예제 #1
0
    def __init__(self, model, query, name, data, key_prefix=None):
        self.model = model
        self.name = name
        self.data = data

        self.seed = sha1(key_bytes(key_prefix))
        self.seed.update(key_bytes(data.get("key_literal")))

        self.keys = keys_values(data, "key", "keys")
        self.id_column = data.get("id_column")
        if not len(self.keys) and self.id_column is None:
            raise InvalidMapping("No keys or ID: %r" % name)
        if len(self.keys) and self.id_column is not None:
            msg = "Please use only keys or id_column, not both: %r" % name
            raise InvalidMapping(msg)

        self.schema = model.get(data.get("schema"))
        if self.schema is None:
            raise InvalidMapping("Invalid schema: %s" % data.get("schema"))

        self.refs = set(self.keys)
        if self.id_column:
            self.refs.add(self.id_column)
        self.dependencies = set()
        self.properties = []
        for name, mapping in data.get("properties", {}).items():
            prop = self.schema.get(name)
            if prop is None:
                raise InvalidMapping("Invalid property: %s" % name)
            mapping = PropertyMapping(query, mapping, prop)
            self.properties.append(mapping)
            self.refs.update(mapping.refs)
            if mapping.entity:
                self.dependencies.add(mapping.entity)
예제 #2
0
    def __init__(self, model, query, name, data, key_prefix=None):
        self.model = model
        self.name = name
        self.data = data

        self.seed = sha1(key_bytes(key_prefix))
        self.seed.update(key_bytes(data.get('key_literal')))

        self.keys = ensure_list(data.get('key'))
        self.keys.extend(ensure_list(data.get('keys')))
        if not len(self.keys):
            raise InvalidMapping("No keys: %r" % name)

        self.schema = model.get(data.get('schema'))
        if self.schema is None:
            raise InvalidMapping("Invalid schema: %s" % data.get('schema'))

        self.refs = set(self.keys)
        self.dependencies = set()
        self.properties = []
        for name, prop in data.get('properties', {}).items():
            prop_schema = self.schema.get(name)
            if prop_schema is None:
                raise InvalidMapping("Invalid property: %s" % name)
            prop = PropertyMapping(query, prop, prop_schema)
            self.properties.append(prop)
            self.refs.update(prop.refs)
            if prop.entity:
                self.dependencies.add(prop.entity)
예제 #3
0
def get_source(mapping):
    """Select the appropriate mapper to execute the given mapping."""
    if 'database' in mapping.data:
        return SQLSource(mapping, mapping.data)
    elif 'csv_url' in mapping.data or 'csv_urls' in mapping.data:
        return CSVSource(mapping, mapping.data)
    raise InvalidMapping("Cannot determine mapping type")
예제 #4
0
    def __init__(self, query, data):
        super(CSVSource, self).__init__(query, data)
        self.urls = set()
        for url in keys_values(data, 'csv_url', 'csv_urls'):
            self.urls.add(os.path.expandvars(url))

        if not len(self.urls):
            raise InvalidMapping("No CSV URLs are specified.")
예제 #5
0
    def bind(self):
        if self.entity is None:
            return

        # Figure out if the schema types of the referenced entities
        # are of a type compatible with the range of this property.
        # For example, an asset can be owned by a legal entity, but
        # by a bank payment or a ship.
        for entity in self.query.entities:
            if entity.name != self.entity:
                continue
            if not entity.schema.is_a(self.schema.range):
                raise InvalidMapping("The entity [%s] must be a %s (not %s)" %
                                     (self.name, self.schema.range, entity.schema.name))  # noqa
            return

        raise InvalidMapping("No entity [%s] for property [%s]"
                             % (self.entity, self.name))
예제 #6
0
    def __init__(
        self,
        model: "Model",
        query: "QueryMapping",
        name: str,
        data: Dict[str, Any],
        key_prefix: Optional[str] = None,
    ) -> None:
        self.model = model
        self.name = name

        self.seed = sha1(key_bytes(key_prefix))
        self.seed.update(key_bytes(data.get("key_literal")))

        self.keys = keys_values(data, "key", "keys")
        self.id_column = stringify(data.get("id_column"))
        if not len(self.keys) and self.id_column is None:
            raise InvalidMapping("No keys or ID: %r" % name)
        if len(self.keys) and self.id_column is not None:
            msg = "Please use only keys or id_column, not both: %r" % name
            raise InvalidMapping(msg)

        schema_name = stringify(data.get("schema"))
        if schema_name is None:
            raise InvalidMapping("No schema: %s" % name)
        schema = model.get(schema_name)
        if schema is None:
            raise InvalidMapping("Invalid schema: %s" % schema_name)
        self.schema = schema

        self.refs = set(self.keys)
        if self.id_column:
            self.refs.add(self.id_column)
        self.dependencies: Set[str] = set()
        self.properties: List[PropertyMapping] = []
        for name, prop_mapping in data.get("properties", {}).items():
            prop = self.schema.get(name)
            if prop is None:
                raise InvalidMapping("Invalid property: %s" % name)
            mapping = PropertyMapping(query, prop_mapping, prop)
            self.properties.append(mapping)
            self.refs.update(mapping.refs)
            if mapping.entity:
                self.dependencies.add(mapping.entity)
예제 #7
0
    def __init__(self, query, data):
        super(CSVSource, self).__init__(query, data)
        urls = ensure_list(data.get('csv_url'))
        urls.extend(ensure_list(data.get('csv_urls')))
        self.urls = set()
        for url in urls:
            self.urls.add(os.path.expandvars(url))

        if not len(self.urls):
            raise InvalidMapping("No CSV URLs are specified.")
예제 #8
0
    def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
        super().__init__(query, data)
        self.urls: Set[str] = set()
        for url in keys_values(data, "csv_url", "csv_urls"):
            self.urls.add(cast(str, os.path.expandvars(url)))

        if not len(self.urls):
            raise InvalidMapping("No CSV URLs are specified.")

        self.filters_set = self._parse_filters(self.filters)
        self.filters_not_set = self._parse_filters(self.filters_not)
예제 #9
0
    def __init__(self, model, data, key_prefix=None):
        self.model = model
        self.data = data

        self.refs = set()
        self.entities = []
        for name, data in data.get('entities', {}).items():
            entity = EntityMapping(model,
                                   self,
                                   name,
                                   data,
                                   key_prefix=key_prefix)

            self.entities.append(entity)
            self.refs.update(entity.refs)

        if not len(self.entities):
            raise InvalidMapping("No entity mappings are defined.")

        # Check if the provided links satisfy the ranges of the given
        # properties (e.g. the owner of a company must be a legal person)
        for entity in self.entities:
            entity.bind()

        # Do dependency resolution, i.e. find the right order to
        # map these entities. This is needed to resolve entity IDs
        # in dependent entities.
        entities = self.entities
        self.entities = []
        resolved = set()
        while len(entities) > 0:
            before = len(entities)
            for entity in entities:
                if entity.dependencies.issubset(resolved):
                    self.entities.append(entity)
                    entities.remove(entity)
                    resolved.add(entity.name)
                    break
            if before == len(entities):
                raise InvalidMapping("Circular entity dependency detected.")
예제 #10
0
 def read_csv_url(self, url):
     parsed_url = requests.utils.urlparse(url)
     log.info("Loading: %s", url)
     if parsed_url.scheme in ['http', 'https']:
         res = requests.get(url, stream=True)
         if not res.ok:
             raise InvalidMapping("Failed to open CSV: %s" % url)
         # if res.encoding is None:
         res.encoding = 'utf-8'
         # log.info("Detected encoding: %s", res.encoding)
         lines = res.iter_lines(decode_unicode=True)
         yield from self.read_csv(lines)
     else:
         with io.open(parsed_url.path, 'r') as fh:
             yield from self.read_csv(fh)
예제 #11
0
 def read_csv_url(self, url: str) -> Generator[Record, None, None]:
     parsed_url = urlparse(url)
     log.info("Loading: %s", url)
     if parsed_url.scheme in ["http", "https"]:
         res = requests.get(url, stream=True)
         if not res.ok:
             raise InvalidMapping("Failed to open CSV: %s" % url)
         # if res.encoding is None:
         res.encoding = "utf-8"
         # log.info("Detected encoding: %s", res.encoding)
         lines = res.iter_lines(decode_unicode=True)
         yield from self.read_csv(lines)
     else:
         with io.open(parsed_url.path, "r") as fh:
             yield from self.read_csv(fh)
예제 #12
0
    def __init__(self, meta: MetaData, data: Union[str, Dict[str,
                                                             str]]) -> None:
        if isinstance(data, str):
            data = {"table": data}
        table_ref = data.get("table")
        if table_ref is None:
            raise InvalidMapping("Query has no table!")
        alias_ref = data.get("alias", table_ref)
        self.table = Table(table_ref, meta, autoload=True)
        self.alias = self.table.alias(alias_ref)

        self.refs: Dict[str, Label[Any]] = {}
        for column in self.alias.columns:
            name = "%s.%s" % (alias_ref, column.name)
            labeled_column = column.label("col_%s" % uuid4().hex[:10])
            self.refs[name] = labeled_column
            self.refs[column.name] = labeled_column
예제 #13
0
 def read_csv(self, url):
     parsed_url = requests.utils.urlparse(url)
     log.info("Loading: %s", url)
     if parsed_url.scheme in ['http', 'https']:
         res = requests.get(url, stream=True)
         if not res.ok:
             raise InvalidMapping("Failed to open CSV: %s" % url)
         # if res.encoding is None:
         res.encoding = 'utf-8'
         # log.info("Detected encoding: %s", res.encoding)
         lines = res.iter_lines(decode_unicode=True)
         for row in DictReader(lines, skipinitialspace=True):
             yield row
     else:
         with io.open(parsed_url.path, 'r') as fh:
             for row in DictReader(fh, skipinitialspace=True):
                 yield row
예제 #14
0
 def read_csv_url(self, url):
     parsed_url = requests.utils.urlparse(url)
     log.info("Loading: %s", url)
     if parsed_url.scheme in ['http', 'https']:
         res = requests.get(url, stream=True)
         if not res.ok:
             raise InvalidMapping("Failed to open CSV: %s" % url)
         # if res.encoding is None:
         res.encoding = 'utf-8'
         # log.info("Detected encoding: %s", res.encoding)
         lines = res.iter_lines(decode_unicode=True)
         yield from self.read_csv(lines)
     else:
         # XXX: This is a security issue in conjunction with the
         # aleph mapping API because a user could map any file
         # from the server file system. Remove???
         with io.open(parsed_url.path, 'r') as fh:
             yield from self.read_csv(fh)
예제 #15
0
    def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
        super(SQLSource, self).__init__(query, data)
        database = data.get("database")
        if database is None:
            raise InvalidMapping("No database in SQL mapping!")
        self.database_uri = cast(str, os.path.expandvars(database))
        kwargs = {}
        if self.database_uri.lower().startswith("postgres"):
            kwargs["server_side_cursors"] = True
        self.engine = create_engine(self.database_uri,
                                    poolclass=NullPool,
                                    **kwargs)  # type: ignore
        self.meta = MetaData()
        self.meta.bind = self.engine

        tables = keys_values(data, "table", "tables")
        self.tables = [QueryTable(self.meta, f) for f in tables]
        self.joins = cast(List[Dict[str, str]], ensure_list(data.get("joins")))
예제 #16
0
 def source(self):
     if 'database' in self.data:
         return SQLSource(self, self.data)
     elif 'csv_url' in self.data or 'csv_urls' in self.data:
         return CSVSource(self, self.data)
     raise InvalidMapping("Cannot determine mapping type")
예제 #17
0
 def _get_source(self, data: Dict[str, Any]) -> Source:
     if "database" in data:
         return SQLSource(self, data)
     if "csv_url" in data or "csv_urls" in data:
         return CSVSource(self, data)
     raise InvalidMapping("Cannot determine mapping type: %r" % data)
예제 #18
0
 def get_column(self, ref: Optional[str]) -> Label[Any]:
     for table in self.tables:
         if ref in table.refs:
             return table.refs[ref]
     raise InvalidMapping("Missing reference: %s" % ref)
예제 #19
0
 def source(self):
     if "database" in self.data:
         return SQLSource(self, self.data)
     elif "csv_url" in self.data or "csv_urls" in self.data:
         return CSVSource(self, self.data)
     raise InvalidMapping("Cannot determine mapping type")
예제 #20
0
 def get_column(self, ref):
     for table in self.tables:
         if ref in table.refs:
             return table.refs.get(ref)
     raise InvalidMapping("Missing reference: %s" % ref)