예제 #1
0
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    for foreign_id, data in config.items():
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            collection = Collection.create({
                'foreign_id': foreign_id,
                'managed': True,
                'label': data.get('label') or foreign_id,
                'summary': data.get('summary'),
                'category': data.get('category'),
            })

        for role_fk in dict_list(data, 'roles', 'role'):
            role = Role.by_foreign_id(role_fk)
            if role is not None:
                Permission.grant(collection, role, True, False)
            else:
                log.warning("Could not find role: %s", role_fk)

        db.session.commit()
        update_collection(collection)

        for query in dict_list(data, 'queries', 'query'):
            load_query(collection, query)
예제 #2
0
    def __init__(self, mapper, name, data, schema):
        self.mapper = mapper
        self.name = name
        self.data = data
        self.schema = schema
        self.refs = dict_list(data, 'column', 'columns')
        self.literals = dict_list(data, 'literal', 'literals')
        self.join = data.get('join')

        # this is hacky, trying to generate refs from template
        self.template = data.get('template')
        if self.template is not None:
            self.formatter = Formatter(self.template)
            self.refs.extend(self.formatter.refs)
예제 #3
0
    def __init__(self, dataset, data):
        super(DBQuery, self).__init__(dataset, data)

        tables = dict_list(data, 'table', 'tables')

        self.database_uri = os.path.expandvars(data.get('database'))
        self.tables = [QueryTable(self, f) for f in tables]
예제 #4
0
    def __init__(self, dataset, data):
        super(CSVQuery, self).__init__(dataset, data)
        self.csv_urls = set()
        for csv_url in dict_list(data, 'csv_url', 'csv_urls'):
            self.csv_urls.add(os.path.expandvars(csv_url))

        if not len(self.csv_urls):
            log.warning("[%s]: no CSV URLs specified", dataset.name)
예제 #5
0
def mapping_process(id):
    collection = get_db_collection(id, request.authz.WRITE)
    require(request.authz.is_admin)
    if not request.is_json:
        raise BadRequest()
    data = request.get_json().get(collection.foreign_id)
    for query in dict_list(data, 'queries', 'query'):
        try:
            model.make_mapping(query)
            bulk_load_query.apply_async([collection.id, query], priority=6)
        except InvalidMapping as invalid:
            raise BadRequest(invalid)
    return ('', 204)
예제 #6
0
파일: entities.py 프로젝트: roukdanus/aleph
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    from aleph.logic.collections import create_collection
    for foreign_id, data in config.items():
        data['foreign_id'] = foreign_id
        data['label'] = data.get('label', foreign_id)
        collection = create_collection(data)
        for query in dict_list(data, 'queries', 'query'):
            bulk_load_query.apply_async([collection.id, query], priority=6)
예제 #7
0
    def __init__(self, name, data):
        self.name = six.text_type(name)
        self.data = data
        self.label = data.get('label', name)
        self.info_url = data.get('info_url')
        self.category = data.get('category')
        self.roles = []
        self.entities_count = None
        self.public = False

        for role in dict_list(data, 'roles', 'role'):
            role_id = Role.load_id(role)
            if role_id is not None:
                self.roles.append(role_id)
            else:
                log.warning("Could not find role: %s", role)
            if role_id in get_public_roles():
                self.public = True

        if not len(self.roles):
            raise ValueError("No roles for dataset: %s" % self.name)

        self._queries = dict_list(data, 'queries', 'query')
예제 #8
0
    def __init__(self, query, data):
        self.query = query
        self.data = data
        self.keys = dict_list(data, 'keys', 'key')
        self.key_fingerprint = data.get('key_fingerprint', False)

        self.schema = schemata.get(data.get('schema'))
        if self.schema is None or self.schema.section != self.section:
            raise TypeError("Invalid schema: %r" % data.get('schema'))

        self.properties = []
        for name, prop in data.get('properties', {}).items():
            schema = self.schema.get(name)
            self.properties.append(MapperProperty(self, name, prop, schema))
예제 #9
0
파일: query.py 프로젝트: wilbrodn/aleph
    def __init__(self, dataset, data):
        self.dataset = dataset
        self.data = data
        self.database_uri = os.path.expandvars(data.get('database'))

        tables = dict_list(data, 'table', 'tables')
        self.tables = [QueryTable(self, f) for f in tables]

        self.entities = []
        for ename, edata in data.get('entities').items():
            self.entities.append(EntityMapper(self, ename, edata))

        self.links = []
        for ldata in data.get('links', []):
            self.links.append(LinkMapper(self, ldata))
예제 #10
0
파일: entities.py 프로젝트: renesugar/aleph
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    for foreign_id, data in config.items():
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            data['foreign_id'] = foreign_id
            data['label'] = data.get('label', foreign_id)
            collection = Collection.create(data)

        db.session.commit()
        index_collection(collection)
        for query in dict_list(data, 'queries', 'query'):
            bulk_load_query.apply_async([collection.id, query], priority=6)
예제 #11
0
파일: entities.py 프로젝트: SiloGit/aleph
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    for foreign_id, data in config.items():
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            collection = Collection.create({
                'foreign_id': foreign_id,
                'label': data.get('label') or foreign_id,
                'summary': data.get('summary'),
                'category': data.get('category'),
                'managed': True,
            })

        db.session.commit()
        index_collection(collection)

        for query in dict_list(data, 'queries', 'query'):
            bulk_load_query.delay(collection.id, query)
예제 #12
0
파일: __init__.py 프로젝트: wilbrodn/aleph
    def __init__(self, schemata, section, name, data):
        assert section in self.SECTIONS, section
        self._schemata = schemata
        self.section = section
        self.name = name
        self.data = data
        self.label = data.get('label', name)
        self.plural = data.get('plural', self.label)
        self.icon = data.get('icon')
        # Do not show in listings:
        self.hidden = data.get('hidden', False)
        # Try to perform fuzzy matching. Fuzzy similarity search does not
        # make sense for entities which have a lot of similar names, such
        # as land plots, assets etc.
        self.fuzzy = data.get('fuzzy', True)
        self._extends = dict_list(data, 'extends')

        self._own_properties = []
        for name, prop in data.get('properties', {}).items():
            self._own_properties.append(SchemaProperty(self, name, prop))

        self.forward = data.get('forward', self.label)
        self.reverse = data.get('reverse', self.label)