Esempio n. 1
0
def xref_collection(stage, collection):
    """Cross-reference all the entities and documents in a collection."""
    delete_xref(collection, sync=True)
    delete_entities(collection.id, origin=ORIGIN, sync=True)
    index_matches(collection, _query_entities(collection))
    index_matches(collection, _query_mentions(collection))
    reindex_collection(collection, sync=False)
Esempio n. 2
0
def load_mapping(stage, collection, mapping_id, sync=False):
    """Flush and reload all entities generated by a mapping."""
    mapping = Mapping.by_id(mapping_id)
    if mapping is None:
        return log.error("Could not find mapping: %s", mapping_id)
    origin = mapping_origin(mapping.id)
    aggregator = get_aggregator(collection)
    aggregator.delete(origin=origin)
    delete_entities(collection.id, origin=origin, sync=True)
    if mapping.disabled:
        return log.info("Mapping is disabled: %s", mapping_id)
    publish(
        Events.LOAD_MAPPING,
        params={"collection": collection, "table": mapping.table_id},
        channels=[collection, mapping.role],
        actor_id=mapping.role_id,
    )
    try:
        map_to_aggregator(collection, mapping, aggregator)
        mapping.set_status(status=Mapping.SUCCESS)
        db.session.commit()
        reindex_collection(collection, sync=sync)
    except Exception as exc:
        mapping.set_status(status=Mapping.FAILED, error=str(exc))
        db.session.commit()
        aggregator.delete(origin=origin)
    finally:
        aggregator.close()
Esempio n. 3
0
    def test_directory_with_file(self):
        _, headers = self.login(is_admin=True)
        meta = {
            'file_name': 'directory',
            'foreign_id': 'directory',
            'schema': 'Folder',
            'collection_id': self.col.id,
        }
        data = {'meta': json.dumps(meta)}
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, res
        assert 'id' in res.json, res.json
        directory = res.json['id']

        meta = {
            'file_name': 'subdirectory',
            'foreign_id': 'subdirectory',
            'parent': {
                'id': directory
            },
            'collection_id': self.col.id,
        }
        data = {'meta': json.dumps(meta)}
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, res
        reindex_collection(self.col)
        assert 'id' in res.json, res.json
        url = '/api/2/entities/%s' % res.json['id']
        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        props = res.json.get('properties')
        assert 'subdirectory' in props['fileName'], res.json
Esempio n. 4
0
    def test_directory_with_file(self):
        _, headers = self.login(is_admin=True)
        meta = {
            "file_name": "directory",
            "foreign_id": "directory",
            "schema": "Folder",
            "collection_id": self.col.id,
        }
        data = {"meta": json.dumps(meta)}
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, res
        assert "id" in res.json, res.json
        directory = res.json["id"]

        meta = {
            "file_name": "subdirectory",
            "foreign_id": "subdirectory",
            "parent": {
                "id": directory
            },
            "collection_id": self.col.id,
        }
        data = {"meta": json.dumps(meta)}
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, res
        reindex_collection(self.col)
        assert "id" in res.json, res.json
        url = "/api/2/entities/%s" % res.json["id"]
        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        props = res.json.get("properties")
        assert "subdirectory" in props["fileName"], res.json
Esempio n. 5
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id='admin', is_admin=True)
        self.private_coll = self.create_collection(
            foreign_id='test_private',
            label="Private Collection",
            category='grey',
            creator=self.admin
        )
        self._banana = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-08-21'
            }
        }, self.private_coll)
        self._banana2 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-03-21'
            }
        }, self.private_coll)
        self._banana3 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-05-21'
            }
        }, self.private_coll)
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(
            foreign_id='test_public',
            label="Public Collection",
            category='news',
            creator=self.admin
        )
        self._kwazulu = self.create_entity({
            'schema': 'Company',
            'properties': {
                'name': ['KwaZulu'],
                'alias': ['kwazulu']
            }
        }, self.public_coll)
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        aggregator = get_aggregator(self.public_coll)
        aggregator.delete()
        aggregator.close()
        reindex_collection(self.public_coll, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        for sample in read_entities(self.get_fixture_path('samples.ijson')):
            aggregator.put(sample, fragment='sample')
        aggregator.close()
        reindex_collection(self.private_coll, sync=True)
Esempio n. 6
0
def xref_collection(stage, collection):
    """Cross-reference all the entities and documents in a collection."""
    log.info("[%s] Clearing previous xref state....", collection)
    delete_xref(collection, sync=True)
    delete_entities(collection.id, origin=ORIGIN, sync=True)
    index_matches(collection, _query_entities(collection))
    index_matches(collection, _query_mentions(collection))
    log.info("[%s] Xref done, re-indexing to reify mentions...", collection)
    reindex_collection(collection, sync=False)
Esempio n. 7
0
 def dispatch_task(self, collection, task):
     stage = task.stage
     payload = task.payload
     sync = task.context.get("sync", False)
     if stage.stage == OP_INDEX:
         index_many(stage, collection, sync=sync, **payload)
     if stage.stage == OP_LOAD_MAPPING:
         load_mapping(stage, collection, **payload)
     if stage.stage == OP_FLUSH_MAPPING:
         flush_mapping(stage, collection, sync=sync, **payload)
     if stage.stage == OP_REINGEST:
         reingest_collection(collection, job_id=stage.job.id, **payload)
     if stage.stage == OP_REINDEX:
         reindex_collection(collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Esempio n. 8
0
def load_entities(foreign_id, infile, unsafe=False):
    """Load FtM entities from the specified iJSON file."""
    collection = ensure_collection(foreign_id, foreign_id)

    def read_entities():
        for idx in count(1):
            line = infile.readline()
            if not line:
                return
            if idx % 1000 == 0:
                log.info("[%s] Loaded %s entities from: %s", collection, idx,
                         infile.name)
            yield json.loads(line)

    role = Role.load_cli_user()
    bulk_write(collection,
               read_entities(),
               unsafe=unsafe,
               role_id=role.id,
               index=False)
    reindex_collection(collection)
Esempio n. 9
0
 def handle(self, task):
     stage = task.stage
     payload = task.payload
     collection = Collection.by_foreign_id(task.job.dataset.name)
     if collection is None:
         log.error("Collection not found: %s", task.job.dataset)
         return
     sync = task.context.get('sync', False)
     if stage.stage == OP_INDEX:
         index_many(stage, collection, sync=sync, **payload)
     if stage.stage == OP_LOAD_MAPPING:
         load_mapping(stage, collection, **payload)
     if stage.stage == OP_FLUSH_MAPPING:
         flush_mapping(stage, collection, sync=sync, **payload)
     if stage.stage == OP_REINGEST:
         reingest_collection(collection, job_id=stage.job.id, **payload)
     if stage.stage == OP_REINDEX:
         reindex_collection(collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Esempio n. 10
0
def reindex_casefiles(flush=False):
    """Re-index all the casefile collections."""
    for collection in Collection.all_casefiles():
        log.info("[%s] Starting to re-index", collection)
        reindex_collection(collection, flush=flush)
Esempio n. 11
0
def reindex(foreign_id, flush=False):
    """Index all the aggregator contents for a collection."""
    collection = get_collection(foreign_id)
    reindex_collection(collection, flush=flush)
Esempio n. 12
0
def _reindex_collection(collection, flush=False):
    log.info("[%s] Starting to re-index", collection)
    try:
        reindex_collection(collection, flush=flush)
    except Exception:
        log.exception("Failed to re-index: %s", collection)
Esempio n. 13
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id="admin", is_admin=True)
        self.private_coll = self.create_collection(
            foreign_id="test_private",
            label="Private Collection",
            category="grey",
            creator=self.admin,
        )
        self._banana = self.create_entity(
            {
                "schema": "Person",
                "properties": {
                    "name": ["Banana"],
                    "birthDate": "1970-08-21"
                },
            },
            self.private_coll,
        )
        self._banana2 = self.create_entity(
            {
                "schema": "Person",
                "properties": {
                    "name": ["Banana"],
                    "birthDate": "1970-03-21"
                },
            },
            self.private_coll,
        )
        self._banana3 = self.create_entity(
            {
                "schema": "Person",
                "properties": {
                    "name": ["Banana ba Nana"],
                    "birthDate": "1969-05-21",
                    "deathDate": "1972-04-23",
                },
            },
            self.private_coll,
        )
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(
            foreign_id="test_public",
            label="Public Collection",
            category="news",
            creator=self.admin,
        )
        self._kwazulu = self.create_entity(
            {
                "schema": "Company",
                "properties": {
                    "name": ["KwaZulu"],
                    "alias": ["kwazulu"]
                },
            },
            self.public_coll,
        )
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        aggregator = get_aggregator(self.public_coll)
        aggregator.delete()
        aggregator.close()
        reindex_collection(self.public_coll, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        for sample in read_entities(self.get_fixture_path("samples.ijson")):
            aggregator.put(sample, fragment="sample")
        aggregator.close()
        reindex_collection(self.private_coll, sync=True)
Esempio n. 14
0
def op_reindex_handler(collection, task):
    sync = task.context.get("sync", False)
    reindex_collection(collection, sync=sync, **task.payload)