Ejemplo n.º 1
0
    def test_xref(self):
        _, headers = self.login(foreign_id=self.user.foreign_id)
        url = '/api/2/entities'

        entity = {
            'schema': 'Person',
            'name': 'Carlos Danger',
            'collection_id': self.coll_a.id,
            'data': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Person',
            'name': 'Carlos Danger',
            'collection_id': self.coll_b.id,
            'data': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Company',
            'name': 'Carlos Danger',
            'collection_id': self.coll_b.id,
            'data': {
                'nationality': 'GB'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Person',
            'name': 'Pure Risk',
            'collection_id': self.coll_b.id,
            'data': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')

        q = db.session.query(Match)
        assert 0 == q.count(), q.count()

        xref_collection(self.coll_a)

        q = db.session.query(Match)
        assert 2 == q.count(), q.count()
Ejemplo n.º 2
0
    def test_summary(self):
        xref_collection(self.residents.id)
        res = self.client.get('/api/2/collections/%s/xref' % self.obsidian.id)
        assert res.status_code == 403, res

        # Not logged in
        resi_url = '/api/2/collections/%s/xref' % self.residents.id
        res = self.client.get(resi_url)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        coll0 = res.json['results'][0]['collection']
        assert 'Obsidian Order' not in coll0['label'], res.json
        assert 'Dabo Girls' in coll0['label'], res.json

        # Logged in as outsider (restricted access)
        _, headers = self.login(foreign_id='outsider')
        res = self.client.get(resi_url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        coll0 = res.json['results'][0]['collection']
        assert 'Obsidian Order' not in coll0['label'], res.json
        assert 'Dabo Girls' in coll0['label'], res.json

        # Logged in as creator (all access)
        _, headers = self.login(foreign_id='creator')
        res = self.client.get(resi_url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 2, res.json
        labels = [m['collection']['label'] for m in res.json['results']]
        assert 'Obsidian Order' in labels, res.json
        assert 'Dabo Girls' in labels, res.json
Ejemplo n.º 3
0
def handle_task(queue, payload, context):
    log.info("Task [%s]: %s (begin)", queue.dataset, queue.operation)
    try:
        collection = Collection.by_foreign_id(queue.dataset)
        if collection is None:
            log.error("Collection not found: %s", queue.dataset)
            return
        if queue.operation == OP_INDEX:
            index_aggregate(queue, collection)
        if queue.operation == OP_BULKLOAD:
            bulk_load(queue, collection, payload)
        if queue.operation == OP_PROCESS:
            process_collection(collection, **payload)
        if queue.operation == OP_XREF:
            xref_collection(queue, collection, **payload)
        log.info("Task [%s]: %s (done)", queue.dataset, queue.operation)
    except (SystemExit, KeyboardInterrupt, Exception):
        retries = int(context.get('retries', 0))
        if retries < settings.QUEUE_RETRY:
            log.info("Queueing failed task for re-try...")
            context['retries'] = retries + 1
            queue.queue_task(payload, context)
        raise
    finally:
        queue.task_done()
Ejemplo n.º 4
0
 def test_xref_specific_collections(self):
     q = db.session.query(Match)
     assert 0 == q.count(), q.count()
     xref_collection(self.stage,
                     self.coll_a,
                     against_collection_ids=[self.coll_c.id])
     q = db.session.query(Match)
     assert 1 == q.count(), q.count()
Ejemplo n.º 5
0
    def test_matches(self):
        xref_collection(self.residents.id)
        # Not logged in
        match_dabo = self.client.get('/api/2/collections/%s/xref/%s' %
                                     (self.residents.id, self.dabo.id))
        assert match_dabo.status_code == 200, match_dabo
        assert match_dabo.json['total'] == 1, match_dabo.json
        assert 'Leeta' in match_dabo.json['results'][0]['entity']['name']
        assert 'Garak' not in match_dabo.json['results'][0]['entity']['name']
        assert 'Tain' not in match_dabo.json['results'][0]['match']['name']
        assert 'MPella' not in match_dabo.json['results'][0]['match']['name']

        match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' %
                                         (self.residents.id, self.obsidian.id))
        assert match_obsidian.status_code == 403, match_obsidian

        # Logged in as outsider (restricted)
        _, headers = self.login('outsider')

        match_dabo = self.client.get('/api/2/collections/%s/xref/%s' %
                                     (self.residents.id, self.dabo.id),
                                     headers=headers)
        assert match_dabo.status_code == 200, match_dabo
        assert match_dabo.json['total'] == 1, match_dabo.json
        assert 'Leeta' in match_dabo.json['results'][0]['entity']['name']
        assert 'Garak' not in match_dabo.json['results'][0]['entity']['name']
        assert 'Tain' not in match_dabo.json['results'][0]['match']['name']
        assert 'MPella' not in match_dabo.json['results'][0]['match']['name']

        match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' %
                                         (self.residents.id, self.obsidian.id),
                                         headers=headers)
        assert match_obsidian.status_code == 403, match_obsidian

        # Logged in as creator (all access)
        _, headers = self.login('creator')

        match_dabo = self.client.get('/api/2/collections/%s/xref/%s' %
                                     (self.residents.id, self.dabo.id),
                                     headers=headers)
        assert match_dabo.status_code == 200, match_dabo
        assert match_dabo.json['total'] == 1, match_dabo.json
        assert 'Leeta' in match_dabo.json['results'][0]['entity']['name']
        assert 'Garak' not in match_dabo.json['results'][0]['entity']['name']
        assert 'Tain' not in match_dabo.json['results'][0]['match']['name']
        assert 'MPella' not in match_dabo.json['results'][0]['match']['name']

        match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' %
                                         (self.residents.id, self.obsidian.id),
                                         headers=headers)
        assert match_obsidian.status_code == 200, match_obsidian
        assert match_obsidian.json['total'] == 1, match_obsidian.json
        assert 'Garak' in match_obsidian.json['results'][0]['entity']['name']
        assert 'Leeta' not in match_obsidian.json['results'][0]['entity'][
            'name']  # noqa
        assert 'Tain' not in match_obsidian.json['results'][0]['match']['name']
        assert 'MPella' not in match_obsidian.json['results'][0]['match'][
            'name']  # noqa
Ejemplo n.º 6
0
    def test_export(self):
        xref.xref_collection(self.residents)
        url = "/api/2/collections/%s/xref.xlsx" % self.obsidian.id
        res = self.client.post(url)
        assert res.status_code == 403, res

        _, headers = self.login(foreign_id="creator")
        res = self.client.post(url, headers=headers)
        assert res.status_code == 202, res
Ejemplo n.º 7
0
def xref(foreign_id, against=None):
    """Cross-reference all entities and documents in a collection."""
    collection = get_collection(foreign_id)
    against_collection_ids = None
    if against is not None:
        against_collection_ids = list(
            map(lambda entry: get_collection(entry).id, against))
    xref_collection(collection.id,
                    against_collection_ids=against_collection_ids)
Ejemplo n.º 8
0
    def test_export(self):
        xref_collection(self.stage, self.residents)
        url = '/api/2/collections/%s/xref/export' % self.obsidian.id
        res = self.client.get(url)
        assert res.status_code == 403, res

        _, headers = self.login(foreign_id='creator')
        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
Ejemplo n.º 9
0
    def test_xref(self):
        self.setup_entities()

        q = db.session.query(Match)
        assert 0 == q.count(), q.count()

        self.flush_index()
        xref_collection(self.coll_a.id)

        q = db.session.query(Match)
        assert 3 == q.count(), q.count()
Ejemplo n.º 10
0
    def test_xref_specific_collections(self):
        self.setup_entities()

        q = db.session.query(Match)
        assert 0 == q.count(), q.count()

        self.flush_index()
        xref_collection(self.coll_a.id, against_collection_ids=[self.coll_c.id])

        q = db.session.query(Match)
        assert 1 == q.count(), q.count()
Ejemplo n.º 11
0
    def test_matches(self):
        xref.xref_collection(self.residents)
        url = "/api/2/collections/%s/xref" % self.residents.id
        # Not logged in
        res = self.client.get(url)
        assert res.status_code == 403, res

        self.grant_publish(self.residents)
        res = self.client.get(url)
        assert res.status_code == 200, res
        assert res.json["total"] == 1, res.json
        res0 = res.json["results"][0]
        assert "Leeta" in get_caption(res0["entity"])
        assert "Garak" not in get_caption(res0["entity"])
        assert "Tain" not in get_caption(res0["match"])
        assert "MPella" not in get_caption(res0["match"])

        # Logged in as outsider (restricted)
        _, headers = self.login("outsider")

        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json["total"] == 1, res.json
        res0 = res.json["results"][0]
        assert "Leeta" in get_caption(res0["entity"])
        assert "Garak" not in get_caption(res0["entity"])
        assert "Tain" not in get_caption(res0["match"])
        assert "MPella" not in get_caption(res0["match"])

        # Logged in as creator (all access)
        _, headers = self.login("creator")

        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json["total"] == 2, res.json
        res0 = res.json["results"][0]
        assert "Garak" in get_caption(res0["entity"])
        assert "Leeta" not in get_caption(res0["entity"])
        assert "Tain" not in get_caption(res0["match"])
        assert "MPella" not in get_caption(res0["match"])
        res1 = res.json["results"][1]
        assert "Leeta" in get_caption(res1["entity"])
        assert "Garak" not in get_caption(res1["entity"])
        assert "Tain" not in get_caption(res1["match"])
        assert "MPella" not in get_caption(res1["match"])
Ejemplo n.º 12
0
    def test_matches(self):
        xref.xref_collection(self.stage, self.residents)
        url = '/api/2/collections/%s/xref' % self.residents.id
        # Not logged in
        res = self.client.get(url)
        assert res.status_code == 403, res

        self.grant_publish(self.residents)
        res = self.client.get(url)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        assert 'Leeta' in get_caption(res0['entity'])
        assert 'Garak' not in get_caption(res0['entity'])
        assert 'Tain' not in get_caption(res0['match'])
        assert 'MPella' not in get_caption(res0['match'])

        # Logged in as outsider (restricted)
        _, headers = self.login('outsider')

        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        assert 'Leeta' in get_caption(res0['entity'])
        assert 'Garak' not in get_caption(res0['entity'])
        assert 'Tain' not in get_caption(res0['match'])
        assert 'MPella' not in get_caption(res0['match'])

        # Logged in as creator (all access)
        _, headers = self.login('creator')

        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 2, res.json
        res0 = res.json['results'][0]
        assert 'Garak' in get_caption(res0['entity'])
        assert 'Leeta' not in get_caption(res0['entity'])
        assert 'Tain' not in get_caption(res0['match'])
        assert 'MPella' not in get_caption(res0['match'])
        res1 = res.json['results'][1]
        assert 'Leeta' in get_caption(res1['entity'])
        assert 'Garak' not in get_caption(res1['entity'])
        assert 'Tain' not in get_caption(res1['match'])
        assert 'MPella' not in get_caption(res1['match'])
Ejemplo n.º 13
0
 def dispatch_task(self, collection, task):
     stage = task.stage
     payload = task.payload
     sync = task.context.get("sync", False)
     if stage.stage == OP_INDEX:
         index_many(stage, collection, sync=sync, **payload)
     if stage.stage == OP_LOAD_MAPPING:
         load_mapping(stage, collection, **payload)
     if stage.stage == OP_FLUSH_MAPPING:
         flush_mapping(stage, collection, sync=sync, **payload)
     if stage.stage == OP_REINGEST:
         reingest_collection(collection, job_id=stage.job.id, **payload)
     if stage.stage == OP_REINDEX:
         reindex_collection(collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Ejemplo n.º 14
0
Archivo: worker.py Proyecto: wdsn/aleph
 def handle(self, task):
     stage = task.stage
     payload = task.payload
     collection = Collection.by_foreign_id(task.job.dataset.name)
     if collection is None:
         log.error("Collection not found: %s", task.job.dataset)
         return
     sync = task.context.get('sync', False)
     if stage.stage == OP_INDEX:
         index_aggregate(stage, collection, sync=sync, **payload)
     if stage.stage == OP_BULKLOAD:
         bulk_load(stage, collection, payload)
     if stage.stage == OP_PROCESS:
         process_collection(stage, collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection, **payload)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Ejemplo n.º 15
0
 def handle(self, task):
     stage = task.stage
     payload = task.payload
     collection = Collection.by_foreign_id(task.job.dataset.name)
     if collection is None:
         log.error("Collection not found: %s", task.job.dataset)
         return
     sync = task.context.get('sync', False)
     if stage.stage == OP_INDEX:
         index_many(stage, collection, sync=sync, **payload)
     if stage.stage == OP_LOAD_MAPPING:
         load_mapping(stage, collection, **payload)
     if stage.stage == OP_FLUSH_MAPPING:
         flush_mapping(stage, collection, sync=sync, **payload)
     if stage.stage == OP_REINGEST:
         reingest_collection(collection, job_id=stage.job.id, **payload)
     if stage.stage == OP_REINDEX:
         reindex_collection(collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Ejemplo n.º 16
0
 def handle(self, task):
     stage = task.stage
     payload = task.payload
     collection = Collection.by_foreign_id(task.job.dataset.name)
     if collection is None:
         log.error("Collection not found: %s", task.job.dataset)
         return
     sync = task.context.get('sync', False)
     if stage.stage == OP_INDEX:
         index_aggregate(stage, collection, sync=sync, **payload)
     if stage.stage == OP_LOAD_MAPPING:
         load_mapping(stage, collection, **payload)
     if stage.stage == OP_FLUSH_MAPPING:
         flush_mapping(stage, collection, sync=sync, **payload)
     if stage.stage == OP_PROCESS:
         if payload.pop('reset', False):
             reset_collection(collection, sync=True)
         process_collection(stage, collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Ejemplo n.º 17
0
    def test_xref_collection(self):
        _, headers = self.login(foreign_id=self.user.foreign_id)
        url = '/api/2/entities'

        entity = {
            'schema': 'Person',
            'name': 'Carlos Danger',
            'collection_id': self.coll_a.id,
            'properties': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Person',
            'name': 'Carlos Danger',
            'collection_id': self.coll_b.id,
            'properties': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'LegalEntity',
            'name': 'Carlos Danger',
            'collection_id': self.coll_b.id,
            'properties': {
                'nationality': 'GB'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Person',
            'name': 'Pure Risk',
            'collection_id': self.coll_b.id,
            'properties': {
                'nationality': 'US'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')

        entity = {
            'schema': 'LegalEntity',
            'name': 'Carlof Danger',
            'collection_id': self.coll_c.id,
            'properties': {
                'nationality': 'FR'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')
        entity = {
            'schema': 'Person',
            'name': 'Dorian Gray',
            'collection_id': self.coll_c.id,
            'properties': {
                'nationality': 'GB'
            }
        }
        self.client.post(url,
                         data=json.dumps(entity),
                         headers=headers,
                         content_type='application/json')

        q = db.session.query(Match)
        assert 0 == q.count(), q.count()

        self.flush_index()
        xref_collection(self.coll_a.id, self.coll_c.id)

        q = db.session.query(Match)
        assert 1 == q.count(), q.count()
Ejemplo n.º 18
0
def xref(foreign_id, against=None):
    """Cross-reference all entities and documents in a collection."""
    collection = get_collection(foreign_id)
    against = ensure_list(against)
    against = [get_collection(c).id for c in against]
    xref_collection(collection.id, against_collection_ids=against)
Ejemplo n.º 19
0
def op_xref_handler(collection, task):
    xref_collection(task.stage, collection)
Ejemplo n.º 20
0
 def test_xref(self):
     matches = list(iter_matches(self.coll_a, self.authz))
     assert 0 == len(matches), len(matches)
     xref_collection(self.stage, self.coll_a)
     matches = list(iter_matches(self.coll_a, self.authz))
     assert 3 == len(matches), len(matches)
Ejemplo n.º 21
0
 def test_xref(self):
     q = db.session.query(Match)
     assert 0 == q.count(), q.count()
     xref_collection(self.stage, self.coll_a)
     q = db.session.query(Match)
     assert 3 == q.count(), q.count()
Ejemplo n.º 22
0
def xref(foreign_id):
    """Cross-reference all entities and documents in a collection."""
    collection = get_collection(foreign_id)
    xref_collection(collection.id)
Ejemplo n.º 23
0
def xref(foreign_id):
    """Cross-reference all entities and documents in a collection."""
    collection = get_collection(foreign_id)
    stage = get_stage(collection, OP_XREF)
    xref_collection(stage, collection)
Ejemplo n.º 24
0
 def test_csv(self):
     xref_collection(self.stage, self.residents)
     url = '/api/2/collections/%s/xref.csv' % self.obsidian.id
     res = self.client.get(url)
     assert res.status_code == 403, res
Ejemplo n.º 25
0
def xref(foreign_id):
    """Cross-reference all entities and documents in a collection."""
    collection = Collection.by_foreign_id(foreign_id)
    if collection is None:
        raise ValueError("No such collection: %r" % foreign_id)
    xref_collection(collection)
Ejemplo n.º 26
0
def op_xref_handler(collection, task):
    xref_collection(collection)