Beispiel #1
0
    def test_upload_csv_doc(self):
        _, headers = self.login(is_admin=True)
        meta = {
            'countries': ['de', 'us'],
            'languages': ['en'],
            'source_url': 'http://pudo.org/experts.csv'
        }
        data = {
            'meta': json.dumps(meta),
            'foo': (open(self.csv_path), 'experts.csv')
        }
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 200, (res, res.data)
        docs = res.json['documents']
        assert len(docs) == 1, docs
        assert docs[0]['file_name'] == 'experts.csv', docs
        flush_index()

        res = self.client.get('/api/2/documents', headers=headers)
        assert res.json['total'] == 1, res.json
        res = self.client.get('/api/2/documents/1', headers=headers)
        assert res.json['countries'] == ['de', 'us'], res.json
        res = self.client.get('/api/2/documents/1/file', headers=headers)
        assert 'Klaus Trutzel' in res.data
        assert 'text/csv' in res.content_type, res.content_type
Beispiel #2
0
 def test_entity_tags(self):
     _, headers = self.login(is_admin=True)
     url = '/api/2/entities'
     data = {
         'schema': 'Person',
         'name': "Blaaaa blubb",
         'properties': {
             'phone': '+491769817271'
         },
         'collection_id': self.col.id
     }
     resa = self.client.post(url,
                             data=json.dumps(data),
                             headers=headers,
                             content_type='application/json')
     data = {
         'schema': 'Person',
         'name': "Nobody Man",
         'properties': {
             'phone': '+491769817271'
         },
         'collection_id': self.col.id
     }
     resa = self.client.post(url,
                             data=json.dumps(data),
                             headers=headers,
                             content_type='application/json')
     flush_index()
     url = '/api/2/entities/%s/tags' % resa.json['id']
     res = self.client.get(url, headers=headers)
     assert res.status_code == 200, (res.status_code, res.json)
     results = res.json['results']
     assert len(results) == 1, results
     assert results[0]['value'] == '+491769817271', results
Beispiel #3
0
    def test_load_sqlite(self):
        count = Collection.all().count()
        assert 0 == count, count

        db_uri = 'sqlite:///' + self.get_fixture_path('kek.sqlite')
        os.environ['ALEPH_TEST_BULK_DATABASE_URI'] = db_uri
        yml_path = self.get_fixture_path('kek.yml')
        config = load_config_file(yml_path)
        bulk_load(config)

        count = Collection.all().count()
        assert 1 == count, count

        coll = Collection.by_foreign_id('kek')
        assert coll.category == 'scrape', coll.category

        _, headers = self.login(is_admin=True)
        flush_index()

        res = self.client.get('/api/2/entities?q=friede+springer',
                              headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        assert res0['id'] == '9895ccc1b3d6444ccc6371ae239a7d55c748a714', res0
Beispiel #4
0
 def test_similar_entity(self):
     _, headers = self.login(is_admin=True)
     url = '/api/2/entities'
     data = {
         'schema': 'Person',
         'name': "Osama bin Laden",
         'collection_id': self.col.id
     }
     res = self.client.post(url,
                            data=json.dumps(data),
                            headers=headers,
                            content_type='application/json')
     data = {
         'schema': 'Person',
         'name': "Osama ben Ladyn",
         'collection_id': self.col.id
     }
     res = self.client.post(url,
                            data=json.dumps(data),
                            headers=headers,
                            content_type='application/json')
     flush_index()
     res = self.client.get('/api/2/entities/%s/similar' % res.json['id'],
                           headers=headers)
     assert res.status_code == 200, (res.status_code, res.json)
     data = res.json
     assert len(data['results']) == 1, data
     assert 'Laden' in data['results'][0]['name'], data
     assert 'Pooh' not in res.data, res.data
Beispiel #5
0
def update_entity(entity):
    index_entity(entity)
    update_entity_full.apply_async([entity.id],
                                   queue=USER_QUEUE,
                                   routing_key=USER_ROUTING_KEY)
    # needed to make the call to view() work:
    flush_index()
Beispiel #6
0
    def test_upload_html_doc(self):
        html_path = self.get_fixture_path('samples/website.html')
        _, headers = self.login(is_admin=True)
        meta = {
            'countries': ['ru', 'us'],
            'languages': ['en'],
            'source_url':
            'https://en.wikipedia.org/wiki/How_does_one_patch_KDE2_under_FreeBSD%3F'  # noqa
        }
        data = {'meta': json.dumps(meta), 'foo': open(html_path)}
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 200, (res, res.data)
        docs = res.json['documents']
        assert len(docs) == 1, docs
        assert docs[0]['type'] == 'html', docs
        flush_index()

        res = self.client.get('/api/2/documents', headers=headers)
        assert res.json['total'] == 1, res.json
        res = self.client.get('/api/2/documents/1', headers=headers)
        assert 'us' in res.json['countries'], res.json
        assert 'html' in res.json, res.json
        assert 'Wikipedia, the free encyclopedia' in res.json['html'], \
            res.json['html']
        res = self.client.get('/api/2/documents/1/file', headers=headers)
        assert 'KDE2' in res.data
        assert 'text/html' in res.content_type, res.content_type
Beispiel #7
0
 def test_delete_source(self):
     collection = Collection.by_id(1000)
     res = self.client.get('/api/2/search?q="mention fruit"')
     assert res.json['total'] == 1, res.json
     delete_collection(collection.id)
     flush_index()
     res = self.client.get('/api/2/search?q="mention fruit"')
     assert res.json['total'] == 0, res.json
Beispiel #8
0
 def load_fixtures(self, file_name, process_documents=True):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     reindex_entities()
     if process_documents:
         for doc in Document.all():
             analyze_document(doc)
     flush_index()
 def test_index(self):
     update_collection(self.col)
     flush_index()
     res = self.client.get('/api/2/collections')
     assert res.status_code == 200, res
     assert res.json['total'] == 0, res.json
     _, headers = self.login(is_admin=True)
     res = self.client.get('/api/2/collections', headers=headers)
     assert res.status_code == 200, res
     assert res.json['total'] == 1, res.json
Beispiel #10
0
 def test_suggest_entity(self):
     self.login(is_admin=True)
     url = '/api/1/entities'
     data = {
         'schema': 'Person',
         'name': "Osama bin Laden",
         'collection_id': self.col.id
     }
     res = self.client.post(url, data=json.dumps(data),
                            content_type='application/json')
     flush_index()
     res = self.client.get('/api/1/entities/_suggest?prefix=osa')
     assert res.status_code == 200, (res.status_code, res.json)
     data = res.json
     assert len(data['results']) == 1, data
     assert 'Laden' in data['results'][0]['name'], data
Beispiel #11
0
    def test_entity_references(self):
        db_uri = 'file://' + self.get_fixture_path('experts.csv')
        os.environ['ALEPH_TEST_BULK_CSV'] = db_uri
        yml_path = self.get_fixture_path('experts.yml')
        config = load_config_file(yml_path)
        bulk_load(config)
        flush_index()

        res = self.client.get('/api/2/entities?q=Climate')
        assert res.json['total'] == 1, res.json
        grp_id = res.json['results'][0]['id']

        res = self.client.get('/api/2/entities/%s/references' % grp_id)
        results = res.json['results']
        assert len(results) == 1, results
        assert results[0]['count'] == 3, results
Beispiel #12
0
    def test_load_sqlite(self):
        count = Collection.all().count()
        assert 0 == count, count

        yml_path = self.get_fixture_path('kek.yml')
        config = load_config_file(yml_path)
        bulk_load(config)
        flush_index()

        count = Collection.all().count()
        assert 1 == count, count

        res = self.client.get('/api/2/entities?q=friede+springer')
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        assert res0['id'] == '9895ccc1b3d6444ccc6371ae239a7d55c748a714', res0
Beispiel #13
0
    def test_load_csv(self):
        count = Collection.all().count()
        assert 0 == count, count

        db_uri = 'file://' + self.get_fixture_path('experts.csv')
        os.environ['ALEPH_TEST_BULK_CSV'] = db_uri
        yml_path = self.get_fixture_path('experts.yml')
        config = load_config_file(yml_path)
        bulk_load(config)
        flush_index()

        count = Collection.all().count()
        assert 1 == count, count

        res = self.client.get('/api/2/entities?q=Greenfield')
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        assert res0['id'] == '6897ef1acd633c229d812c1c495f030d212c9081', res0
Beispiel #14
0
 def test_index(self):
     index_entity(self.ent)
     flush_index()
     res = self.client.get('/api/1/entities?facet=collections')
     assert res.status_code == 200, res
     assert res.json['total'] == 0, res.json
     assert len(res.json['facets']['collections']['values']) == 0, res.json
     self.login(is_admin=True)
     res = self.client.get('/api/1/entities?facet=collections')
     assert res.status_code == 200, res
     assert res.json['total'] == 1, res.json
     assert len(res.json['facets']['collections']['values']) == 1, res.json
     col0 = res.json['facets']['collections']['values'][0]
     assert col0['id'] == str(self.col.id), res.json
     assert col0['label'] == self.col.label, res.json
     assert len(res.json['facets']) == 1, res.json
     res = self.client.get('/api/1/entities?facet=countries')
     assert len(res.json['facets']) == 1, res.json
     assert 'values' in res.json['facets']['countries'], res.json
Beispiel #15
0
    def test_upload_html_doc(self):
        _, headers = self.login(is_admin=True)
        data = {
            'meta': json.dumps(self.meta),
            'foo': (StringIO("this is a futz with a banana"), 'futz.html')
        }
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 200, (res, res.data)
        docs = res.json['documents']
        assert len(docs) == 1, docs
        assert docs[0]['file_name'] == 'futz.html', docs
        flush_index()

        res = self.client.get('/api/2/documents', headers=headers)
        assert res.json['total'] == 1, res.json
        res = self.client.get('/api/2/documents/1', headers=headers)
        assert res.json['countries'] == ['de', 'us'], res.json
        res = self.client.get('/api/2/documents/1/file', headers=headers)
        assert 'futz with a banana' in res.data
        assert 'text/html' in res.content_type, res.content_type
Beispiel #16
0
def delete_pending(collection_id=None):
    """Deletes any pending entities."""
    q = db.session.query(Entity.id)
    q = q.filter(Entity.state == Entity.STATE_PENDING)

    if collection_id is not None:
        q = q.filter(Entity.collection_id == collection_id)

    q.delete(synchronize_session='fetch')

    rq = db.session.query(Reference)
    sq = db.session.query(Entity.id)
    sq = sq.filter(Entity.state == Entity.STATE_PENDING)

    if collection_id is not None:
        sq = sq.filter(Entity.collection_id == collection_id)

    rq = rq.filter(Reference.entity_id.in_(sq))
    rq.delete(synchronize_session='fetch')

    db.session.commit()
    flush_index()
Beispiel #17
0
    def setUp(self):
        super(XrefApiTestCase, self).setUp()
        self.creator = self.create_user(foreign_id='creator')
        self.outsider = self.create_user(foreign_id='outsider')
        self.guest = self.create_user(foreign_id=Role.SYSTEM_GUEST)

        # First public collection and entities
        self.residents = Collection.create(
            {
                'label': 'Residents of Habitat Ring',
                'foreign_id': 'test_residents'
            },
            role=self.creator)
        db.session.add(self.residents)
        db.session.flush()
        Permission.grant(self.residents, self.guest, True, False)

        self.ent = Entity.create({
            'schema': 'Person',
            'name': 'Elim Garak',
        }, self.residents)
        db.session.add(self.ent)

        self.ent2 = Entity.create({
            'schema': 'Person',
            'name': 'Leeta',
        }, self.residents)
        db.session.add(self.ent2)

        # Second public collection and entities
        self.dabo = Collection.create(
            {
                'label': 'Dabo Girls',
                'foreign_id': 'test_dabo'
            },
            role=self.creator)
        db.session.add(self.dabo)
        db.session.flush()
        Permission.grant(self.dabo, self.guest, True, False)

        self.ent3 = Entity.create({
            'schema': 'Person',
            'name': 'MPella',
        }, self.dabo)
        db.session.add(self.ent3)

        self.ent4 = Entity.create({
            'schema': 'Person',
            'name': 'Leeta',
        }, self.dabo)
        db.session.add(self.ent4)

        self.ent5 = Entity.create({
            'schema': 'Person',
            'name': 'Mardah',
        }, self.dabo)
        db.session.add(self.ent5)

        # Private collection and entities
        self.obsidian = Collection.create(
            {
                'label': 'Obsidian Order',
                'foreign_id': 'test_obsidian',
                'category': 'leak'
            },
            role=self.creator)
        db.session.add(self.obsidian)
        db.session.flush()

        self.ent6 = Entity.create({
            'schema': 'Person',
            'name': 'Elim Garack',
        }, self.obsidian)
        db.session.add(self.ent6)

        self.ent7 = Entity.create(
            {
                'schema': 'Person',
                'name': 'Enabran Tain',
            }, self.obsidian)
        db.session.add(self.ent7)

        db.session.commit()
        index_entity(self.ent)
        index_entity(self.ent2)
        index_entity(self.ent3)
        index_entity(self.ent4)
        index_entity(self.ent5)
        index_entity(self.ent6)
        index_entity(self.ent7)
        flush_index()
Beispiel #18
0
 def flush_index(self):
     flush_index()
Beispiel #19
0
def update_entity(entity):
    index_entity(entity)
    update_entity_full.apply_async([entity.id], priority=7)
    # needed to make the call to view() work:
    flush_index()