def load_fixtures(self): self.admin = self.create_user(foreign_id='admin', is_admin=True) self.private_coll = self.create_collection(foreign_id='test_private', label="Private Collection", category='grey', casefile=False, creator=self.admin) self._banana = Entity.create( { 'schema': 'Person', 'properties': { 'name': ['Banana'], } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = self.create_collection(foreign_id='test_public', label="Public Collection", category='news', casefile=False, creator=self.admin) self._kwazulu = Entity.create( { 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() drop_aggregator(self.public_coll) stage = get_stage(self.public_coll, OP_PROCESS) process_collection(stage, self.public_coll, ingest=False, sync=True) aggregator = get_aggregator(self.private_coll) aggregator.delete() stage = get_stage(self.private_coll, OP_PROCESS) for sample in read_entities(self.get_fixture_path('samples.ijson')): aggregator.put(sample, fragment='sample') index_aggregate(stage, self.private_coll, entity_id=sample.id, sync=True) aggregator.close() process_collection(stage, self.private_coll, ingest=False, sync=True)
def test_directory_with_file(self): _, headers = self.login(is_admin=True) meta = { 'file_name': 'directory', 'foreign_id': 'directory', 'schema': 'Folder', 'collection_id': self.col.id, } data = {'meta': json.dumps(meta)} res = self.client.post(self.url, data=data, headers=headers) assert res.status_code == 201, res assert 'id' in res.json, res.json directory = res.json['id'] meta = { 'file_name': 'subdirectory', 'foreign_id': 'subdirectory', 'parent': { 'id': directory }, 'collection_id': self.col.id, } data = {'meta': json.dumps(meta)} res = self.client.post(self.url, data=data, headers=headers) assert res.status_code == 201, res stage = get_stage(self.col, OP_PROCESS) process_collection(stage, self.col, ingest=False) assert 'id' in res.json, res.json url = '/api/2/entities/%s' % res.json['id'] res = self.client.get(url, headers=headers) assert res.status_code == 200, res props = res.json.get('properties') assert 'subdirectory' in props['fileName'], res.json
def load_fixtures(self): self.private_coll = Collection.create({ 'foreign_id': 'test_private', 'label': "Private Collection", 'category': 'grey' }) self._banana = Entity.create( { 'schema': 'Person', 'properties': { 'name': ['Banana'], } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = Collection.create({ 'foreign_id': 'test_public', 'label': "Public Collection", 'category': 'news' }) self._kwazulu = Entity.create( { 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() drop_aggregator(self.public_coll) stage = get_stage(self.private_coll, OP_PROCESS) process_collection(stage, self.public_coll, ingest=False, sync=True) stage = get_stage(self.private_coll, OP_INDEX) samples = read_entities(self.get_fixture_path('samples.ijson')) drop_aggregator(self.private_coll) index_entities(stage, self.private_coll, samples, sync=True) stage = get_stage(self.private_coll, OP_PROCESS) process_collection(stage, self.private_coll, ingest=False, sync=True)
def bulk_write(collection, iterable, job_id=None, unsafe=False): """Write a set of entities - given as dicts - to the index in bulk mode. This will perform validation but is dangerous as it means the application has no control over key generation and a few other aspects of building the entity. """ namespace = Namespace(collection.foreign_id) stage = get_stage(collection, OP_INDEX, job_id=job_id) entities = [] for item in iterable: if not is_mapping(item): raise InvalidData("Failed to read input data", errors=item) entity = model.get_proxy(item) entity = namespace.apply(entity) if not unsafe: entity = remove_checksums(entity) entities.append(entity) index_entities(stage, collection, entities)
def test_entity_references(self): db_uri = self.get_fixture_path('experts.csv').as_uri() os.environ['ALEPH_TEST_BULK_CSV'] = db_uri yml_path = self.get_fixture_path('experts.yml') config = load_mapping_file(yml_path) coll = self.create_collection() stage = get_stage(coll, OP_BULKLOAD) bulk_load(stage, coll, config.get('experts')) _, headers = self.login(is_admin=True) query = '/api/2/entities?filter:schemata=Thing&q=Climate' res = self.client.get(query, headers=headers) assert res.json['total'] == 1, res.json grp_id = res.json['results'][0]['id'] res = self.client.get('/api/2/entities/%s/references' % grp_id, headers=headers) results = res.json['results'] assert len(results) == 1, results assert results[0]['count'] == 3, results
def process(foreign_id, sync=False): """Process documents and database entities and index them.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_PROCESS) process_collection(stage, collection, sync=sync)
def reindex(foreign_id, sync=False): """Clear the search index and entity cache or a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_PROCESS) index_aggregate(stage, collection, sync=sync)
def xref(foreign_id): """Cross-reference all entities and documents in a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_XREF) xref_collection(stage, collection)
def setUp(self): super(XrefApiTestCase, self).setUp() xref.SCORE_CUTOFF = 0.01 self.creator = self.create_user(foreign_id="creator") self.outsider = self.create_user(foreign_id="outsider") # First public collection and entities self.residents = self.create_collection( label="Residents of Habitat Ring", foreign_id="test_residents", creator=self.creator, ) self.ent = self.create_entity( { "schema": "Person", "properties": { "name": "Elim Garak" } }, self.residents) self.ent2 = self.create_entity( { "schema": "Person", "properties": { "name": "Leeta" } }, self.residents) # Second public collection and entities self.dabo = self.create_collection(label="Dabo Girls", foreign_id="test_dabo", creator=self.creator) self.grant_publish(self.dabo) self.ent3 = self.create_entity( { "schema": "Person", "properties": { "name": "MPella" } }, self.dabo) self.ent4 = self.create_entity( { "schema": "Person", "properties": { "name": "Leeta" } }, self.dabo) self.ent5 = self.create_entity( { "schema": "Person", "properties": { "name": "Mardah" } }, self.dabo) # Private collection and entities self.obsidian = self.create_collection(label="Obsidian Order", foreign_id="test_obsidian", creator=self.creator) self.ent6 = self.create_entity( { "schema": "Person", "properties": { "name": "Elim Garak" } }, self.obsidian) self.ent7 = self.create_entity( { "schema": "Person", "properties": { "name": "Enabran Tain" } }, self.obsidian) db.session.commit() index_entity(self.ent) index_entity(self.ent2) index_entity(self.ent3) index_entity(self.ent4) index_entity(self.ent5) index_entity(self.ent6) index_entity(self.ent7) self.stage = get_stage(self.residents, OP_XREF)
def setUp(self): super(XrefTestCase, self).setUp() self.user = self.create_user() self.coll_a = self.create_collection(creator=self.user, casefile=False) self.coll_b = self.create_collection(creator=self.user, casefile=False) self.coll_c = self.create_collection(creator=self.user, casefile=False) db.session.commit() self.stage = get_stage(self.coll_a, OP_XREF, job_id='unit_test') self.authz = Authz.from_role(self.user) _, headers = self.login(foreign_id=self.user.foreign_id) url = '/api/2/entities' entity = { 'schema': 'Person', 'collection_id': str(self.coll_a.id), 'properties': { 'name': 'Carlos Danger', 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'collection_id': str(self.coll_b.id), 'properties': { 'name': 'Carlos Danger', 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'LegalEntity', 'collection_id': str(self.coll_b.id), 'properties': { 'name': 'Carlos Danger', 'country': 'GB' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'collection_id': str(self.coll_b.id), 'properties': { 'name': 'Pure Risk', 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'LegalEntity', 'collection_id': str(self.coll_c.id), 'properties': { 'name': 'Carlos Danger', 'country': 'GB' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json')
def setUp(self): super(XrefApiTestCase, self).setUp() self.creator = self.create_user(foreign_id='creator') self.outsider = self.create_user(foreign_id='outsider') # First public collection and entities self.residents = self.create_collection( label='Residents of Habitat Ring', foreign_id='test_residents', creator=self.creator) self.grant_publish(self.residents) self.ent = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Elim Garak', } }, self.residents) self.ent2 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Leeta', } }, self.residents) # Second public collection and entities self.dabo = self.create_collection(label='Dabo Girls', foreign_id='test_dabo', creator=self.creator) self.grant_publish(self.dabo) self.ent3 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'MPella', } }, self.dabo) self.ent4 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Leeta', } }, self.dabo) self.ent5 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Mardah', } }, self.dabo) # Private collection and entities self.obsidian = self.create_collection(label='Obsidian Order', foreign_id='test_obsidian', creator=self.creator) self.ent6 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Elim Garak', } }, self.obsidian) self.ent7 = self.create_entity( { 'schema': 'Person', 'properties': { 'name': 'Enabran Tain', } }, self.obsidian) db.session.commit() index_entity(self.ent) index_entity(self.ent2) index_entity(self.ent3) index_entity(self.ent4) index_entity(self.ent5) index_entity(self.ent6) index_entity(self.ent7) self.stage = get_stage(self.residents, OP_XREF)
def setUp(self): super(BulkLoadTestCase, self).setUp() self.coll = self.create_collection() self.stage = get_stage(self.coll, OP_BULKLOAD)
def setUp(self): super(XrefTestCase, self).setUp() self.user = self.create_user() self.coll_a = self.create_collection(creator=self.user) self.coll_b = self.create_collection(creator=self.user) self.coll_c = self.create_collection(creator=self.user) db.session.commit() self.stage = get_stage(self.coll_a, OP_XREF, job_id="unit_test") self.authz = Authz.from_role(self.user) _, headers = self.login(foreign_id=self.user.foreign_id) url = "/api/2/entities" entity = { "schema": "Person", "collection_id": str(self.coll_a.id), "properties": { "name": "Carlos Danger", "nationality": "US" }, } self.client.post( url, data=json.dumps(entity), headers=headers, content_type=JSON, ) entity = { "schema": "Person", "collection_id": str(self.coll_b.id), "properties": { "name": "Carlos Danger", "nationality": "US" }, } self.client.post( url, data=json.dumps(entity), headers=headers, content_type=JSON, ) entity = { "schema": "LegalEntity", "collection_id": str(self.coll_b.id), "properties": { "name": "Carlos Danger", "country": "GB" }, } self.client.post( url, data=json.dumps(entity), headers=headers, content_type=JSON, ) entity = { "schema": "Person", "collection_id": str(self.coll_b.id), "properties": { "name": "Pure Risk", "nationality": "US" }, } self.client.post( url, data=json.dumps(entity), headers=headers, content_type=JSON, ) entity = { "schema": "LegalEntity", "collection_id": str(self.coll_c.id), "properties": { "name": "Carlos Danger", "country": "GB" }, } self.client.post( url, data=json.dumps(entity), headers=headers, content_type=JSON, )
def reindex(foreign_id): """Index all the aggregator contents for a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_PROCESS) index_aggregate(stage, collection) update_collection(collection)