def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') collection = Collection.by_foreign_id(url, { 'label': 'Investigative Dashboard Requests' }) Permission.grant_foreign(collection, 'idashboard:occrp_staff', True, False) existing_entities = [] terms = set() db.session.flush() for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() print url continue for req in data.get('paginator', {}).get('object_list'): ent = self.update_entity(req, collection) if ent is not None: terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s", ent.name) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def crawl_collection(self, collection): if not len(collection.get('subjects', [])): return url = urljoin(self.URL, '/api/collections/%s' % collection.get('id')) collection = Collection.by_foreign_id(url, { 'label': collection.get('title') }) res = requests.get('%s/permissions' % url, headers=self.HEADERS) for perm in res.json().get('results', []): Permission.grant_foreign(collection, perm.get('role'), perm.get('read'), perm.get('write')) log.info(" > Spindle collection: %s", collection.label) res = requests.get('%s/entities' % url, headers=self.HEADERS) terms = set() existing_entities = [] for entity in res.json().get('results', []): if entity.get('name') is None: continue aliases = [on.get('alias') for on in entity.get('other_names', [])] ent = Entity.by_foreign_id(entity.get('id'), collection, { 'name': entity.get('name'), 'category': SCHEMATA.get(entity.get('$schema'), OTHER), 'data': entity, 'selectors': aliases }) terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') collection = Collection.by_foreign_id( url, {'label': 'Investigative Dashboard Requests'}) Permission.grant_foreign(collection, 'idashboard:occrp_staff', True, False) existing_entities = [] terms = set() db.session.flush() for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() print url continue for req in data.get('paginator', {}).get('object_list'): ent = self.update_entity(req, collection) if ent is not None: terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s", ent.name) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) try: aggregator.drop() finally: aggregator.close() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering linkages metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id, sync=True)
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) source_name = source.get('source') or source.get('source_id') label = '%s - %s' % (source.get('publisher'), source_name) collection = self.find_collection(url, {'label': label}) Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", collection.label) entities = requests.get(url).json().get('entities', []) for entity in entities: data = { 'identifiers': [{ 'scheme': 'opennames:%s' % source.get('source_id'), 'identifier': entity.get('uid') }], 'other_names': [], 'name': entity.get('name'), '$schema': SCHEMA.get(entity.get('type'), '/entity/entity.json#') } for on in entity.get('other_names', []): on['name'] = on.pop('other_name', None) data['other_names'].append(on) self.emit_entity(collection, data) self.emit_collection(collection)
def load_fixtures(self): self.private_coll = Collection.create({ 'foreign_id': 'test_private', 'label': "Private Collection", 'category': 'grey' }) self._banana = Entity.create( { 'schema': 'Person', 'properties': { 'name': ['Banana'], } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = Collection.create({ 'foreign_id': 'test_public', 'label': "Public Collection", 'category': 'news' }) self._kwazulu = Entity.create( { 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() samples = read_entities(self.get_fixture_path('samples.ijson')) index_entities(self.private_coll, samples) process_collection(self.public_coll, ingest=False, reset=True) process_collection(self.private_coll, ingest=False, reset=True)
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) source_name = source.get('source') or source.get('source_id') label = '%s - %s' % (source.get('publisher'), source_name) collection = self.find_collection(url, { 'label': label }) Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", collection.label) entities = requests.get(url).json().get('entities', []) for entity in entities: data = { 'identifiers': [{ 'scheme': 'opennames:%s' % source.get('source_id'), 'identifier': entity.get('uid') }], 'other_names': [], 'name': entity.get('name'), '$schema': SCHEMA.get(entity.get('type'), '/entity/entity.json#') } for on in entity.get('other_names', []): on['name'] = on.pop('other_name', None) data['other_names'].append(on) self.emit_entity(collection, data) self.emit_collection(collection)
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) db.session.commit() refresh_role(role) if post is None: return params = {"role": role, "collection": collection} if pre is None or not pre.read: if role.foreign_id == Role.SYSTEM_GUEST: publish( Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[GLOBAL], ) else: publish( Events.GRANT_COLLECTION, actor_id=editor_id, params=params, channels=[role], ) return post
def delete_collection(collection_id, wait=False): # Deleting a collection affects many associated objects and requires # checks, so this is done manually and in detail here. q = db.session.query(Collection) q = q.filter(Collection.id == collection_id) collection = q.first() if collection is None: log.error("No collection with ID: %r", collection_id) return log.info("Deleting collection [%r]: %r", collection.id, collection.label) deleted_at = datetime.utcnow() index_delete(collection_id, wait=wait) log.info("Deleting cross-referencing matches...") Match.delete_by_collection(collection_id) log.info("Deleting permissions...") Permission.delete_by_collection(collection_id, deleted_at=deleted_at) delete_documents(collection_id, wait=wait) delete_entities(collection_id, wait=wait) collection.delete(deleted_at=deleted_at) db.session.commit()
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') watchlist = Watchlist.by_foreign_id(url, { 'label': 'Investigative Dashboard Requests' }) Permission.grant_foreign(watchlist, 'idashboard:occrp_staff', True, False) existing_entities = [] previous_terms = watchlist.terms updated_terms = set() db.session.flush() for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() for req in data.get('paginator', {}).get('object_list'): category = REQUEST_TYPES.get(req.get('ticket_type')) if category is None: continue ent = Entity.by_foreign_id(str(req.get('id')), watchlist, { 'name': req.get('name'), 'category': category, 'data': req, 'selectors': [req.get('name')] }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def crawl_collection(self, collection): if not len(collection.get('subjects', [])): return url = urljoin(self.URL, '/api/collections/%s' % collection.get('id')) watchlist = Watchlist.by_foreign_id(url, { 'label': collection.get('title') }) res = requests.get('%s/permissions' % url, headers=self.HEADERS) for perm in res.json().get('results', []): Permission.grant_foreign(watchlist, perm.get('role'), perm.get('read'), perm.get('write')) log.info(" > Spindle collection: %s", watchlist.label) res = requests.get('%s/entities' % url, headers=self.HEADERS) previous_terms = watchlist.terms updated_terms = set() existing_entities = [] for entity in res.json().get('results', []): if entity.get('name') is None: continue aliases = [on.get('alias') for on in entity.get('other_names', [])] ent = Entity.by_foreign_id(entity.get('id'), watchlist, { 'name': entity.get('name'), 'category': SCHEMATA.get(entity.get('$schema'), OTHER), 'data': entity, 'selectors': aliases }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def test_update_collections_via_doc_update(self): url = '/api/1/documents/1000' ores = self.client.get(url) user = self.login() Permission.grant_collection(1000, user, True, True) can_write = Collection.create({'label': "Write"}, user) no_write = Collection.create({'label': "No-write"}) db.session.commit() data = ores.json.copy() data['collection_id'].append(can_write.id) res = self.client.post(url, data=json.dumps(data), content_type='application/json') assert res.status_code == 200, res assert can_write.id in res.json['collection_id'], res.json data = ores.json.copy() data['collection_id'] = [no_write.id] res = self.client.post(url, data=json.dumps(data), content_type='application/json') assert res.status_code == 200, res assert no_write.id not in res.json['collection_id'], res.json assert 1000 in res.json['collection_id'], res.json data = ores.json.copy() data['collection_id'] = ['foo'] res = self.client.post(url, data=json.dumps(data), content_type='application/json') assert res.status_code == 400, res
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') collection = Collection.by_foreign_id(url, { 'label': 'Investigative Dashboard Requests' }) Permission.grant_foreign(collection, 'idashboard:occrp_staff', True, False) existing_entities = [] terms = set() db.session.flush() for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() for req in data.get('paginator', {}).get('object_list'): category = REQUEST_TYPES.get(req.get('ticket_type')) if category is None: continue ent = Entity.by_foreign_id(str(req.get('id')), collection, { 'name': req.get('name'), 'category': category, 'data': req, 'selectors': [req.get('name')] }) terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ for foreign_id, data in config.items(): collection = Collection.by_foreign_id(foreign_id) if collection is None: collection = Collection.create({ 'foreign_id': foreign_id, 'managed': True, 'label': data.get('label') or foreign_id, 'summary': data.get('summary'), 'category': data.get('category'), }) for role_fk in dict_list(data, 'roles', 'role'): role = Role.by_foreign_id(role_fk) if role is not None: Permission.grant(collection, role, True, False) else: log.warning("Could not find role: %s", role_fk) db.session.commit() update_collection(collection) for query in dict_list(data, 'queries', 'query'): load_query(collection, query)
def load_fixtures(self): self.admin = self.create_user(foreign_id='admin', is_admin=True) self.private_coll = self.create_collection( foreign_id='test_private', label="Private Collection", category='grey', creator=self.admin ) self._banana = self.create_entity({ 'schema': 'Person', 'properties': { 'name': ['Banana'], 'birthDate': '1970-08-21' } }, self.private_coll) self._banana2 = self.create_entity({ 'schema': 'Person', 'properties': { 'name': ['Banana'], 'birthDate': '1970-03-21' } }, self.private_coll) self._banana3 = self.create_entity({ 'schema': 'Person', 'properties': { 'name': ['Banana'], 'birthDate': '1970-05-21' } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = self.create_collection( foreign_id='test_public', label="Public Collection", category='news', creator=self.admin ) self._kwazulu = self.create_entity({ 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() aggregator = get_aggregator(self.public_coll) aggregator.delete() aggregator.close() reindex_collection(self.public_coll, sync=True) aggregator = get_aggregator(self.private_coll) aggregator.delete() for sample in read_entities(self.get_fixture_path('samples.ijson')): aggregator.put(sample, fragment='sample') aggregator.close() reindex_collection(self.private_coll, sync=True)
def cleanup_deleted(): from aleph.model import Alert, Entity, Collection from aleph.model import Permission, Role Alert.cleanup_deleted() Permission.cleanup_deleted() Entity.cleanup_deleted() Collection.cleanup_deleted() Role.cleanup_deleted() db.session.commit()
def crawl_collection(self, collection): if not len(collection.get('subjects', [])): return url = urljoin(self.URL, '/api/collections/%s' % collection.get('id')) collection = Collection.by_foreign_id(url, { 'label': collection.get('title') }) res = requests.get('%s/permissions' % url, headers=self.HEADERS) for perm in res.json().get('results', []): Permission.grant_foreign(collection, perm.get('role'), perm.get('read'), perm.get('write')) log.info(" > Spindle collection: %s", collection.label) res = requests.get('%s/entities' % url, headers=self.HEADERS) terms = set() existing_entities = [] for entity in res.json().get('results', []): if entity.get('name') is None: continue entity['$schema'] = SCHEMATA.get(entity.get('$schema'), OTHER) if 'jurisdiction_code' in entity: entity['jurisdiction_code'] = \ entity['jurisdiction_code'].lower() entity.pop('members', None) entity.pop('memberships', None) entity.pop('assets', None) entity.pop('owners', None) entity.pop('family_first', None) entity.pop('family_second', None) entity.pop('social_first', None) entity.pop('social_second', None) for date_field in ['birth_date']: if date_field in entity and 'T' in entity[date_field]: entity[date_field], _ = entity[date_field].split('T', 1) for on in entity.get('other_names', []): name = on.pop('alias', None) if name is not None: on['name'] = name entity['identifiers'] = [{ 'scheme': 'spindle', 'identifier': entity.pop('id', None) }] ent = Entity.save(entity, collection_id=collection.id, merge=True) db.session.flush() terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s", ent.name) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def delete_collection(collection, sync=False): reset_collection(collection, sync=False) flush_notifications(collection) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() index.delete_collection(collection.id, sync=sync) Authz.flush()
def update_permission(role, collection, read, write): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) db.session.commit() notify_role_template(role, collection.label, 'email/permission.html', url=collection_url(collection.id), pre=pre, post=post, collection=collection) return post
def delete_collection(collection, sync=False): flush_notifications(collection) drop_aggregator(collection) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Match.delete_by_collection(collection.id, deleted_at=deleted_at) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() index.delete_collection(collection.id, sync=sync) index.delete_entities(collection.id, sync=False) refresh_collection(collection.id) Authz.flush()
def cleanup_deleted(): from aleph.model import Alert, Entity, Collection from aleph.model import Permission, Role, Document from aleph.model import Diagram, Mapping Mapping.cleanup_deleted() Diagram.cleanup_deleted() Document.cleanup_deleted() Alert.cleanup_deleted() Permission.cleanup_deleted() Entity.cleanup_deleted() Collection.cleanup_deleted() Role.cleanup_deleted() db.session.commit()
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') coll = self.find_collection( url, {'label': 'Investigative Dashboard Requests'}) Permission.grant_foreign(coll, 'idashboard:occrp_staff', True, False) for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() for req in data.get('paginator', {}).get('object_list'): # TODO: get the ID API fixed. self.update_entity(req, coll) self.emit_collection(coll)
def delete_collection(collection, keep_metadata=False, sync=False): reset_collection(collection, sync=False) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=sync) Authz.flush() refresh_collection(collection.id, sync=True)
def load_fixtures(self): self.admin = self.create_user(foreign_id='admin', is_admin=True) self.private_coll = self.create_collection(foreign_id='test_private', label="Private Collection", category='grey', casefile=False, creator=self.admin) self._banana = Entity.create( { 'schema': 'Person', 'properties': { 'name': ['Banana'], } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = self.create_collection(foreign_id='test_public', label="Public Collection", category='news', casefile=False, creator=self.admin) self._kwazulu = Entity.create( { 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() drop_aggregator(self.public_coll) stage = get_stage(self.public_coll, OP_PROCESS) process_collection(stage, self.public_coll, ingest=False, sync=True) aggregator = get_aggregator(self.private_coll) aggregator.delete() stage = get_stage(self.private_coll, OP_PROCESS) for sample in read_entities(self.get_fixture_path('samples.ijson')): aggregator.put(sample, fragment='sample') index_aggregate(stage, self.private_coll, entity_id=sample.id, sync=True) aggregator.close() process_collection(stage, self.private_coll, ingest=False, sync=True)
def permissions_index(id): collection = get_db_collection(id, request.authz.WRITE) q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role': role }) return jsonify({ 'total': len(permissions), 'results': PermissionSchema().dump(permissions, many=True) })
def index(id): collection = get_db_collection(id, request.authz.WRITE) roles = Role.all_groups(request.authz).all() if request.authz.is_admin: roles.extend(Role.all_system()) q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({'total': len(permissions), 'results': permissions})
def permissions_index(collection): request.authz.require(request.authz.collection_write(collection)) q = Permission.all() q = q.filter(Permission.collection_id == collection) permissions = [] roles_seen = set() for permission in q.all(): if check_visible(permission.role): permissions.append(permission) roles_seen.add(permission.role.id) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in Role.all_groups(): if check_visible(role): if role.id not in roles_seen: roles_seen.add(role.id) permissions.append({ 'write': False, 'read': False, 'role': role, 'role_id': role.id }) return jsonify({'total': len(permissions), 'results': permissions})
def index(id): collection = get_db_collection(id, request.authz.WRITE) record_audit(Audit.ACT_COLLECTION, id=id) roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({ 'total': len(permissions), 'results': permissions })
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') coll = self.find_collection(url, { 'label': 'Investigative Dashboard Requests' }) Permission.grant_foreign(coll, 'idashboard:occrp_staff', True, False) for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() for req in data.get('paginator', {}).get('object_list'): # TODO: get the ID API fixed. self.update_entity(req, coll) self.emit_collection(coll)
def permissions_index(collection): authz.require(authz.collection_write(collection)) q = Permission.all() q = q.filter(Permission.collection_id == collection) return jsonify({ 'total': q.count(), 'results': q })
def source_permissions_index(source=None): authz.require(authz.source_write(source)) q = Permission.all() q = q.filter(Permission.resource_type == Permission.SOURCE) q = q.filter(Permission.resource_id == source) return jsonify({ 'total': q.count(), 'results': q })
def collection_permissions_index(collection=None): authz.require(authz.collection_write(collection)) q = Permission.all() q = q.filter(Permission.resource_type == Permission.COLLECTION) q = q.filter(Permission.resource_id == collection) return jsonify({ 'total': q.count(), 'results': q })
def permissions_index(collection): request.authz.require(request.authz.collection_write(collection)) q = Permission.all() q = q.filter(Permission.collection_id == collection) permissions = [] for permission in q.all(): if check_visible(permission.role): permissions.append(permission) return jsonify({'total': len(permissions), 'results': permissions})
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) source_name = source.get('source') or source.get('source_id') label = '%s - %s' % (source.get('publisher'), source_name) collection = Collection.by_foreign_id(url, {'label': label}) Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", collection.label) terms = set() existing_entities = [] db.session.flush() entities = requests.get(url).json().get('entities', []) for entity in entities: data = { 'identifiers': [{ 'scheme': 'opennames:%s' % source.get('source_id'), 'identifier': entity.get('uid') }], 'other_names': [], 'name': entity.get('name'), '$schema': SCHEMA.get(entity.get('type'), '/entity/entity.json#') } for on in entity.get('other_names', []): on['name'] = on.pop('other_name', None) data['other_names'].append(on) ent = Entity.save(data, collection_id=collection.id, merge=True) db.session.flush() terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s", ent.name) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def delete_collection_content(collection_id): # Deleting a collection affects many associated objects and requires # checks, so this is done manually and in detail here. q = db.session.query(Collection) q = q.filter(Collection.id == collection_id) collection = q.first() if collection is None: log.error("No collection with ID: %r", collection_id) return log.info("Deleting collection [%r]: %r", collection.id, collection.label) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection_id, deleted_at=deleted_at) Match.delete_by_collection(collection_id, deleted_at=deleted_at) Permission.delete_by_collection(collection_id, deleted_at=deleted_at) index.delete_collection(collection_id) index.delete_entities(collection_id) collection.delete(deleted_at=deleted_at) db.session.commit()
def delete_collection(collection, keep_metadata=False, sync=False, reset_sync=False): reset_collection(collection, sync=reset_sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering this metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=sync) Authz.flush() refresh_collection(collection.id, sync=True)
def crawl_item(self, item, sources, source): source_data = item.meta.get('source', {}) source_id = source_data.pop('foreign_id', source) if source_id is None: raise ValueError("No foreign_id for source given: %r" % item) if source_id not in sources: label = source_data.get('label', source_id) sources[source_id] = self.create_source(foreign_id=source_id, label=label) if source_data.get('public'): Permission.grant_foreign(sources[source_id], Role.SYSTEM_GUEST, True, False) if source_data.get('users'): Permission.grant_foreign(sources[source_id], Role.SYSTEM_USER, True, False) log.info('Import: %r', item.identifier) meta = self.normalize_metadata(item) self.emit_file(sources[source_id], meta, item.data_path)
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.is_public: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params) elif pre is not None and pre.read and not post.read: publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params) db.session.commit() Authz.flush() return post
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) source_name = source.get('source') or source.get('source_id') label = '%s - %s' % (source.get('publisher'), source_name) collection = Collection.by_foreign_id(url, { 'label': label }) Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", collection.label) terms = set() existing_entities = [] db.session.flush() entities = requests.get(url).json().get('entities', []) for entity in entities: data = { 'identifiers': [{ 'scheme': 'opennames:%s' % source.get('source_id'), 'identifier': entity.get('uid') }], 'other_names': [], 'name': entity.get('name'), '$schema': SCHEMA.get(entity.get('type'), '/entity/entity.json#') } for on in entity.get('other_names', []): on['name'] = on.pop('other_name', None) data['other_names'].append(on) ent = Entity.save(data, collection_id=collection.id, merge=True) db.session.flush() terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s", ent.name) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) source_name = source.get('source') or source.get('source_id') label = '%s - %s' % (source.get('publisher'), source_name) collection = Collection.by_foreign_id(url, { 'label': label }) Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", collection.label) terms = set() existing_entities = [] db.session.flush() entities = requests.get(url).json().get('entities', []) for entity in entities: if entity.get('name') is None: continue selectors = [] for on in entity.get('other_names', []): selectors.append(on.get('other_name')) for iden in entity.get('identities', []): if iden.get('number'): selectors.append(iden.get('number')) ent = Entity.by_foreign_id(entity.get('uid'), collection, { 'name': entity.get('name'), 'category': CATEGORIES.get(entity.get('type'), OTHER), 'data': entity, 'selectors': selectors }) terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) for entity in collection.entities: if entity.id not in existing_entities: entity.delete() self.emit_collection(collection, terms)
def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) aggregator.drop() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Mapping.delete_by_collection(collection.id) EntitySet.delete_by_collection(collection.id, deleted_at) Entity.delete_by_collection(collection.id) Document.delete_by_collection(collection.id) if not keep_metadata: Permission.delete_by_collection(collection.id) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id)
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.foreign_id == Role.SYSTEM_GUEST: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params, channels=[role]) db.session.commit() Authz.flush() refresh_role(role) return post
def crawl_item(self, item): coll_data = item.meta.get('source', {}) coll_fk = coll_data.pop('foreign_id') if coll_fk is None: raise ValueError("No foreign_id for collection given: %r" % item) if coll_fk not in self.collections: label = coll_data.get('label', coll_fk) self.collections[coll_fk] = Collection.create({ 'foreign_id': coll_fk, 'label': label }) if coll_data.get('public'): Permission.grant_foreign(self.collections[coll_fk], Role.SYSTEM_GUEST, True, False) db.session.commit() log.info('Import: %r', item.identifier) meta = self.normalize_metadata(item) ingest_file(self.collections[coll_fk].id, meta, item.data_path, move=False)
def permissions_update(collection): authz.require(authz.collection_write(collection)) data = request_data() validate(data, "permission.json#") role = Role.all().filter(Role.id == data["role"]).first() if role is None: raise BadRequest() permission = Permission.grant_collection(collection, role, data["read"], data["write"]) db.session.commit() log_event(request) return jsonify({"status": "ok", "updated": permission})
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) watchlist = Watchlist.by_foreign_id(url, { 'label': source.get('source_id') }) Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", watchlist.label) previous_terms = watchlist.terms updated_terms = set() existing_entities = [] db.session.flush() entities = requests.get(url).json().get('entities', []) for entity in entities: if entity.get('name') is None: continue selectors = [] for on in entity.get('other_names', []): selectors.append(on.get('other_name')) for iden in entity.get('identities', []): if iden.get('number'): selectors.append(iden.get('number')) ent = Entity.by_foreign_id(entity.get('uid'), watchlist, { 'name': entity.get('name'), 'category': CATEGORIES.get(entity.get('type'), OTHER), 'data': entity, 'selectors': selectors }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def update_permission(role, collection, read, write, editor_id=None): """Update a roles permission to access a given collection.""" pre = Permission.by_collection_role(collection, role) post = Permission.grant(collection, role, read, write) params = {'role': role, 'collection': collection} if (pre is None or not pre.read) and post.read: if role.is_public: publish(Events.PUBLISH_COLLECTION, actor_id=editor_id, params=params, channels=[Notification.GLOBAL]) else: publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params) elif pre is not None and pre.read and not post.read: publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params) db.session.commit() Authz.flush() refresh_role(role) return post
def sources(action): if not hasattr(request, 'auth_sources'): request.auth_sources = {READ: set(), WRITE: set()} if is_admin(): for source_id, in Source.all_ids(): request.auth_sources[READ].add(source_id) request.auth_sources[WRITE].add(source_id) else: q = Permission.all() q = q.filter(Permission.role_id.in_(request.auth_roles)) q = q.filter(Permission.resource_type == Permission.SOURCE) for perm in q: if perm.read: request.auth_sources[READ].add(perm.resource_id) if perm.write and request.logged_in: request.auth_sources[WRITE].add(perm.resource_id) return list(request.auth_sources.get(action, []))
def collections(action): if not hasattr(request, 'auth_collections'): request.auth_collections = {READ: set(), WRITE: set()} if is_admin(): q = Collection.all_ids().filter(Collection.deleted_at == None) # noqa for col_id, in q: request.auth_collections[READ].add(col_id) request.auth_collections[WRITE].add(col_id) else: q = Permission.all() q = q.filter(Permission.role_id.in_(request.auth_roles)) q = q.filter(Permission.collection_id != None) # noqa for perm in q: if perm.read or perm.write: request.auth_collections[READ].add(perm.collection_id) if perm.write and request.logged_in: request.auth_collections[WRITE].add(perm.collection_id) return list(request.auth_collections.get(action, []))
def permissions_update(collection): authz.require(authz.collection_write(collection)) data = request_data() validate(data, 'permission.json#') role = Role.all().filter(Role.id == data['role']).first() if role is None: raise BadRequest() permission = Permission.grant_collection(collection.id, role, data['read'], data['write']) db.session.commit() return jsonify({ 'status': 'ok', 'updated': permission })
def setUp(self): super(SourcesApiTestCase, self).setUp() self.source = Source() self.source.foreign_id = "test" self.source.label = "Test Collection" self.source.category = "news" db.session.add(self.source) db.session.flush() permission = Permission() permission.role_id = Role.system(Role.SYSTEM_USER) permission.read = True permission.write = True permission.resource_id = self.source.id permission.resource_type = Permission.SOURCE db.session.add(permission) db.session.commit()
def permissions_save(watchlist=None, source=None): if watchlist is not None: authz.require(authz.watchlist_write(watchlist)) if source is not None: authz.require(authz.source_write(source)) resource_type = Permission.WATCHLIST if watchlist else Permission.SOURCE resource_id = watchlist or source data = request_data() validate(data, permissions_schema) role = db.session.query(Role).filter(Role.id == data['role']).first() if role is None: raise BadRequest() permission = Permission.grant_resource(resource_type, resource_id, role, data['read'], data['write']) db.session.commit() return jsonify({ 'status': 'ok', 'updated': permission })
def grant(self, collection, role, read, write): Permission.grant(collection, role, read, write) db.session.commit() update_collection(collection)