def test_linkage_index(self): role, headers = self.login() other, _ = self.login(foreign_id='other') profile_id = make_textid() coll = self.create_collection() self.grant_publish(coll) Linkage.save(profile_id, make_textid(), coll.id, role.id, decision=True, decider_id=role.id) Linkage.save(profile_id, make_textid(), coll.id, self.group.id, decision=True, decider_id=role.id) Linkage.save(profile_id, make_textid(), coll.id, other.id, decision=True, decider_id=other.id) db.session.commit() res = self.client.get('/api/2/linkages', headers=headers) assert res.json['total'] == 2, res.json url = '/api/2/linkages?filter:context_id=%s' % role.id res = self.client.get(url, headers=headers) assert res.json['total'] == 1, res.json
def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None): role = cls.by_foreign_id(foreign_id) if role is None: role = cls() role.foreign_id = foreign_id role.name = name or email role.type = type role.is_admin = False role.is_muted = False role.is_tester = False role.is_blocked = False role.notified_at = datetime.utcnow() if role.api_key is None: role.api_key = make_textid() if email is not None: role.email = email if is_admin is not None: role.is_admin = is_admin # see: https://github.com/alephdata/aleph/issues/111 auto_admins = [a.lower() for a in settings.ADMINS] if email is not None and email.lower() in auto_admins: role.is_admin = True db.session.add(role) db.session.flush() return role
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) request.authz.require(request.authz.collection_write(collection.id)) log_event(request) crawler_run = make_textid() try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) documents = [] for storage in request.files.values(): sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) content_hash = checksum(sec_fn) document = Document.by_keys(collection=collection, content_hash=content_hash) document.crawler = 'user_upload:%s' % request.authz.role.id document.crawler_run = crawler_run document.mime_type = storage.mimetype document.file_name = storage.filename try: meta = json.loads(request.form.get('meta', '{}')) validate(meta, 'metadata.json#') document.meta.update(meta) except Exception as ex: raise BadRequest(unicode(ex)) ingest_document(document, sec_fn, user_queue=True) os.unlink(sec_fn) documents.append(document) return jsonify({'status': 'ok', 'documents': documents})
def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None): role = cls.by_foreign_id(foreign_id) if role is None: role = cls() role.foreign_id = foreign_id role.name = name role.type = type role.is_admin = False if role.api_key is None: role.api_key = make_textid() role.email = email if is_admin is not None: role.is_admin = is_admin # see: https://github.com/alephdata/aleph/issues/111 auto_admins = [a.lower() for a in get_config('AUTHZ_ADMINS')] if email is not None and email.lower() in auto_admins: role.is_admin = True db.session.add(role) db.session.flush() return role
def save(cls, data, collections, merge=False): ent = cls.by_id(data.get('id')) if 'state' not in data: data['state'] = cls.STATE_ACTIVE for identifier in data.get('identifiers', []): if ent is None: ent = cls.by_identifier(identifier.get('scheme'), identifier.get('identifier'), collections=collections) if ent is None: schema = data.get('$schema', cls._schema) cls = cls.get_schema_class(schema) ent = cls() ent.id = make_textid() if merge: for collection in ent.collections: if collection.id not in [c.id for c in collections]: collections.append(collection) if not len(collections): raise AttributeError("No collection specified.") ent.collections = collections ent.update(data, merge=merge) return ent
def save(cls, data, merge=False): ent = cls.by_id(data.get('id')) if 'state' not in data: data['state'] = cls.STATE_ACTIVE collections = data.pop('collections', []) for identifier in data.get('identifiers', []): if ent is None: ent = cls.by_identifier(identifier.get('scheme'), identifier.get('identifier'), collections=collections) if ent is None: schema = data.get('$schema', cls._schema) cls = cls.get_schema_class(schema) ent = cls() ent.id = make_textid() if merge: for collection in ent.collections: if collection.id not in [c.id for c in collections]: collections.append(collection) if not len(collections): raise AttributeError("No collection specified.") ent.collections = collections ent.update(data, merge=merge) return ent
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) request.authz.require(request.authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get('meta', '{}')) meta['crawler_id'] = 'user_upload:%s' % request.authz.role.id meta['crawler_run'] = make_textid() except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta['mime_type'] = storage.mimetype file_meta['file_name'] = storage.filename file_meta['source_path'] = storage.filename validate(file_meta, 'metadata.json#') file_meta = Metadata.from_data(file_meta) sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection_id, file_meta, sec_fn, move=True, queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) metas.append(file_meta) return jsonify({'status': 'ok', 'metadata': metas})
def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None): role = cls.by_foreign_id(foreign_id) if role is None: role = cls() role.foreign_id = foreign_id role.name = name or email role.type = type role.is_admin = False role.notified_at = datetime.utcnow() if role.api_key is None: role.api_key = make_textid() if email is not None: role.email = email if is_admin is not None: role.is_admin = is_admin # see: https://github.com/alephdata/aleph/issues/111 auto_admins = [a.lower() for a in settings.ADMINS] if email is not None and email.lower() in auto_admins: role.is_admin = True db.session.add(role) db.session.flush() return role
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta['mime_type'] = storage.mimetype file_meta['file_name'] = storage.filename validate(file_meta, 'metadata.json#') file_meta = Metadata.from_data(file_meta) file_meta.crawler_id = 'user_upload:%s' % request.auth_role.id file_meta.crawler_run = make_textid() sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection.id, file_meta, sec_fn, move=True, queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) metas.append(file_meta) return jsonify({'status': 'ok', 'metadata': metas})
def create(cls, data, collection): ent = cls() ent.type = data.pop('schema', None) ent.id = make_textid() ent.collection = collection ent.update(data) ent.collection.touch() return ent
def create(cls, data, collection, authz): entityset = cls() entityset.id = make_textid() entityset.layout = {} entityset.role_id = authz.id entityset.collection_id = collection.id entityset.update(data) return entityset
def create(cls, data, role=None): foreign_id = data.get('foreign_id') or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.foreign_id = foreign_id collection.update(data, creator=role) collection.deleted_at = None return collection
def create(cls, data, collection, validate=True): entity = cls() entity_id = data.get('id') or make_textid() if not registry.entity.validate(entity_id): raise InvalidData(gettext("Invalid entity ID")) entity.id = collection.ns.sign(entity_id) entity.collection_id = collection.id entity.update(data, collection, validate=validate) return entity
def save(cls, data, merge=False): link = cls.by_id(data.get('id')) if link is None: schema = data.get('$schema', cls._schema) cls = cls.get_schema_class(schema) ent = cls() ent.id = make_textid() link.update(data, merge=merge) return ent
def create(cls, data, collection, role_id=None): entity = cls() entity_id = data.get("id") or make_textid() if not registry.entity.validate(entity_id): raise InvalidData(gettext("Invalid entity ID")) entity.id = collection.ns.sign(entity_id) entity.collection_id = collection.id entity.role_id = role_id entity.update(data, collection) return entity
def decide_xref(xref, decision=None, context_id=None, decider_id=None): """Store user feedback from an Xref result as a set of entity linkages. The problem here is that we're trying to translate a single pair-wise user decision into a merge or split decision regarding a cluster of entities. This works for most cases, with the exception that a profile, once established, cannot be split in a way that preserves what entities were linked to what other entities originally.""" entity_id = xref.get('entity_id') collection_id = xref.get('collection_id') entity_linkage = Linkage.by_entity(entity_id, decision=True, collection_id=collection_id, context_id=context_id).first() match_id = xref.get('match_id') match_collection_id = xref.get('match_collection_id') match_linkage = Linkage.by_entity(match_id, decision=True, collection_id=match_collection_id, context_id=context_id).first() # If we are undecided, and we stay undecided, not much to change. if entity_linkage is None or match_linkage is None: if decision is None: return if entity_linkage is None and match_linkage is None: # Case 1: Neither entity is linked to a profile, make a new one. profile_id = make_textid() Linkage.save(profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) Linkage.save(profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) elif entity_linkage is None and match_linkage is not None: # Case 2a: One entity is part of a profile, the other isn't. # Add the other entity to the existing profile. Linkage.save(match_linkage.profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) elif entity_linkage is not None and match_linkage is None: # Case 2b: Like 2a, but the other way around. Linkage.save(entity_linkage.profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) elif decision is True: # Case 3: Both entities are part of profiles. These now need to be # merged. Linkage.merge(entity_linkage.profile_id, match_linkage.profile_id) else: # Case 4: Both entities are part of profiles, and have been # judged not to be the same. Mark them as distinct. Linkage.save(entity_linkage.profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) # Case 4b: Splitting an existing profile somewhat randomly. if entity_linkage.profile_id != match_linkage.profile_id: Linkage.save(match_linkage.profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) db.session.commit()
def create(cls, data, collection): foreign_id = data.get('foreign_id') ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True) if ent is None: ent = cls() ent.id = make_textid() ent.collection = collection ent.foreign_id = foreign_id ent.deleted_at = None ent.update(data) return ent
def create(cls, data, collection): foreign_ids = string_set(data.get('foreign_ids')) ent = cls.by_foreign_ids(foreign_ids, collection.id, deleted=True) if ent is None: ent = cls() ent.id = make_textid() ent.collection = collection ent.foreign_ids = foreign_ids ent.update(data) ent.deleted_at = None return ent
def create(cls, data, creator=None, created_at=None): foreign_id = data.get('foreign_id') or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.created_at = created_at collection.foreign_id = foreign_id collection.category = cls.DEFAULT collection.casefile = False collection.update(data, creator=creator) collection.deleted_at = None return collection
def create(cls, data, role=None, created_at=None): foreign_id = data.get('foreign_id') or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.created_at = created_at collection.foreign_id = foreign_id collection.category = cls.DEFAULT collection.casefile = False collection.update(data, creator=role) collection.deleted_at = None return collection
def create(cls, data, authz, created_at=None): foreign_id = data.get('foreign_id') or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.created_at = created_at collection.foreign_id = foreign_id collection.category = cls.DEFAULT_CATEGORY collection.casefile = True collection.creator_id = authz.id collection.update(data, authz) collection.deleted_at = None return collection
def create(cls, data, authz, created_at=None): foreign_id = data.get("foreign_id") or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.created_at = created_at collection.foreign_id = foreign_id collection.category = cls.CASEFILE collection.creator = authz.role collection.update(data, authz) collection.deleted_at = None if collection.creator is not None: Permission.grant(collection, collection.creator, True, True) return collection
def create(cls, data, role=None): foreign_id = data.get('foreign_id') or make_textid() collection = cls.by_foreign_id(foreign_id, deleted=True) if collection is None: collection = cls() collection.foreign_id = foreign_id collection.creator = role collection.update(data) db.session.add(collection) db.session.flush() if role is not None: Permission.grant_collection(collection.id, role, True, True) collection.deleted_at = None return collection
def save(cls, data, collection_id=None, merge=False): ent = cls.by_id(data.get('id')) for identifier in data.get('identifiers', []): if ent is None: ent = cls.by_identifier(identifier.get('scheme'), identifier.get('identifier'), collection_id=collection_id) if ent is None: schema = data.get('$schema', cls._schema) cls = cls.get_schema_class(schema) ent = cls() ent.id = make_textid() if collection_id is not None: ent.collection_id = collection_id ent.update(data, merge=merge) return ent
def save(cls, data, collection, merge=False): ent = cls.by_id(data.get('id')) if ent is None: ent = cls() ent.type = data.pop('schema', None) if ent.type is None: raise ValueError("No schema provided.") ent.id = make_textid() if merge: data = merge_data(data, ent.to_dict()) if collection is None: raise ValueError("No collection specified.") ent.collection = collection ent.update(data) return ent
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get("meta", "{}")) except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta["mime_type"] = storage.mimetype file_meta["file_name"] = storage.filename validate(file_meta, "metadata.json#") file_meta = Metadata.from_data(file_meta) file_meta.crawler_id = "user_upload:%s" % request.auth_role.id file_meta.crawler_run = make_textid() sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection.id, file_meta, sec_fn, move=True) metas.append(file_meta) return jsonify({"status": "ok", "metadata": metas})
def __init__(self): self.incremental = False self.crawler_run = make_textid()
def update(self, data): if self.id is None: self.id = make_textid() self.schema_update(data)
def entities_update(entityset_id): """ --- post: summary: Update an entity and add it to the entity set. description: > Update the entity with id `entity_id`. If it does not exist it will be created. If the user cannot edit the given entity, it is merely added to the entity set. New entities are always created in the collection of the entity set. Aside from these idiosyncracies, this is the same as `/api/2/entities/<id>`, but handles entity set membership transparently. parameters: - description: The entityset id. in: path name: entityset_id required: true schema: type: string example: 3a0d91ece2dce88ad3259594c7b642485235a048 requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ entityset = get_entityset(entityset_id, request.authz.WRITE) data = parse_request("EntityUpdate") entity_id = data.get("id", make_textid()) try: entity = get_index_entity(entity_id, request.authz.READ) collection = get_db_collection(entity.get("collection_id"), request.authz.READ) except NotFound: entity = None collection = entityset.collection tag_request(collection_id=entityset.collection_id) if entity is None or check_write_entity(entity, request.authz): if get_flag("validate", default=False): validate_entity(data) sync = get_flag("sync", default=True) entity_id = upsert_entity(data, collection, authz=request.authz, sync=sync) EntitySetItem.save( entityset, entity_id, collection_id=collection.id, added_by_id=request.authz.id, ) db.session.commit() return entity_view(entity_id)