Ejemplo n.º 1
0
    def test_linkage_index(self):
        role, headers = self.login()
        other, _ = self.login(foreign_id='other')
        profile_id = make_textid()
        coll = self.create_collection()
        self.grant_publish(coll)
        Linkage.save(profile_id,
                     make_textid(),
                     coll.id,
                     role.id,
                     decision=True,
                     decider_id=role.id)
        Linkage.save(profile_id,
                     make_textid(),
                     coll.id,
                     self.group.id,
                     decision=True,
                     decider_id=role.id)
        Linkage.save(profile_id,
                     make_textid(),
                     coll.id,
                     other.id,
                     decision=True,
                     decider_id=other.id)
        db.session.commit()

        res = self.client.get('/api/2/linkages', headers=headers)
        assert res.json['total'] == 2, res.json

        url = '/api/2/linkages?filter:context_id=%s' % role.id
        res = self.client.get(url, headers=headers)
        assert res.json['total'] == 1, res.json
Ejemplo n.º 2
0
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)

        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name or email
            role.type = type
            role.is_admin = False
            role.is_muted = False
            role.is_tester = False
            role.is_blocked = False
            role.notified_at = datetime.utcnow()

        if role.api_key is None:
            role.api_key = make_textid()

        if email is not None:
            role.email = email

        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/alephdata/aleph/issues/111
        auto_admins = [a.lower() for a in settings.ADMINS]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()
        return role
Ejemplo n.º 3
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    request.authz.require(request.authz.collection_write(collection.id))
    log_event(request)
    crawler_run = make_textid()

    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    documents = []
    for storage in request.files.values():
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        content_hash = checksum(sec_fn)
        document = Document.by_keys(collection=collection,
                                    content_hash=content_hash)
        document.crawler = 'user_upload:%s' % request.authz.role.id
        document.crawler_run = crawler_run
        document.mime_type = storage.mimetype
        document.file_name = storage.filename

        try:
            meta = json.loads(request.form.get('meta', '{}'))
            validate(meta, 'metadata.json#')
            document.meta.update(meta)
        except Exception as ex:
            raise BadRequest(unicode(ex))

        ingest_document(document, sec_fn, user_queue=True)
        os.unlink(sec_fn)
        documents.append(document)
    return jsonify({'status': 'ok', 'documents': documents})
Ejemplo n.º 4
0
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)

        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name
            role.type = type
            role.is_admin = False

        if role.api_key is None:
            role.api_key = make_textid()

        role.email = email
        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/alephdata/aleph/issues/111
        auto_admins = [a.lower() for a in get_config('AUTHZ_ADMINS')]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()

        return role
Ejemplo n.º 5
0
    def save(cls, data, collections, merge=False):
        ent = cls.by_id(data.get('id'))
        if 'state' not in data:
            data['state'] = cls.STATE_ACTIVE

        for identifier in data.get('identifiers', []):
            if ent is None:
                ent = cls.by_identifier(identifier.get('scheme'),
                                        identifier.get('identifier'),
                                        collections=collections)
        if ent is None:
            schema = data.get('$schema', cls._schema)
            cls = cls.get_schema_class(schema)
            ent = cls()
            ent.id = make_textid()

        if merge:
            for collection in ent.collections:
                if collection.id not in [c.id for c in collections]:
                    collections.append(collection)
        if not len(collections):
            raise AttributeError("No collection specified.")

        ent.collections = collections
        ent.update(data, merge=merge)
        return ent
Ejemplo n.º 6
0
    def save(cls, data, merge=False):
        ent = cls.by_id(data.get('id'))
        if 'state' not in data:
            data['state'] = cls.STATE_ACTIVE

        collections = data.pop('collections', [])
        for identifier in data.get('identifiers', []):
            if ent is None:
                ent = cls.by_identifier(identifier.get('scheme'),
                                        identifier.get('identifier'),
                                        collections=collections)
        if ent is None:
            schema = data.get('$schema', cls._schema)
            cls = cls.get_schema_class(schema)
            ent = cls()
            ent.id = make_textid()

        if merge:
            for collection in ent.collections:
                if collection.id not in [c.id for c in collections]:
                    collections.append(collection)
        if not len(collections):
            raise AttributeError("No collection specified.")

        ent.collections = collections
        ent.update(data, merge=merge)
        return ent
Ejemplo n.º 7
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    request.authz.require(request.authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get('meta', '{}'))
        meta['crawler_id'] = 'user_upload:%s' % request.authz.role.id
        meta['crawler_run'] = make_textid()

    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta['mime_type'] = storage.mimetype
        file_meta['file_name'] = storage.filename
        file_meta['source_path'] = storage.filename
        validate(file_meta, 'metadata.json#')
        file_meta = Metadata.from_data(file_meta)
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection_id, file_meta, sec_fn, move=True,
                    queue=USER_QUEUE, routing_key=USER_ROUTING_KEY)
        metas.append(file_meta)
    return jsonify({'status': 'ok', 'metadata': metas})
Ejemplo n.º 8
0
Archivo: role.py Proyecto: pudo/aleph
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)

        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name or email
            role.type = type
            role.is_admin = False
            role.notified_at = datetime.utcnow()

        if role.api_key is None:
            role.api_key = make_textid()

        if email is not None:
            role.email = email

        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/alephdata/aleph/issues/111
        auto_admins = [a.lower() for a in settings.ADMINS]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()
        return role
Ejemplo n.º 9
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta['mime_type'] = storage.mimetype
        file_meta['file_name'] = storage.filename
        validate(file_meta, 'metadata.json#')
        file_meta = Metadata.from_data(file_meta)
        file_meta.crawler_id = 'user_upload:%s' % request.auth_role.id
        file_meta.crawler_run = make_textid()
        sec_fn = os.path.join(get_upload_folder(),
                              secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection.id, file_meta, sec_fn, move=True,
                    queue=USER_QUEUE, routing_key=USER_ROUTING_KEY)
        metas.append(file_meta)
    return jsonify({'status': 'ok', 'metadata': metas})
Ejemplo n.º 10
0
 def create(cls, data, collection):
     ent = cls()
     ent.type = data.pop('schema', None)
     ent.id = make_textid()
     ent.collection = collection
     ent.update(data)
     ent.collection.touch()
     return ent
Ejemplo n.º 11
0
 def create(cls, data, collection, authz):
     entityset = cls()
     entityset.id = make_textid()
     entityset.layout = {}
     entityset.role_id = authz.id
     entityset.collection_id = collection.id
     entityset.update(data)
     return entityset
Ejemplo n.º 12
0
 def create(cls, data, role=None):
     foreign_id = data.get('foreign_id') or make_textid()
     collection = cls.by_foreign_id(foreign_id, deleted=True)
     if collection is None:
         collection = cls()
         collection.foreign_id = foreign_id
     collection.update(data, creator=role)
     collection.deleted_at = None
     return collection
Ejemplo n.º 13
0
 def create(cls, data, collection, validate=True):
     entity = cls()
     entity_id = data.get('id') or make_textid()
     if not registry.entity.validate(entity_id):
         raise InvalidData(gettext("Invalid entity ID"))
     entity.id = collection.ns.sign(entity_id)
     entity.collection_id = collection.id
     entity.update(data, collection, validate=validate)
     return entity
Ejemplo n.º 14
0
    def save(cls, data, merge=False):
        link = cls.by_id(data.get('id'))
        if link is None:
            schema = data.get('$schema', cls._schema)
            cls = cls.get_schema_class(schema)
            ent = cls()
            ent.id = make_textid()

        link.update(data, merge=merge)
        return ent
Ejemplo n.º 15
0
 def create(cls, data, collection, role_id=None):
     entity = cls()
     entity_id = data.get("id") or make_textid()
     if not registry.entity.validate(entity_id):
         raise InvalidData(gettext("Invalid entity ID"))
     entity.id = collection.ns.sign(entity_id)
     entity.collection_id = collection.id
     entity.role_id = role_id
     entity.update(data, collection)
     return entity
Ejemplo n.º 16
0
def decide_xref(xref, decision=None, context_id=None, decider_id=None):
    """Store user feedback from an Xref result as a set of entity linkages.
    The problem here is that we're trying to translate a single pair-wise
    user decision into a merge or split decision regarding a cluster of
    entities.

    This works for most cases, with the exception that a profile, once
    established, cannot be split in a way that preserves what entities
    were linked to what other entities originally."""
    entity_id = xref.get('entity_id')
    collection_id = xref.get('collection_id')
    entity_linkage = Linkage.by_entity(entity_id, decision=True,
                                       collection_id=collection_id,
                                       context_id=context_id).first()
    match_id = xref.get('match_id')
    match_collection_id = xref.get('match_collection_id')
    match_linkage = Linkage.by_entity(match_id, decision=True,
                                      collection_id=match_collection_id,
                                      context_id=context_id).first()

    # If we are undecided, and we stay undecided, not much to change.
    if entity_linkage is None or match_linkage is None:
        if decision is None:
            return

    if entity_linkage is None and match_linkage is None:
        # Case 1: Neither entity is linked to a profile, make a new one.
        profile_id = make_textid()
        Linkage.save(profile_id, entity_id, collection_id, context_id,
                     decision=decision, decider_id=decider_id)
        Linkage.save(profile_id, match_id, match_collection_id, context_id,
                     decision=decision, decider_id=decider_id)
    elif entity_linkage is None and match_linkage is not None:
        # Case 2a: One entity is part of a profile, the other isn't.
        # Add the other entity to the existing profile.
        Linkage.save(match_linkage.profile_id, entity_id, collection_id,
                     context_id, decision=decision, decider_id=decider_id)
    elif entity_linkage is not None and match_linkage is None:
        # Case 2b: Like 2a, but the other way around.
        Linkage.save(entity_linkage.profile_id, match_id, match_collection_id,
                     context_id, decision=decision, decider_id=decider_id)
    elif decision is True:
        # Case 3: Both entities are part of profiles. These now need to be
        # merged.
        Linkage.merge(entity_linkage.profile_id, match_linkage.profile_id)
    else:
        # Case 4: Both entities are part of profiles, and have been
        # judged not to be the same. Mark them as distinct.
        Linkage.save(entity_linkage.profile_id, match_id, match_collection_id,
                     context_id, decision=decision, decider_id=decider_id)
        # Case 4b: Splitting an existing profile somewhat randomly.
        if entity_linkage.profile_id != match_linkage.profile_id:
            Linkage.save(match_linkage.profile_id, entity_id, collection_id,
                         context_id, decision=decision, decider_id=decider_id)
    db.session.commit()
Ejemplo n.º 17
0
 def create(cls, data, collection):
     foreign_id = data.get('foreign_id')
     ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True)
     if ent is None:
         ent = cls()
         ent.id = make_textid()
         ent.collection = collection
         ent.foreign_id = foreign_id
     ent.deleted_at = None
     ent.update(data)
     return ent
Ejemplo n.º 18
0
Archivo: entity.py Proyecto: pudo/aleph
 def create(cls, data, collection):
     foreign_id = data.get('foreign_id')
     ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True)
     if ent is None:
         ent = cls()
         ent.id = make_textid()
         ent.collection = collection
         ent.foreign_id = foreign_id
     ent.deleted_at = None
     ent.update(data)
     return ent
Ejemplo n.º 19
0
 def create(cls, data, collection):
     foreign_ids = string_set(data.get('foreign_ids'))
     ent = cls.by_foreign_ids(foreign_ids, collection.id, deleted=True)
     if ent is None:
         ent = cls()
         ent.id = make_textid()
         ent.collection = collection
         ent.foreign_ids = foreign_ids
     ent.update(data)
     ent.deleted_at = None
     return ent
Ejemplo n.º 20
0
 def create(cls, data, creator=None, created_at=None):
     foreign_id = data.get('foreign_id') or make_textid()
     collection = cls.by_foreign_id(foreign_id, deleted=True)
     if collection is None:
         collection = cls()
         collection.created_at = created_at
         collection.foreign_id = foreign_id
         collection.category = cls.DEFAULT
         collection.casefile = False
     collection.update(data, creator=creator)
     collection.deleted_at = None
     return collection
Ejemplo n.º 21
0
 def create(cls, data, role=None, created_at=None):
     foreign_id = data.get('foreign_id') or make_textid()
     collection = cls.by_foreign_id(foreign_id, deleted=True)
     if collection is None:
         collection = cls()
         collection.created_at = created_at
         collection.foreign_id = foreign_id
         collection.category = cls.DEFAULT
         collection.casefile = False
     collection.update(data, creator=role)
     collection.deleted_at = None
     return collection
Ejemplo n.º 22
0
 def create(cls, data, authz, created_at=None):
     foreign_id = data.get('foreign_id') or make_textid()
     collection = cls.by_foreign_id(foreign_id, deleted=True)
     if collection is None:
         collection = cls()
         collection.created_at = created_at
         collection.foreign_id = foreign_id
         collection.category = cls.DEFAULT_CATEGORY
         collection.casefile = True
         collection.creator_id = authz.id
     collection.update(data, authz)
     collection.deleted_at = None
     return collection
Ejemplo n.º 23
0
 def create(cls, data, authz, created_at=None):
     foreign_id = data.get("foreign_id") or make_textid()
     collection = cls.by_foreign_id(foreign_id, deleted=True)
     if collection is None:
         collection = cls()
         collection.created_at = created_at
         collection.foreign_id = foreign_id
         collection.category = cls.CASEFILE
         collection.creator = authz.role
     collection.update(data, authz)
     collection.deleted_at = None
     if collection.creator is not None:
         Permission.grant(collection, collection.creator, True, True)
     return collection
Ejemplo n.º 24
0
    def create(cls, data, role=None):
        foreign_id = data.get('foreign_id') or make_textid()
        collection = cls.by_foreign_id(foreign_id, deleted=True)
        if collection is None:
            collection = cls()
            collection.foreign_id = foreign_id
            collection.creator = role
            collection.update(data)
            db.session.add(collection)
            db.session.flush()

            if role is not None:
                Permission.grant_collection(collection.id, role, True, True)
        collection.deleted_at = None
        return collection
Ejemplo n.º 25
0
 def save(cls, data, collection_id=None, merge=False):
     ent = cls.by_id(data.get('id'))
     for identifier in data.get('identifiers', []):
         if ent is None:
             ent = cls.by_identifier(identifier.get('scheme'),
                                     identifier.get('identifier'),
                                     collection_id=collection_id)
     if ent is None:
         schema = data.get('$schema', cls._schema)
         cls = cls.get_schema_class(schema)
         ent = cls()
         ent.id = make_textid()
         if collection_id is not None:
             ent.collection_id = collection_id
     ent.update(data, merge=merge)
     return ent
Ejemplo n.º 26
0
 def save(cls, data, collection_id=None, merge=False):
     ent = cls.by_id(data.get('id'))
     for identifier in data.get('identifiers', []):
         if ent is None:
             ent = cls.by_identifier(identifier.get('scheme'),
                                     identifier.get('identifier'),
                                     collection_id=collection_id)
     if ent is None:
         schema = data.get('$schema', cls._schema)
         cls = cls.get_schema_class(schema)
         ent = cls()
         ent.id = make_textid()
         if collection_id is not None:
             ent.collection_id = collection_id
     ent.update(data, merge=merge)
     return ent
Ejemplo n.º 27
0
    def save(cls, data, collection, merge=False):
        ent = cls.by_id(data.get('id'))
        if ent is None:
            ent = cls()
            ent.type = data.pop('schema', None)
            if ent.type is None:
                raise ValueError("No schema provided.")
            ent.id = make_textid()

        if merge:
            data = merge_data(data, ent.to_dict())

        if collection is None:
            raise ValueError("No collection specified.")

        ent.collection = collection
        ent.update(data)
        return ent
Ejemplo n.º 28
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get("meta", "{}"))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta["mime_type"] = storage.mimetype
        file_meta["file_name"] = storage.filename
        validate(file_meta, "metadata.json#")
        file_meta = Metadata.from_data(file_meta)
        file_meta.crawler_id = "user_upload:%s" % request.auth_role.id
        file_meta.crawler_run = make_textid()
        sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection.id, file_meta, sec_fn, move=True)
        metas.append(file_meta)
    return jsonify({"status": "ok", "metadata": metas})
Ejemplo n.º 29
0
 def __init__(self):
     self.incremental = False
     self.crawler_run = make_textid()
Ejemplo n.º 30
0
 def update(self, data):
     if self.id is None:
         self.id = make_textid()
     self.schema_update(data)
Ejemplo n.º 31
0
def entities_update(entityset_id):
    """
    ---
    post:
      summary: Update an entity and add it to the entity set.
      description: >
        Update the entity with id `entity_id`. If it does not exist it will be
        created. If the user cannot edit the given entity, it is merely added
        to the entity set. New entities are always created in the collection of
        the entity set.

        Aside from these idiosyncracies, this is the same as `/api/2/entities/<id>`,
        but handles entity set membership transparently.
      parameters:
      - description: The entityset id.
        in: path
        name: entityset_id
        required: true
        schema:
          type: string
        example: 3a0d91ece2dce88ad3259594c7b642485235a048
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    entityset = get_entityset(entityset_id, request.authz.WRITE)
    data = parse_request("EntityUpdate")
    entity_id = data.get("id", make_textid())
    try:
        entity = get_index_entity(entity_id, request.authz.READ)
        collection = get_db_collection(entity.get("collection_id"),
                                       request.authz.READ)
    except NotFound:
        entity = None
        collection = entityset.collection
    tag_request(collection_id=entityset.collection_id)
    if entity is None or check_write_entity(entity, request.authz):
        if get_flag("validate", default=False):
            validate_entity(data)
        sync = get_flag("sync", default=True)
        entity_id = upsert_entity(data,
                                  collection,
                                  authz=request.authz,
                                  sync=sync)
    EntitySetItem.save(
        entityset,
        entity_id,
        collection_id=collection.id,
        added_by_id=request.authz.id,
    )
    db.session.commit()
    return entity_view(entity_id)
Ejemplo n.º 32
0
 def __init__(self):
     self.incremental = False
     self.crawler_run = make_textid()