Beispiel #1
0
    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        self.schema = model.precise_schema(self.schema, other.schema)
        self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids)
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        data = merge_data(self.data, other.data)
        if self.name != other.name:
            data = merge_data(data, {'alias': [other.name]})
        self.data = data

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({Alert.entity_id: self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)
Beispiel #2
0
 def test_merge_list(self):
     old = {'lst': ['a', 'b', 'c']}
     new = {'lst': ['c', 'd', 'e']}
     result = merge_data(old, new)
     assert 'a' in result['lst'], result
     assert 'c' in result['lst'], result
     assert 'e' in result['lst'], result
Beispiel #3
0
def bulk_load_query(collection_id, query):
    collection = Collection.by_id(collection_id)
    if collection is None:
        log.warning("Collection does not exist: %s", collection_id)
        return

    mapping = model.make_mapping(query, key_prefix=collection.foreign_id)
    entities = {}
    total = 0
    for idx, record in enumerate(mapping.source.records, 1):
        for entity in mapping.map(record).values():
            entity_id = entity.get('id')
            if entity_id is None:
                continue
            # When loading from a tabular data source, we will often
            # encounter mappings where the same entity is emitted
            # multiple times in short sequence, e.g. when the data
            # describes all the directors of a single company.
            base = entities.get(entity_id, {})
            entities[entity_id] = merge_data(entity, base)
            total += 1

        if idx % 1000 == 0:
            log.info("[%s] Loaded %s records, %s entities...",
                     collection.foreign_id, idx, total)

        if len(entities) >= BULK_PAGE:
            index_bulk(collection, entities, chunk_size=BULK_PAGE)
            entities = {}

    if len(entities):
        index_bulk(collection, entities, chunk_size=BULK_PAGE)

    # Update collection stats
    index_collection(collection)
Beispiel #4
0
 def test_merge_value(self):
     old = {
         'foo': 'bar',
     }
     new = {
         'foo': 'quux',
     }
     result = merge_data(old, new)
     assert result['foo'] == 'quux', result
Beispiel #5
0
def update(entity_id):
    entity = get_db_entity(entity_id, request.authz.WRITE)
    data = parse_request(EntityUpdateSchema)
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    data = update_entity(entity, sync=get_flag('sync', True))
    return EntitySerializer.jsonify(data)
Beispiel #6
0
 def merge(self, left, right):
     """Merge two entities and return a combined version."""
     properties = merge_data(left.get('properties'),
                             right.get('properties'))
     schema = self.precise_schema(left.get('schema'), right.get('schema'))
     return {
         'id': left.get('id', right.get('id')),
         'schema': schema,
         'properties': properties
     }
Beispiel #7
0
 def test_merge_different(self):
     old = {
         'foo': 'quux',
     }
     new = {
         'bar': 'quux',
     }
     result = merge_data(old, new)
     assert result['foo'] == 'quux', result
     assert result['bar'] == 'quux', result
Beispiel #8
0
def update(id):
    entity = get_db_entity(id, request.authz.WRITE)
    data = parse_request(schema=EntityUpdateSchema)
    if as_bool(request.args.get('merge')):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    update_entity(entity)
    update_collection(entity.collection)
    return view(entity.id)
Beispiel #9
0
def update(entity_id):
    entity = get_db_entity(entity_id, request.authz.WRITE)
    tag_request(collection_id=entity.collection_id)
    data = parse_request(EntityUpdateSchema)
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    data = update_entity(entity, sync=get_flag('sync', True))
    return EntitySerializer.jsonify(data)
Beispiel #10
0
def update(id):
    _, entity = get_entity(id, request.authz.WRITE)
    data = parse_request(schema=EntitySchema)
    if as_bool(request.args.get('merge')):
        data['data'] = merge_data(data.get('data') or {},
                                  entity.data or {})
    entity.update(data)
    db.session.commit()
    update_entity(entity)
    update_collection(entity.collection)
    return view(entity.id)
Beispiel #11
0
def update(id):
    entity = get_db_entity(id, request.authz.WRITE)
    data = parse_request(EntityUpdateSchema)
    sync = get_flag('sync')
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    data = update_entity(entity, sync=sync)
    return serialize_data(data, CombinedSchema)
Beispiel #12
0
def update(entity_id):
    """
    ---
    post:
      summary: Update an entity
      description: >
        Update the entity with id `entity_id`. This only applies to
        entities which are backed by a database row, i.e. not any
        entities resulting from a mapping or bulk load.
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    entity = get_db_entity(entity_id, request.authz.WRITE)
    tag_request(collection_id=entity.collection_id)
    data = parse_request('EntityUpdate')
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    update_entity(entity, sync=get_flag('sync', True))
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #13
0
def finalize_index(proxy, context, texts):
    """Apply final denormalisations to the index."""
    for prop, value in proxy.itervalues():
        if prop.type.name in ['entity', 'date', 'url', 'country', 'language']:
            continue
        texts.append(value)

    entity = proxy.to_full_dict()
    data = merge_data(context, entity)
    data['name'] = proxy.caption
    data['text'] = index_form(texts)

    names = data.get('names', [])
    fps = [fingerprints.generate(name) for name in names]
    fps = [fp for fp in fps if fp is not None]
    data['fingerprints'] = list(set(fps))

    if not data.get('created_at'):
        data['created_at'] = data.get('updated_at')
    data.pop('id', None)
    return clean_dict(data)
Beispiel #14
0
 def test_merge_objects(self):
     old = {'data': {'nested': True}}
     new = {'data': {'banana': 'hello'}}
     result = merge_data(old, new)
     assert result['data']['nested'], result
     assert result['data']['banana'], result