Beispiel #1
0
    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        self.schema = model.precise_schema(self.schema, other.schema)
        self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids)
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        data = merge_data(self.data, other.data)
        if self.name != other.name:
            data = merge_data(data, {'alias': [other.name]})
        self.data = data

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({Alert.entity_id: self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)
Beispiel #2
0
    def test_model_precise_schema(self):
        assert model.precise_schema('Thing', 'Thing') == 'Thing'
        assert model.precise_schema('Thing', 'Person') == 'Person'
        assert model.precise_schema('Person', 'Thing') == 'Person'
        assert model.precise_schema('Person', 'Company') == 'LegalEntity'

        with assert_raises(InvalidData):
            model.precise_schema('Person', 'Directorship')
Beispiel #3
0
def compare(left, right):
    """Compare two entities and return number between 0 and 1.
    Returned number indicates probability that two entities are the same.
    """
    left_schema = model.get(left.get('schema'))
    right_schema = model.get(right.get('schema'))
    if right_schema not in list(left_schema.matchable_schemata):
        return 0
    schema = model.precise_schema(left_schema, right_schema)
    score = compare_fingerprints(left, right) * FP_WEIGHT
    left_properties = left.get('properties', {})
    right_properties = right.get('properties', {})
    for name, prop in schema.properties.items():
        weight = MATCH_WEIGHTS.get(prop.type, 0)
        if weight == 0:
            continue
        left_values = left_properties.get(name)
        right_values = right_properties.get(name)
        prop_score = prop.type.compare_sets(left_values, right_values)
        score = score + prop_score * weight
    return max(0.0, min(1.0, score)) * 0.9
 def test_model_precise_schema(self):
     assert model.precise_schema('Thing', 'Thing') == 'Thing'
     assert model.precise_schema('Thing', 'Person') == 'Person'
     assert model.precise_schema('Person', 'Thing') == 'Person'
     assert model.precise_schema('Person', 'Company') == 'LegalEntity'