Esempio n. 1
0
    def post(self, request, domain):
        """
        Get the best matched entity for a given query.

        If the entity is an alias of another entity, the aliased entity is
        returned.
        """
        data = request.data.copy()
        query_field = data.get("query_field")
        query_value = data.get("query_value")
        return_canonical = data.get("return_canonical", True)
        block_attrs = data.get("block_attrs", {})
        scorer_class = data.get("scorer", "fuzzywuzzy.default_process")

        try:
            scorer = import_class("crosswalk.scorers.{}".format(scorer_class))
        except ImportError:
            return Response("Invalid scorer.",
                            status=status.HTTP_400_BAD_REQUEST)

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response("Domain not found.",
                            status=status.HTTP_404_NOT_FOUND)

        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(attributes__contains=block_attrs)

        if entities.count() == 0:
            return Response({}, status=status.HTTP_200_OK)

        entity_values = [e.attributes[query_field] for e in entities]

        match, score = scorer(query_value, entity_values)

        entity = entities.filter(**{
            "attributes__{}".format(query_field): match
        }).first()

        aliased = False

        if return_canonical:
            while entity.alias_for:
                aliased = True
                entity = entity.alias_for

        return Response(
            {
                "entity": EntitySerializer(entity).data,
                "match_score": score,
                "aliased": aliased,
            },
            status=status.HTTP_200_OK,
        )
Esempio n. 2
0
    def post(self, request, domain):
        """
        Get a matched entity for a given query.

        If the entity is an alias of another entity, the aliased entity is
        returned.
        """
        data = request.data.copy()
        query_field = data.get("query_field")
        query_value = data.get("query_value")
        block_attrs = data.get("block_attrs", {})
        return_canonical = data.get("return_canonical", True)

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response(
                "Domain not found.", status=status.HTTP_404_NOT_FOUND
            )

        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(attributes__contains=block_attrs)

        entities = entities.filter(
            **{"attributes__{}".format(query_field): query_value}
        )

        aliased = False

        if entities.count() > 1:
            return Response(
                "Found more than one entity. Be more specific?",
                status=status.HTTP_403_FORBIDDEN,
            )
        elif entities.count() == 0:
            return Response(
                "Match not found.", status=status.HTTP_404_NOT_FOUND
            )
        else:
            entity = entities.first()

        if return_canonical:
            while entity.alias_for:
                aliased = True
                entity = entity.alias_for

        return Response(
            {"entity": EntitySerializer(entity).data, "aliased": aliased},
            status=status.HTTP_200_OK,
        )
Esempio n. 3
0
    def post(self, request, domain):
        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response("Domain not found.", status=status.HTTP_200_OK)

        entities = request.data.copy()

        entity_objects = []

        for entity in entities:
            uuid = entity.pop("uuid", None)

            # Validate entity attributes before creating in bulk
            try:
                validate_shallow_dict(entity)
            except NestedAttributesError:
                return Response(
                    "Cannot create entity with nested attributes.",
                    status=status.HTTP_400_BAD_REQUEST,
                )
            try:
                validate_no_reserved_keys(entity)
            except ReservedKeyError:
                return Response(
                    "Reserved key found in entity attributes.",
                    status=status.HTTP_400_BAD_REQUEST,
                )

            entity_objects.append(
                Entity(
                    uuid=uuid,
                    domain=domain,
                    attributes=entity,
                    created_by=request.user,
                ))

        created_entities = Entity.objects.bulk_create(entity_objects)

        return Response(
            {
                "entities": [{
                    "entity": EntitySerializer(entity).data,
                    "created": True
                } for entity in created_entities]
            },
            status=status.HTTP_200_OK,
        )
Esempio n. 4
0
    def post(self, request, domain):
        """
        POST searches for an entity based on criteria. If only one entity is
        returned from query, it is updated. If more than one, return 403.
        """
        data = request.data.copy()

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response("Domain not found.",
                            status=status.HTTP_404_NOT_FOUND)

        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(
            attributes__contains=data.get("block_attrs", {}))

        if entities.count() == 0:
            return Response("Entity not found.",
                            status=status.HTTP_404_NOT_FOUND)

        elif entities.count() > 1:
            return Response(
                "Found more than one entity. Be more specific?",
                status=status.HTTP_403_FORBIDDEN,
            )

        entity = entities.first()
        update_attrs = data.get("update_attrs", {})

        # validate data
        try:
            full_validation(update_attrs)
        except (NestedAttributesError, ReservedKeyError):
            return Response(
                "Update data could not be validated.",
                status=status.HTTP_400_BAD_REQUEST,
            )

        entity.attributes = {**entity.attributes, **update_attrs}
        entity.save()

        return Response(
            {"entity": EntitySerializer(entities.first()).data},
            status=status.HTTP_200_OK,
        )
Esempio n. 5
0
    def post(self, request, domain):
        """
        Create an alias if an entity is found above a certain match threshold
        or create a new entity.
        """
        user = request.user
        data = request.data.copy()
        query_field = data.get("query_field")
        query_value = data.get("query_value")
        block_attrs = data.get("block_attrs", {})
        create_attrs = data.get("create_attrs", {})
        return_canonical = data.get("return_canonical", True)
        threshold = data.get("threshold")
        scorer_class = data.get("scorer", "fuzzywuzzy.default_process")

        try:
            scorer = import_class("crosswalk.scorers.{}".format(scorer_class))
        except ImportError:
            return Response(
                "Invalid scorer.", status=status.HTTP_400_BAD_REQUEST
            )

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response(
                "Domain not found.", status=status.HTTP_404_NOT_FOUND
            )

        # Find the best match for a query
        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(attributes__contains=block_attrs)

        entity_values = [e.attributes[query_field] for e in entities]
        match, score = scorer(query_value, entity_values)

        entities = entities.filter(
            **{"attributes__{}".format(query_field): match}
        )

        if entities.count() > 1:
            return Response(
                "More than one alias candiate for entity.",
                status=status.HTTP_403_FORBIDDEN,
            )

        entity = entities.first()

        attributes = {
            **{query_field: query_value},
            **block_attrs,
            **create_attrs,
        }

        if entity.attributes == attributes:
            return Response(
                "Entity appears to already exist.",
                status=status.HTTP_409_CONFLICT,
            )

        if score > threshold:
            aliased = True
            alias = Entity(
                attributes=attributes,
                alias_for=entity,
                created_by=user,
                domain=domain,
            )
            alias.save()
            if return_canonical:
                while entity.alias_for:
                    entity = entity.alias_for
        else:
            aliased = False
            entity = Entity(
                attributes=attributes, created_by=user, domain=domain
            )
            entity.save()

        return Response(
            {
                "entity": EntitySerializer(entity).data,
                "created": True,
                "aliased": aliased,
                "match_score": score,
            },
            status=status.HTTP_200_OK,
        )
    def post(self, request, domain):
        """
        Get a matched entity for a given query or create an entity if
        one is not found.

        If the entity is an alias of another entity, the aliased entity is
        returned.
        """
        user = request.user
        data = request.data.copy()
        query_field = data.get("query_field")
        query_value = data.get("query_value")
        block_attrs = data.get("block_attrs", {})
        create_attrs = data.get("create_attrs", {})
        return_canonical = data.get("return_canonical", True)

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response("Domain not found.",
                            status=status.HTTP_404_NOT_FOUND)

        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(attributes__contains=block_attrs)

        entities = entities.filter(
            **{"attributes__{}".format(query_field): query_value})

        created = False
        aliased = False

        if entities.count() == 0:
            created = True
            uuid = create_attrs.pop("uuid", None)
            entity = Entity(
                uuid=uuid,
                attributes={
                    **{
                        query_field: query_value
                    },
                    **block_attrs,
                    **create_attrs,
                },
                created_by=user,
                domain=domain,
            )
            entity.save()
        elif entities.count() > 1:
            return Response(
                "Found more than one entity. Be more specific?",
                status=status.HTTP_403_FORBIDDEN,
            )
        else:
            entity = entities[0]

        if return_canonical:
            while entity.alias_for:
                aliased = True
                entity = entity.alias_for

        return Response(
            {
                "entity": EntitySerializer(entity).data,
                "created": created,
                "aliased": aliased,
            },
            status=status.HTTP_200_OK,
        )
    def post(self, request, domain):
        """
        Get the best matched entity for a given query or create an entity if
        one is not found above a certain match threshold.

        If the entity is an alias of another entity, the aliased entity is
        returned.
        """
        user = request.user
        data = request.data.copy()
        query_field = data.get("query_field")
        query_value = data.get("query_value")
        block_attrs = data.get("block_attrs", {})
        create_attrs = data.get("create_attrs", {})
        threshold = data.get("threshold")
        return_canonical = data.get("return_canonical", True)
        scorer_class = data.get("scorer", "fuzzywuzzy.default_process")

        try:
            scorer = import_class("crosswalk.scorers.{}".format(scorer_class))
        except ImportError:
            return Response(
                "Invalid scorer.", status=status.HTTP_400_BAD_REQUEST
            )

        try:
            domain = Domain.objects.get(slug=domain)
        except ObjectDoesNotExist:
            return Response(
                "Domain not found.", status=status.HTTP_404_NOT_FOUND
            )

        entities = Entity.objects.filter(domain=domain)
        entities = entities.filter(attributes__contains=block_attrs)

        if entities.count() == 0:
            score = None
            created = True
        else:
            entity_values = [e.attributes[query_field] for e in entities]
            match, score = scorer(query_value, entity_values)

            entity = entities.filter(
                **{"attributes__{}".format(query_field): match}
            ).first()

            created = True if score < threshold else False

        if created:
            created = True
            uuid = create_attrs.pop("uuid", None)
            entity = Entity(
                uuid=uuid,
                attributes={
                    **{query_field: query_value},
                    **block_attrs,
                    **create_attrs,
                },
                created_by=user,
                domain=domain,
            )
            entity.save()

        aliased = False
        if return_canonical:
            while entity.alias_for:
                aliased = True
                entity = entity.alias_for

        return Response(
            {
                "entity": EntitySerializer(entity).data,
                "created": created,
                "match_score": score,
                "aliased": aliased,
            },
            status=status.HTTP_200_OK,
        )