def post(self, request, domain): """ Get the best matched entity for a given query. If the entity is an alias of another entity, the aliased entity is returned. """ data = request.data.copy() query_field = data.get("query_field") query_value = data.get("query_value") return_canonical = data.get("return_canonical", True) block_attrs = data.get("block_attrs", {}) scorer_class = data.get("scorer", "fuzzywuzzy.default_process") try: scorer = import_class("crosswalk.scorers.{}".format(scorer_class)) except ImportError: return Response("Invalid scorer.", status=status.HTTP_400_BAD_REQUEST) try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response("Domain not found.", status=status.HTTP_404_NOT_FOUND) entities = Entity.objects.filter(domain=domain) entities = entities.filter(attributes__contains=block_attrs) if entities.count() == 0: return Response({}, status=status.HTTP_200_OK) entity_values = [e.attributes[query_field] for e in entities] match, score = scorer(query_value, entity_values) entity = entities.filter(**{ "attributes__{}".format(query_field): match }).first() aliased = False if return_canonical: while entity.alias_for: aliased = True entity = entity.alias_for return Response( { "entity": EntitySerializer(entity).data, "match_score": score, "aliased": aliased, }, status=status.HTTP_200_OK, )
def post(self, request, domain): """ Get a matched entity for a given query. If the entity is an alias of another entity, the aliased entity is returned. """ data = request.data.copy() query_field = data.get("query_field") query_value = data.get("query_value") block_attrs = data.get("block_attrs", {}) return_canonical = data.get("return_canonical", True) try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response( "Domain not found.", status=status.HTTP_404_NOT_FOUND ) entities = Entity.objects.filter(domain=domain) entities = entities.filter(attributes__contains=block_attrs) entities = entities.filter( **{"attributes__{}".format(query_field): query_value} ) aliased = False if entities.count() > 1: return Response( "Found more than one entity. Be more specific?", status=status.HTTP_403_FORBIDDEN, ) elif entities.count() == 0: return Response( "Match not found.", status=status.HTTP_404_NOT_FOUND ) else: entity = entities.first() if return_canonical: while entity.alias_for: aliased = True entity = entity.alias_for return Response( {"entity": EntitySerializer(entity).data, "aliased": aliased}, status=status.HTTP_200_OK, )
def post(self, request, domain): try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response("Domain not found.", status=status.HTTP_200_OK) entities = request.data.copy() entity_objects = [] for entity in entities: uuid = entity.pop("uuid", None) # Validate entity attributes before creating in bulk try: validate_shallow_dict(entity) except NestedAttributesError: return Response( "Cannot create entity with nested attributes.", status=status.HTTP_400_BAD_REQUEST, ) try: validate_no_reserved_keys(entity) except ReservedKeyError: return Response( "Reserved key found in entity attributes.", status=status.HTTP_400_BAD_REQUEST, ) entity_objects.append( Entity( uuid=uuid, domain=domain, attributes=entity, created_by=request.user, )) created_entities = Entity.objects.bulk_create(entity_objects) return Response( { "entities": [{ "entity": EntitySerializer(entity).data, "created": True } for entity in created_entities] }, status=status.HTTP_200_OK, )
def post(self, request, domain): """ POST searches for an entity based on criteria. If only one entity is returned from query, it is updated. If more than one, return 403. """ data = request.data.copy() try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response("Domain not found.", status=status.HTTP_404_NOT_FOUND) entities = Entity.objects.filter(domain=domain) entities = entities.filter( attributes__contains=data.get("block_attrs", {})) if entities.count() == 0: return Response("Entity not found.", status=status.HTTP_404_NOT_FOUND) elif entities.count() > 1: return Response( "Found more than one entity. Be more specific?", status=status.HTTP_403_FORBIDDEN, ) entity = entities.first() update_attrs = data.get("update_attrs", {}) # validate data try: full_validation(update_attrs) except (NestedAttributesError, ReservedKeyError): return Response( "Update data could not be validated.", status=status.HTTP_400_BAD_REQUEST, ) entity.attributes = {**entity.attributes, **update_attrs} entity.save() return Response( {"entity": EntitySerializer(entities.first()).data}, status=status.HTTP_200_OK, )
def post(self, request, domain): """ Create an alias if an entity is found above a certain match threshold or create a new entity. """ user = request.user data = request.data.copy() query_field = data.get("query_field") query_value = data.get("query_value") block_attrs = data.get("block_attrs", {}) create_attrs = data.get("create_attrs", {}) return_canonical = data.get("return_canonical", True) threshold = data.get("threshold") scorer_class = data.get("scorer", "fuzzywuzzy.default_process") try: scorer = import_class("crosswalk.scorers.{}".format(scorer_class)) except ImportError: return Response( "Invalid scorer.", status=status.HTTP_400_BAD_REQUEST ) try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response( "Domain not found.", status=status.HTTP_404_NOT_FOUND ) # Find the best match for a query entities = Entity.objects.filter(domain=domain) entities = entities.filter(attributes__contains=block_attrs) entity_values = [e.attributes[query_field] for e in entities] match, score = scorer(query_value, entity_values) entities = entities.filter( **{"attributes__{}".format(query_field): match} ) if entities.count() > 1: return Response( "More than one alias candiate for entity.", status=status.HTTP_403_FORBIDDEN, ) entity = entities.first() attributes = { **{query_field: query_value}, **block_attrs, **create_attrs, } if entity.attributes == attributes: return Response( "Entity appears to already exist.", status=status.HTTP_409_CONFLICT, ) if score > threshold: aliased = True alias = Entity( attributes=attributes, alias_for=entity, created_by=user, domain=domain, ) alias.save() if return_canonical: while entity.alias_for: entity = entity.alias_for else: aliased = False entity = Entity( attributes=attributes, created_by=user, domain=domain ) entity.save() return Response( { "entity": EntitySerializer(entity).data, "created": True, "aliased": aliased, "match_score": score, }, status=status.HTTP_200_OK, )
def post(self, request, domain): """ Get a matched entity for a given query or create an entity if one is not found. If the entity is an alias of another entity, the aliased entity is returned. """ user = request.user data = request.data.copy() query_field = data.get("query_field") query_value = data.get("query_value") block_attrs = data.get("block_attrs", {}) create_attrs = data.get("create_attrs", {}) return_canonical = data.get("return_canonical", True) try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response("Domain not found.", status=status.HTTP_404_NOT_FOUND) entities = Entity.objects.filter(domain=domain) entities = entities.filter(attributes__contains=block_attrs) entities = entities.filter( **{"attributes__{}".format(query_field): query_value}) created = False aliased = False if entities.count() == 0: created = True uuid = create_attrs.pop("uuid", None) entity = Entity( uuid=uuid, attributes={ **{ query_field: query_value }, **block_attrs, **create_attrs, }, created_by=user, domain=domain, ) entity.save() elif entities.count() > 1: return Response( "Found more than one entity. Be more specific?", status=status.HTTP_403_FORBIDDEN, ) else: entity = entities[0] if return_canonical: while entity.alias_for: aliased = True entity = entity.alias_for return Response( { "entity": EntitySerializer(entity).data, "created": created, "aliased": aliased, }, status=status.HTTP_200_OK, )
def post(self, request, domain): """ Get the best matched entity for a given query or create an entity if one is not found above a certain match threshold. If the entity is an alias of another entity, the aliased entity is returned. """ user = request.user data = request.data.copy() query_field = data.get("query_field") query_value = data.get("query_value") block_attrs = data.get("block_attrs", {}) create_attrs = data.get("create_attrs", {}) threshold = data.get("threshold") return_canonical = data.get("return_canonical", True) scorer_class = data.get("scorer", "fuzzywuzzy.default_process") try: scorer = import_class("crosswalk.scorers.{}".format(scorer_class)) except ImportError: return Response( "Invalid scorer.", status=status.HTTP_400_BAD_REQUEST ) try: domain = Domain.objects.get(slug=domain) except ObjectDoesNotExist: return Response( "Domain not found.", status=status.HTTP_404_NOT_FOUND ) entities = Entity.objects.filter(domain=domain) entities = entities.filter(attributes__contains=block_attrs) if entities.count() == 0: score = None created = True else: entity_values = [e.attributes[query_field] for e in entities] match, score = scorer(query_value, entity_values) entity = entities.filter( **{"attributes__{}".format(query_field): match} ).first() created = True if score < threshold else False if created: created = True uuid = create_attrs.pop("uuid", None) entity = Entity( uuid=uuid, attributes={ **{query_field: query_value}, **block_attrs, **create_attrs, }, created_by=user, domain=domain, ) entity.save() aliased = False if return_canonical: while entity.alias_for: aliased = True entity = entity.alias_for return Response( { "entity": EntitySerializer(entity).data, "created": created, "match_score": score, "aliased": aliased, }, status=status.HTTP_200_OK, )