def update_nodes(self, entitytypeid, data): self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping(self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], newentity['entityid']) if baseentity == None: baseentity = entity else: baseentity.merge(entity) if entitytypeid == 'COMPONENT.E18': production_entities = self.resource.find_entities_by_type_id('PRODUCTION.E12') if len(production_entities) > 0: self.resource.merge_at(baseentity, 'PRODUCTION.E12') else: self.resource.merge_at(baseentity, self.resource.entitytypeid) else: self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def test_save(self): val = models.Values.objects.get(value='Legal') python_object = { "entityid":"", "entitytypeid":"PERSON.E1", "value":"", "property":"P1", "child_entities":[{ "entityid":"", "entitytypeid":"NAME.E1", "value":"Alexei", "property":"P1", "child_entities":[{ "entityid":"", "entitytypeid":"NAME_TYPE.E1", "value":val.pk, "property":"P1", "child_entities":[] }] }] } entity = Entity(python_object) entity._save() self.assertNotEqual(python_object['entityid'], entity.entityid) entity = Entity().get(entity.entityid) self.assertEqual(entity.child_entities[0].value, 'Alexei') self.assertEqual(entity.child_entities[0].child_entities[0].value, val.pk)
def test_save(self): python_object = { "entityid":"", "entitytypeid":"CAR.E1", "value":"", "property":"P1", "child_entities":[{ "entityid":"", "entitytypeid":"MAKE.E1", "value":"Porsche", "property":"P1", "child_entities":[{ "entityid":"", "entitytypeid":"MODEL.E1", "value":"911", "property":"P1", "child_entities":[] }] }] } entity = Entity(python_object) entity._save() self.assertNotEqual(python_object['entityid'], entity.entityid) entity = Entity().get(entity.entityid) self.assertEqual(entity.child_entities[0].value, 'Porsche') self.assertEqual(entity.child_entities[0].child_entities[0].value, '911')
def validate_headers(self, workbook, skip_resourceid_col=False, resource_type=None): if resource_type != 'relations': q = Entity().get_mapping_schema(resource_type) result = {'success': True, 'errors': []} for sheet in workbook.worksheets: if sheet.title == 'RELATIONS': restypenodes = RELATION_HEADER else: restypenodes = set(q.keys()) for header in sheet.iter_cols(max_row=1): nodename = header[0].value if nodename is not None: if skip_resourceid_col == True and header[ 0].value == 'RESOURCEID': continue if not nodename in restypenodes: msg = "{} is not a valid {} node name".format( nodename, resource_type) result['errors'].append(msg) if result['errors']: result['success'] = False return result
def test_form_classes(self): print "\n\n==== TESTING ALL FORM CLASSES ====" # These are considered exceptions because they are often included # in a resource type but actually enter data for a different resource # type. So they will throw errors below, even if they are valid. form_exceptions = [ 'man-made', 'man-made-component', 'related-files', 'related-resources', 'file-upload' ] for restype in sorted(settings.RESOURCE_TYPE_CONFIGS()): print "\n\n--- {} FORMS ---".format(restype) q = Entity().get_mapping_schema(restype) restypenodes = set(q.keys()) res = Resource({"entitytypeid": restype}) for group in res.form_groups: for form in group['forms']: invalid_nodes = [] fclass = form['class'] formid = fclass.get_info()['id'] if formid in form_exceptions: continue print "\nFORM:", formid template_errors = self.test_template(formid, restypenodes) print "{} invalid node{} in the template".format( len(template_errors), "s" if len(template_errors) != 1 else "") if len(template_errors) > 0: print " ", template_errors a = res.get_form(formid) try: a.load("en-US") except Exception as e: print "ERROR LOADING THIS FORMS.PY CLASS" print traceback.print_exc() continue for key in a.data: if not key in restypenodes and "." in key: invalid_nodes.append(key) domains = a.data[key].get('domains', []) for domainnode in domains: if not domainnode in restypenodes: invalid_nodes.append(domainnode) print "{} invalid node{} in the forms.py class".format( len(invalid_nodes), "s" if len(invalid_nodes) != 1 else "") if len(invalid_nodes) > 0: print " ", invalid_nodes
def make_full_value_lookup(self, restype): q = Entity().get_mapping_schema(restype) restypenodes = set(q.keys()) outdict = {} for node_name in restypenodes: node_obj = EntityTypes.objects.get(pk=node_name) if node_obj.businesstablename == "domains": outdict[node_name] = self.get_label_lookup( node_obj.conceptid_id, return_entity=True) with open("full_label_lookup.json", 'wb') as out: json.dump(outdict, out, indent=1) return outdict
def schema(self): try: return self._schema except AttributeError: self._schema = Entity.get_mapping_schema( self.resource.entitytypeid) return self._schema
def EntityTypes(request, entitytypeid): entityschema = [] if entitytypeid == '': return HttpResponse(JSONSerializer().serialize({}, ensure_ascii=True, indent=4)) else: if request.GET.get('f') is None: return render(request, 'graph.htm', {}) else: entityschema = {entitytypeid: Entity.get_mapping_schema(entitytypeid)} if request.GET.get('f') == 'json': return HttpResponse(JSONSerializer().serialize(entityschema, ensure_ascii=True, indent=4), content_type='application/json') if request.GET.get('f') == 'd3': d3Schema = d3Obj() d3Schema.name = entitytypeid for assestAttr in entityschema[entitytypeid]: d3ObjAssestAttr = d3Obj() d3ObjAssestAttr.name = assestAttr for step in entityschema[entitytypeid][assestAttr]['steps']: d3ObjStep = d3Obj() d3ObjStep.name = step['entitytypedomain'] + ' ' + step['propertyid'] + ' ' + step['entitytyperange'] d3ObjAssestAttr.children.append(d3ObjStep) d3Schema.children.append(d3ObjAssestAttr) return HttpResponse(JSONSerializer().serialize(d3Schema, ensure_ascii=True, indent=4))
def EntityTypes(request, entitytypeid): entityschema = [] if entitytypeid == '': return HttpResponse(JSONSerializer().serialize({}, ensure_ascii=True, indent=4)) else: if request.GET.get('f') is None: return render_to_response('graph.htm', {}, context_instance=RequestContext(request)) else: entityschema = {entitytypeid: Entity.get_mapping_schema(entitytypeid)} if request.GET.get('f') == 'json': return HttpResponse(JSONSerializer().serialize(entityschema, ensure_ascii=True, indent=4), content_type='application/json') if request.GET.get('f') == 'd3': d3Schema = d3Obj() d3Schema.name = entitytypeid for assestAttr in entityschema[entitytypeid]: d3ObjAssestAttr = d3Obj() d3ObjAssestAttr.name = assestAttr for step in entityschema[entitytypeid][assestAttr]['steps']: d3ObjStep = d3Obj() d3ObjStep.name = step['entitytypedomain'] + ' ' + step['propertyid'] + ' ' + step['entitytyperange'] d3ObjAssestAttr.children.append(d3ObjStep) d3Schema.children.append(d3ObjAssestAttr) return HttpResponse(JSONSerializer().serialize(d3Schema, ensure_ascii=True, indent=4))
def delete_index(self): """ removes an entity from the search index """ se = SearchEngineFactory().create() se.delete(index='entity', doc_type=self.entitytypeid, id=self.entityid) se.delete(index='resource', doc_type=self.entitytypeid, id=self.entityid) se.delete(index='maplayers', doc_type=self.entitytypeid, id=self.entityid) def delete_indexes(entity): if entity.businesstablename == 'strings' or entity.businesstablename == 'domains': se.delete_terms(entity.entityid) entity = Entity().get(self.entityid) entity.traverse(delete_indexes)
def insert_actors(settings=None): if not settings: from django.conf import settings logging.warning("INSERTING ACTORS") resource_entity_type = 'HERITAGE_RESOURCE_GROUP.E27' mapping_schema = Entity.get_mapping_schema(resource_entity_type) # access settings to determine which actor nodes should correspond to editors of which pre-existing nodes for entry in settings.ACTOR_NODES: # find all entities of the parent type actor_entitytypeid = entry[0] parent_entitytypeid = entry[1] source_entitytypeid = entry[2] mapping_step_to_actor = mapping_schema[actor_entitytypeid]['steps'][-1] parent_entities = models.Entities.objects.filter( entitytypeid=parent_entitytypeid).iterator() for parent_entity_model in parent_entities: # check whether an actor node already exists parent_entity = Entity().get(parent_entity_model.entityid) actors = parent_entity.find_entities_by_type_id(actor_entitytypeid) if (len(actors) == 0): # get the root resource root_resource_model = get_resource_for_entity( parent_entity_model, resource_entity_type) if not root_resource_model: continue # find the last edit to the node that the data originated at edits = models.EditLog.objects.filter( resourceid=root_resource_model.entityid, attributeentitytypeid=source_entitytypeid).order_by( 'timestamp') first_edit = edits[0] actor_name = '%s %s' % (edits[0].user_firstname, edits[0].user_lastname) # create the actor node parent_entity.add_child_entity( actor_entitytypeid, mapping_step_to_actor['propertyid'], actor_name, '') # logging.warning("\n\nParent after insert") log_entity(parent_entity) parent_entity._save() root_resource = Resource() root_resource.get(root_resource_model.entityid)
def create(id='', value=''): entity = Entity() entity.entityid = id entity.entitytypeid = 'TEST.E1' entity.property = 'P1' entity.value = value entity.child_entities = [] return entity
def update_nodes(self, entitytypeid, data, dataKey=None): if dataKey == None: dataKey = entitytypeid self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: if type(newentity) is list: for newentityitem in newentity: baseentitygroup = None for newsubentity in newentityitem: entity = Entity() if newsubentity['entitytypeid'] in self.schema: entity.create_from_mapping( self.resource.entitytypeid, self.schema[ newsubentity['entitytypeid']]['steps'], newsubentity['entitytypeid'], newsubentity['value'], '') if baseentitygroup == None: baseentitygroup = entity else: baseentitygroup.merge(entity) if baseentity == None: baseentity = baseentitygroup else: baseentity.merge_at(baseentitygroup, entitytypeid) else: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping( self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], '') if baseentity == None: baseentity = entity else: baseentity.merge(entity) self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def createBacklogIds(): entitytype = archesmodels.EntityTypes.objects.get(pk = "ACTOR.E39") type = 'ACTOR' all_entities = archesmodels.Entities.objects.filter(entitytypeid__exact = entitytype) entities =[] errors = [] for count, entity in enumerate(all_entities, 1): if count % 5000 == 0: print "%s resources inspected" % count try: relation = archesmodels.Relations.objects.get(ruleid=archesmodels.Rules.objects.get(entitytypedomain=entitytype, entitytyperange="EAMENA_ID.E42").ruleid, entityiddomain =entity.entityid) except ObjectDoesNotExist: entities.append(entity) print "There are %s resources and %s which do not have a EAMENA_ID.E42" % (all_entities.count(), len(entities)) for count, entity in enumerate(entities, 1): if count % 1000 == 0: print "%s UniqueIds created" % count entity2 = archesmodels.Entities() entity2.entitytypeid = archesmodels.EntityTypes.objects.get(pk = "EAMENA_ID.E42") entity2.entityid = str(uuid.uuid4()) entity2.save() rule = archesmodels.Rules.objects.get(entitytypedomain = entity.entitytypeid, entitytyperange = entity2.entitytypeid, propertyid = 'P1') archesmodels.Relations.objects.get_or_create(entityiddomain = entity, entityidrange = entity2, ruleid = rule) uniqueidmodel = Entity._get_model('uniqueids') uniqueidmodelinstance = uniqueidmodel() uniqueidmodelinstance.entityid = entity2 uniqueidmodelinstance.id_type = type try: lastID = uniqueidmodel.objects.filter(id_type__exact=type).latest() IdInt = int(lastID.val) + 1 uniqueidmodelinstance.val = str(IdInt) except ObjectDoesNotExist: print "The resource %s has been assigned the first ID with entityid %s" % (entity.entityid,entity2.entityid) uniqueidmodelinstance.val = str(1) uniqueidmodelinstance.order_date = datetime.datetime.now() uniqueidmodelinstance.save() zerosLength = settings.ID_LENGTH if settings.ID_LENGTH > len(uniqueidmodelinstance.val) else len(uniqueidmodelinstance.val) value = type +"-"+uniqueidmodelinstance.val.zfill(zerosLength) # ReindexResource(entity.entityid, entity2.entityid, value) try: resource = Resource().get(entity.entityid) resource.index() except Exception as e: if e not in errors: errors.append(e) if len(errors) > 0: print errors[0], ':', len(errors)
def update_nodes(self, entitytypeid, data, dataKey=None): if dataKey == None: dataKey = entitytypeid self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping(self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], newentity['entityid']) if baseentity == None: baseentity = entity else: baseentity.merge(entity) self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def test_init_entity_from_python(self): """ Test to see that a json string can be parsed into a Entity instance """ python_object = { "entityid": "1234", "entitytypeid": "TEST.E1", "property": "P1", "value": "123", "child_entities": [] } entity = Entity(python_object) self.assertEqual(entity.entityid, '1234') self.assertEqual(entity.entitytypeid, 'TEST.E1') self.assertEqual(entity.property, 'P1') self.assertEqual(entity.value, '123') self.assertEqual(entity.child_entities, [])
def prepare_documents_for_report_index(self, geom_entities=[]): """ Generates a list of specialized resource based documents to support resource reports """ geojson_geom = None if len(geom_entities) > 0: geojson_geom = { 'type': 'GeometryCollection', 'geometries': [geom_entity['value'] for geom_entity in geom_entities] } entity_dict = Entity() entity_dict.property = self.property entity_dict.entitytypeid = self.entitytypeid entity_dict.entityid = self.entityid entity_dict.primaryname = self.get_primary_name() entity_dict.geometry = geojson_geom entity_dict.graph = self.dictify(keys=['label', 'value']) return [JSONSerializer().serializeToPython(entity_dict)]
def test_save(self): val = models.Values.objects.get(value='Legal') python_object = { "entityid": "", "entitytypeid": "PERSON.E1", "value": "", "property": "P1", "child_entities": [{ "entityid": "", "entitytypeid": "NAME.E1", "value": "Alexei", "property": "P1", "child_entities": [{ "entityid": "", "entitytypeid": "NAME_TYPE.E1", "value": val.pk, "property": "P1", "child_entities": [] }] }] } entity = Entity(python_object) entity._save() self.assertNotEqual(python_object['entityid'], entity.entityid) entity = Entity().get(entity.entityid) self.assertEqual(entity.child_entities[0].value, 'Alexei') self.assertEqual(entity.child_entities[0].child_entities[0].value, val.pk)
def test_save(self): python_object = { "entityid": "", "entitytypeid": "CAR.E1", "value": "", "property": "P1", "child_entities": [{ "entityid": "", "entitytypeid": "MAKE.E1", "value": "Porsche", "property": "P1", "child_entities": [{ "entityid": "", "entitytypeid": "MODEL.E1", "value": "911", "property": "P1", "child_entities": [] }] }] } entity = Entity(python_object) entity._save() self.assertNotEqual(python_object['entityid'], entity.entityid) entity = Entity().get(entity.entityid) self.assertEqual(entity.child_entities[0].value, 'Porsche') self.assertEqual(entity.child_entities[0].child_entities[0].value, '911')
def createBacklogIds(): entitytype = archesmodels.EntityTypes.objects.get(pk="ACTOR.E39") type = 'ACTOR' all_entities = archesmodels.Entities.objects.filter( entitytypeid__exact=entitytype) entities = [] errors = [] for count, entity in enumerate(all_entities, 1): if count % 5000 == 0: print "%s resources inspected" % count try: relation = archesmodels.Relations.objects.get( ruleid=archesmodels.Rules.objects.get( entitytypedomain=entitytype, entitytyperange="EAMENA_ID.E42").ruleid, entityiddomain=entity.entityid) except ObjectDoesNotExist: entities.append(entity) print "There are %s resources and %s which do not have a EAMENA_ID.E42" % ( all_entities.count(), len(entities)) for count, entity in enumerate(entities, 1): if count % 1000 == 0: print "%s UniqueIds created" % count entity2 = archesmodels.Entities() entity2.entitytypeid = archesmodels.EntityTypes.objects.get( pk="EAMENA_ID.E42") entity2.entityid = str(uuid.uuid4()) entity2.save() rule = archesmodels.Rules.objects.get( entitytypedomain=entity.entitytypeid, entitytyperange=entity2.entitytypeid, propertyid='P1') archesmodels.Relations.objects.get_or_create(entityiddomain=entity, entityidrange=entity2, ruleid=rule) uniqueidmodel = Entity._get_model('uniqueids') uniqueidmodelinstance = uniqueidmodel() uniqueidmodelinstance.entityid = entity2 uniqueidmodelinstance.id_type = type try: lastID = uniqueidmodel.objects.filter(id_type__exact=type).latest() IdInt = int(lastID.val) + 1 uniqueidmodelinstance.val = str(IdInt) except ObjectDoesNotExist: print "The resource %s has been assigned the first ID with entityid %s" % ( entity.entityid, entity2.entityid) uniqueidmodelinstance.val = str(1) uniqueidmodelinstance.order_date = datetime.datetime.now() uniqueidmodelinstance.save() zerosLength = settings.ID_LENGTH if settings.ID_LENGTH > len( uniqueidmodelinstance.val) else len(uniqueidmodelinstance.val) value = type + "-" + uniqueidmodelinstance.val.zfill(zerosLength) # ReindexResource(entity.entityid, entity2.entityid, value) try: resource = Resource().get(entity.entityid) resource.index() except Exception as e: if e not in errors: errors.append(e) if len(errors) > 0: print errors[0], ':', len(errors)
def save(self, user={}, note='', resource_uuid=''): """ Saves a resource back to the db """ newentity = False timestamp = datetime.now() if self.entityid != '' and resource_uuid == '': entity_pre_save = Entity().get(self.entityid) else: newentity = True self.entityid = resource_uuid self.trim() self._save() if not newentity: diff = self.diff(entity_pre_save) for entity in diff['deleted_nodes']: if entity.label != '' or entity.value != '': edit = archesmodels.EditLog() edit.editlogid = str(uuid.uuid4()) edit.resourceentitytypeid = self.entitytypeid edit.resourceid = self.entityid edit.userid = getattr(user, 'id', '') edit.user_email = getattr(user, 'email', '') edit.user_firstname = getattr(user, 'first_name', '') edit.user_lastname = getattr(user, 'last_name', '') edit.note = note edit.timestamp = timestamp edit.attributeentitytypeid = entity.entitytypeid edit.edittype = 'delete' edit.oldvalue = entity.label if entity.label != '' else entity.value edit.save() entity._delete(delete_root=True) for entity in diff['updated_nodes']: if entity['from'].label != '' or entity[ 'to'].label != '' or entity[ 'from'].value != '' or entity['to'].value != '': edit = archesmodels.EditLog() edit.editlogid = str(uuid.uuid4()) edit.resourceentitytypeid = self.entitytypeid edit.resourceid = self.entityid edit.userid = getattr(user, 'id', '') edit.user_email = getattr(user, 'email', '') edit.user_firstname = getattr(user, 'first_name', '') edit.user_lastname = getattr(user, 'last_name', '') edit.note = note edit.timestamp = timestamp edit.attributeentitytypeid = entity['from'].entitytypeid edit.edittype = 'update' edit.oldvalue = entity['from'].label if entity[ 'from'].label != '' else entity['from'].value edit.newvalue = entity['to'].label if entity[ 'to'].label != '' else entity['to'].value edit.save() for entity in diff['inserted_nodes']: if entity.label != '' or entity.value != '': edit = archesmodels.EditLog() edit.editlogid = str(uuid.uuid4()) edit.resourceentitytypeid = self.entitytypeid edit.resourceid = self.entityid edit.userid = getattr(user, 'id', '') edit.user_email = getattr(user, 'email', '') edit.user_firstname = getattr(user, 'first_name', '') edit.user_lastname = getattr(user, 'last_name', '') edit.note = note edit.timestamp = timestamp edit.attributeentitytypeid = entity.entitytypeid edit.edittype = 'insert' edit.oldvalue = None edit.newvalue = entity.label if entity.label != '' else entity.value edit.save() else: for entity in self.flatten(): if entity.label != '' or entity.value != '': edit = archesmodels.EditLog() edit.editlogid = str(uuid.uuid4()) edit.resourceentitytypeid = self.entitytypeid edit.resourceid = self.entityid edit.userid = getattr(user, 'id', '') edit.user_email = getattr(user, 'email', '') edit.user_firstname = getattr(user, 'first_name', '') edit.user_lastname = getattr(user, 'last_name', '') edit.note = note edit.timestamp = timestamp edit.attributeentitytypeid = entity.entitytypeid edit.edittype = 'create' edit.oldvalue = None edit.newvalue = entity.label if entity.label != '' else entity.value edit.save() return self
def prepare_documents_for_search_index(self): """ Generates a list of specialized resource based documents to support resource search """ document = Entity() document.property = self.property document.entitytypeid = self.entitytypeid document.entityid = self.entityid document.value = self.value document.label = self.label document.businesstablename = self.businesstablename document.primaryname = self.get_primary_name() document.child_entities = [] document.dates = [] document.domains = [] document.geometries = [] document.numbers = [] for entity in self.flatten(): if entity.entityid != self.entityid: if entity.businesstablename == 'domains': value = archesmodels.Values.objects.get(pk=entity.value) entity_copy = entity.copy() entity_copy.conceptid = value.conceptid_id document.domains.append(entity_copy) elif entity.businesstablename == 'dates': document.dates.append(entity) elif entity.businesstablename == 'numbers': document.numbers.append(entity) elif entity.businesstablename == 'geometries': entity.value = JSONDeserializer().deserialize( fromstr(entity.value).json) document.geometries.append(entity) else: document.child_entities.append(entity) return [JSONSerializer().serializeToPython(document)]
def prepare_documents_for_search_index(self): """ Generates a list of specialized resource based documents to support resource search """ document = Entity() document.property = self.property document.entitytypeid = self.entitytypeid document.entityid = self.entityid document.value = self.value document.label = self.label document.businesstablename = self.businesstablename document.primaryname = self.get_primary_name() document.child_entities = [] document.dates = [] document.domains = [] document.geometries = [] document.numbers = [] for entity in self.flatten(): if entity.entityid != self.entityid: if entity.businesstablename == 'domains': value = archesmodels.Values.objects.get(pk=entity.value) entity_copy = entity.copy() entity_copy.conceptid = value.conceptid_id document.domains.append(entity_copy) elif entity.businesstablename == 'dates': document.dates.append(entity) elif entity.businesstablename == 'numbers': document.numbers.append(entity) elif entity.businesstablename == 'geometries': entity.value = JSONDeserializer().deserialize(fromstr(entity.value).json) document.geometries.append(entity) else: document.child_entities.append(entity) return [JSONSerializer().serializeToPython(document)]
def prepare_documents_for_search_index(self): """ Generates a list of specialized resource based documents to support resource search """ document = Entity() document.property = self.property document.entitytypeid = self.entitytypeid document.entityid = self.entityid document.value = self.value document.label = self.label document.businesstablename = self.businesstablename document.primaryname = self.get_primary_name() document.child_entities = [] document.dates = [] document.extendeddates = [] document.domains = [] document.geometries = [] document.numbers = [] for entity in self.flatten(): if entity.entityid != self.entityid: if entity.businesstablename == 'domains': value = archesmodels.Values.objects.get(pk=entity.value) entity_copy = entity.copy() entity_copy.conceptid = value.conceptid_id document.domains.append(entity_copy) elif entity.businesstablename == 'dates': document.dates.append(entity) document.extendeddates.append(entity) elif entity.businesstablename == 'numbers': document.numbers.append(entity) elif entity.businesstablename == 'geometries': entity.value = JSONDeserializer().deserialize(fromstr(entity.value).json) document.geometries.append(entity) else: document.child_entities.append(entity) if entity.entitytypeid in settings.EXTENDED_DATE_NODES: document.extendeddates.append(entity) doc = JSONSerializer().serializeToPython(document) # documents = super(Resource, self).prepare_documents_for_search_index() # for doc in documents: ## index dates to extended date mapping for entity in doc['extendeddates']: date = date_to_int(entity['value']) entity['value'] = date ## index dates groups to extended date groups mapping doc['extendeddategroups'] = [] for branch,labels in settings.INDEXED_DATE_BRANCH_FORMATIONS.iteritems(): for nodes in self.get_nodes(branch,keys=['value']): doc['extendeddategroups'].append({ 'value': date_to_int(nodes[labels[0]]), 'conceptid': nodes[labels[1]] }) return [doc]
def Entities(request, entityid): entity = [] if request.method == 'GET': if entityid == '': pass else: entity = Entity().get(entityid) else: if 'edit' not in request.user.user_groups: raise Exception( 'User must be logged in to insert, update, or delete entities') if request.method == 'POST': if len(request.FILES) > 0: jsondata = JSONDeserializer().deserialize( request.POST.get('json')) else: jsondata = JSONDeserializer().deserialize(request.body) if not isinstance(jsondata, list): jsondata = [jsondata] for entityjson in jsondata: entity = Entity(entityjson) if entity.entityid != '': entity.delete_index() entity.save(username=request.user.username, uploadedfile=request.FILES.get('file', None)) entity.index() elif request.method == 'DELETE': jsondata = JSONDeserializer().deserialize(request.body) if not isinstance(jsondata, list): jsondata = [jsondata] for entityjson in jsondata: entity = Entity(entityjson) entity.delete_index() entity.delete(delete_root=entity.get_rank() == 0) return HttpResponse(JSONSerializer().serialize(entity, ensure_ascii=True, indent=4))
def schema(self): try: return self._schema except AttributeError: self._schema = Entity.get_mapping_schema(self.resource.entitytypeid) return self._schema
def migrate(settings=None): if not settings: from django.conf import settings suffix = '_altered_nodes.csv' errors = [] for path in settings.ADDITIONAL_RESOURCE_GRAPH_LOCATIONS: if os.path.exists(path): print '\nLOADING NODE MIGRATION INFO (%s)' % (path) print '--------------' for f in listdir(path): if isfile(join(path, f)) and f.endswith(suffix): path_to_file = join(path, f) basepath = path_to_file[:-18] name = basepath.split(os.sep)[-1] migrations = get_list_dict( basepath + '_altered_nodes.csv', [ 'OLDENTITYTYPEID', 'NEWENTITYTYPEID', 'GROUPROOTNODEOLD', 'GROUPROOTNODENEW' ]) # Identify nodes which must be migrated resource_entity_type = 'HERITAGE_RESOURCE_GROUP.E27' mapping_schema = Entity.get_mapping_schema( resource_entity_type) # group migrations by groupRootNodeNew groups = groupby( migrations, lambda x: (x['GROUPROOTNODEOLD'], x['GROUPROOTNODENEW'])) for group_root_node_ids, group_migrations in groups: #Convert group_migrations to a list as we need to iterate it multiple times group_migrations_list = [] for group_migration in group_migrations: group_migrations_list.append(group_migration) group_root_node_id = group_root_node_ids[0] new_group_root_node_id = group_root_node_ids[1] #Find all entities with the old group root node group_root_entities = models.Entities.objects.filter( entitytypeid=group_root_node_id) print "ENTITIES COUNT: ", group_root_entities.count() for group_root_entity_model in group_root_entities.iterator( ): # Create a new subgraph for each of the migration steps, then merge them together at the group root node #get full resource graph for the root entity try: group_root_entity = Entity( group_root_entity_model.pk) except: print "Faulty group entity's ID %s and entitytype %s" % ( group_root_entity_model.pk, group_root_entity_model.entitytypeid) continue new_group_root_entity = Entity( ).create_from_mapping( resource_entity_type, mapping_schema[new_group_root_node_id] ['steps'], new_group_root_node_id, '') if group_migrations_list[0][ 'NEWENTITYTYPEID'] != new_group_root_node_id: # create a node for the new group root group_root_is_new_data_node = False else: group_root_is_new_data_node = True # get the root resource graph for this entity resource_model = get_resource_for_entity( group_root_entity, resource_entity_type) if not resource_model: continue resource = Resource().get(resource_model.entityid) for group_migration in group_migrations_list: # get individual entities to be migrated in the source group old_entities = group_root_entity.find_entities_by_type_id( group_migration['OLDENTITYTYPEID']) for old_entity in old_entities: date_on = False # Create the corresponding entity in the new schema new_entity = Entity() #Disturbance dates need to be mapped to different nodes depending on the value of the now obsolete DISTURBANCE_DATE_TYPE.E55 if group_migration['OLDENTITYTYPEID'] in [ 'DISTURBANCE_DATE_END.E49', 'DISTURBANCE_DATE_START.E49' ]: date_type_node = group_root_entity.find_entities_by_type_id( 'DISTURBANCE_DATE_TYPE.E55') if date_type_node: if date_type_node[ 0].label == 'Occurred before': new_entity_type_id = 'DISTURBANCE_DATE_OCCURRED_BEFORE.E61' elif date_type_node[ 0].label == 'Occurred on': if group_migration[ 'OLDENTITYTYPEID'] == 'DISTURBANCE_DATE_START.E49': date_on = True else: new_entity_type_id = 'DISTURBANCE_DATE_OCCURRED_ON.E61' else: new_entity_type_id = group_migration[ 'NEWENTITYTYPEID'] else: new_entity_type_id = group_migration[ 'NEWENTITYTYPEID'] old_value = old_entity.value if old_entity.businesstablename == 'domains': # in some cases we move from domains to strings. newEntityType = models.EntityTypes.objects.get( entitytypeid=new_entity_type_id) if newEntityType.businesstablename == 'strings': old_value = old_entity.label if not date_on: new_entity.create_from_mapping( resource_entity_type, mapping_schema[new_entity_type_id] ['steps'], new_entity_type_id, old_value) # In some cases a newly created data node is the new group root. In this case we should discard the previously created new group root and use this one instead. if new_group_root_node_id == new_entity_type_id: new_group_root_entity = new_entity group_root_is_new_data_node = True # UNUSED # # If there is a node to be inserted, do it here # # if 'INSERT_NODE_RULE' in group_migration: # # entityttypeid_to_insert = group_migration['INSERT_NODE_RULE'][1][1] # # value_to_insert = group_migration['INSERT_NODE_RULE'][1][2] # # # # inserted_entity = Entity() # # inserted_entity.create_from_mapping(resource_entity_type, mapping_schema[entityttypeid_to_insert]['steps'], entityttypeid_to_insert, value_to_insert) # # # # new_entity.merge(inserted_entity) # If there is a node in common with the existing node further down the chain than the group root node, merge there # follow links back from the parent shouldnt_merge_with_group_root = group_root_is_new_data_node and new_group_root_node_id == new_entity_type_id if not shouldnt_merge_with_group_root: has_merged = False reversed_steps = mapping_schema[ new_entity_type_id]['steps'][::-1] for step in reversed_steps: # find the entitytypedomain in the new_group_root_entity if not has_merged: mergeable_nodes = new_group_root_entity.find_entities_by_type_id( step['entitytypedomain']) if len(mergeable_nodes) > 0: new_group_root_entity.merge_at( new_entity, step[ 'entitytypedomain'] ) has_merged = True new_entity = None # gc.collect() if not has_merged: logging.warning( "Unable to merge newly created entity" ) # merge the new group root entity into the resource resource.merge_at(new_group_root_entity, resource_entity_type) logging.warning("SAVING RESOURCE, %s", resource) # save the resource resource.trim() try: resource._save() resource = None except Exception as e: logging.warning("Error saving resource") logging.warning(e) errors.append("Error saving %s. Error was %s" % (resource, e)) group_root_entity.clear() group_root_entity = None new_group_root_entity.clear() new_group_root_entity = None # end for group root # resource.index() # logging.warning("SAVED RESOURCE, %s", resource) utils.write_to_file( os.path.join(settings.PACKAGE_ROOT, 'logs', 'migration_errors.txt'), '') if len(errors) > 0: # utils.write_to_file(os.path.join(settings.PACKAGE_ROOT, 'logs', 'migration_errors.txt'), '\n'.join(errors)) print "\n\nERROR: There were errors migrating some resources. See below" print errors
def build_search_results_dsl(request): # Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. # Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion. sorting = { "child_entities.label": { "order" : "asc", "nested_path": "child_entities", "nested_filter": { "term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"} } } } term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) boolean_search = request.GET.get('booleanSearch', '') filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', '')) filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', '')) filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', '')) #Ignore first entry as it is a dummy filter_combine_flags = filter_combine_flags[1:] # filter_combine_flags = [False, True, False, False, False] # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', '')) # Not here yet, so put in some bogus data # filter_groups = [ # 'NAME.E41', # 'NAME.E41', # 'DISTURBANCE_STATE.E3', # 'THREAT_STATE.E3' # ] se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) boolquery = Bool() boolfilter = Bool() is_empty_temporal_filter = True # store each search term in an initially. These will be combined based on the global and/or and the optional groupings terms_queries = []; # logging.warning("-------QUERY-------") if term_filter != '' or not is_empty_temporal_filter: for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)): selectbox_boolfilter = Bool() groupid = filter_grouping[index] if not groupid == 'No group': # build a nested query against the nested_entities # build a nested query for each resource type for resourcetype in settings.RESOURCE_TYPE_CONFIGS().keys(): # trace the path from each term to the group root term_paths = [] for term in select_box: # trace path from group root to this term if term['type'] == 'concept': # get all the parent concepts for this value i.e. the field concept_relations = models.ConceptRelations.objects.filter(conceptidto=term['value'], relationtype="member") for relation in concept_relations: term_parent_concept = models.Concepts.objects.get(conceptid=relation.conceptidfrom) # get the steps from the root to that concept if term_parent_concept.nodetype.nodetype == "Collection": term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid) elif term_parent_concept.nodetype.nodetype == 'Concept': # need to get at the parent until we reach the root collection. concepts are arranged hierarchically parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member') grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom) term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid) #this path begins at the root, and ends up at the node in question if resourcetype in term_schema: term_path = term_schema[resourcetype]['steps'] term_paths.append({ 'term': term, 'path': term_path }) break elif term['type'] == 'term': concept = models.Concepts.objects.get(conceptid=term['context']) term_schema = Entity.get_mapping_schema_to(concept.legacyoid) if resourcetype in term_schema: term_path = term_schema[resourcetype]['steps'] term_paths.append({ 'term': term, 'path': term_path }) elif term['type'] == 'string': term_schema = Entity.get_mapping_schema_to(groupid) if resourcetype in term_schema: term_path = term_schema[resourcetype]['steps'] term_paths.append({ 'term': term, 'path': term_path }) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': '3', 'start_date': start_date, 'end_date': end_date, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) if 'filters' in temporal_filter[index]: term_schema = Entity.get_mapping_schema_to(groupid) if resourcetype in term_schema: term_path = term_schema[resourcetype]['steps'] for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': date_operator, 'date_value': date_value, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) # combine the traced path to build a nested query group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities') # add nested query to overall query selectbox_boolfilter.should(group_query) # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2)) else: for term in select_box: if term['type'] == 'term': entitytype = models.EntityTypes.objects.get(conceptid_id=term['context']) boolfilter_nested = Bool() boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) nested = Nested(path='child_entities', query=boolfilter_nested) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) terms = Terms(field='domains.conceptid', terms=concept_ids) nested = Nested(path='domains', query=terms) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'string': boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested = Nested(path='child_entities', query=boolfilter_folded) boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested2 = Nested(path='domains', query=boolfilter_folded2) boolquery2.should(nested) boolquery2.should(nested2) if filter_and_or[index] == 'or': if not term['inverted']: # use boolfilter here instead of boolquery because boolquery # can't be combined with other boolfilters using boolean OR selectbox_boolfilter.should(boolquery2) else: if term['inverted']: selectbox_boolfilter.must_not(boolquery2) else: selectbox_boolfilter.must(boolquery2) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() range = Range(field='dates.value', gte=start_date, lte=end_date) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) if 'filters' in temporal_filter[index]: for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if date_operator == '1': # equals query range = Range(field='dates.value', gte=date_value, lte=date_value) elif date_operator == '0': # greater than query range = Range(field='dates.value', lt=date_value) elif date_operator == '2': # less than query range = Range(field='dates.value', gt=date_value) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) terms_queries.append(selectbox_boolfilter) # if not selectbox_boolfilter.empty: # if boolean_search == 'or': # boolfilter.should(selectbox_boolfilter) # else: # boolfilter.must(selectbox_boolfilter) # We now have individual query terms for each of the search components. Combine into one group now # Start by building a an array of groups which will be combined according to the global And/Or # Queries within one of these groups will be combined by the complement of the global And/Or # We may end up with [ [A,B], [C], [D,E] ], which would translate to either: # (A || B) && C && (D || E) # or # (A && B) || C || (D && E) # for global AND or OR respectively # logging.warning("TERMS QUERIES %s", terms_queries) bool_components = []; for i, term_query in enumerate(terms_queries): if i is 0: bool_components.append([term_query]) else: should_group_with_previous = filter_combine_flags[i-1] if should_group_with_previous: bool_components[-1].append(term_query) else: bool_components.append([term_query]) # logging.warning("BOOL COMPONENTS %s", bool_components) # Now build the ES queries for bool_component in bool_components: if len(bool_component) is 1: # just combine this on its own q = bool_component[0] else: q = Bool() for sub_component in bool_component: if boolean_search == 'or': #apply the OPPOSITE of the global boolean operator q.must(sub_component) else: q.should(sub_component) # combine to the overall query according to the global boolean operator if boolean_search == 'or': boolfilter.should(q) else: boolfilter.must(q) if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '': geojson = spatial_filter['geometry'] if geojson['type'] == 'bbox': coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]] geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates ) nested = Nested(path='geometries', query=geoshape) else: buffer = spatial_filter['buffer'] geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] ) nested = Nested(path='geometries', query=geoshape) if 'inverted' not in spatial_filter: spatial_filter['inverted'] = False if spatial_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if not boolquery.empty: query.add_query(boolquery) if not boolfilter.empty: query.add_filter(boolfilter) # Sorting criterion added to query (AZ 10/08/16) query.dsl.update({'sort': sorting}) # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query) return query
def build_search_results_dsl(request): # Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. # Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion. sorting = { "child_entities.label": { "order" : "asc", "nested_path": "child_entities", "nested_filter": { "term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"} } } } term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) boolean_search = request.GET.get('booleanSearch', '') filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', '')) filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', '')) filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', '')) #Ignore first entry as it is a dummy filter_combine_flags = filter_combine_flags[1:] # filter_combine_flags = [False, True, False, False, False] # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', '')) # Not here yet, so put in some bogus data # filter_groups = [ # 'NAME.E41', # 'NAME.E41', # 'DISTURBANCE_STATE.E3', # 'THREAT_STATE.E3' # ] se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) boolquery = Bool() boolfilter = Bool() is_empty_temporal_filter = True # store each search term in an initially. These will be combined based on the global and/or and the optional groupings terms_queries = []; # logging.warning("-------QUERY-------") if term_filter != '' or not is_empty_temporal_filter: for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)): selectbox_boolfilter = Bool() groupid = filter_grouping[index] if not groupid == 'No group': # build a nested query against the nested_entities # trace the path from each term to the group root term_paths = [] for term in select_box: # trace path from group root to this term if term['type'] == 'concept': # get the parent concept for this value i.e. the field term_parent_concept = Concept.get_parent_concept(term['value']) # get the steps from the root to that concept if term_parent_concept.nodetype.nodetype == "Collection": term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid) elif term_parent_concept.nodetype.nodetype == 'Concept': # need to get at the parent until we reach the root collection. concepts are arranged hierarchically parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member') grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom) term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid) #this path begins at the root, and ends up at the node in question term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) elif term['type'] == 'term': concept = models.Concepts.objects.get(conceptid=term['context']) term_schema = Entity.get_mapping_schema_to(concept.legacyoid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) elif term['type'] == 'string': term_schema = Entity.get_mapping_schema_to(groupid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': '3', 'start_date': start_date, 'end_date': end_date, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) if 'filters' in temporal_filter[index]: term_schema = Entity.get_mapping_schema_to(groupid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': date_operator, 'date_value': date_value, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) # combine the traced path to build a nested query group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities') # add nested query to overall query selectbox_boolfilter.must(group_query) # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2)) else: for term in select_box: if term['type'] == 'term': entitytype = models.EntityTypes.objects.get(conceptid_id=term['context']) boolfilter_nested = Bool() boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) nested = Nested(path='child_entities', query=boolfilter_nested) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) terms = Terms(field='domains.conceptid', terms=concept_ids) nested = Nested(path='domains', query=terms) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'string': boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested = Nested(path='child_entities', query=boolfilter_folded) boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested2 = Nested(path='domains', query=boolfilter_folded2) boolquery2.should(nested) boolquery2.should(nested2) if filter_and_or[index] == 'or': if not term['inverted']: # use boolfilter here instead of boolquery because boolquery # can't be combined with other boolfilters using boolean OR selectbox_boolfilter.should(boolquery2) else: if term['inverted']: selectbox_boolfilter.must_not(boolquery2) else: selectbox_boolfilter.must(boolquery2) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() range = Range(field='dates.value', gte=start_date, lte=end_date) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) if 'filters' in temporal_filter[index]: for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if date_operator == '1': # equals query range = Range(field='dates.value', gte=date_value, lte=date_value) elif date_operator == '0': # greater than query range = Range(field='dates.value', lt=date_value) elif date_operator == '2': # less than query range = Range(field='dates.value', gt=date_value) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) terms_queries.append(selectbox_boolfilter) # if not selectbox_boolfilter.empty: # if boolean_search == 'or': # boolfilter.should(selectbox_boolfilter) # else: # boolfilter.must(selectbox_boolfilter) # We now have individual query terms for each of the search components. Combine into one group now # Start by building a an array of groups which will be combined according to the global And/Or # Queries within one of these groups will be combined by the complement of the global And/Or # We may end up with [ [A,B], [C], [D,E] ], which would translate to either: # (A || B) && C && (D || E) # or # (A && B) || C || (D && E) # for global AND or OR respectively # logging.warning("TERMS QUERIES %s", terms_queries) bool_components = []; for i, term_query in enumerate(terms_queries): if i is 0: bool_components.append([term_query]) else: should_group_with_previous = filter_combine_flags[i-1] if should_group_with_previous: bool_components[-1].append(term_query) else: bool_components.append([term_query]) # logging.warning("BOOL COMPONENTS %s", bool_components) # Now build the ES queries for bool_component in bool_components: if len(bool_component) is 1: # just combine this on its own q = bool_component[0] else: q = Bool() for sub_component in bool_component: if boolean_search == 'or': #apply the OPPOSITE of the global boolean operator q.must(sub_component) else: q.should(sub_component) # combine to the overall query according to the global boolean operator if boolean_search == 'or': boolfilter.should(q) else: boolfilter.must(q) if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '': geojson = spatial_filter['geometry'] if geojson['type'] == 'bbox': coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]] geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates ) nested = Nested(path='geometries', query=geoshape) else: buffer = spatial_filter['buffer'] geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] ) nested = Nested(path='geometries', query=geoshape) if 'inverted' not in spatial_filter: spatial_filter['inverted'] = False if spatial_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if not boolquery.empty: query.add_query(boolquery) if not boolfilter.empty: query.add_filter(boolfilter) # Sorting criterion added to query (AZ 10/08/16) query.dsl.update({'sort': sorting}) # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query) return query
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) query = build_search_terms_dsl(request) results = query.search(index='term', doc_type='value') group_root_node = request.GET.get('group_root_node', '') delete_results = [] for result in results['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result['_source']['context'], lang) result['_source']['options']['context_label'] = prefLabel['value'] entity_type = None # if a group is selected we have to filter out the results that don't belong to the selected group if group_root_node != 'No group': entities = [] if 'conceptid' in result['_source']['options']: # concept: find the entity_type to check if it is connected to the selected group valueid = result['_source']['options']["conceptid"] value_relations_to = models.ConceptRelations.objects.filter(conceptidto=valueid, relationtype='member') if value_relations_to: for value_relations_to_concept in value_relations_to: value_parent_concept = models.Concepts.objects.filter(conceptid=value_relations_to_concept.conceptidfrom) parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=value_parent_concept[0].conceptid, relationtype='member') if value_parent_concept[0].nodetype.nodetype == 'Concept': # need to get at the parent until we reach the root collection. concepts are arranged hierarchically grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom) entity_type = grandparent[0].legacyoid entities.append(entity_type) elif value_parent_concept[0].nodetype.nodetype == 'Collection': entity_type = value_parent_concept[0].legacyoid entities.append(entity_type) else: logging.warning("Not a concept or collection") else: # not a concept - possibly a name field or similar. Use the context entity_type = models.EntityTypes.objects.filter(conceptid=result['_source']['context']) entities.append(entity_type) delete_result = True # check the if the entity_type is under the selected root group node # so that it can be deleted later if entities: for entity_type in entities: res = Entity().get_mapping_schema_to(entity_type) # search parents for group_root_node for resourcetype in settings.RESOURCE_TYPE_CONFIGS().keys(): if resourcetype in res: for parent in res[resourcetype]['steps']: if parent['entitytyperange'] == group_root_node: delete_result = False break if delete_result: delete_results.append(result) # deleted the flagged results for result in delete_results: results['hits']['hits'].remove(result); results['hits']['total'] = len(results['hits']['hits']) return JSONResponse(results)
def Entities(request, entityid): entity = [] if request.method == 'GET': if entityid == '': pass else: entity = Entity().get(entityid) else: if 'edit' not in request.user.user_groups: raise Exception('User must be logged in to insert, update, or delete entities') if request.method == 'POST': if len(request.FILES) > 0: jsondata = JSONDeserializer().deserialize(request.POST.get('json')) else: jsondata = JSONDeserializer().deserialize(request.body) if not isinstance(jsondata, list): jsondata = [jsondata] for entityjson in jsondata: entity = Entity(entityjson) if entity.entityid != '': entity.delete_index() entity.save(username=request.user.username, uploadedfile=request.FILES.get('file', None)) entity.index() elif request.method == 'DELETE': jsondata = JSONDeserializer().deserialize(request.body) if not isinstance(jsondata, list): jsondata = [jsondata] for entityjson in jsondata: entity = Entity(entityjson) entity.delete_index() entity.delete(delete_root=entity.get_rank()==0) return HttpResponse(JSONSerializer().serialize(entity, ensure_ascii=True, indent=4))