def update_nodes(self, entitytypeid, data): self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping(self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], newentity['entityid']) if baseentity == None: baseentity = entity else: baseentity.merge(entity) if entitytypeid == 'COMPONENT.E18': production_entities = self.resource.find_entities_by_type_id('PRODUCTION.E12') if len(production_entities) > 0: self.resource.merge_at(baseentity, 'PRODUCTION.E12') else: self.resource.merge_at(baseentity, self.resource.entitytypeid) else: self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def update_nodes(self, entitytypeid, data, dataKey=None): if dataKey == None: dataKey = entitytypeid self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: if type(newentity) is list: for newentityitem in newentity: baseentitygroup = None for newsubentity in newentityitem: entity = Entity() if newsubentity['entitytypeid'] in self.schema: entity.create_from_mapping( self.resource.entitytypeid, self.schema[ newsubentity['entitytypeid']]['steps'], newsubentity['entitytypeid'], newsubentity['value'], '') if baseentitygroup == None: baseentitygroup = entity else: baseentitygroup.merge(entity) if baseentity == None: baseentity = baseentitygroup else: baseentity.merge_at(baseentitygroup, entitytypeid) else: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping( self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], '') if baseentity == None: baseentity = entity else: baseentity.merge(entity) self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def update_nodes(self, entitytypeid, data, dataKey=None): if dataKey == None: dataKey = entitytypeid self.resource.prune(entitytypes=[entitytypeid]) if self.schema == None: self.schema = Entity.get_mapping_schema(self.resource.entitytypeid) for value in data[entitytypeid]: baseentity = None for newentity in value['nodes']: entity = Entity() if newentity['entitytypeid'] in self.schema: entity.create_from_mapping(self.resource.entitytypeid, self.schema[newentity['entitytypeid']]['steps'], newentity['entitytypeid'], newentity['value'], newentity['entityid']) if baseentity == None: baseentity = entity else: baseentity.merge(entity) self.resource.merge_at(baseentity, self.resource.entitytypeid) self.resource.trim()
def migrate(settings=None): if not settings: from django.conf import settings suffix = '_altered_nodes.csv' errors = [] for path in settings.ADDITIONAL_RESOURCE_GRAPH_LOCATIONS: if os.path.exists(path): print '\nLOADING NODE MIGRATION INFO (%s)' % (path) print '--------------' for f in listdir(path): if isfile(join(path, f)) and f.endswith(suffix): path_to_file = join(path, f) basepath = path_to_file[:-18] name = basepath.split(os.sep)[-1] migrations = get_list_dict( basepath + '_altered_nodes.csv', [ 'OLDENTITYTYPEID', 'NEWENTITYTYPEID', 'GROUPROOTNODEOLD', 'GROUPROOTNODENEW' ]) # Identify nodes which must be migrated resource_entity_type = 'HERITAGE_RESOURCE_GROUP.E27' mapping_schema = Entity.get_mapping_schema( resource_entity_type) # group migrations by groupRootNodeNew groups = groupby( migrations, lambda x: (x['GROUPROOTNODEOLD'], x['GROUPROOTNODENEW'])) for group_root_node_ids, group_migrations in groups: #Convert group_migrations to a list as we need to iterate it multiple times group_migrations_list = [] for group_migration in group_migrations: group_migrations_list.append(group_migration) group_root_node_id = group_root_node_ids[0] new_group_root_node_id = group_root_node_ids[1] #Find all entities with the old group root node group_root_entities = models.Entities.objects.filter( entitytypeid=group_root_node_id) print "ENTITIES COUNT: ", group_root_entities.count() for group_root_entity_model in group_root_entities.iterator( ): # Create a new subgraph for each of the migration steps, then merge them together at the group root node #get full resource graph for the root entity try: group_root_entity = Entity( group_root_entity_model.pk) except: print "Faulty group entity's ID %s and entitytype %s" % ( group_root_entity_model.pk, group_root_entity_model.entitytypeid) continue new_group_root_entity = Entity( ).create_from_mapping( resource_entity_type, mapping_schema[new_group_root_node_id] ['steps'], new_group_root_node_id, '') if group_migrations_list[0][ 'NEWENTITYTYPEID'] != new_group_root_node_id: # create a node for the new group root group_root_is_new_data_node = False else: group_root_is_new_data_node = True # get the root resource graph for this entity resource_model = get_resource_for_entity( group_root_entity, resource_entity_type) if not resource_model: continue resource = Resource().get(resource_model.entityid) for group_migration in group_migrations_list: # get individual entities to be migrated in the source group old_entities = group_root_entity.find_entities_by_type_id( group_migration['OLDENTITYTYPEID']) for old_entity in old_entities: date_on = False # Create the corresponding entity in the new schema new_entity = Entity() #Disturbance dates need to be mapped to different nodes depending on the value of the now obsolete DISTURBANCE_DATE_TYPE.E55 if group_migration['OLDENTITYTYPEID'] in [ 'DISTURBANCE_DATE_END.E49', 'DISTURBANCE_DATE_START.E49' ]: date_type_node = group_root_entity.find_entities_by_type_id( 'DISTURBANCE_DATE_TYPE.E55') if date_type_node: if date_type_node[ 0].label == 'Occurred before': new_entity_type_id = 'DISTURBANCE_DATE_OCCURRED_BEFORE.E61' elif date_type_node[ 0].label == 'Occurred on': if group_migration[ 'OLDENTITYTYPEID'] == 'DISTURBANCE_DATE_START.E49': date_on = True else: new_entity_type_id = 'DISTURBANCE_DATE_OCCURRED_ON.E61' else: new_entity_type_id = group_migration[ 'NEWENTITYTYPEID'] else: new_entity_type_id = group_migration[ 'NEWENTITYTYPEID'] old_value = old_entity.value if old_entity.businesstablename == 'domains': # in some cases we move from domains to strings. newEntityType = models.EntityTypes.objects.get( entitytypeid=new_entity_type_id) if newEntityType.businesstablename == 'strings': old_value = old_entity.label if not date_on: new_entity.create_from_mapping( resource_entity_type, mapping_schema[new_entity_type_id] ['steps'], new_entity_type_id, old_value) # In some cases a newly created data node is the new group root. In this case we should discard the previously created new group root and use this one instead. if new_group_root_node_id == new_entity_type_id: new_group_root_entity = new_entity group_root_is_new_data_node = True # UNUSED # # If there is a node to be inserted, do it here # # if 'INSERT_NODE_RULE' in group_migration: # # entityttypeid_to_insert = group_migration['INSERT_NODE_RULE'][1][1] # # value_to_insert = group_migration['INSERT_NODE_RULE'][1][2] # # # # inserted_entity = Entity() # # inserted_entity.create_from_mapping(resource_entity_type, mapping_schema[entityttypeid_to_insert]['steps'], entityttypeid_to_insert, value_to_insert) # # # # new_entity.merge(inserted_entity) # If there is a node in common with the existing node further down the chain than the group root node, merge there # follow links back from the parent shouldnt_merge_with_group_root = group_root_is_new_data_node and new_group_root_node_id == new_entity_type_id if not shouldnt_merge_with_group_root: has_merged = False reversed_steps = mapping_schema[ new_entity_type_id]['steps'][::-1] for step in reversed_steps: # find the entitytypedomain in the new_group_root_entity if not has_merged: mergeable_nodes = new_group_root_entity.find_entities_by_type_id( step['entitytypedomain']) if len(mergeable_nodes) > 0: new_group_root_entity.merge_at( new_entity, step[ 'entitytypedomain'] ) has_merged = True new_entity = None # gc.collect() if not has_merged: logging.warning( "Unable to merge newly created entity" ) # merge the new group root entity into the resource resource.merge_at(new_group_root_entity, resource_entity_type) logging.warning("SAVING RESOURCE, %s", resource) # save the resource resource.trim() try: resource._save() resource = None except Exception as e: logging.warning("Error saving resource") logging.warning(e) errors.append("Error saving %s. Error was %s" % (resource, e)) group_root_entity.clear() group_root_entity = None new_group_root_entity.clear() new_group_root_entity = None # end for group root # resource.index() # logging.warning("SAVED RESOURCE, %s", resource) utils.write_to_file( os.path.join(settings.PACKAGE_ROOT, 'logs', 'migration_errors.txt'), '') if len(errors) > 0: # utils.write_to_file(os.path.join(settings.PACKAGE_ROOT, 'logs', 'migration_errors.txt'), '\n'.join(errors)) print "\n\nERROR: There were errors migrating some resources. See below" print errors