def handle(self, *args, **options): total_orphans_count = 0 for topic in Topic.objects.all(): # escape common & energy as usual if topic.slug in ["common", "energy"]: continue self.stdout.write("Topic: %s" % (topic)) orphans_count = 0 try: for Model in topic.get_models(): try: for field in utils.iterate_model_fields(Model): if field["rel_type"] and field["direction"] == "out" and "through" in field["rules"]: ids= [] for entity in Model.objects.all(): ids.extend([_.id for _ in entity.node.relationships.all()]) Properties = field["rules"]["through"] for info in Properties.objects.all(): if info._relationship not in ids: self.stdout.write("\t%s is an orphelin property of the model %s." % (info._NodeModel__node, Model)) orphans_count += 1 total_orphans_count += 1 if options["fix"]: self.stdout.write("\tremoving %s" % (info)) info.delete() except Exception as e: self.stderr.write("\tError with fields of %s (%s)" % (entity, e)) if orphans_count > 0: self.stdout.write("\tfound %d orphans" % (orphans_count)) except Exception as e: self.stderr.write("\tError with model %s (%s)" % (Model.__class__.__name__, e)) self.stdout.write("TOTAL: found %d orphans" % (total_orphans_count))
def formfield_for_choice_field(self, db_field, request, **kwargs): if db_field.name == 'name' and hasattr(request, "topic_id"): # We add choices for this field using the current topic's models kwargs["choices"] = [] # Get the current topic with the ID set into the parent form topic = Topic.objects.get(id=request.topic_id) # Get the topic's models models = topic.get_models() for model in models: model_name = getattr(model._meta, "verbose_name").title() subset = [] # Retreive every relationship field for this model for field in utils.iterate_model_fields(model): if field["type"] != 'AutoField': choice = [ field["name"], field["verbose_name"].title(), ] # Add ... at the end ot the relationship field if field["type"] == 'Relationship': choice[1] += "..." subset.append(choice) # Add the choice subset only if it contains elements if len(subset): kwargs["choices"].append(( model_name, subset, )) return super(SearchTermInline, self).formfield_for_choice_field(db_field, request, **kwargs)
def delete_entity(*args, **kwargs): fields = utils.iterate_model_fields(kwargs.get('instance').__class__) for field in fields: if field["rel_type"] and "through" in field["rules"] and field["rules"]["through"] != None: Properties = field["rules"]["through"] for info in Properties.objects.all(): info.delete() update_topic_cache(*args, **kwargs)
def delete_entity(*args, **kwargs): fields = utils.iterate_model_fields(kwargs.get('instance').__class__) for field in fields: if field["rel_type"] and "through" in field[ "rules"] and field["rules"]["through"] != None: Properties = field["rules"]["through"] for info in Properties.objects.all(): info.delete() update_topic_cache(*args, **kwargs)
def get_columns(model): edges = dict() columns = [] for field in utils.iterate_model_fields(model): if field['type'] != 'Relationship': if field['name'] not in ['id']: columns.append(field['name']) else: edges[field['rel_type']] = [field['model'], field['name'], field['related_model']] return (columns, edges)
def alter_detail_data_to_serialize(self, request, bundle, nested=False): model = self.get_model() # Get relationships fields fields = [ f for f in model._meta.fields if f.get_internal_type() == 'Relationship'] node_rels = bundle.obj.node.relationships.all() # If the nested parameter is True, this set node_to_retreive = set() # Resolve relationships manualy for field in fields: # Get relationships for this fields field_rels = [ rel for rel in node_rels[:] if rel.type == field._type] # Filter relationships to keep only the well oriented relationships # get the related field informations related_field = [f for f in iterate_model_fields(model) if "rel_type" in f and f["rel_type"] == field._type and "name" in f and f["name"] == field._BoundRelationship__attname] if related_field: # Note (edouard): check some assertions in case I forgot something assert len(related_field) == 1, related_field assert related_field[0]["direction"] # choose the end point to check end_point_side = "start" if related_field[0]["direction"] == "out" else "end" # filter the relationship field_rels = [rel for rel in field_rels if getattr(rel, end_point_side).id == bundle.obj.id] # Get node ids for those relationships field_oposites = [ graph.opposite(rel, bundle.obj.id) for rel in field_rels ] # Save the list into properities bundle.data[field.name] = field_oposites # Nested mode to true: we need to retreive every node if nested: node_to_retreive = set(list(node_to_retreive) + field_oposites) # There is node to extract for the graph if len(node_to_retreive): # Build the query to get all node in one request query = "start n=node(%s) RETURN ID(n), n" % ",".join(map(str, node_to_retreive)) # Get all nodes as raw values to avoid unintended request to the graph nodes = connection.query(query, returns=(int, dict)) # Helper lambda to retreive a node retreive_node = lambda idx: next(n[1]["data"] for n in nodes if n[0] == idx) # Populate the relationships field with there node instance for field in fields: # Retreive the list of ids for i, idx in enumerate(bundle.data[field.name]): rel_node = retreive_node(idx) # Save the id which is not a node property rel_node["id"] = idx # Update value bundle.data[field.name][i] = self.validate(rel_node, field.target_model, allow_missing=True) # Show additional field following the model's rules rules = request.current_topic.get_rules().model(self.get_model()).all() # All additional relationships for key in rules: # Filter rules to keep only Neomatch instance. # Neomatch is a class to create programmaticly a search related to # this node. if isinstance(rules[key], Neomatch): bundle.data[key] = rules[key].query(bundle.obj.id) return bundle
def iterate_fields(model, is_relationship): for field in [ f for f in utils.iterate_model_fields(model) if (f['type'].lower() == 'relationship') == is_relationship ]: if "search_terms" in field["rules"]: yield [{ 'name': field['name'], 'label': st, 'subject': model._meta.object_name } for st in field["rules"]["search_terms"]]
def rdf_search_query(self, subject, predicate, obj): identifier = obj["id"] if "id" in obj else obj # retrieve all models in current topic all_models = dict( (model.__name__, model) for model in self.get_models()) # If the received identifier describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format(field=field_name, value=identifier, model=subject["name"], app=self.app_label()) # If the received identifier describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.iterate_model_fields( all_models[predicate["subject"]]) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = u""" START st=node({id}) MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, id=identifier, app=self.app_label(), is_out='<' if relationships[0]['direction'] == 'out' else '', is_in='>' if relationships[0]['direction'] == 'in' else '') else: return {'errors': 'Unkown predicate type: %s' % predicate["name"]} return connection.cypher(query).to_dicts()
def get_columns(model): edges = dict() columns = [] for field in utils.iterate_model_fields(model): if field['type'] != 'Relationship': if field['name'] not in ['id']: columns.append(field['name']) else: edges[field['rel_type']] = [ field['model'], field['name'], field['related_model'] ] return (columns, edges)
def rdf_search_query(self, subject, predicate, obj): identifier = obj["id"] if "id" in obj else obj # retrieve all models in current topic all_models = dict((model.__name__, model) for model in self.get_models()) # If the received identifier describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( field=ield_name, value=identifier, model=subject["name"], app=self.app_label() ) # If the received identifier describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.iterate_model_fields( all_models[predicate["subject"]] ) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = u""" START st=node({id}) MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, id=identifier, app=self.app_label(), is_out='<' if relationships[0]['direction'] == 'out' else '', is_in='>' if relationships[0]['direction'] == 'in' else '' ) print query else: return {'errors': 'Unkown predicate type: %s' % predicate["name"]} return connection.cypher(query).to_dicts()
def field(self): cache_key = "%s__field" % (self.name) field = utils.topic_cache.get(self.topic, cache_key) if field is None and self.name: topic_models = self.topic.get_models() for model in topic_models: # Retreive every relationship field for this model for f in utils.iterate_model_fields(model): if f["name"] == self.name: field = f field["rules"]["through"] = None # Yes, this is ugly but this field is creating Pickling errors. utils.topic_cache.set(self.topic, cache_key, field) return field
def field(self): cache_key = "%s__field" % (self.name) field = utils.topic_cache.get(self.topic, cache_key) if field is None and self.name: topic_models = self.topic.get_models() for model in topic_models: # Retreive every relationship field for this model for f in utils.iterate_model_fields(model): if f["name"] == self.name: field = f field["rules"][ "through"] = None # Yes, this is ugly but this field is creating Pickling errors. utils.topic_cache.set(self.topic, cache_key, field) return field
def summary_forms(self, bundle, request): available_resources = {} # Get the model's rules manager rulesManager = self.topic.get_rules() # Fetch every registered model # to print out its rules for model in self.topic.get_models(): name = model.__name__.lower() rules = rulesManager.model(model).all() verbose_name = getattr(model._meta, "verbose_name", name) verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s") for key in rules: # Filter rules to keep only Neomatch if isinstance(rules[key], Neomatch): fields.append({ "name": key, "type": "ExtendedRelationship", "verbose_name": rules[key].title, "rules": {}, "related_model": rules[key].target_model.__name__ }) fields = [ field.copy() for field in utils.iterate_model_fields(model) ] fields = [self.sanitize_field(field) for field in fields] available_resources[name] = { 'help_text': getattr(model, "_description", None), 'topic': getattr(model, "_topic", self.topic.slug) or self.topic.slug, 'model': getattr(model, "__name__", ""), 'verbose_name': verbose_name, 'verbose_name_plural': verbose_name_plural, 'name': name, 'fields': fields, 'rules': rules, 'index': getattr(model, "__idx__", 0) } return available_resources
def handle(self, *args, **options): total_orphans_count = 0 for topic in Topic.objects.all(): # escape common & energy as usual if topic.slug in ["common", "energy"]: continue self.stdout.write("Topic: %s" % (topic)) orphans_count = 0 try: for Model in topic.get_models(): try: for field in utils.iterate_model_fields(Model): if field["rel_type"] and field[ "direction"] == "out" and "through" in field[ "rules"]: ids = [] for entity in Model.objects.all(): ids.extend([ _.id for _ in entity.node.relationships.all() ]) Properties = field["rules"]["through"] for info in Properties.objects.all(): if info._relationship not in ids: self.stdout.write( "\t%s is an orphelin property of the model %s." % (info._NodeModel__node, Model)) orphans_count += 1 total_orphans_count += 1 if options["fix"]: self.stdout.write("\tremoving %s" % (info)) info.delete() except Exception as e: self.stderr.write("\tError with fields of %s (%s)" % (entity, e)) if orphans_count > 0: self.stdout.write("\tfound %d orphans" % (orphans_count)) except Exception as e: self.stderr.write("\tError with model %s (%s)" % (Model.__class__.__name__, e)) self.stdout.write("TOTAL: found %d orphans" % (total_orphans_count))
def formfield_for_choice_field(self, db_field, request, **kwargs): if db_field.name == 'name' and hasattr(request, "topic_id"): # We add choices for this field using the current topic's models kwargs["choices"] = [] # Get the current topic with the ID set into the parent form topic = Topic.objects.get(id=request.topic_id) # Get the topic's models models = topic.get_models() for model in models: model_name = getattr(model._meta, "verbose_name").title() subset = [] # Retreive every relationship field for this model for field in utils.iterate_model_fields(model): if field["type"] != 'AutoField': choice = [ field["name"], field["verbose_name"].title(), ] # Add ... at the end ot the relationship field if field["type"] == 'Relationship': choice[1] += "..." subset.append(choice) # Add the choice subset only if it contains elements if len(subset): kwargs["choices"].append( (model_name, subset,) ) return super(SearchTermInline, self).formfield_for_choice_field(db_field, request,**kwargs)
def summary_forms(self, bundle, request): available_resources = {} # Get the model's rules manager rulesManager = self.topic.get_rules() # Fetch every registered model # to print out its rules for model in self.topic.get_models(): name = model.__name__.lower() rules = rulesManager.model(model).all() verbose_name = getattr(model._meta, "verbose_name", name) verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s") for key in rules: # Filter rules to keep only Neomatch if isinstance(rules[key], Neomatch): fields.append({ "name" : key, "type" : "ExtendedRelationship", "verbose_name" : rules[key].title, "rules" : {}, "related_model": rules[key].target_model.__name__ }) fields = [ field.copy() for field in utils.iterate_model_fields(model) ] fields = [ self.sanitize_field(field) for field in fields ] available_resources[name] = { 'help_text' : getattr(model, "_description", None), 'topic' : getattr(model, "_topic", self.topic.slug) or self.topic.slug, 'model' : getattr(model, "__name__", ""), 'verbose_name' : verbose_name, 'verbose_name_plural' : verbose_name_plural, 'name' : name, 'fields' : fields, 'rules' : rules, 'index' : getattr(model, "__idx__", 0) } return available_resources
def iterate_fields(model, is_relationship): for field in [f for f in utils.iterate_model_fields(model) if (f['type'].lower() == 'relationship') == is_relationship]: if "search_terms" in field["rules"]: yield [{'name': field['name'], 'label': st, 'subject': model._meta.object_name} for st in field["rules"]["search_terms"]]
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None): """ Job which parses uploaded content, validates and saves them as model """ start_time = start_time != None and start_time or time.time() entities = {} relations = [] errors = [] id_mapping = {} nb_lines = 0 file_reading_progression = 0 job = get_current_job() # Define Exceptions class Error(Exception): """ Generic Custom Exception for this endpoint. Include the topic. """ def __init__(self, **kwargs): """ set the topic and add all the parameters as attributes """ self.topic = topic.title for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return self.__dict__ class WarningCastingValueFail(Error): pass class WarningValidationError(Error): pass class WarningKeyUnknown(Error): pass class WarningInformationIsMissing(Error): pass class AttributeDoesntExist(Error): pass class WrongCSVSyntax(Error): pass class ColumnUnknow(Error): pass class ModelDoesntExist(Error): pass class RelationDoesntExist(Error): pass try: assert type(files) in (tuple, list), type(files) assert len(files) > 0, "You need to upload at least one file." assert type(files[0]) in (tuple, list) assert len(files[0]) == 2 # retrieve all models in current topic all_models = dict( (model.__name__, model) for model in topic.get_models()) # iterate over all files and dissociate entities .csv from relations .csv for file in files: if type(file) is tuple: file_name = file[0] file = file[1] else: raise Exception() csv_reader = utils.open_csv(file) header = csv_reader.next() assert len( header ) > 1, "{file_name} header should have at least 2 columns" assert header[0].endswith( "_id" ), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format( file_name=file_name, first_col=header[0]) if len(header) >= 3 and header[0].endswith( "_id") and header[2].endswith("_id"): # this is a relationship file relations.append((file_name, file)) else: # this is an entities file model_name = utils.to_class_name(header[0].replace("_id", "")) if model_name in all_models.keys(): entities[model_name] = (file_name, file) else: raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys()) nb_lines += len(file) - 1 # -1 removes headers # first iterate over entities logger.debug("BulkUpload: creating entities") for entity, (file_name, file) in entities.items(): csv_reader = utils.open_csv(file) header = csv_reader.next() # must check that all columns map to an existing model field fields = utils.get_model_fields(all_models[entity]) fields_types = {} for field in fields: fields_types[field['name']] = field['type'] field_names = [field['name'] for field in fields] columns = [] for column in header[1:]: column = utils.to_underscores(column) if not column in field_names and not column.endswith( "__sources__"): raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break if column.endswith("__sources__"): column_type = "__sources__" column = column[:-len("__sources__")] if not column in field_names: raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break else: column_type = fields_types.get(column, None) columns.append((column, column_type)) else: # here, we know that all columns are valid for row in csv_reader: data = {} sources = {} entity_id = row[0] for i, (column, column_type) in enumerate(columns): value = str(row[i + 1]).decode('utf-8') # cast value if needed if value: try: if "Integer" in column_type: value = int(value) # TODO: cast float if "Date" in column_type: value = datetime.datetime(*map( int, re.split('[^\d]', value)[:3])).replace( tzinfo=utc) except Exception as e: e = WarningCastingValueFail( column_name=column, value=value, type=column_type, data=data, model=entity, file=file_name, line=csv_reader.line_num, error=str(e)) errors.append(e) break if column_type == "__sources__": sources[column] = value else: data[column] = value else: # instanciate a model try: item = all_models[entity].objects.create(**data) # map the object with the ID defined in the .csv id_mapping[(entity, entity_id)] = item # create sources for sourced_field, reference in sources.items(): for ref in reference.split("||"): FieldSource.objects.create( individual=item.id, field=sourced_field, reference=ref) # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource) # Concurrent access are not secure here. # For now we refresh the job just before saving it. file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = ( float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except Exception as e: errors.append( WarningValidationError( data=data, model=entity, file=file_name, line=csv_reader.line_num, error=str(e))) inserted_relations = 0 # then iterate over relations logger.debug("BulkUpload: creating relations") for file_name, file in relations: # create a csv reader csv_reader = utils.open_csv(file) csv_header = csv_reader.next() relation_name = utils.to_underscores(csv_header[1]) model_from = utils.to_class_name(csv_header[0].replace("_id", "")) model_to = utils.to_class_name(csv_header[2].replace("_id", "")) properties_name = csv_header[3:] # retrieve ModelProperties from related model ModelProperties = topic.get_rules().model( all_models[model_from]).field(relation_name).get("through") # check that the relation actually exists between the two objects try: getattr(all_models[model_from], relation_name) except Exception as e: raise RelationDoesntExist( file=file_name, model_from=model_from, model_to=model_to, relation_name=relation_name, fields_available=[ field['name'] for field in utils.iterate_model_fields( all_models[model_from]) ], error=str(e)) for row in csv_reader: id_from = row[0] id_to = row[2] properties = [p.decode('utf-8') for p in row[3:]] if id_to and id_from: try: instance_from = id_mapping[(model_from, id_from)] instance_to = id_mapping[(model_to, id_to)] getattr(instance_from, relation_name).add(instance_to) # add properties if needed if ModelProperties and properties_name and properties: # save the relationship to create an id instance_from.save() # retrieve this id relation_id = next( rel.id for rel in instance_from.node.relationships.outgoing() if rel.end.id == instance_to.id) # properties of the relationship relation_args = { "_endnodes": [ id_mapping[(model_from, id_from)].id, instance_to.id ], "_relationship": relation_id, } # Pairwise the properties with their names relation_args.update( zip(properties_name, properties)) try: ModelProperties.objects.create(**relation_args) except TypeError as e: errors.append( AttributeDoesntExist( file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_args=relation_args, error=str(e))) # update the job inserted_relations += 1 file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = ( float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except KeyError as e: errors.append( WarningKeyUnknown(file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_name=relation_name, error=str(e))) except Exception as e: # Error unknown, we break the process to alert the user raise Error(file=file_name, line=csv_reader.line_num, model_from=model_from, id_from=id_from, model_to=model_to, id_to=id_to, relation_name=relation_name, error=str(e)) else: # A key is missing (id_from or id_to) but we don't want to stop the parsing. # Then we store the wrong line to return it to the user. errors.append( WarningInformationIsMissing(file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from)) # Save everything saved = 0 logger.debug("BulkUpload: saving %d objects" % (len(id_mapping))) if job: job.refresh() job.meta["objects_to_save"] = len(id_mapping) job.save() for item in id_mapping.values(): item.save() saved += 1 if job: job.refresh() job.meta["saving_progression"] = saved job.save() if job: job.refresh() if job and "track" in job.meta: from django.core.mail import send_mail user = User.objects.get(pk=job.meta["user"]) send_mail("upload finished", "your upload just finished", settings.DEFAULT_FROM_EMAIL, (user.email, )) return { 'duration': (time.time() - start_time), 'inserted': { 'objects': saved, 'links': inserted_relations }, "errors": sorted([ dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors ]) } except Exception as e: import traceback logger.error(traceback.format_exc()) if e.__dict__: message = str(e.__dict__) else: message = e.message return {"errors": [{e.__class__.__name__: message}]}
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None): """ Job which parses uploaded content, validates and saves them as model """ start_time = start_time != None and start_time or time.time() entities = {} relations = [] errors = [] id_mapping = {} nb_lines = 0 file_reading_progression = 0 job = get_current_job() # Define Exceptions class Error (Exception): """ Generic Custom Exception for this endpoint. Include the topic. """ def __init__(self, **kwargs): """ set the topic and add all the parameters as attributes """ self.topic = topic.title for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return self.__dict__ class WarningCastingValueFail (Error): pass class WarningValidationError (Error): pass class WarningKeyUnknown (Error): pass class WarningInformationIsMissing (Error): pass class AttributeDoesntExist (Error): pass class WrongCSVSyntax (Error): pass class ColumnUnknow (Error): pass class ModelDoesntExist (Error): pass class RelationDoesntExist (Error): pass try: assert type(files) in (tuple, list), type(files) assert len(files) > 0, "You need to upload at least one file." assert type(files[0]) in (tuple, list) assert len(files[0]) == 2 # retrieve all models in current topic all_models = dict((model.__name__, model) for model in topic.get_models()) # iterate over all files and dissociate entities .csv from relations .csv for file in files: if type(file) is tuple: file_name = file[0] file = file[1] else: raise Exception() csv_reader = utils.open_csv(file) header = csv_reader.next() assert len(header) > 1, "{file_name} header should have at least 2 columns" assert header[0].endswith("_id"), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format(file_name=file_name, first_col=header[0]) if len(header) >=3 and header[0].endswith("_id") and header[2].endswith("_id"): # this is a relationship file relations.append((file_name, file)) else: # this is an entities file model_name = utils.to_class_name(header[0].replace("_id", "")) if model_name in all_models.keys(): entities[model_name] = (file_name, file) else: raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys()) nb_lines += len(file) - 1 # -1 removes headers # first iterate over entities logger.debug("BulkUpload: creating entities") for entity, (file_name, file) in entities.items(): csv_reader = utils.open_csv(file) header = csv_reader.next() # must check that all columns map to an existing model field fields = utils.get_model_fields(all_models[entity]) fields_types = {} for field in fields: fields_types[field['name']] = field['type'] field_names = [field['name'] for field in fields] columns = [] for column in header[1:]: column = utils.to_underscores(column) if not column in field_names and not column.endswith("__sources__"): raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break if column.endswith("__sources__"): column_type = "__sources__" column = column[:-len("__sources__")] if not column in field_names: raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names) break else: column_type = fields_types.get(column, None) columns.append((column, column_type)) else: # here, we know that all columns are valid for row in csv_reader: data = {} sources = {} entity_id = row[0] for i, (column, column_type) in enumerate(columns): value = str(row[i+1]).decode('utf-8') # cast value if needed if value: try: if "Integer" in column_type: value = int(value) # TODO: cast float if "Date" in column_type: value = datetime.datetime(*map(int, re.split('[^\d]', value)[:3])).replace(tzinfo=utc) except Exception as e: e = WarningCastingValueFail( column_name = column, value = value, type = column_type, data = data, model=entity, file = file_name, line = csv_reader.line_num, error = str(e) ) errors.append(e) break if column_type == "__sources__": sources[column] = value else: data[column] = value else: # instanciate a model try: item = all_models[entity].objects.create(**data) # map the object with the ID defined in the .csv id_mapping[(entity, entity_id)] = item # create sources for sourced_field, reference in sources.items(): for ref in reference.split("||"): FieldSource.objects.create(individual=item.id, field=sourced_field, reference=ref) # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource) # Concurrent access are not secure here. # For now we refresh the job just before saving it. file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except Exception as e: errors.append( WarningValidationError( data = data, model = entity, file = file_name, line = csv_reader.line_num, error = str(e) ) ) inserted_relations = 0 # then iterate over relations logger.debug("BulkUpload: creating relations") for file_name, file in relations: # create a csv reader csv_reader = utils.open_csv(file) csv_header = csv_reader.next() relation_name = utils.to_underscores(csv_header[1]) model_from = utils.to_class_name(csv_header[0].replace("_id", "")) model_to = utils.to_class_name(csv_header[2].replace("_id", "")) properties_name = csv_header[3:] # retrieve ModelProperties from related model ModelProperties = topic.get_rules().model(all_models[model_from]).field(relation_name).get("through") # check that the relation actually exists between the two objects try: getattr(all_models[model_from], relation_name) except Exception as e: raise RelationDoesntExist( file = file_name, model_from = model_from, model_to = model_to, relation_name = relation_name, fields_available = [field['name'] for field in utils.iterate_model_fields(all_models[model_from])], error = str(e)) for row in csv_reader: id_from = row[0] id_to = row[2] properties = [p.decode('utf-8') for p in row[3:]] if id_to and id_from: try: instance_from = id_mapping[(model_from, id_from)] instance_to = id_mapping[(model_to, id_to)] getattr(instance_from, relation_name).add(instance_to) # add properties if needed if ModelProperties and properties_name and properties: # save the relationship to create an id instance_from.save() # retrieve this id relation_id = next(rel.id for rel in instance_from.node.relationships.outgoing() if rel.end.id == instance_to.id) # properties of the relationship relation_args = { "_endnodes" : [id_mapping[(model_from, id_from)].id, instance_to.id], "_relationship" : relation_id, } # Pairwise the properties with their names relation_args.update(zip(properties_name, properties)) try: ModelProperties.objects.create(**relation_args) except TypeError as e: errors.append( AttributeDoesntExist( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_args = relation_args, error = str(e) ) ) # update the job inserted_relations += 1 file_reading_progression += 1 if job: job.refresh() job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100 job.meta["file_reading"] = file_name job.save() except KeyError as e: errors.append( WarningKeyUnknown( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e) ) ) except Exception as e: # Error unknown, we break the process to alert the user raise Error( file = file_name, line = csv_reader.line_num, model_from = model_from, id_from = id_from, model_to = model_to, id_to = id_to, relation_name = relation_name, error = str(e)) else: # A key is missing (id_from or id_to) but we don't want to stop the parsing. # Then we store the wrong line to return it to the user. errors.append( WarningInformationIsMissing( file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from ) ) # Save everything saved = 0 logger.debug("BulkUpload: saving %d objects" % (len(id_mapping))) if job: job.refresh() job.meta["objects_to_save"] = len(id_mapping) job.save() for item in id_mapping.values(): item.save() saved += 1 if job: job.refresh() job.meta["saving_progression"] = saved job.save() if job: job.refresh() if job and "track" in job.meta: from django.core.mail import send_mail user = User.objects.get(pk=job.meta["user"]) send_mail("upload finished", "your upload just finished", settings.DEFAULT_FROM_EMAIL, (user.email,)) return { 'duration' : (time.time() - start_time), 'inserted' : { 'objects' : saved, 'links' : inserted_relations }, "errors" : sorted([dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors]) } except Exception as e: import traceback logger.error(traceback.format_exc()) if e.__dict__: message = str(e.__dict__) else: message = e.message return { "errors" : [{e.__class__.__name__ : message}] }
def get_patch(self, request, **kwargs): pk = kwargs["pk"] # This should be a POST request self.method_check(request, allowed=['post']) self.throttle_check(request) # User must be authentication self.is_authenticated(request) bundle = self.build_bundle(request=request) # User allowed to update this model self.authorized_update_detail(self.get_object_list(bundle.request), bundle) # Get the node's data using the rest API try: node = connection.nodes.get(pk) # Node not found except client.NotFoundError: raise Http404("Not found.") # Load every relationship only when we need to update a relationship node_rels = None # Parse only body string body = json.loads(request.body) if type(request.body) is str else request.body # Copy data to allow dictionary resizing data = body.copy() # Received per-field sources if "field_sources" in data: # field_sources must not be treated here, see patch_source method field_sources = data.pop("field_sources") # Validate data. # If it fails, it will raise a ValidationError data = self.validate(data) # Get author list (or a new array if ) author_list = node.properties.get("_author", []) # This is the first time the current user edit this node if int(request.user.id) not in author_list: # Add the author to the author list data["_author"] = author_list + [request.user.id] # @TODO check that 'node' is an instance of 'model' # Set new values to the node for field_name in data: field = self.get_model_field(field_name) field_value = data[field_name] # The value can be a list of ID for relationship if field.get_internal_type() is 'Relationship': # Pluck id from the list field_ids = [ value for value in field_value if value is not int(pk) ] # Prefetch all relationship if node_rels is None: node_rels = node.relationships.all() # Get relationship name rel_type = self.get_model_field(field_name)._type # We don't want to add this relation twice so we extract # every node connected to the current one through this type # of relationship. "existing_rels_id" will contain the ids of # every node related to this one. existing_rels = [ rel for rel in node_rels if rel.type == rel_type ] existing_rels_id = [ graph.opposite(rel, pk) for rel in existing_rels ] # Get every ids from "field_ids" that ain't not in # the list of existing relationship "existing_rel_id". new_rels_id = set(field_ids).difference(existing_rels_id) # Get every ids from "existing_rels_id" that ain't no more # in the new list of relationships "field_ids". old_rels_id = set(existing_rels_id).difference(field_ids) # Start a transaction to batch import values with connection.transaction(commit=False) as tx: # Convert ids or related node to *node* instances new_rels_node = [ connection.nodes.get(idx) for idx in new_rels_id ] # Convert ids or unrelated node to *relationships* instances old_rels = [] # Convert ids list into relationship instances for idx in old_rels_id: # Find the relationship that match with this id matches = [ rel for rel in existing_rels if graph.connected(rel, idx) ] # Merge the list of relationships old_rels = old_rels + matches # Commit change when every field was treated tx.commit() # Start a transaction to batch insert/delete values with connection.transaction(commit=False) as tx: # Then create the new relationships (using nodes instances) # Outcoming relationship if field.direction == 'out': [ connection.relationships.create(node, rel_type, n) for n in new_rels_node ] # Incoming relationship elif field.direction == 'in': [ connection.relationships.create(n, rel_type, node) for n in new_rels_node ] # Then delete the old relationships (using relationships instance) [ rel.delete() for rel in old_rels ] # Commit change when every field was treated tx.commit() # Or a literal value # (integer, date, url, email, etc) else: # Current model model = self.get_model() # Fields fields = { x['name'] : x for x in iterate_model_fields(model) } # Remove the values if field_value in [None, '']: if field_name == 'image' and fields[field_name]['type'] == 'URLField': self.remove_node_file(node, field_name, True) # The field may not exists (yet) try: node.delete(field_name) # It's OK, it just means we don't have to remove it except client.NotFoundError: pass # We simply update the node property # (the value is already validated) else: if field_name in fields: if 'is_rich' in fields[field_name]['rules'] and fields[field_name]['rules']['is_rich']: data[field_name] = field_value = bleach.clean(field_value, tags=("br", "blockquote", "ul", "ol", "li", "b", "i", "u", "a", "p"), attributes={ '*': ("class",), 'a': ("href", "target") }) if field_name == 'image' and fields[field_name]['type'] == 'URLField': self.remove_node_file(node, field_name, True) try: image_file = download_url(data[field_name]) path = default_storage.save(os.path.join(settings.UPLOAD_ROOT, image_file.name) , image_file) data[field_name] = field_value = path.replace(settings.MEDIA_ROOT, "") except UnavailableImage: data[field_name] = field_value = "" except NotAnImage: data[field_name] = field_value = "" except OversizedFile: data[field_name] = field_value = "" node.set(field_name, field_value) # update the cache topic_cache.incr_version(request.current_topic) # And returns cleaned data return self.create_response(request, data)