Beispiel #1
0
 def handle(self, *args, **options):
     total_orphans_count = 0
     for topic in Topic.objects.all():
         # escape common & energy as usual
         if topic.slug in ["common", "energy"]: continue
         self.stdout.write("Topic: %s" % (topic))
         orphans_count = 0
         try:
             for Model in topic.get_models():
                 try:
                     for field in utils.iterate_model_fields(Model):
                         if field["rel_type"] and field["direction"] == "out" and "through" in field["rules"]:
                             ids= []
                             for entity in Model.objects.all():
                                 ids.extend([_.id for _ in entity.node.relationships.all()])
                             Properties = field["rules"]["through"]
                             for info in Properties.objects.all():
                                 if info._relationship not in ids:
                                     self.stdout.write("\t%s is an orphelin property of the model %s." % (info._NodeModel__node, Model))
                                     orphans_count += 1
                                     total_orphans_count += 1
                                     if options["fix"]:
                                         self.stdout.write("\tremoving %s" % (info))
                                         info.delete()
                 except Exception as e:
                     self.stderr.write("\tError with fields of %s (%s)" % (entity, e))
             if orphans_count > 0:
                 self.stdout.write("\tfound %d orphans" % (orphans_count))
         except Exception as e:
             self.stderr.write("\tError with model %s (%s)" % (Model.__class__.__name__, e))
     self.stdout.write("TOTAL: found %d orphans" % (total_orphans_count))
Beispiel #2
0
 def formfield_for_choice_field(self, db_field, request, **kwargs):
     if db_field.name == 'name' and hasattr(request, "topic_id"):
         # We add choices for this field using the current topic's models
         kwargs["choices"] = []
         # Get the current topic with the ID set into the parent form
         topic = Topic.objects.get(id=request.topic_id)
         # Get the topic's models
         models = topic.get_models()
         for model in models:
             model_name = getattr(model._meta, "verbose_name").title()
             subset = []
             # Retreive every relationship field for this model
             for field in utils.iterate_model_fields(model):
                 if field["type"] != 'AutoField':
                     choice = [
                         field["name"],
                         field["verbose_name"].title(),
                     ]
                     # Add ... at the end ot the relationship field
                     if field["type"] == 'Relationship': choice[1] += "..."
                     subset.append(choice)
             # Add the choice subset only if it contains elements
             if len(subset):
                 kwargs["choices"].append((
                     model_name,
                     subset,
                 ))
     return super(SearchTermInline,
                  self).formfield_for_choice_field(db_field, request,
                                                   **kwargs)
Beispiel #3
0
def delete_entity(*args, **kwargs):
    fields = utils.iterate_model_fields(kwargs.get('instance').__class__)
    for field in fields:
        if field["rel_type"] and "through" in field["rules"] and field["rules"]["through"] != None:
            Properties = field["rules"]["through"]
            for info in Properties.objects.all():
                info.delete()
    update_topic_cache(*args, **kwargs)
Beispiel #4
0
def delete_entity(*args, **kwargs):
    fields = utils.iterate_model_fields(kwargs.get('instance').__class__)
    for field in fields:
        if field["rel_type"] and "through" in field[
                "rules"] and field["rules"]["through"] != None:
            Properties = field["rules"]["through"]
            for info in Properties.objects.all():
                info.delete()
    update_topic_cache(*args, **kwargs)
Beispiel #5
0
 def get_columns(model):
     edges   = dict()
     columns = []
     for field in utils.iterate_model_fields(model):
         if field['type'] != 'Relationship':
             if field['name'] not in ['id']:
                 columns.append(field['name'])
         else:
             edges[field['rel_type']] = [field['model'], field['name'], field['related_model']]
     return (columns, edges)
Beispiel #6
0
 def alter_detail_data_to_serialize(self, request, bundle, nested=False):
     model = self.get_model()
     # Get relationships fields
     fields = [ f for f in model._meta.fields if f.get_internal_type() == 'Relationship']
     node_rels = bundle.obj.node.relationships.all()
     # If the nested parameter is True, this set
     node_to_retreive = set()
     # Resolve relationships manualy
     for field in fields:
         # Get relationships for this fields
         field_rels = [ rel for rel in node_rels[:] if rel.type == field._type]
         # Filter relationships to keep only the well oriented relationships
         # get the related field informations
         related_field = [f for f in iterate_model_fields(model) if "rel_type" in f and f["rel_type"] == field._type and "name" in f and f["name"] == field._BoundRelationship__attname]
         if related_field:
             # Note (edouard): check some assertions in case I forgot something
             assert len(related_field) == 1, related_field
             assert related_field[0]["direction"]
             # choose the end point to check
             end_point_side = "start" if related_field[0]["direction"] == "out" else "end"
             # filter the relationship
             field_rels = [rel for rel in field_rels if getattr(rel, end_point_side).id == bundle.obj.id]
         # Get node ids for those relationships
         field_oposites = [ graph.opposite(rel, bundle.obj.id) for rel in field_rels ]
         # Save the list into properities
         bundle.data[field.name] = field_oposites
         # Nested mode to true: we need to retreive every node
         if nested: node_to_retreive = set(list(node_to_retreive) + field_oposites)
     # There is node to extract for the graph
     if len(node_to_retreive):
         # Build the query to get all node in one request
         query = "start n=node(%s) RETURN ID(n), n" % ",".join(map(str, node_to_retreive))
         # Get all nodes as raw values to avoid unintended request to the graph
         nodes = connection.query(query, returns=(int, dict))
         # Helper lambda to retreive a node
         retreive_node = lambda idx: next(n[1]["data"] for n in nodes if n[0] == idx)
         # Populate the relationships field with there node instance
         for field in fields:
             # Retreive the list of ids
             for i, idx in enumerate(bundle.data[field.name]):
                 rel_node = retreive_node(idx)
                 # Save the id which is not a node property
                 rel_node["id"] = idx
                 # Update value
                 bundle.data[field.name][i] = self.validate(rel_node, field.target_model, allow_missing=True)
     # Show additional field following the model's rules
     rules = request.current_topic.get_rules().model(self.get_model()).all()
     # All additional relationships
     for key in rules:
         # Filter rules to keep only Neomatch instance.
         # Neomatch is a class to create programmaticly a search related to
         # this node.
         if isinstance(rules[key], Neomatch):
             bundle.data[key] = rules[key].query(bundle.obj.id)
     return bundle
Beispiel #7
0
 def iterate_fields(model, is_relationship):
     for field in [
             f for f in utils.iterate_model_fields(model)
             if (f['type'].lower() == 'relationship') == is_relationship
     ]:
         if "search_terms" in field["rules"]:
             yield [{
                 'name': field['name'],
                 'label': st,
                 'subject': model._meta.object_name
             } for st in field["rules"]["search_terms"]]
Beispiel #8
0
    def rdf_search_query(self, subject, predicate, obj):
        identifier = obj["id"] if "id" in obj else obj
        # retrieve all models in current topic
        all_models = dict(
            (model.__name__, model) for model in self.get_models())
        # If the received identifier describe a literal value
        if self.is_registered_literal(predicate["name"]):
            # Get the field name into the database
            field_name = predicate["name"]
            # Build the request
            query = """
                START root=node(*)
                MATCH (root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(root.{field})
                AND root.{field} = {value}
                AND type.model_name = {model}
                AND type.app_label = '{app}'
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(field=field_name,
                       value=identifier,
                       model=subject["name"],
                       app=self.app_label())

        # If the received identifier describe a literal value
        elif self.is_registered_relationship(predicate["name"]):
            fields = utils.iterate_model_fields(
                all_models[predicate["subject"]])
            # Get the field name into the database
            relationships = [
                field for field in fields if field["name"] == predicate["name"]
            ]
            # We didn't find the predicate
            if not len(relationships):
                return {'errors': 'Unkown predicate type'}
            relationship = relationships[0]["rel_type"]
            # Query to get every result
            query = u"""
                START st=node({id})
                MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(st.name)
                AND type.app_label = '{app}'
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(
                relationship=relationship,
                id=identifier,
                app=self.app_label(),
                is_out='<' if relationships[0]['direction'] == 'out' else '',
                is_in='>' if relationships[0]['direction'] == 'in' else '')
        else:
            return {'errors': 'Unkown predicate type: %s' % predicate["name"]}
        return connection.cypher(query).to_dicts()
Beispiel #9
0
 def get_columns(model):
     edges = dict()
     columns = []
     for field in utils.iterate_model_fields(model):
         if field['type'] != 'Relationship':
             if field['name'] not in ['id']:
                 columns.append(field['name'])
         else:
             edges[field['rel_type']] = [
                 field['model'], field['name'], field['related_model']
             ]
     return (columns, edges)
Beispiel #10
0
 def rdf_search_query(self, subject, predicate, obj):
     identifier = obj["id"] if "id" in obj else obj
     # retrieve all models in current topic
     all_models = dict((model.__name__, model) for model in self.get_models())
     # If the received identifier describe a literal value
     if self.is_registered_literal(predicate["name"]):
         # Get the field name into the database
         field_name = predicate["name"]
         # Build the request
         query = """
             START root=node(*)
             MATCH (root)<-[:`<<INSTANCE>>`]-(type)
             WHERE HAS(root.name)
             AND HAS(root.{field})
             AND root.{field} = {value}
             AND type.model_name = {model}
             AND type.app_label = '{app}'
             RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
         """.format(
             field=ield_name,
             value=identifier,
             model=subject["name"],
             app=self.app_label()
         )
         
     # If the received identifier describe a literal value
     elif self.is_registered_relationship(predicate["name"]):
         fields        = utils.iterate_model_fields( all_models[predicate["subject"]] )
         # Get the field name into the database
         relationships = [ field for field in fields if field["name"] == predicate["name"] ]
         # We didn't find the predicate
         if not len(relationships): return {'errors': 'Unkown predicate type'}
         relationship  = relationships[0]["rel_type"]
         # Query to get every result
         query = u"""
             START st=node({id})
             MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type)
             WHERE HAS(root.name)
             AND HAS(st.name)
             AND type.app_label = '{app}'
             RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
         """.format(
             relationship=relationship,
             id=identifier,
             app=self.app_label(),
             is_out='<' if relationships[0]['direction'] == 'out' else '',
             is_in='>' if relationships[0]['direction'] == 'in' else ''
         )
         print query
     else:
         return {'errors': 'Unkown predicate type: %s' % predicate["name"]}
     return connection.cypher(query).to_dicts()
Beispiel #11
0
 def field(self):
     cache_key = "%s__field" % (self.name)
     field     = utils.topic_cache.get(self.topic, cache_key)
     if field is None and self.name:
         topic_models = self.topic.get_models()
         for model in topic_models:
             # Retreive every relationship field for this model
             for f in utils.iterate_model_fields(model):
                 if f["name"] == self.name:
                     field = f
         field["rules"]["through"] = None # Yes, this is ugly but this field is creating Pickling errors.
         utils.topic_cache.set(self.topic, cache_key, field)
     return field
Beispiel #12
0
 def field(self):
     cache_key = "%s__field" % (self.name)
     field = utils.topic_cache.get(self.topic, cache_key)
     if field is None and self.name:
         topic_models = self.topic.get_models()
         for model in topic_models:
             # Retreive every relationship field for this model
             for f in utils.iterate_model_fields(model):
                 if f["name"] == self.name:
                     field = f
         field["rules"][
             "through"] = None  # Yes, this is ugly but this field is creating Pickling errors.
         utils.topic_cache.set(self.topic, cache_key, field)
     return field
Beispiel #13
0
    def summary_forms(self, bundle, request):
        available_resources = {}
        # Get the model's rules manager
        rulesManager = self.topic.get_rules()
        # Fetch every registered model
        # to print out its rules
        for model in self.topic.get_models():
            name = model.__name__.lower()
            rules = rulesManager.model(model).all()
            verbose_name = getattr(model._meta, "verbose_name", name)
            verbose_name_plural = getattr(model._meta, "verbose_name_plural",
                                          verbose_name + "s")
            for key in rules:
                # Filter rules to keep only Neomatch
                if isinstance(rules[key], Neomatch):
                    fields.append({
                        "name":
                        key,
                        "type":
                        "ExtendedRelationship",
                        "verbose_name":
                        rules[key].title,
                        "rules": {},
                        "related_model":
                        rules[key].target_model.__name__
                    })

            fields = [
                field.copy() for field in utils.iterate_model_fields(model)
            ]
            fields = [self.sanitize_field(field) for field in fields]

            available_resources[name] = {
                'help_text': getattr(model, "_description", None),
                'topic': getattr(model, "_topic", self.topic.slug)
                or self.topic.slug,
                'model': getattr(model, "__name__", ""),
                'verbose_name': verbose_name,
                'verbose_name_plural': verbose_name_plural,
                'name': name,
                'fields': fields,
                'rules': rules,
                'index': getattr(model, "__idx__", 0)
            }

        return available_resources
Beispiel #14
0
 def handle(self, *args, **options):
     total_orphans_count = 0
     for topic in Topic.objects.all():
         # escape common & energy as usual
         if topic.slug in ["common", "energy"]: continue
         self.stdout.write("Topic: %s" % (topic))
         orphans_count = 0
         try:
             for Model in topic.get_models():
                 try:
                     for field in utils.iterate_model_fields(Model):
                         if field["rel_type"] and field[
                                 "direction"] == "out" and "through" in field[
                                     "rules"]:
                             ids = []
                             for entity in Model.objects.all():
                                 ids.extend([
                                     _.id for _ in
                                     entity.node.relationships.all()
                                 ])
                             Properties = field["rules"]["through"]
                             for info in Properties.objects.all():
                                 if info._relationship not in ids:
                                     self.stdout.write(
                                         "\t%s is an orphelin property of the model %s."
                                         % (info._NodeModel__node, Model))
                                     orphans_count += 1
                                     total_orphans_count += 1
                                     if options["fix"]:
                                         self.stdout.write("\tremoving %s" %
                                                           (info))
                                         info.delete()
                 except Exception as e:
                     self.stderr.write("\tError with fields of %s (%s)" %
                                       (entity, e))
             if orphans_count > 0:
                 self.stdout.write("\tfound %d orphans" % (orphans_count))
         except Exception as e:
             self.stderr.write("\tError with model %s (%s)" %
                               (Model.__class__.__name__, e))
     self.stdout.write("TOTAL: found %d orphans" % (total_orphans_count))
Beispiel #15
0
 def formfield_for_choice_field(self, db_field, request, **kwargs):
     if db_field.name == 'name' and hasattr(request, "topic_id"):
         # We add choices for this field using the current topic's models
         kwargs["choices"] = []
         # Get the current topic with the ID set into the parent form
         topic  = Topic.objects.get(id=request.topic_id)
         # Get the topic's models
         models = topic.get_models()
         for model in models:
             model_name    = getattr(model._meta, "verbose_name").title()
             subset        = []
             # Retreive every relationship field for this model
             for field in utils.iterate_model_fields(model):
                 if field["type"] != 'AutoField':
                     choice   = [ field["name"], field["verbose_name"].title(), ]
                     # Add ... at the end ot the relationship field
                     if field["type"] == 'Relationship': choice[1] += "..."
                     subset.append(choice)
             # Add the choice subset only if it contains elements
             if len(subset): kwargs["choices"].append( (model_name, subset,) )
     return super(SearchTermInline, self).formfield_for_choice_field(db_field, request,**kwargs)
Beispiel #16
0
    def summary_forms(self, bundle, request):
        available_resources = {}
        # Get the model's rules manager
        rulesManager = self.topic.get_rules()
        # Fetch every registered model
        # to print out its rules
        for model in self.topic.get_models():
            name                = model.__name__.lower()
            rules               = rulesManager.model(model).all()
            verbose_name        = getattr(model._meta, "verbose_name", name)
            verbose_name_plural = getattr(model._meta, "verbose_name_plural", verbose_name + "s")
            for key in rules:
                # Filter rules to keep only Neomatch
                if isinstance(rules[key], Neomatch):
                    fields.append({
                        "name"         : key,
                        "type"         : "ExtendedRelationship",
                        "verbose_name" : rules[key].title,
                        "rules"        : {},
                        "related_model": rules[key].target_model.__name__
                    })

            fields = [ field.copy() for field in utils.iterate_model_fields(model) ]
            fields = [ self.sanitize_field(field) for field in fields ]

            available_resources[name] = {
                'help_text'           : getattr(model, "_description", None),
                'topic'               : getattr(model, "_topic", self.topic.slug) or self.topic.slug,
                'model'               : getattr(model, "__name__", ""),
                'verbose_name'        : verbose_name,
                'verbose_name_plural' : verbose_name_plural,
                'name'                : name,
                'fields'              : fields,
                'rules'               : rules,
                'index'               : getattr(model, "__idx__", 0)
            }


        return available_resources
Beispiel #17
0
 def iterate_fields(model, is_relationship):
     for field in [f for f in utils.iterate_model_fields(model) if (f['type'].lower() == 'relationship') == is_relationship]:
         if "search_terms" in field["rules"]:
             yield [{'name': field['name'], 'label': st, 'subject': model._meta.object_name} for st in field["rules"]["search_terms"]]
Beispiel #18
0
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None):
    """
    Job which parses uploaded content, validates and saves them as model
    """

    start_time = start_time != None and start_time or time.time()
    entities = {}
    relations = []
    errors = []
    id_mapping = {}
    nb_lines = 0
    file_reading_progression = 0
    job = get_current_job()

    # Define Exceptions
    class Error(Exception):
        """
        Generic Custom Exception for this endpoint.
        Include the topic.
        """
        def __init__(self, **kwargs):
            """ set the topic and add all the parameters as attributes """
            self.topic = topic.title
            for key, value in kwargs.items():
                setattr(self, key, value)

        def __str__(self):
            return self.__dict__

    class WarningCastingValueFail(Error):
        pass

    class WarningValidationError(Error):
        pass

    class WarningKeyUnknown(Error):
        pass

    class WarningInformationIsMissing(Error):
        pass

    class AttributeDoesntExist(Error):
        pass

    class WrongCSVSyntax(Error):
        pass

    class ColumnUnknow(Error):
        pass

    class ModelDoesntExist(Error):
        pass

    class RelationDoesntExist(Error):
        pass

    try:
        assert type(files) in (tuple, list), type(files)
        assert len(files) > 0, "You need to upload at least one file."
        assert type(files[0]) in (tuple, list)
        assert len(files[0]) == 2

        # retrieve all models in current topic
        all_models = dict(
            (model.__name__, model) for model in topic.get_models())
        # iterate over all files and dissociate entities .csv from relations .csv
        for file in files:
            if type(file) is tuple:
                file_name = file[0]
                file = file[1]
            else:
                raise Exception()
            csv_reader = utils.open_csv(file)
            header = csv_reader.next()
            assert len(
                header
            ) > 1, "{file_name} header should have at least 2 columns"
            assert header[0].endswith(
                "_id"
            ), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format(
                file_name=file_name, first_col=header[0])
            if len(header) >= 3 and header[0].endswith(
                    "_id") and header[2].endswith("_id"):
                # this is a relationship file
                relations.append((file_name, file))
            else:
                # this is an entities file
                model_name = utils.to_class_name(header[0].replace("_id", ""))
                if model_name in all_models.keys():
                    entities[model_name] = (file_name, file)
                else:
                    raise ModelDoesntExist(model=model_name,
                                           file=file_name,
                                           models_availables=all_models.keys())
            nb_lines += len(file) - 1  # -1 removes headers

        # first iterate over entities
        logger.debug("BulkUpload: creating entities")
        for entity, (file_name, file) in entities.items():
            csv_reader = utils.open_csv(file)
            header = csv_reader.next()
            # must check that all columns map to an existing model field
            fields = utils.get_model_fields(all_models[entity])
            fields_types = {}
            for field in fields:
                fields_types[field['name']] = field['type']
            field_names = [field['name'] for field in fields]
            columns = []
            for column in header[1:]:
                column = utils.to_underscores(column)
                if not column in field_names and not column.endswith(
                        "__sources__"):
                    raise ColumnUnknow(file=file_name,
                                       column=column,
                                       model=entity,
                                       attributes_available=field_names)
                    break
                if column.endswith("__sources__"):
                    column_type = "__sources__"
                    column = column[:-len("__sources__")]
                    if not column in field_names:
                        raise ColumnUnknow(file=file_name,
                                           column=column,
                                           model=entity,
                                           attributes_available=field_names)
                        break
                else:
                    column_type = fields_types.get(column, None)
                columns.append((column, column_type))
            else:
                # here, we know that all columns are valid
                for row in csv_reader:
                    data = {}
                    sources = {}
                    entity_id = row[0]
                    for i, (column, column_type) in enumerate(columns):
                        value = str(row[i + 1]).decode('utf-8')
                        # cast value if needed
                        if value:
                            try:
                                if "Integer" in column_type:
                                    value = int(value)
                                # TODO: cast float
                                if "Date" in column_type:
                                    value = datetime.datetime(*map(
                                        int,
                                        re.split('[^\d]', value)[:3])).replace(
                                            tzinfo=utc)

                            except Exception as e:
                                e = WarningCastingValueFail(
                                    column_name=column,
                                    value=value,
                                    type=column_type,
                                    data=data,
                                    model=entity,
                                    file=file_name,
                                    line=csv_reader.line_num,
                                    error=str(e))
                                errors.append(e)
                                break
                            if column_type == "__sources__":
                                sources[column] = value
                            else:
                                data[column] = value
                    else:
                        # instanciate a model
                        try:
                            item = all_models[entity].objects.create(**data)
                            # map the object with the ID defined in the .csv
                            id_mapping[(entity, entity_id)] = item
                            # create sources
                            for sourced_field, reference in sources.items():
                                for ref in reference.split("||"):
                                    FieldSource.objects.create(
                                        individual=item.id,
                                        field=sourced_field,
                                        reference=ref)
                            # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource)
                            # Concurrent access are not secure here.
                            # For now we refresh the job just before saving it.
                            file_reading_progression += 1
                            if job:
                                job.refresh()
                                job.meta["file_reading_progression"] = (
                                    float(file_reading_progression) /
                                    float(nb_lines)) * 100
                                job.meta["file_reading"] = file_name
                                job.save()
                        except Exception as e:
                            errors.append(
                                WarningValidationError(
                                    data=data,
                                    model=entity,
                                    file=file_name,
                                    line=csv_reader.line_num,
                                    error=str(e)))

        inserted_relations = 0
        # then iterate over relations
        logger.debug("BulkUpload: creating relations")
        for file_name, file in relations:
            # create a csv reader
            csv_reader = utils.open_csv(file)
            csv_header = csv_reader.next()
            relation_name = utils.to_underscores(csv_header[1])
            model_from = utils.to_class_name(csv_header[0].replace("_id", ""))
            model_to = utils.to_class_name(csv_header[2].replace("_id", ""))
            properties_name = csv_header[3:]
            # retrieve ModelProperties from related model
            ModelProperties = topic.get_rules().model(
                all_models[model_from]).field(relation_name).get("through")
            # check that the relation actually exists between the two objects
            try:
                getattr(all_models[model_from], relation_name)
            except Exception as e:
                raise RelationDoesntExist(
                    file=file_name,
                    model_from=model_from,
                    model_to=model_to,
                    relation_name=relation_name,
                    fields_available=[
                        field['name'] for field in utils.iterate_model_fields(
                            all_models[model_from])
                    ],
                    error=str(e))
            for row in csv_reader:
                id_from = row[0]
                id_to = row[2]
                properties = [p.decode('utf-8') for p in row[3:]]
                if id_to and id_from:
                    try:
                        instance_from = id_mapping[(model_from, id_from)]
                        instance_to = id_mapping[(model_to, id_to)]
                        getattr(instance_from, relation_name).add(instance_to)
                        # add properties if needed
                        if ModelProperties and properties_name and properties:
                            # save the relationship to create an id
                            instance_from.save()
                            # retrieve this id
                            relation_id = next(
                                rel.id for rel in
                                instance_from.node.relationships.outgoing()
                                if rel.end.id == instance_to.id)
                            # properties of the relationship
                            relation_args = {
                                "_endnodes": [
                                    id_mapping[(model_from, id_from)].id,
                                    instance_to.id
                                ],
                                "_relationship":
                                relation_id,
                            }
                            # Pairwise the properties with their names
                            relation_args.update(
                                zip(properties_name, properties))
                            try:
                                ModelProperties.objects.create(**relation_args)
                            except TypeError as e:
                                errors.append(
                                    AttributeDoesntExist(
                                        file=file_name,
                                        line=csv_reader.line_num,
                                        model_from=model_from,
                                        id_from=id_from,
                                        model_to=model_to,
                                        id_to=id_to,
                                        relation_args=relation_args,
                                        error=str(e)))
                        # update the job
                        inserted_relations += 1
                        file_reading_progression += 1
                        if job:
                            job.refresh()
                            job.meta["file_reading_progression"] = (
                                float(file_reading_progression) /
                                float(nb_lines)) * 100
                            job.meta["file_reading"] = file_name
                            job.save()
                    except KeyError as e:
                        errors.append(
                            WarningKeyUnknown(file=file_name,
                                              line=csv_reader.line_num,
                                              model_from=model_from,
                                              id_from=id_from,
                                              model_to=model_to,
                                              id_to=id_to,
                                              relation_name=relation_name,
                                              error=str(e)))
                    except Exception as e:
                        # Error unknown, we break the process to alert the user
                        raise Error(file=file_name,
                                    line=csv_reader.line_num,
                                    model_from=model_from,
                                    id_from=id_from,
                                    model_to=model_to,
                                    id_to=id_to,
                                    relation_name=relation_name,
                                    error=str(e))
                else:
                    # A key is missing (id_from or id_to) but we don't want to stop the parsing.
                    # Then we store the wrong line to return it to the user.
                    errors.append(
                        WarningInformationIsMissing(file=file_name,
                                                    row=row,
                                                    line=csv_reader.line_num,
                                                    id_to=id_to,
                                                    id_from=id_from))

        # Save everything
        saved = 0
        logger.debug("BulkUpload: saving %d objects" % (len(id_mapping)))
        if job:
            job.refresh()
            job.meta["objects_to_save"] = len(id_mapping)
            job.save()
        for item in id_mapping.values():
            item.save()
            saved += 1
            if job:
                job.refresh()
                job.meta["saving_progression"] = saved
                job.save()
        if job: job.refresh()
        if job and "track" in job.meta:
            from django.core.mail import send_mail
            user = User.objects.get(pk=job.meta["user"])
            send_mail("upload finished", "your upload just finished",
                      settings.DEFAULT_FROM_EMAIL, (user.email, ))
        return {
            'duration': (time.time() - start_time),
            'inserted': {
                'objects': saved,
                'links': inserted_relations
            },
            "errors":
            sorted([
                dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors
            ])
        }

    except Exception as e:
        import traceback
        logger.error(traceback.format_exc())
        if e.__dict__:
            message = str(e.__dict__)
        else:
            message = e.message
        return {"errors": [{e.__class__.__name__: message}]}
Beispiel #19
0
def process_bulk_parsing_and_save_as_model(topic, files, start_time=None):
    """
    Job which parses uploaded content, validates and saves them as model
    """

    start_time               = start_time != None and start_time or time.time()
    entities                 = {}
    relations                = []
    errors                   = []
    id_mapping               = {}
    nb_lines                 = 0
    file_reading_progression = 0
    job                      = get_current_job()

    # Define Exceptions
    class Error (Exception):
        """
        Generic Custom Exception for this endpoint.
        Include the topic.
        """
        def __init__(self, **kwargs):
            """ set the topic and add all the parameters as attributes """
            self.topic = topic.title
            for key, value in kwargs.items():
                setattr(self, key, value)
        def __str__(self):
            return self.__dict__

    class WarningCastingValueFail     (Error): pass
    class WarningValidationError      (Error): pass
    class WarningKeyUnknown           (Error): pass
    class WarningInformationIsMissing (Error): pass
    class AttributeDoesntExist        (Error): pass
    class WrongCSVSyntax              (Error): pass
    class ColumnUnknow                (Error): pass
    class ModelDoesntExist            (Error): pass
    class RelationDoesntExist         (Error): pass

    try:
        assert type(files) in (tuple, list), type(files)
        assert len(files) > 0, "You need to upload at least one file."
        assert type(files[0]) in (tuple, list)
        assert len(files[0]) == 2

        # retrieve all models in current topic
        all_models = dict((model.__name__, model) for model in topic.get_models())
        # iterate over all files and dissociate entities .csv from relations .csv
        for file in files:
            if type(file) is tuple:
                file_name = file[0]
                file      = file[1]
            else:
                raise Exception()
            csv_reader = utils.open_csv(file)
            header     = csv_reader.next()
            assert len(header) > 1, "{file_name} header should have at least 2 columns"
            assert header[0].endswith("_id"), "{file_name} : First column should begin with a header like <model_name>_id. Actually {first_col}".format(file_name=file_name, first_col=header[0])
            if len(header) >=3 and header[0].endswith("_id") and header[2].endswith("_id"):
                # this is a relationship file
                relations.append((file_name, file))
            else:
                # this is an entities file
                model_name = utils.to_class_name(header[0].replace("_id", ""))
                if model_name in all_models.keys():
                    entities[model_name] = (file_name, file)
                else:
                    raise ModelDoesntExist(model=model_name, file=file_name, models_availables=all_models.keys())
            nb_lines += len(file) - 1 # -1 removes headers

        # first iterate over entities
        logger.debug("BulkUpload: creating entities")
        for entity, (file_name, file) in entities.items():
            csv_reader = utils.open_csv(file)
            header     = csv_reader.next()
            # must check that all columns map to an existing model field
            fields       = utils.get_model_fields(all_models[entity])
            fields_types = {}
            for field in fields:
                fields_types[field['name']] = field['type']
            field_names = [field['name'] for field in fields]
            columns        = []
            for column in header[1:]:
                column = utils.to_underscores(column)
                if not column in field_names and not column.endswith("__sources__"):
                    raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names)
                    break
                if column.endswith("__sources__"):
                    column_type = "__sources__"
                    column = column[:-len("__sources__")]
                    if not column in field_names:
                        raise ColumnUnknow(file=file_name, column=column, model=entity, attributes_available=field_names)
                        break
                else:
                    column_type = fields_types.get(column, None)
                columns.append((column, column_type))
            else:
                # here, we know that all columns are valid
                for row in csv_reader:
                    data      = {}
                    sources   = {}
                    entity_id = row[0]
                    for i, (column, column_type) in enumerate(columns):
                        value = str(row[i+1]).decode('utf-8')
                        # cast value if needed
                        if value:
                            try:
                                if "Integer" in column_type:
                                    value = int(value)
                                # TODO: cast float
                                if "Date" in column_type:
                                    value = datetime.datetime(*map(int, re.split('[^\d]', value)[:3])).replace(tzinfo=utc)

                            except Exception as e:
                                e = WarningCastingValueFail(
                                    column_name = column,
                                    value       = value,
                                    type        = column_type,
                                    data        = data, model=entity,
                                    file        = file_name,
                                    line        = csv_reader.line_num,
                                    error       = str(e)
                                )
                                errors.append(e)
                                break
                            if column_type == "__sources__":
                                sources[column] = value
                            else:
                                data[column] = value
                    else:
                        # instanciate a model
                        try:
                            item = all_models[entity].objects.create(**data)
                            # map the object with the ID defined in the .csv
                            id_mapping[(entity, entity_id)] = item
                            # create sources
                            for sourced_field, reference in sources.items():
                                for ref in reference.split("||"):
                                    FieldSource.objects.create(individual=item.id, field=sourced_field, reference=ref)
                            # FIXME: job can be accessed somewhere else (i.e detective/topics/common/jobs.py:JobResource)
                            # Concurrent access are not secure here.
                            # For now we refresh the job just before saving it.
                            file_reading_progression += 1
                            if job:
                                job.refresh()
                                job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100
                                job.meta["file_reading"] = file_name
                                job.save()
                        except Exception as e:
                            errors.append(
                                WarningValidationError(
                                    data  = data,
                                    model = entity,
                                    file  = file_name,
                                    line  = csv_reader.line_num,
                                    error = str(e)
                                )
                            )

        inserted_relations = 0
        # then iterate over relations
        logger.debug("BulkUpload: creating relations")
        for file_name, file in relations:
            # create a csv reader
            csv_reader      = utils.open_csv(file)
            csv_header      = csv_reader.next()
            relation_name   = utils.to_underscores(csv_header[1])
            model_from      = utils.to_class_name(csv_header[0].replace("_id", ""))
            model_to        = utils.to_class_name(csv_header[2].replace("_id", ""))
            properties_name = csv_header[3:]
            # retrieve ModelProperties from related model
            ModelProperties = topic.get_rules().model(all_models[model_from]).field(relation_name).get("through")
            # check that the relation actually exists between the two objects
            try:
                getattr(all_models[model_from], relation_name)
            except Exception as e:
                raise RelationDoesntExist(
                    file             = file_name,
                    model_from       = model_from,
                    model_to         = model_to,
                    relation_name    = relation_name,
                    fields_available = [field['name'] for field in utils.iterate_model_fields(all_models[model_from])],
                    error            = str(e))
            for row in csv_reader:
                id_from    = row[0]
                id_to      = row[2]
                properties = [p.decode('utf-8') for p in row[3:]]
                if id_to and id_from:
                    try:
                        instance_from = id_mapping[(model_from, id_from)]
                        instance_to   = id_mapping[(model_to, id_to)]
                        getattr(instance_from, relation_name).add(instance_to)
                        # add properties if needed
                        if ModelProperties and properties_name and properties:
                            # save the relationship to create an id
                            instance_from.save()
                            # retrieve this id
                            relation_id = next(rel.id for rel in instance_from.node.relationships.outgoing() if rel.end.id == instance_to.id)
                            # properties of the relationship
                            relation_args = {
                                "_endnodes"     : [id_mapping[(model_from, id_from)].id, instance_to.id],
                                "_relationship" : relation_id,
                            }
                            # Pairwise the properties with their names 
                            relation_args.update(zip(properties_name, properties))
                            try:
                                ModelProperties.objects.create(**relation_args)
                            except TypeError as e:
                                errors.append(
                                    AttributeDoesntExist(
                                        file             = file_name,
                                        line             = csv_reader.line_num,
                                        model_from       = model_from,
                                        id_from          = id_from,
                                        model_to         = model_to,
                                        id_to            = id_to,
                                        relation_args    = relation_args,
                                        error            = str(e)
                                    )
                        )
                        # update the job
                        inserted_relations += 1
                        file_reading_progression += 1
                        if job:
                            job.refresh()
                            job.meta["file_reading_progression"] = (float(file_reading_progression) / float(nb_lines)) * 100
                            job.meta["file_reading"] = file_name
                            job.save()
                    except KeyError as e:
                        errors.append(
                            WarningKeyUnknown(
                                file             = file_name,
                                line             = csv_reader.line_num,
                                model_from       = model_from,
                                id_from          = id_from,
                                model_to         = model_to,
                                id_to            = id_to,
                                relation_name    = relation_name,
                                error            = str(e)
                            )
                        )
                    except Exception as e:
                        # Error unknown, we break the process to alert the user
                        raise Error(
                            file             = file_name,
                            line             = csv_reader.line_num,
                            model_from       = model_from,
                            id_from          = id_from,
                            model_to         = model_to,
                            id_to            = id_to,
                            relation_name    = relation_name,
                            error            = str(e))
                else:
                    # A key is missing (id_from or id_to) but we don't want to stop the parsing.
                    # Then we store the wrong line to return it to the user.
                    errors.append(
                        WarningInformationIsMissing(
                            file=file_name, row=row, line=csv_reader.line_num, id_to=id_to, id_from=id_from
                        )
                    )

        # Save everything
        saved = 0
        logger.debug("BulkUpload: saving %d objects" % (len(id_mapping)))
        if job:
            job.refresh()
            job.meta["objects_to_save"] = len(id_mapping)
            job.save()
        for item in id_mapping.values():
            item.save()
            saved += 1
            if job:
                job.refresh()
                job.meta["saving_progression"] = saved
                job.save()
        if job: job.refresh()
        if job and "track" in job.meta:
            from django.core.mail import send_mail
            user = User.objects.get(pk=job.meta["user"])
            send_mail("upload finished", "your upload just finished", settings.DEFAULT_FROM_EMAIL, (user.email,))
        return {
            'duration' : (time.time() - start_time),
            'inserted' : {
                'objects' : saved,
                'links'   : inserted_relations
            },
            "errors" : sorted([dict([(e.__class__.__name__, str(e.__dict__))]) for e in errors])
        }

    except Exception as e:
        import traceback
        logger.error(traceback.format_exc())
        if e.__dict__:
            message = str(e.__dict__)
        else:
            message = e.message
        return {
            "errors" : [{e.__class__.__name__ : message}]
        }
Beispiel #20
0
 def get_patch(self, request, **kwargs):
     pk = kwargs["pk"]
     # This should be a POST request
     self.method_check(request, allowed=['post'])
     self.throttle_check(request)
     # User must be authentication
     self.is_authenticated(request)
     bundle = self.build_bundle(request=request)
     # User allowed to update this model
     self.authorized_update_detail(self.get_object_list(bundle.request), bundle)
     # Get the node's data using the rest API
     try: node = connection.nodes.get(pk)
     # Node not found
     except client.NotFoundError: raise Http404("Not found.")
     # Load every relationship only when we need to update a relationship
     node_rels = None
     # Parse only body string
     body = json.loads(request.body) if type(request.body) is str else request.body
     # Copy data to allow dictionary resizing
     data = body.copy()
     # Received per-field sources
     if "field_sources" in data:
         # field_sources must not be treated here, see patch_source method
         field_sources = data.pop("field_sources")
     # Validate data.
     # If it fails, it will raise a ValidationError
     data = self.validate(data)
     # Get author list (or a new array if )
     author_list = node.properties.get("_author", [])
     # This is the first time the current user edit this node
     if int(request.user.id) not in author_list:
         # Add the author to the author list
         data["_author"] = author_list + [request.user.id]
     # @TODO check that 'node' is an instance of 'model'
     # Set new values to the node
     for field_name in data:
         field       = self.get_model_field(field_name)
         field_value = data[field_name]
         # The value can be a list of ID for relationship
         if field.get_internal_type() is 'Relationship':
             # Pluck id from the list
             field_ids = [ value for value in field_value if value is not int(pk) ]
             # Prefetch all relationship
             if node_rels is None: node_rels = node.relationships.all()
             # Get relationship name
             rel_type = self.get_model_field(field_name)._type
             # We don't want to add this relation twice so we extract
             # every node connected to the current one through this type
             # of relationship. "existing_rels_id" will contain the ids of
             # every node related to this one.
             existing_rels = [ rel for rel in node_rels if rel.type == rel_type ]
             existing_rels_id = [ graph.opposite(rel, pk) for rel in existing_rels ]
             # Get every ids from "field_ids" that ain't not in
             # the list of existing relationship "existing_rel_id".
             new_rels_id = set(field_ids).difference(existing_rels_id)
             # Get every ids from "existing_rels_id" that ain't no more
             # in the new list of relationships "field_ids".
             old_rels_id = set(existing_rels_id).difference(field_ids)
             # Start a transaction to batch import values
             with connection.transaction(commit=False) as tx:
                 # Convert ids or related node to *node* instances
                 new_rels_node = [ connection.nodes.get(idx) for idx in new_rels_id ]
                 # Convert ids or unrelated node to *relationships* instances
                 old_rels    = []
                 # Convert ids list into relationship instances
                 for idx in old_rels_id:
                     # Find the relationship that match with this id
                     matches = [ rel for rel in existing_rels if graph.connected(rel, idx) ]
                     # Merge the list of relationships
                     old_rels = old_rels + matches
             # Commit change when every field was treated
             tx.commit()
             # Start a transaction to batch insert/delete values
             with connection.transaction(commit=False) as tx:
                 # Then create the new relationships (using nodes instances)
                 # Outcoming relationship
                 if field.direction == 'out':
                     [ connection.relationships.create(node, rel_type, n) for n in new_rels_node ]
                 # Incoming relationship
                 elif field.direction == 'in':
                     [ connection.relationships.create(n, rel_type, node) for n in new_rels_node ]
                 # Then delete the old relationships (using relationships instance)
                 [ rel.delete() for rel in old_rels ]
             # Commit change when every field was treated
             tx.commit()
         # Or a literal value
         # (integer, date, url, email, etc)
         else:
             # Current model
             model = self.get_model()
             # Fields
             fields = { x['name'] : x for x in iterate_model_fields(model) }
             # Remove the values
             if field_value in [None, '']:
                 if field_name == 'image' and fields[field_name]['type'] == 'URLField':
                     self.remove_node_file(node, field_name, True)
                 # The field may not exists (yet)
                 try:
                     node.delete(field_name)
                 # It's OK, it just means we don't have to remove it
                 except client.NotFoundError: pass
             # We simply update the node property
             # (the value is already validated)
             else:
                 if field_name in fields:
                     if 'is_rich' in fields[field_name]['rules'] and fields[field_name]['rules']['is_rich']:
                         data[field_name] = field_value = bleach.clean(field_value,
                                                                       tags=("br", "blockquote", "ul", "ol",
                                                                             "li", "b", "i", "u", "a", "p"),
                                                                       attributes={
                                                                           '*': ("class",),
                                                                           'a': ("href", "target")
                                                                       })
                     if field_name == 'image' and fields[field_name]['type'] == 'URLField':
                         self.remove_node_file(node, field_name, True)
                         try:
                             image_file = download_url(data[field_name])
                             path = default_storage.save(os.path.join(settings.UPLOAD_ROOT, image_file.name) , image_file)
                             data[field_name] = field_value = path.replace(settings.MEDIA_ROOT, "")
                         except UnavailableImage:
                             data[field_name] = field_value = ""
                         except NotAnImage:
                             data[field_name] = field_value = ""
                         except OversizedFile:
                             data[field_name] = field_value = ""
                 node.set(field_name, field_value)
     # update the cache
     topic_cache.incr_version(request.current_topic)
     # And returns cleaned data
     return self.create_response(request, data)