Esempio n. 1
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node

        # FIXME: SHOULD be able to handle cases when the label is not supplied,
        # or if the label does not match any label from the ConceptValue
        # Either by instantiating a keyword without a concept_id or by
        # or by looking for say an external identifier attached to the concept and
        # building upon that.
        concept_uri = json_ld_node.get('id')
        label = json_ld_node.get(str(RDFS.label))

        concept_id = None
        import re
        p = re.compile(
            r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$"
        )
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")

        # FIXME: assert that the type of this node is a E55_Type?

        # FIXME when pyld supports uppercase lang in strings, include
        # language handling here.

        if label:
            # Could be:
            #  - Blank node E55_Type with a label - a Keyword
            #  - Concept ID URI, with a label - a conventional Concept
            # find a matching Concept Value to the label
            values = get_valueids_from_concept_label(label, concept_id)

            if values:
                return values[0]["id"]
            else:
                if concept_id:
                    print(
                        "FAILED TO FIND MATCHING LABEL '{0}' FOR CONCEPT '{1}'"
                    ).format(label, concept_id)
                    label = None
                else:
                    print("No Concept ID URI supplied for rdf")

        if concept_id and label is None:
            # got a concept URI but the label is nonexistant
            # or cannot be resolved in Arches
            value = get_preflabel_from_conceptid(concept_id, lang=None)
            return value['id']

        if concept_id is None and (label is None or label == ""):
            # a keyword of some type. If the code execution gets here their either
            # was no RDFS:label literal value to note or the keyword cannot be found
            # amongst the current Arches ConceptValues
            pass
Esempio n. 2
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node

        # FIXME: SHOULD be able to handle cases when the label is not supplied,
        # or if the label does not match any label from the ConceptValue
        # Either by instantiating a keyword without a concept_id or by
        # or by looking for say an external identifier attached to the concept and
        # building upon that.

        try:
            # assume a list, and as this is a ConceptDataType, assume a single entry
            json_ld_node = json_ld_node[0]
        except KeyError as e:
            pass

        concept_uri = json_ld_node.get('@id')
        label_node = json_ld_node.get(str(RDFS.label))

        # Consume the labels, such that we don't recurse into them
        if label_node:
            del json_ld_node[str(RDFS.label)]

        concept_id = lang = None
        import re
        p = re.compile(
            r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$"
        )
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")
        else:
            # could be an external id, rather than an Arches only URI
            hits = [
                ident for ident in models.Value.objects.all().filter(
                    value__exact=str(concept_uri),
                    valuetype__category="identifiers")
            ]
            # print("Could be external URI - hits from RDM: {0}".format(len(hits)))
            if len(hits) == 1:
                concept_id = hits[0].concept_id
                # Still need to find the label or prefLabel for this concept
            else:
                print(
                    "ERROR: Multiple hits for {0} external identifier in RDM:".
                    format(concept_uri))
                for hit in hits:
                    print("ConceptValue {0}, Concept {1} - '{2}'".format(
                        hit.valueid, hit.conceptid, hit.value))

        # print("Trying to get a label from the concept node.")
        if label_node:
            label, lang = get_value_from_jsonld(label_node)
            if label:
                # Could be:
                #  - Blank node E55_Type with a label - a Keyword
                #  - Concept ID URI, with a label - a conventional Concept
                #  - Concept ID via an external URI, hosted in Arches
                # find a matching Concept Value to the label
                values = get_valueids_from_concept_label(
                    label, concept_id, lang)

                if values:
                    return values[0]["id"]
                else:
                    if concept_id:
                        # print("FAILED TO FIND MATCHING LABEL '{0}'@{2} FOR CONCEPT '{1}' in ES").format(
                        #     label, concept_id, lang)
                        # print("Attempting a match from label via the DB:")
                        hits = [
                            ident
                            for ident in models.Value.objects.all().filter(
                                value__exact=label)
                        ]
                        if hits and len(hits) == 1:
                            # print "FOUND: %s" % hits[0].pk
                            return str(hits[0].pk)
                        label = None
                    else:
                        print("No Concept ID URI supplied for rdf")
        else:
            label = None

        if concept_id and label is None:
            # got a concept URI but the label is nonexistant
            # or cannot be resolved in Arches
            value = get_preflabel_from_conceptid(concept_id, lang=lang)
            return value['id']

        if concept_id is None and (label is None or label == ""):
            print(
                "Concept lookup in from_rdf FAILED: No concept id found and no label either"
            )
            # a keyword of some type. If the code execution gets here their either
            # was no RDFS:label literal value to note or the keyword cannot be found
            # amongst the current Arches ConceptValues
            pass
Esempio n. 3
0
    def parse_and_validate_resources(self, request):
        datatype_factory = DataTypeFactory()

        column_name_to_node_data_map = json.loads(
            request.POST.get('column_name_to_node_data_map'))

        uploaded_file = request.FILES.get('uploaded_file')
        decoded_file = uploaded_file.read().decode('utf-8').splitlines()

        parsed_rows = []

        for row_dict in csv.DictReader(decoded_file):
            row_data = {}
            parsed_row_data = {}
            errors = {}
            location_data = {
                "type":
                "FeatureCollection",
                "features": [{
                    "type": "Feature",
                    "properties": {},
                    "geometry": {
                        "type": "Point",
                        "coordinates": [0, 0]
                    }
                }]
            }

            for key, value in row_dict.items():
                node_data = column_name_to_node_data_map[key]

                if node_data['node_id']:
                    # edge case for converting columns into complex node values
                    if (node_data.get('flag') == 'format_location'):
                        if 'x' in node_data['args']:
                            location_data['features'][0]['geometry'][
                                'coordinates'][1] = float(value)
                        if 'y' in node_data['args']:
                            location_data['features'][0]['geometry'][
                                'coordinates'][0] = float(value)

                        row_data[node_data['node_id']] = location_data
                        parsed_row_data[node_data[
                            'node_id']] = location_data  # should be correct after all iterations
                    else:
                        row_data[node_data['node_id']] = value
                        node_id = node_data['node_id']

                        node = models.Node.objects.get(pk=node_id)
                        datatype = datatype_factory.get_instance(node.datatype)

                        if isinstance(datatype,
                                      (ConceptDataType, ConceptListDataType)):
                            value_data = get_valueids_from_concept_label(value)

                            # `get_valueids_from_concept_label` returns a list including concepts
                            # where the value is a partial match let's filter for the exact value
                            exact_match = None

                            for value_datum in value_data:
                                if value_datum['value'] == value:
                                    exact_match = value_datum

                            if exact_match:
                                value = exact_match['id']  # value_id

                            if isinstance(datatype, ConceptListDataType):
                                value = [value]

                        try:
                            validation_errors = datatype.validate(value,
                                                                  node=node)

                            if validation_errors:
                                errors[node_id] = {
                                    'errors': validation_errors,
                                    'node_id': node_id,
                                    'cell_value': value,
                                }
                        except Exception as e:
                            print(str(e))

                        parsed_row_data[node_id] = value

            parsed_rows.append({
                'row_id': str(uuid.uuid4()),
                'row_data': row_data,
                'location_data': location_data,
                'parsed_data': parsed_row_data,
                'errors': errors,
            })

        return JSONResponse({
            'node_ids_to_column_names_map':
            {v['node_id']: k
             for k, v in column_name_to_node_data_map.items()},
            'data': parsed_rows
        })
Esempio n. 4
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node
        # But might not always get them both.

        try:
            # assume a list, and as this is a ConceptDataType, assume a single entry
            json_ld_node = json_ld_node[0]
        except KeyError as e:
            pass

        concept_uri = json_ld_node.get("@id")
        label_node = json_ld_node.get(str(RDFS.label))
        concept_id = lang = None
        import re

        # FIXME: This should use settings for host and check for UUID
        p = re.compile(r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$")
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")
        else:
            # could be an external id, rather than an Arches only URI
            hits = [ident for ident in models.Value.objects.all().filter(value__exact=str(concept_uri), valuetype__category="identifiers")]
            if len(hits) == 1:
                concept_id = hits[0].concept_id
            else:
                print("ERROR: Multiple hits for {0} external identifier in RDM:".format(concept_uri))
                for hit in hits:
                    print("ConceptValue {0}, Concept {1} - '{2}'".format(hit.valueid, hit.conceptid, hit.value))
                # Just try the first one and hope
                concept_id = hits[0].concept_id

        if label_node:
            label, lang = get_value_from_jsonld(label_node)
            if label:
                values = get_valueids_from_concept_label(label, concept_id, lang)
                if values:
                    return values[0]["id"]
                else:
                    if concept_id:
                        hits = [ident for ident in models.Value.objects.all().filter(value__exact=label)]
                        if hits and len(hits) == 1:
                            return str(hits[0].pk)
                        label = None
                    else:
                        print("No Concept ID URI supplied for rdf")
        else:
            label = None

        if concept_id and label is None:
            value = get_preflabel_from_conceptid(concept_id, lang=lang)
            if value["id"]:
                return value["id"]
            else:
                hits = [ident for ident in models.Value.objects.all()]
                if hits:
                    return str(hits[0].pk)
                else:
                    print(f"No labels for concept: {concept_id}!")
                    return None
        else:
            # No concept_id means not in RDM at all
            return None
Esempio n. 5
0
	def __replace_node_uuids(self, data, nodes, uid=''):

		passed_uid = uid

		if isinstance(data, (dict)):

			if '_' in data:
				passed_uid = data['_']

			ret = {}
			for keyobj in data.keys():
				key = str(keyobj)
				node_name = key
				value = data[key]
				values = []
				if keyobj in nodes:
					key = str(keyobj)
					if 'name' in nodes[key]:
						node_name = nodes[key]['name']
					if 'values' in nodes[key]:
						values = nodes[key]['values']
					key = nodes[key]['nodeid']
				if len(values) == 0:
					ret[key] = self.__replace_node_uuids(value, nodes, passed_uid)
				else:
					if isinstance(value, (list)):
						for i in range(0, len(value)):
							if isinstance(value[i], (str)):
								replaced = False
								for potential_value in values:
									if potential_value['label'].casefold() == value[i].casefold():
										if potential_value['label'] != value[i]:
											self.__warn(passed_uid, "Invalid concept value '" + str(value[i]) + "'", "Did you mean '" + str(potential_value['label']) + "'?")
										value[i] = potential_value['valueid']
										replaced = True
								if not(replaced):
									error_text = 'Invalid concept value "' + str(value) + '" for "' + str(node_name) + '".'
									error_help = ''
									if len(values) > 0:
										values_string = []
										for value_string in values:
											values_string.append("'" + value_string['label'] + "'")
										error_help = error_help + 'Valid values: ' + (', '.join(values_string)) + '.'
										# error_help = error_help + '\n' + json.dumps(data)
									self.__error(passed_uid, error_text, error_help)
							if isinstance(value[i], (dict)):
								value[i] = self.__replace_node_uuids(value[i], nodes, passed_uid)
					else:
						replaced = False
						oldvalue = value
						for potential_value in values:
							if isinstance(value, (str)):
								if potential_value['label'].casefold().strip() == value.casefold().strip():
									if potential_value['label'] != value:
										self.__warn(passed_uid, "Invalid concept value '" + str(value) + "'", "Did you mean '" + str(potential_value['label']) + "'?")
									value = potential_value['valueid']
									replaced = True
						if oldvalue == value:
							if isinstance(value, (str)):
								potential_values = get_valueids_from_concept_label(value)
								if len(potential_values) == 1:
									value = potential_values[0]['id']
									replaced = True
						if not(replaced):
							error_text = 'Invalid concept value "' + str(value) + '" for "' + str(node_name) + '".'
							error_help = ''
							if len(values) > 0:
								values_string = []
								for value_string in values:
									values_string.append("'" + value_string['label'] + "'")
								error_help = error_help + 'Valid values: ' + (', '.join(values_string)) + '.'
								#error_help = error_help + '\n' + json.dumps(data)
							self.__error(passed_uid, error_text, error_help)
					ret[key] = value
			return ret

		if isinstance(data, (list)):

			ret = []
			for item in data:
				ret.append(self.__replace_node_uuids(item, nodes, passed_uid))
			return ret

		return data