def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # FIXME: SHOULD be able to handle cases when the label is not supplied, # or if the label does not match any label from the ConceptValue # Either by instantiating a keyword without a concept_id or by # or by looking for say an external identifier attached to the concept and # building upon that. concept_uri = json_ld_node.get('id') label = json_ld_node.get(str(RDFS.label)) concept_id = None import re p = re.compile( r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$" ) m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") # FIXME: assert that the type of this node is a E55_Type? # FIXME when pyld supports uppercase lang in strings, include # language handling here. if label: # Could be: # - Blank node E55_Type with a label - a Keyword # - Concept ID URI, with a label - a conventional Concept # find a matching Concept Value to the label values = get_valueids_from_concept_label(label, concept_id) if values: return values[0]["id"] else: if concept_id: print( "FAILED TO FIND MATCHING LABEL '{0}' FOR CONCEPT '{1}'" ).format(label, concept_id) label = None else: print("No Concept ID URI supplied for rdf") if concept_id and label is None: # got a concept URI but the label is nonexistant # or cannot be resolved in Arches value = get_preflabel_from_conceptid(concept_id, lang=None) return value['id'] if concept_id is None and (label is None or label == ""): # a keyword of some type. If the code execution gets here their either # was no RDFS:label literal value to note or the keyword cannot be found # amongst the current Arches ConceptValues pass
def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # FIXME: SHOULD be able to handle cases when the label is not supplied, # or if the label does not match any label from the ConceptValue # Either by instantiating a keyword without a concept_id or by # or by looking for say an external identifier attached to the concept and # building upon that. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get('@id') label_node = json_ld_node.get(str(RDFS.label)) # Consume the labels, such that we don't recurse into them if label_node: del json_ld_node[str(RDFS.label)] concept_id = lang = None import re p = re.compile( r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$" ) m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ ident for ident in models.Value.objects.all().filter( value__exact=str(concept_uri), valuetype__category="identifiers") ] # print("Could be external URI - hits from RDM: {0}".format(len(hits))) if len(hits) == 1: concept_id = hits[0].concept_id # Still need to find the label or prefLabel for this concept else: print( "ERROR: Multiple hits for {0} external identifier in RDM:". format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format( hit.valueid, hit.conceptid, hit.value)) # print("Trying to get a label from the concept node.") if label_node: label, lang = get_value_from_jsonld(label_node) if label: # Could be: # - Blank node E55_Type with a label - a Keyword # - Concept ID URI, with a label - a conventional Concept # - Concept ID via an external URI, hosted in Arches # find a matching Concept Value to the label values = get_valueids_from_concept_label( label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: # print("FAILED TO FIND MATCHING LABEL '{0}'@{2} FOR CONCEPT '{1}' in ES").format( # label, concept_id, lang) # print("Attempting a match from label via the DB:") hits = [ ident for ident in models.Value.objects.all().filter( value__exact=label) ] if hits and len(hits) == 1: # print "FOUND: %s" % hits[0].pk return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: # got a concept URI but the label is nonexistant # or cannot be resolved in Arches value = get_preflabel_from_conceptid(concept_id, lang=lang) return value['id'] if concept_id is None and (label is None or label == ""): print( "Concept lookup in from_rdf FAILED: No concept id found and no label either" ) # a keyword of some type. If the code execution gets here their either # was no RDFS:label literal value to note or the keyword cannot be found # amongst the current Arches ConceptValues pass
def parse_and_validate_resources(self, request): datatype_factory = DataTypeFactory() column_name_to_node_data_map = json.loads( request.POST.get('column_name_to_node_data_map')) uploaded_file = request.FILES.get('uploaded_file') decoded_file = uploaded_file.read().decode('utf-8').splitlines() parsed_rows = [] for row_dict in csv.DictReader(decoded_file): row_data = {} parsed_row_data = {} errors = {} location_data = { "type": "FeatureCollection", "features": [{ "type": "Feature", "properties": {}, "geometry": { "type": "Point", "coordinates": [0, 0] } }] } for key, value in row_dict.items(): node_data = column_name_to_node_data_map[key] if node_data['node_id']: # edge case for converting columns into complex node values if (node_data.get('flag') == 'format_location'): if 'x' in node_data['args']: location_data['features'][0]['geometry'][ 'coordinates'][1] = float(value) if 'y' in node_data['args']: location_data['features'][0]['geometry'][ 'coordinates'][0] = float(value) row_data[node_data['node_id']] = location_data parsed_row_data[node_data[ 'node_id']] = location_data # should be correct after all iterations else: row_data[node_data['node_id']] = value node_id = node_data['node_id'] node = models.Node.objects.get(pk=node_id) datatype = datatype_factory.get_instance(node.datatype) if isinstance(datatype, (ConceptDataType, ConceptListDataType)): value_data = get_valueids_from_concept_label(value) # `get_valueids_from_concept_label` returns a list including concepts # where the value is a partial match let's filter for the exact value exact_match = None for value_datum in value_data: if value_datum['value'] == value: exact_match = value_datum if exact_match: value = exact_match['id'] # value_id if isinstance(datatype, ConceptListDataType): value = [value] try: validation_errors = datatype.validate(value, node=node) if validation_errors: errors[node_id] = { 'errors': validation_errors, 'node_id': node_id, 'cell_value': value, } except Exception as e: print(str(e)) parsed_row_data[node_id] = value parsed_rows.append({ 'row_id': str(uuid.uuid4()), 'row_data': row_data, 'location_data': location_data, 'parsed_data': parsed_row_data, 'errors': errors, }) return JSONResponse({ 'node_ids_to_column_names_map': {v['node_id']: k for k, v in column_name_to_node_data_map.items()}, 'data': parsed_rows })
def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # But might not always get them both. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get("@id") label_node = json_ld_node.get(str(RDFS.label)) concept_id = lang = None import re # FIXME: This should use settings for host and check for UUID p = re.compile(r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$") m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ident for ident in models.Value.objects.all().filter(value__exact=str(concept_uri), valuetype__category="identifiers")] if len(hits) == 1: concept_id = hits[0].concept_id else: print("ERROR: Multiple hits for {0} external identifier in RDM:".format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format(hit.valueid, hit.conceptid, hit.value)) # Just try the first one and hope concept_id = hits[0].concept_id if label_node: label, lang = get_value_from_jsonld(label_node) if label: values = get_valueids_from_concept_label(label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: hits = [ident for ident in models.Value.objects.all().filter(value__exact=label)] if hits and len(hits) == 1: return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: value = get_preflabel_from_conceptid(concept_id, lang=lang) if value["id"]: return value["id"] else: hits = [ident for ident in models.Value.objects.all()] if hits: return str(hits[0].pk) else: print(f"No labels for concept: {concept_id}!") return None else: # No concept_id means not in RDM at all return None
def __replace_node_uuids(self, data, nodes, uid=''): passed_uid = uid if isinstance(data, (dict)): if '_' in data: passed_uid = data['_'] ret = {} for keyobj in data.keys(): key = str(keyobj) node_name = key value = data[key] values = [] if keyobj in nodes: key = str(keyobj) if 'name' in nodes[key]: node_name = nodes[key]['name'] if 'values' in nodes[key]: values = nodes[key]['values'] key = nodes[key]['nodeid'] if len(values) == 0: ret[key] = self.__replace_node_uuids(value, nodes, passed_uid) else: if isinstance(value, (list)): for i in range(0, len(value)): if isinstance(value[i], (str)): replaced = False for potential_value in values: if potential_value['label'].casefold() == value[i].casefold(): if potential_value['label'] != value[i]: self.__warn(passed_uid, "Invalid concept value '" + str(value[i]) + "'", "Did you mean '" + str(potential_value['label']) + "'?") value[i] = potential_value['valueid'] replaced = True if not(replaced): error_text = 'Invalid concept value "' + str(value) + '" for "' + str(node_name) + '".' error_help = '' if len(values) > 0: values_string = [] for value_string in values: values_string.append("'" + value_string['label'] + "'") error_help = error_help + 'Valid values: ' + (', '.join(values_string)) + '.' # error_help = error_help + '\n' + json.dumps(data) self.__error(passed_uid, error_text, error_help) if isinstance(value[i], (dict)): value[i] = self.__replace_node_uuids(value[i], nodes, passed_uid) else: replaced = False oldvalue = value for potential_value in values: if isinstance(value, (str)): if potential_value['label'].casefold().strip() == value.casefold().strip(): if potential_value['label'] != value: self.__warn(passed_uid, "Invalid concept value '" + str(value) + "'", "Did you mean '" + str(potential_value['label']) + "'?") value = potential_value['valueid'] replaced = True if oldvalue == value: if isinstance(value, (str)): potential_values = get_valueids_from_concept_label(value) if len(potential_values) == 1: value = potential_values[0]['id'] replaced = True if not(replaced): error_text = 'Invalid concept value "' + str(value) + '" for "' + str(node_name) + '".' error_help = '' if len(values) > 0: values_string = [] for value_string in values: values_string.append("'" + value_string['label'] + "'") error_help = error_help + 'Valid values: ' + (', '.join(values_string)) + '.' #error_help = error_help + '\n' + json.dumps(data) self.__error(passed_uid, error_text, error_help) ret[key] = value return ret if isinstance(data, (list)): ret = [] for item in data: ret.append(self.__replace_node_uuids(item, nodes, passed_uid)) return ret return data