Ejemplo n.º 1
0
def import_concepts(reference_data):
    concepts = reference_data[0]['concepts']
    values = reference_data[1]['values']
    relations = reference_data[2]['relations']

    concept_objs = {}
    for concept in concepts:
        concept_obj = Concept()
        concept_obj.id = concept['conceptid']
        concept_obj.nodetype = concept['nodetype']
        concept_obj.legacyoid = concept['legacyoid']
        concept_obj.save()

        concept_objs[concept_obj.id] = concept_obj

    existing_valuetypes = [o.valuetype for o in models.DValueType.objects.all()]
    for value in values:
        if value['valuetype'] not in existing_valuetypes:
            models.DValueType.objects.create(valuetype = value['valuetype'], category = 'undefined', namespace = 'arches')
            existing_valuetypes.append(value['valuetype'])

        conceptvalue_obj = ConceptValue()
        conceptvalue_obj.id = value['valueid']
        conceptvalue_obj.conceptid = value['conceptid']
        conceptvalue_obj.type = value['valuetype']
        conceptvalue_obj.value = value['value']
        conceptvalue_obj.language = value['languageid']
        conceptvalue_obj.save()

    for relation in relations:
        if relation['conceptidfrom'] in concept_objs and relation['conceptidto'] in concept_objs:
            conceptfrom = concept_objs[relation['conceptidfrom']]
            conceptto = concept_objs[relation['conceptidto']]
            conceptfrom.add_relation(conceptto, relation['relationtype'])
Ejemplo n.º 2
0
    def test_create_concept(self):
        """
        Test of basic CRUD on a Concept model

        """

        concept_in = Concept()
        concept_in.nodetype = 'Concept'
        concept_in.values = [ConceptValue({
            #id: '',
            #conceptid: '',
            'type': 'prefLabel',
            'category': 'label',
            'value': 'test pref label',
            'language': 'en-US'
        })]
        concept_in.save()

        concept_out = Concept().get(id=concept_in.id)

        self.assertEqual(concept_out.id, concept_in.id)
        self.assertEqual(concept_out.values[0].value, 'test pref label')

        label = concept_in.values[0] 
        label.value = 'updated pref label'
        concept_in.values[0] = label
        concept_in.save()
        concept_out = Concept().get(id=concept_in.id)

        self.assertEqual(concept_out.values[0].value, 'updated pref label')

        concept_out.delete(delete_self=True)
        with self.assertRaises(models.Concept.DoesNotExist):
            deleted_concept = Concept().get(id=concept_out.id)
Ejemplo n.º 3
0
    def get_concepts(self, uris):
        """
        Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087

        """

        default_lang = settings.LANGUAGE_CODE
        dcterms_identifier_type = DValueType.objects.get(valuetype=str(
            DCTERMS.identifier).replace(str(DCTERMS), ''),
                                                         namespace='dcterms')

        concepts = []
        langs = []
        for lang in self.allowed_languages:
            # the AAT expects language codes to be all lower case
            langs.append('\"%s\"' % (lang.lower()))
        for uri in uris.split(','):
            query = """
                SELECT ?value ?type WHERE {
                  {
                    <%s> skos:prefLabel ?value .
                    BIND('prefLabel' AS ?type)
                  }
                  UNION
                  {
                    <%s> skos:scopeNote [rdf:value ?value] .
                    BIND('scopeNote' AS ?type)
                  }
                  FILTER (lang(?value) in (%s)) 
                }""" % (uri, uri, ','.join(langs))
            results = self.perform_sparql_query(query)

            if len(results["results"]["bindings"]) > 0:
                concept = Concept()
                concept.nodetype = 'Concept'
                for result in results["results"]["bindings"]:
                    concept.addvalue({
                        'type': result["type"]["value"],
                        'value': result["value"]["value"],
                        'language': result["value"]["xml:lang"]
                    })
                concept.addvalue({
                    'value': uri,
                    'language': settings.LANGUAGE_CODE,
                    'type': dcterms_identifier_type.valuetype,
                    'category': dcterms_identifier_type.category
                })
                concepts.append(concept)
            else:
                raise Exception(
                    _("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors.  You may need to add the appropriate languages into the database for this query to work<br><br>"
                      ) % (query.replace('<', '&lt').replace('>', '&gt')))

        return concepts
Ejemplo n.º 4
0
    def get_concepts(self, uris):  
        """
        Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087

        """  

        concepts = []    
        langs = []   
        for lang in self.allowed_languages:
            langs.append('\"%s\"' % (lang))
        for uri in uris.split(','):
            query = """
                SELECT ?value ?type WHERE {
                  {
                    <%s> skos:prefLabel ?value .
                    BIND('prefLabel' AS ?type)
                  }
                  UNION
                  {
                    <%s> skos:scopeNote [rdf:value ?value] .
                    BIND('scopeNote' AS ?type)
                  }
                  FILTER (lang(?value) in (%s)) 
                }""" % (uri, uri, ','.join(langs))
            results = self.perform_sparql_query(query)

            if len(results["results"]["bindings"]) > 0 :
                concept = Concept()
                concept.nodetype = 'Concept'
                for result in results["results"]["bindings"]:
                    concept.addvalue({
                        'type': result["type"]["value"],
                        'value': result["value"]["value"],
                        'language': result["value"]["xml:lang"]
                    }) 
                concepts.append(concept)
            else:
                raise Exception(_("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors.  You may need to add the appropriate languages into the database for this query to work<br><br>") % (query.replace('<', '&lt').replace('>', '&gt')))

        return concepts
Ejemplo n.º 5
0
def import_concepts(reference_data):
    concepts = reference_data[0]['concepts']
    values = reference_data[1]['values']
    relations = reference_data[2]['relations']

    concept_objs = {}
    for concept in concepts:
        concept_obj = Concept()
        concept_obj.id = concept['conceptid']
        concept_obj.nodetype = concept['nodetype']
        concept_obj.legacyoid = concept['legacyoid']
        concept_obj.save()

        concept_objs[concept_obj.id] = concept_obj

    existing_valuetypes = [
        o.valuetype for o in models.DValueType.objects.all()
    ]
    for value in values:
        if value['valuetype'] not in existing_valuetypes:
            models.DValueType.objects.create(valuetype=value['valuetype'],
                                             category='undefined',
                                             namespace='arches')
            existing_valuetypes.append(value['valuetype'])

        conceptvalue_obj = ConceptValue()
        conceptvalue_obj.id = value['valueid']
        conceptvalue_obj.conceptid = value['conceptid']
        conceptvalue_obj.type = value['valuetype']
        conceptvalue_obj.value = value['value']
        conceptvalue_obj.language = value['languageid']
        conceptvalue_obj.save()

    for relation in relations:
        if relation['conceptidfrom'] in concept_objs and relation[
                'conceptidto'] in concept_objs:
            conceptfrom = concept_objs[relation['conceptidfrom']]
            conceptto = concept_objs[relation['conceptidto']]
            conceptfrom.add_relation(conceptto, relation['relationtype'])
Ejemplo n.º 6
0
    def test_create_concept(self):
        """
        Test of basic CRUD on a Concept model

        """

        concept_in = Concept()
        concept_in.nodetype = "Concept"
        concept_in.values = [
            ConceptValue(
                {
                    # id: '',
                    # conceptid: '',
                    "type": "prefLabel",
                    "category": "label",
                    "value": "test pref label",
                    "language": "en-US",
                }
            )
        ]
        concept_in.save()

        concept_out = Concept().get(id=concept_in.id)

        self.assertEqual(concept_out.id, concept_in.id)
        self.assertEqual(concept_out.values[0].value, "test pref label")

        label = concept_in.values[0]
        label.value = "updated pref label"
        concept_in.values[0] = label
        concept_in.save()
        concept_out = Concept().get(id=concept_in.id)

        self.assertEqual(concept_out.values[0].value, "updated pref label")

        concept_out.delete(delete_self=True)
        with self.assertRaises(models.Concept.DoesNotExist):
            deleted_concept = Concept().get(id=concept_out.id)
Ejemplo n.º 7
0
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping):
    print filename.upper()    

    start = time()
    value_types = models.ValueTypes.objects.all()
    filepath = os.path.join(path_to_authority_files, filename)
    unicodecsv.field_size_limit(sys.maxint)
    errors = []
    lookups = Lookups()

    #create nodes for each authority document file and relate them to the authority document node in the concept schema
    auth_doc_file_name = str(filename)
    display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip())
    if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV':
        top_concept = Concept()
        top_concept.id = str(uuid.uuid4())
        top_concept.nodetype = 'Concept'       
        top_concept.legacyoid = auth_doc_file_name
        top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
        lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id)

    else:
        top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005')
        top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv'

    lookups.add_lookup(concept=top_concept, rownum=0)
    
    try:
        with open(filepath, 'rU') as f:
            rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], 
                encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
            rows.next() # skip header row
            for row in rows:              
                try:
                    if 'MISSING' in row:
                        raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING']))
                    else:
                        legacyoid = row[u'CONCEPTID']
                        concept = Concept()
                        concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4())
                        concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection'
                        concept.legacyoid = row[u'CONCEPTID']
                        concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
                        if row['CONCEPTTYPE'].lower() == 'collector':
                            concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'})
                        if row[u'ALTLABELS'] != '':
                            altlabel_list = row[u'ALTLABELS'].split(';')
                            for altlabel in altlabel_list:
                                concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'})    
                        
                        parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id
                        lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num)
                        # don't add a member relationship between a top concept and it's children
                        if parent_concept_id != top_concept.id: 
                            lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num)
                        
                        # add the member relationship from the E55 type (typically) to their top members
                        if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name:
                            for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]:
                                lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num)

                        if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'):
                            raise Exception('The row has invalid values.')

                        lookups.add_lookup(concept=concept, rownum=rows.line_num)    
                        
                except Exception as e:
                    errors.append('ERROR in row %s: %s' % (rows.line_num, str(e)))           
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    try:
        # try and open the values file if it exists
        if exists(filepath.replace('.csv', '.values.csv')):
            with open(filepath.replace('.csv', '.values.csv'), 'rU') as f:
                rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], 
                    encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
                rows.next() # skip header row
                for row in rows:
                    try:
                        if 'ADDITIONAL' in row:
                            raise Exception('The row wasn\'t parsed properly. Additional fields found %s.  Add quotes to values that have commas in them.' % (row['ADDITIONAL']))
                        else:
                            row_valuetype = row[u'VALUETYPE'].strip()
                            if row_valuetype not in value_types.values_list('valuetype', flat=True): 
                                valuetype = models.ValueTypes()
                                valuetype.valuetype = row_valuetype
                                valuetype.category = 'undefined'
                                valuetype.namespace = 'arches'
                                valuetype.save()
                            
                            value_types = models.ValueTypes.objects.all()
                            concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID'])
                            category = value_types.get(valuetype=row_valuetype).category
                            concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category})

                    except Exception as e:
                        errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row))
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))            
        
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv')))
        errors.append('\n\n\n\n')


    # insert and index the concpets
    for key in lookups.lookup:
        try:
            lookups.lookup[key]['concept'].save()
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc()))
        
        lookups.lookup[key]['concept'].index(scheme=top_concept)            

    # insert the concept relations
    for relation in lookups.concept_relationships:
        sql = """
            INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype)
            VALUES ('%s', '%s', '%s');
        """%(relation['source'], relation['target'], relation['type'])
        #print sql
        try:
            cursor.execute(sql)
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start))    

    return errors
Ejemplo n.º 8
0
    def test_prefLabel(self):
        """
        Test to confirm the proper retrieval of the prefLabel based on different language requirements

        """

        concept = Concept()
        concept.nodetype = 'Concept'
        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en-US',
                'language': 'en-US'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en',
                'language': 'en'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es-SP',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(
            concept.get_preflabel(lang='en-US').value, 'test pref label en-US')
        self.assertEqual(
            concept.get_preflabel(lang='en').value, 'test pref label en')
        self.assertEqual(concept.get_preflabel().value,
                         'test pref label %s' % (test_settings.LANGUAGE_CODE))

        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en',
                'language': 'en'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        # should pick the base language if it can't find the more specific version
        self.assertEqual(
            concept.get_preflabel(lang='en-US').value, 'test pref label en')

        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(
            concept.get_preflabel(lang='en-US').value, 'test alt label en-US')

        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en',
                'language': 'en'
            })
        ]

        self.assertEqual(
            concept.get_preflabel(lang='en-US').value, 'test alt label en')

        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en-US',
                'language': 'en-US'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(
            concept.get_preflabel(lang='en').value, 'test pref label en-US')
Ejemplo n.º 9
0
    def test_prefLabel(self):
        """
        Test to confirm the proper retrieval of the prefLabel based on different language requirements

        """

        concept = Concept()
        concept.nodetype = 'Concept'
        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en-US',
                'language': 'en-US'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en',
                'language': 'en'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es-SP',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test pref label en-US')
        self.assertEqual(concept.get_preflabel(lang='en').value, 'test pref label en')
        self.assertEqual(concept.get_preflabel().value, 'test pref label %s' % (test_settings.LANGUAGE_CODE))

        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en',
                'language': 'en'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        # should pick the base language if it can't find the more specific version
        self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test pref label en')
        
        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test alt label en-US')
                
        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en',
                'language': 'en'
            })
        ]

        self.assertEqual(concept.get_preflabel(lang='en-US').value, 'test alt label en')
        
        concept.values = [
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label en-US',
                'language': 'en-US'
            }),
            ConceptValue({
                'type': 'prefLabel',
                'category': 'label',
                'value': 'test pref label es',
                'language': 'es-SP'
            }),
            ConceptValue({
                'type': 'altLabel',
                'category': 'label',
                'value': 'test alt label en-US',
                'language': 'en-US'
            })
        ]

        self.assertEqual(concept.get_preflabel(lang='en').value, 'test pref label en-US')