def _do_test(self, input, output, urn_fallback=False): if isinstance(output, Exception): with pytest.raises(type(output)) as e: IRILink().execute(input) assert e.value.args == output.args else: assert { k: v for k, v in IRILink( urn_fallback=urn_fallback).execute(input).items() if k in output } == output
def normalize(self, node, graph): try: ret = IRILink().execute(node.attrs['uri']) except ValueError as e: logger.warning('Discarding invalid identifier %s with error %s', node.attrs['uri'], e) graph.remove(node) return if ret['authority'] in {'issn', 'orcid.org' } or ret['scheme'] in {'mailto'}: logger.warning( 'Discarding %s %s as an invalid identifier for works', ret['authority'], ret['IRI']) graph.remove(node) return if node.attrs['uri'] != ret['IRI']: logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI']) node.attrs = { 'uri': ret['IRI'], 'host': ret['authority'], 'scheme': ret['scheme'], }
def add_source_identifier(self, source_id, jsonld, root_ref): uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI'] if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri for n in jsonld['@graph']): return identifier_ref = { '@id': '_:' + uuid.uuid4().hex, '@type': 'workidentifier' } identifier = { 'uri': uri, 'creative_work': root_ref, **identifier_ref } ctx.pool[root_ref].setdefault('identifiers', []).append(identifier_ref) jsonld['@graph'].append(identifier)
def _parse(self, *args, **kwargs): if self.attrs.pop('parse'): parsed = IRILink().execute(self.attrs['uri']) self.attrs['scheme'] = parsed['scheme'] self.attrs['host'] = parsed['authority']
def test_benchmark(self, input, benchmark): benchmark(IRILink().execute, input)
def handle(self, id_file, *args, **options): id_map = yaml.load(id_file) temp_id_query = 'select nextval(%s);' fk_query = ''' SELECT kcu.table_name, kcu.column_name FROM information_schema.table_constraints AS tc JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name) JOIN information_schema.key_column_usage AS kcu USING (constraint_schema, constraint_name) WHERE tc.constraint_type = 'FOREIGN KEY' AND ccu.table_name = %s AND ccu.column_name = %s; ''' work_id_query = ''' SELECT DISTINCT wi.creative_work_id FROM share_workidentifier wi WHERE wi.uri IN %s; ''' with transaction.atomic(): with connection.cursor() as c: c.execute(temp_id_query, ['share_creativework_id_seq']) temp_id = c.fetchone()[0] c.execute(fk_query, ['share_creativework', 'id']) foreign_keys = c.fetchall() tables = ['share_creativework', *[t for t, _ in foreign_keys]] self.disable_triggers(c, tables) for new_id, identifiers in id_map.items(): uris = [] for app_label, identifier in identifiers: Context().config = apps.get_app_config(app_label) uris.append( IRILink( urn_fallback=True).execute(identifier)['IRI']) c.execute(work_id_query, [tuple(uris)]) old_id = c.fetchone() if old_id: old_id = old_id[0] if old_id == new_id: continue print('Moving {} to {}...'.format(old_id, new_id)) if AbstractCreativeWork.objects.filter( id=new_id).exists(): self.update_id(c, new_id, temp_id, foreign_keys) self.update_id(c, old_id, new_id, foreign_keys) self.update_id(c, temp_id, old_id, foreign_keys) else: self.update_id(c, old_id, new_id, foreign_keys) else: print('Skipping {}! No work found for identifiers: {}'. format(new_id, uris)) # Cannot update a table and then alter it in the same transaction with transaction.atomic(): with connection.cursor() as c: self.enable_triggers(c, tables)