예제 #1
0
 def _do_test(self, input, output, urn_fallback=False):
     if isinstance(output, Exception):
         with pytest.raises(type(output)) as e:
             IRILink().execute(input)
         assert e.value.args == output.args
     else:
         assert {
             k: v
             for k, v in IRILink(
                 urn_fallback=urn_fallback).execute(input).items()
             if k in output
         } == output
예제 #2
0
    def normalize(self, node, graph):
        try:
            ret = IRILink().execute(node.attrs['uri'])
        except ValueError as e:
            logger.warning('Discarding invalid identifier %s with error %s',
                           node.attrs['uri'], e)
            graph.remove(node)
            return

        if ret['authority'] in {'issn', 'orcid.org'
                                } or ret['scheme'] in {'mailto'}:
            logger.warning(
                'Discarding %s %s as an invalid identifier for works',
                ret['authority'], ret['IRI'])
            graph.remove(node)
            return

        if node.attrs['uri'] != ret['IRI']:
            logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI'])

        node.attrs = {
            'uri': ret['IRI'],
            'host': ret['authority'],
            'scheme': ret['scheme'],
        }
예제 #3
0
    def add_source_identifier(self, source_id, jsonld, root_ref):
        uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI']
        if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri for n in jsonld['@graph']):
            return

        identifier_ref = {
            '@id': '_:' + uuid.uuid4().hex,
            '@type': 'workidentifier'
        }
        identifier = {
            'uri': uri,
            'creative_work': root_ref,
            **identifier_ref
        }
        ctx.pool[root_ref].setdefault('identifiers', []).append(identifier_ref)
        jsonld['@graph'].append(identifier)
예제 #4
0
 def _parse(self, *args, **kwargs):
     if self.attrs.pop('parse'):
         parsed = IRILink().execute(self.attrs['uri'])
         self.attrs['scheme'] = parsed['scheme']
         self.attrs['host'] = parsed['authority']
예제 #5
0
 def test_benchmark(self, input, benchmark):
     benchmark(IRILink().execute, input)
예제 #6
0
    def handle(self, id_file, *args, **options):
        id_map = yaml.load(id_file)

        temp_id_query = 'select nextval(%s);'

        fk_query = '''
            SELECT kcu.table_name, kcu.column_name
            FROM information_schema.table_constraints AS tc
            JOIN information_schema.constraint_column_usage AS ccu
                USING (constraint_schema, constraint_name)
            JOIN information_schema.key_column_usage AS kcu
                USING (constraint_schema, constraint_name)
            WHERE tc.constraint_type = 'FOREIGN KEY'
                AND ccu.table_name = %s
                AND ccu.column_name = %s;
        '''

        work_id_query = '''
            SELECT DISTINCT wi.creative_work_id
            FROM share_workidentifier wi
            WHERE wi.uri IN %s;
        '''

        with transaction.atomic():
            with connection.cursor() as c:
                c.execute(temp_id_query, ['share_creativework_id_seq'])
                temp_id = c.fetchone()[0]

                c.execute(fk_query, ['share_creativework', 'id'])
                foreign_keys = c.fetchall()

                tables = ['share_creativework', *[t for t, _ in foreign_keys]]

                self.disable_triggers(c, tables)

                for new_id, identifiers in id_map.items():
                    uris = []
                    for app_label, identifier in identifiers:
                        Context().config = apps.get_app_config(app_label)
                        uris.append(
                            IRILink(
                                urn_fallback=True).execute(identifier)['IRI'])
                    c.execute(work_id_query, [tuple(uris)])

                    old_id = c.fetchone()
                    if old_id:
                        old_id = old_id[0]
                        if old_id == new_id:
                            continue
                        print('Moving {} to {}...'.format(old_id, new_id))
                        if AbstractCreativeWork.objects.filter(
                                id=new_id).exists():
                            self.update_id(c, new_id, temp_id, foreign_keys)
                            self.update_id(c, old_id, new_id, foreign_keys)
                            self.update_id(c, temp_id, old_id, foreign_keys)
                        else:
                            self.update_id(c, old_id, new_id, foreign_keys)
                    else:
                        print('Skipping {}! No work found for identifiers: {}'.
                              format(new_id, uris))

        # Cannot update a table and then alter it in the same transaction
        with transaction.atomic():
            with connection.cursor() as c:
                self.enable_triggers(c, tables)