예제 #1
0
def test_normalize_pid():
    """Test persistent id normalization."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.normalize_pid(i, expected_schemes[0]) == \
            normalized_value or i

    assert idutils.normalize_pid(None, 'handle') is None
예제 #2
0
def test_normalize_pid():
    """Test persistent id normalization."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.normalize_pid(i, expected_schemes[0]) == \
            normalized_value or i

    assert idutils.normalize_pid(None, 'handle') is None
예제 #3
0
def test_to_url():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0]
        ) == url_value
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0],
            url_scheme='https',
        ) == (url_value.replace('http://', 'https://')
              # If the value is already a URL its scheme is preserved
              if expected_schemes[0] not in ['purl', 'url'] else url_value)
 def lookup_re3data(self):
     if self.client_id and self.pid_scheme:
         re3doi = RepositoryHelper.DATACITE_REPOSITORIES.get(
             self.client_id)  # {client_id,re3doi}
         short_re3doi = idutils.normalize_pid(
             re3doi, scheme='doi')  #https://doi.org/10.17616/R3XS37
         # pid -> clientId -> repo doi-> re3id, and query repository metadata from re3api
         if re3doi:
             self.logger.info('Found match re3data (DOI-based) record')
             query_url = Preprocessor.RE3DATA_API + '?query=' + short_re3doi  # https://re3data.org/api/beta/repositories?query=
             q = RequestHelper(url=query_url)
             q.setAcceptType(AcceptTypes.xml)
             re_source, xml = q.content_negotiate(metric_id='RE3DATA')
             root = etree.fromstring(xml.content)
             #<link href="https://www.re3data.org/api/beta/repository/r3d100010134" rel="self" />
             re3link = root.xpath('//link')[0].attrib['href']
             if re3link is not None:
                 self.logger.info('Found match re3data metadata record')
                 # query reposiroty metadata
                 q2 = RequestHelper(url=re3link)
                 q2.setAcceptType(AcceptTypes.xml)
                 re3_source, re3_response = q2.content_negotiate(
                     metric_id='RE3DATA')
                 self.re3metadata_raw = re3_response.content
                 self.parseRepositoryMetadata()
         else:
             self.logger.warning(
                 'No DOI of client id is available from datacite api')
예제 #5
0
 def normalize_value(self, data, **kwargs):
     """Normalize identifier value."""
     try:
         data['ID'] = idutils.normalize_pid(data['ID'], data['IDScheme'])
         return data
     except Exception:
         current_app.logger.warning(
             'Failed to normalize PID value.', extra={'data': data})
예제 #6
0
    def normalize_identifier(self, data, **kwargs):
        """Normalizes the identifier based on the scheme."""
        identifier = data.get("identifier")

        # It can be empty if not required
        if identifier:
            # at this point, `scheme` is set or validation failed earlier
            scheme = data["scheme"]
            data["identifier"] = idutils.normalize_pid(identifier, scheme)

        return data
예제 #7
0
    def normalize_identifier(self, data, **kwargs):
        """Normalizes the identifier based on the scheme."""
        identifier = data.get("identifier")

        # It can be empty if not required
        if identifier:
            # At this point scheme should exist or had failed
            scheme = data["scheme"]
            data["identifier"] = idutils.normalize_pid(identifier, scheme)

        return data
예제 #8
0
    def _deserialize(self, value, attr, data):
        """Deserialize persistent identifier value."""
        value = super(PersistentId, self)._deserialize(value, attr, data)
        value = value.strip()

        schemes = idutils.detect_identifier_schemes(value)
        if self.scheme and self.scheme.lower() not in schemes:
            self.fail('invalid_scheme', scheme=self.scheme)
        if not schemes:
            self.fail('invalid_pid')
        return idutils.normalize_pid(value, schemes[0]) \
            if self.normalize else value
예제 #9
0
    def _deserialize(self, value, attr, data):
        """Deserialize persistent identifier value."""
        value = super(PersistentId, self)._deserialize(value, attr, data)
        value = value.strip()

        schemes = idutils.detect_identifier_schemes(value)
        if self.scheme and self.scheme.lower() not in schemes:
            self.fail('invalid_scheme', scheme=self.scheme)
        if not schemes:
            self.fail('invalid_pid')
        return idutils.normalize_pid(value, schemes[0]) \
            if self.normalize else value
 def __init__(self, idstring):
     self.identifier = idstring
     self.normalized_id = self.identifier
     if self.identifier and isinstance(self.identifier, str):
         if len(self.identifier) > 4 and not self.identifier.isnumeric():
             #workaround to resolve lsids:
             #idutils.LANDING_URLS['lsid'] ='http://www.lsid.info/resolver/?lsid={pid}'
             #workaround to recognize https purls
             if 'https://purl.' in self.identifier:
                 self.identifier = self.identifier.replace(
                     'https:', 'http:')
             generic_identifiers_org_pattern = '^([a-z0-9\._]+):(.+)'
             # idutils check
             self.identifier_schemes = idutils.detect_identifier_schemes(
                 self.identifier)
             # identifiers.org check
             if not self.identifier_schemes:
                 self.method = 'identifiers.org'
                 idmatch = re.search(generic_identifiers_org_pattern,
                                     self.identifier)
                 if idmatch:
                     found_prefix = idmatch[1]
                     found_suffix = idmatch[2]
                     if found_prefix in self.IDENTIFIERS_ORG_DATA.keys():
                         if (re.search(
                                 self.IDENTIFIERS_ORG_DATA[found_prefix]
                             ['pattern'], found_suffix)):
                             self.identifier_schemes = [
                                 found_prefix, 'identifiers_org'
                             ]
                             self.preferred_schema = found_prefix
                         self.identifier_url = str(
                             self.IDENTIFIERS_ORG_DATA[found_prefix]
                             ['url_pattern']).replace(
                                 '{$id}', found_suffix)
                         self.normalized_id = found_prefix.lower(
                         ) + ':' + found_suffix
             else:
                 # preferred schema
                 if self.identifier_schemes:
                     if len(self.identifier_schemes) > 0:
                         if len(self.identifier_schemes) > 1:
                             if 'url' in self.identifier_schemes:  # ['doi', 'url']
                                 self.identifier_schemes.remove('url')
                         self.preferred_schema = self.identifier_schemes[0]
                         self.normalized_id = idutils.normalize_pid(
                             self.identifier, self.preferred_schema)
                     self.identifier_url = idutils.to_url(
                         self.identifier, self.preferred_schema)
             if self.preferred_schema in Mapper.VALID_PIDS.value or self.preferred_schema in self.IDENTIFIERS_ORG_DATA.keys(
             ):
                 self.is_persistent = True
 def __call__(self, form, field, submit=False, fields=None):
     scheme = None
     if self.scheme_field:
         scheme = getattr(form, self.scheme_field).data
     elif self.scheme:
         scheme = self.scheme
     else:
         schemes = idutils.detect_identifier_schemes(field.data)
         if schemes:
             scheme = schemes[0]
     if scheme:
         if field.data:
             field.data = idutils.normalize_pid(field.data, scheme=scheme)
예제 #12
0
 def __call__(self, form, field, submit=False, fields=None):
     scheme = None
     if self.scheme_field:
         scheme = getattr(form, self.scheme_field).data
     elif self.scheme:
         scheme = self.scheme
     else:
         schemes = idutils.detect_identifier_schemes(field.data)
         if schemes:
             scheme = schemes[0]
     if scheme:
         if field.data:
             field.data = idutils.normalize_pid(field.data, scheme=scheme)
    def lookup_re3data(self):
        if self.client_id and self.pid_scheme:

            re3doi = RepositoryHelper.DATACITE_REPOSITORIES.get(
                self.client_id)  # {client_id,re3doi}
            #print(self.client_id,'Re3DOI',re3doi, idutils.is_doi(re3doi))
            if re3doi:
                if idutils.is_doi(re3doi):
                    short_re3doi = idutils.normalize_pid(
                        re3doi, scheme='doi')  #https://doi.org/10.17616/R3XS37
                else:
                    re3doi = None

            # pid -> clientId -> repo doi-> re3id, and query repository metadata from re3api
            if re3doi:
                self.logger.info(
                    'FsF-R1.3-01M : Found match re3data (DOI-based) record')
                query_url = Preprocessor.RE3DATA_API + '?query=' + short_re3doi  # https://re3data.org/api/beta/repositories?query=
                q = RequestHelper(url=query_url)
                q.setAcceptType(AcceptTypes.xml)
                re_source, xml = q.content_negotiate(metric_id='RE3DATA')
                try:
                    if isinstance(xml, bytes):
                        xml = xml.decode().encode()
                    root = etree.fromstring(xml)

                    #<link href="https://www.re3data.org/api/beta/repository/r3d100010134" rel="self" />
                    re3link = root.xpath('//link')[0].attrib['href']
                    if re3link is not None:
                        self.logger.info(
                            'FsF-R1.3-01M : Found match re3data metadata record -: '
                            + str(re3link))
                        # query reposiroty metadata
                        q2 = RequestHelper(url=re3link)
                        q2.setAcceptType(AcceptTypes.xml)
                        re3_source, re3_response = q2.content_negotiate(
                            metric_id='RE3DATA')
                        self.re3metadata_raw = re3_response
                        self.parseRepositoryMetadata()
                except Exception as e:
                    self.logger.warning(
                        'FsF-R1.3-01M : Malformed re3data (DOI-based) record received: '
                        + str(e))
            else:
                self.logger.warning(
                    'FsF-R1.3-01M : No DOI of client id is available from datacite api'
                )
예제 #14
0
파일: fuji.py 프로젝트: huberrob/fuji
    def check_identifiers(self):
        uuidresult = {'id': 1, 'metric_id': 'FsF-F1-01D', 'passed': False}
        pidresult = {'id': 2, 'metric_id': 'FsF-F1-02D', 'passed': False}
        try:
            #try to find an identifier schema for the given string
            foundpids = id.detect_identifier_schemes(self.uid)
            if len(foundpids) > 0:
                #if schema found we have an id which can be found by idutils
                uuidresult['passed'] = True
                uuidresult['output'] = {
                    'uuid': self.uid,
                    'uuid_schema': foundpids
                }
                #now we check if the schema is listed in our valid pid list in this case it is also a pid
                realpids = [
                    value for value in foundpids if value in self.validpids
                ]
                if len(realpids) > 0:
                    pidresult['passed'] = True
                if foundpids[0] == 'url':
                    self.pid_url = self.uid
                else:
                    # we try to find an actionable representation of the pid (URL)
                    self.pid_url = id.to_url(pid, scheme=realpids[0])
                    #we should log here if this fails..
                #Now we try to perform a HTTP GET request
                r = requests.get(self.pid_url)
                if r.status_code == 200:
                    if len(realpids) > 0:
                        self.pid = id.normalize_pid(pid, scheme=realpids[0])
                    self.landing_url = r.url
                    self.landing_html = r.text
                    pidresult['output'] = {
                        'pid': self.pid,
                        'resolved_url': self.landing_url,
                        'pid_schema': realpids
                    }
                else:
                    self.error.append('FsF-F1: HTTP Error: ' +
                                      str(r.status_code))
        except BaseException as err:
            self.error.append('FsF-F1: Failed to check the given identifier' +
                              str(err))

        self.results.append(uuidresult)
        self.results.append(pidresult)
예제 #15
0
 def __init__(self, idstring):
     self.identifier = idstring
     self.normalized_id = self.identifier
     if len(self.identifier) > 4 and not self.identifier.isnumeric():
         generic_identifiers_org_pattern = '^([a-z0-9\._]+):(.+)'
         # idutils check
         self.identifier_schemes = idutils.detect_identifier_schemes(
             self.identifier)
         # identifiers.org check
         if not self.identifier_schemes:
             self.method = 'identifiers.org'
             idmatch = re.search(generic_identifiers_org_pattern,
                                 self.identifier)
             if idmatch:
                 found_prefix = idmatch[1]
                 found_suffix = idmatch[2]
                 if found_prefix in self.IDENTIFIERS_ORG_DATA.keys():
                     if (re.search(
                             self.IDENTIFIERS_ORG_DATA[found_prefix]
                         ['pattern'], found_suffix)):
                         self.identifier_schemes = [
                             found_prefix, 'identifiers_org'
                         ]
                         self.preferred_schema = found_prefix
                     self.identifier_url = str(
                         self.IDENTIFIERS_ORG_DATA[found_prefix]
                         ['url_pattern']).replace('{$id}', found_suffix)
                     self.normalized_id = found_prefix.lower(
                     ) + ':' + found_suffix
         else:
             # preferred schema
             if len(self.identifier_schemes) > 0:
                 if len(self.identifier_schemes) > 1:
                     if 'url' in self.identifier_schemes:  # ['doi', 'url']
                         self.identifier_schemes.remove('url')
                 self.preferred_schema = self.identifier_schemes[0]
                 self.normalized_id = idutils.normalize_pid(
                     self.identifier, self.preferred_schema)
             self.identifier_url = idutils.to_url(self.identifier,
                                                  self.preferred_schema)
         if self.preferred_schema in Mapper.VALID_PIDS.value or self.preferred_schema in self.IDENTIFIERS_ORG_DATA.keys(
         ):
             self.is_persistent = True
예제 #16
0
def test_tourl():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0]
            ) == url_value
예제 #17
0
def test_idempotence():
    """Test persistent id normalization."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        val_norm = idutils.normalize_pid(i, expected_schemes[0])
        assert expected_schemes[0] in \
            idutils.detect_identifier_schemes(val_norm)
예제 #18
0
파일: common.py 프로젝트: slint/zenodo
 def normalize_identifier(self, data):
     """Normalize identifier."""
     data['identifier'] = idutils.normalize_pid(
         data['identifier'], data['scheme'])
예제 #19
0
 def normalize_identifier(self, data):
     """Normalize identifier."""
     data['identifier'] = idutils.normalize_pid(data['identifier'],
                                                data['scheme'])
예제 #20
0
def test_tourl():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(idutils.normalize_pid(i, expected_schemes[0]),
                              expected_schemes[0]) == url_value
예제 #21
0
def test_idempotence():
    """Test persistent id normalization."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        val_norm = idutils.normalize_pid(i, expected_schemes[0])
        assert expected_schemes[0] in \
            idutils.detect_identifier_schemes(val_norm)
예제 #22
0
def update_metadata(id_value: str, scheme: str, data: dict,
                    create_identity_events: bool = True,
                    create_missing_groups: bool = True,
                    providers: List[str] = None,
                    link_publication_date: str = None):
    """."""
    from ..events.api import EventAPI
    scheme = scheme.lower()
    id_value = idutils.normalize_pid(id_value, scheme)

    target_identifiers = set()
    for i in data.get('Identifier', []):
        value, target_scheme = i['ID'], i['IDScheme'].lower()
        value = idutils.normalize_pid(value, target_scheme)
        target_identifiers.add((value, target_scheme))

    # Check if there are identity links that can be created:
    if create_identity_events and len(target_identifiers) > 0:
        events = []
        providers = providers or ['unknown']
        providers = [{'Name': provider} for provider in providers]
        link_publication_date = link_publication_date or \
            datetime.now().isoformat()
        source_id_obj = {'ID': id_value, 'IDScheme': scheme}
        for target_value, target_scheme in target_identifiers:
            if not ((id_value, scheme) == (target_value, target_scheme)):
                target_id_obj = {'ID': target_value, 'IDScheme': target_scheme}
                payload = {
                    'RelationshipType': {
                        'Name': 'IsRelatedTo',
                        'SubTypeSchema': 'DataCite',
                        'SubType': 'IsIdenticalTo'
                    },
                    'Target': {
                        'Identifier': target_id_obj,
                        'Type': {'Name': 'unknown'}
                    },
                    'LinkProvider': providers,
                    'Source': {
                        'Identifier': source_id_obj,
                        'Type': {'Name': 'unknown'}
                    },
                    'LinkPublicationDate': link_publication_date,
                }
                events.append(payload)
        for event_chunk in chunks(events, 100):
            try:
                EventAPI.handle_event(
                    list(event_chunk), no_index=True, eager=True)
            except ValueError as exc:
                error_obj = ErrorMonitoring(origin="update_metadata", error=repr(exc), n_retries = 99, payload=event_chunk)
                db.session.add(error_obj)
                db.session.commit()
                current_app.logger.exception(
                    'Error while processing identity event')

    id_group = get_group_from_id(id_value, scheme)
    if not id_group and create_missing_groups:
        identifier = Identifier(
            value=id_value, scheme=scheme).fetch_or_create_id()
        db.session.commit()
        id_group, _ = get_or_create_groups(identifier)
        db.session.commit()
    id_group.data.update(data)
    db.session.commit()