Ejemplo n.º 1
0
    def _dump_common_links(self, obj):
        """Dump common links for deposits and records."""
        links = {}
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if doi:
            links['badge'] = ui_link_for('badge', doi=quote(doi))
            links['doi'] = idutils.to_url(doi, 'doi')

        conceptdoi = m.get('conceptdoi')
        if conceptdoi:
            links['conceptbadge'] = ui_link_for('badge', doi=quote(conceptdoi))
            links['conceptdoi'] = idutils.to_url(conceptdoi, 'doi')

        files = m.get('_files', [])
        for f in files:
            if f.get('type') in thumbnail_exts:
                try:
                    links['thumb250'] = self._thumbnail_url(f, 250)
                    # First previewable image is used for preview.
                except RuntimeError:
                    pass
                break

        return links
Ejemplo n.º 2
0
    def _dump_common_links(self, obj):
        """Dump common links for deposits and records."""
        links = {}
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if doi:
            links['badge'] = ui_link_for('badge', doi=quote(doi))
            links['doi'] = idutils.to_url(doi, 'doi', 'https')

        conceptdoi = m.get('conceptdoi')
        if conceptdoi:
            links['conceptbadge'] = ui_link_for('badge', doi=quote(conceptdoi))
            links['conceptdoi'] = idutils.to_url(conceptdoi, 'doi', 'https')

        files = m.get('_files', [])
        for f in files:
            if f.get('type') in thumbnail_exts:
                try:
                    links['thumb250'] = self._thumbnail_url(f, 250)
                    # First previewable image is used for preview.
                except RuntimeError:
                    pass
                break

        return links
Ejemplo n.º 3
0
 def get_id(self, obj):
     """Get URL for the person's ORCID or GND."""
     orcid = obj.get('orcid')
     gnd = obj.get('gnd')
     if orcid:
         return idutils.to_url(orcid, 'orcid', 'https')
     if gnd:
         return idutils.to_url(gnd, 'gnd', 'https')
     return missing
Ejemplo n.º 4
0
 def get_id(self, obj):
     """Get URL for the person's ORCID or GND."""
     orcid = obj.get('orcid')
     gnd = obj.get('gnd')
     if orcid:
         return idutils.to_url(orcid, 'orcid', 'https')
     if gnd:
         return idutils.to_url(gnd, 'gnd', 'https')
     return missing
Ejemplo n.º 5
0
def test_to_url():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0]
        ) == url_value
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0],
            url_scheme='https',
        ) == (url_value.replace('http://', 'https://')
              # If the value is already a URL its scheme is preserved
              if expected_schemes[0] not in ['purl', 'url'] else url_value)
Ejemplo n.º 6
0
def pid_url(identifier, scheme=None):
    """Convert persistent identifier into a link."""
    if scheme is None:
        try:
            scheme = idutils.detect_identifier_schemes(identifier)[0]
        except IndexError:
            scheme = None
    if scheme and identifier:
        return idutils.to_url(identifier, scheme)
    return ""
Ejemplo n.º 7
0
def pid_url(identifier, scheme=None):
    """Convert persistent identifier into a link."""
    if scheme is None:
        try:
            scheme = idutils.detect_identifier_schemes(identifier)[0]
        except IndexError:
            scheme = None
    if scheme and identifier:
        return idutils.to_url(identifier, scheme)
    return ""
Ejemplo n.º 8
0
def build_id_info(id_):
    """Build information for the Identifier."""
    data = {'ID': id_.value, 'IDScheme': id_.scheme}
    try:
        id_url = idutils.to_url(id_.value, id_.scheme)
        if id_url:
            data['IDURL'] = id_url
    except Exception:
        pass
    return data
Ejemplo n.º 9
0
    def dump_links(self, obj):
        """Dump links."""
        links = obj.get('links', {})
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if current_app and doi:
            links['badge'] = "{base}/badge/doi/{value}.svg".format(
                base=current_app.config.get('THEME_SITEURL'),
                value=quote(doi),
            )
            links['doi'] = idutils.to_url(doi, 'doi')

        if has_request_context():
            if is_deposit(m):
                bucket_id = m.get('_buckets', {}).get('deposit')
                recid = m.get('recid') if m.get('_deposit', {}).get('pid') \
                    else None
                api_key = 'record'
                html_key = 'record_html'
            else:
                bucket_id = m.get('_buckets', {}).get('record')
                recid = m.get('recid')
                api_key = None
                html_key = 'html'

            if bucket_id:
                try:
                    links['bucket'] = url_for(
                        'invenio_files_rest.bucket_api',
                        bucket_id=bucket_id,
                        _external=True,
                    )
                except BuildError:
                    pass

            if recid:
                try:
                    if api_key:
                        links[api_key] = url_for(
                            'invenio_records_rest.recid_item',
                            pid_value=recid,
                            _external=True,
                        )
                    if html_key:
                        links[html_key] = \
                            current_app.config['RECORDS_UI_ENDPOINT'].format(
                            host=request.host,
                            scheme=request.scheme,
                            pid_value=recid,
                        )
                except BuildError:
                    pass

            return links
Ejemplo n.º 10
0
    def dump_links(self, obj):
        """Dump links."""
        links = obj.get('links', {})
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if current_app and doi:
            links['badge'] = "{base}/badge/doi/{value}.svg".format(
                base=current_app.config.get('THEME_SITEURL'),
                value=quote(doi),
            )
            links['doi'] = idutils.to_url(doi, 'doi')

        if has_request_context():
            if is_deposit(m):
                bucket_id = m.get('_buckets', {}).get('deposit')
                recid = m.get('recid') if m.get('_deposit', {}).get('pid') \
                    else None
                api_key = 'record'
                html_key = 'record_html'
            else:
                bucket_id = m.get('_buckets', {}).get('record')
                recid = m.get('recid')
                api_key = None
                html_key = 'html'

            if bucket_id:
                try:
                    links['bucket'] = url_for(
                        'invenio_files_rest.bucket_api',
                        bucket_id=bucket_id,
                        _external=True,
                    )
                except BuildError:
                    pass

            if recid:
                try:
                    if api_key:
                        links[api_key] = url_for(
                            'invenio_records_rest.recid_item',
                            pid_value=recid,
                            _external=True,
                        )
                    if html_key:
                        links[html_key] = \
                            current_app.config['RECORDS_UI_ENDPOINT'].format(
                            host=request.host,
                            scheme=request.scheme,
                            pid_value=recid,
                        )
                except BuildError:
                    pass

            return links
Ejemplo n.º 11
0
    def _dump_common_links(self, obj):
        """Dump common links for deposits and records."""
        links = {}
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if doi:
            links['badge'] = \
                "{base}/badge/doi/{value}.svg".format(
                    base=current_app.config.get('THEME_SITEURL'),
                    value=quote(doi))
            links['doi'] = idutils.to_url(doi, 'doi')

        conceptdoi = m.get('conceptdoi')
        if conceptdoi:
            links['conceptbadge'] = \
                "{base}/badge/doi/{value}.svg".format(
                    base=current_app.config.get('THEME_SITEURL'),
                    value=quote(conceptdoi))
            links['conceptdoi'] = idutils.to_url(conceptdoi, 'doi')
        return links
Ejemplo n.º 12
0
    def _dump_common_links(self, obj):
        """Dump common links for deposits and records."""
        links = {}
        m = obj.get('metadata', {})

        doi = m.get('doi')
        if doi:
            links['badge'] = \
                "{base}/badge/doi/{value}.svg".format(
                    base=current_app.config.get('THEME_SITEURL'),
                    value=quote(doi))
            links['doi'] = idutils.to_url(doi, 'doi')

        conceptdoi = m.get('conceptdoi')
        if conceptdoi:
            links['conceptbadge'] = \
                "{base}/badge/doi/{value}.svg".format(
                    base=current_app.config.get('THEME_SITEURL'),
                    value=quote(conceptdoi))
            links['conceptdoi'] = idutils.to_url(conceptdoi, 'doi')
        return links
 def __init__(self, idstring):
     self.identifier = idstring
     self.normalized_id = self.identifier
     if self.identifier and isinstance(self.identifier, str):
         if len(self.identifier) > 4 and not self.identifier.isnumeric():
             #workaround to resolve lsids:
             #idutils.LANDING_URLS['lsid'] ='http://www.lsid.info/resolver/?lsid={pid}'
             #workaround to recognize https purls
             if 'https://purl.' in self.identifier:
                 self.identifier = self.identifier.replace(
                     'https:', 'http:')
             generic_identifiers_org_pattern = '^([a-z0-9\._]+):(.+)'
             # idutils check
             self.identifier_schemes = idutils.detect_identifier_schemes(
                 self.identifier)
             # identifiers.org check
             if not self.identifier_schemes:
                 self.method = 'identifiers.org'
                 idmatch = re.search(generic_identifiers_org_pattern,
                                     self.identifier)
                 if idmatch:
                     found_prefix = idmatch[1]
                     found_suffix = idmatch[2]
                     if found_prefix in self.IDENTIFIERS_ORG_DATA.keys():
                         if (re.search(
                                 self.IDENTIFIERS_ORG_DATA[found_prefix]
                             ['pattern'], found_suffix)):
                             self.identifier_schemes = [
                                 found_prefix, 'identifiers_org'
                             ]
                             self.preferred_schema = found_prefix
                         self.identifier_url = str(
                             self.IDENTIFIERS_ORG_DATA[found_prefix]
                             ['url_pattern']).replace(
                                 '{$id}', found_suffix)
                         self.normalized_id = found_prefix.lower(
                         ) + ':' + found_suffix
             else:
                 # preferred schema
                 if self.identifier_schemes:
                     if len(self.identifier_schemes) > 0:
                         if len(self.identifier_schemes) > 1:
                             if 'url' in self.identifier_schemes:  # ['doi', 'url']
                                 self.identifier_schemes.remove('url')
                         self.preferred_schema = self.identifier_schemes[0]
                         self.normalized_id = idutils.normalize_pid(
                             self.identifier, self.preferred_schema)
                     self.identifier_url = idutils.to_url(
                         self.identifier, self.preferred_schema)
             if self.preferred_schema in Mapper.VALID_PIDS.value or self.preferred_schema in self.IDENTIFIERS_ORG_DATA.keys(
             ):
                 self.is_persistent = True
Ejemplo n.º 14
0
def _serialize_identifiers(ids, relations=None):
    """Serialize related and alternate identifiers to URLs.

    :param ids: List of related_identifier or alternate_identifier objects.
    :param relations: if not None, will only select IDs of specific relation
    :returns: List of identifiers in schema.org format.
    :rtype dict:
    """
    relations = relations or []
    ids = [{'@type': 'CreativeWork',
             '@id': idutils.to_url(i['identifier'], i['scheme'], 'https')}
            for i in ids if (not relations or i['relation'] in relations) and 'scheme' in i]
    return [id_ for id_ in ids if id_['@id']]
Ejemplo n.º 15
0
def pid_url(identifier, scheme=None, url_scheme="https"):
    """Convert persistent identifier into a link."""
    if scheme is None:
        try:
            scheme = idutils.detect_identifier_schemes(identifier)[0]
        except IndexError:
            scheme = None
    try:
        if scheme and identifier:
            return idutils.to_url(identifier, scheme, url_scheme=url_scheme)
    except Exception:
        current_app.logger.warning("URL generation for identifier {0} failed.".format(identifier), exc_info=True)
    return ""
Ejemplo n.º 16
0
def _serialize_identifiers(ids, relations=None):
    """Serialize related and alternate identifiers to URLs.

    :param ids: List of related_identifier or alternate_identifier objects.
    :param relations: if not None, will only select IDs of specific relation
    :returns: List of identifiers in schema.org format.
    :rtype dict:
    """
    relations = relations or []
    ids = [{'@type': 'CreativeWork',
             '@id': idutils.to_url(i['identifier'], i['scheme'], 'https')}
            for i in ids if (not relations or i['relation'] in relations) and 'scheme' in i]
    return [id_ for id_ in ids if id_['@id']]
Ejemplo n.º 17
0
def pid_url(identifier, scheme=None, url_scheme='https'):
    """Convert persistent identifier into a link."""
    if scheme is None:
        try:
            scheme = idutils.detect_identifier_schemes(identifier)[0]
        except IndexError:
            scheme = None
    try:
        if scheme and identifier:
            return idutils.to_url(identifier, scheme, url_scheme=url_scheme)
    except Exception:
        current_app.logger.warning('URL generation for identifier {0} failed.'
                                   .format(identifier), exc_info=True)
    return ''
Ejemplo n.º 18
0
def get_related_identifiers_url(record: Record, doi_prefix: str) -> List[Dict]:
    """Create related identifiers URL.

    Args:
        related_identifiers (Record): Record API Object from where the related
        identifiers will be extracted.

        doi_prefix (str): GEO Knowledge Hub DOI Prefix.

    Returns:
        List[Dict]: List of record related identifiers (with URL resolved)

    Note:
        The `doi_prefix` is used to check if the items are managed by the GEO Knowledge Hub.
    """
    # extracting related identifiers
    related_identifiers = py_.get(record, "metadata.related_identifiers", [])

    new_related_identifiers = []
    for related_identifier in related_identifiers:
        if related_identifier.get("identifier", None):
            pass

        scheme = related_identifier["scheme"]
        identifier = related_identifier["identifier"]

        related_identifier_obj = py_.set_(py_.clone_deep(related_identifier),
                                          "url", "")

        try:
            if idutils.is_url(identifier):
                related_identifier_obj["url"] = identifier
            else:
                # checking if the doi is internal
                if idutils.is_doi(identifier):
                    identifier_split = identifier.split("/")

                    if doi_prefix and identifier_split[0] == doi_prefix:
                        related_identifier_obj["url"] = posixpath.join(
                            "/records", identifier_split[1])

                if not related_identifier_obj["url"]:
                    related_identifier_obj["url"] = idutils.to_url(
                        identifier, scheme, "https")
        except BaseException:
            related_identifier_obj["url"] = identifier
        new_related_identifiers.append(related_identifier_obj)
    return new_related_identifiers
Ejemplo n.º 19
0
    def check_identifiers(self):
        uuidresult = {'id': 1, 'metric_id': 'FsF-F1-01D', 'passed': False}
        pidresult = {'id': 2, 'metric_id': 'FsF-F1-02D', 'passed': False}
        try:
            #try to find an identifier schema for the given string
            foundpids = id.detect_identifier_schemes(self.uid)
            if len(foundpids) > 0:
                #if schema found we have an id which can be found by idutils
                uuidresult['passed'] = True
                uuidresult['output'] = {
                    'uuid': self.uid,
                    'uuid_schema': foundpids
                }
                #now we check if the schema is listed in our valid pid list in this case it is also a pid
                realpids = [
                    value for value in foundpids if value in self.validpids
                ]
                if len(realpids) > 0:
                    pidresult['passed'] = True
                if foundpids[0] == 'url':
                    self.pid_url = self.uid
                else:
                    # we try to find an actionable representation of the pid (URL)
                    self.pid_url = id.to_url(pid, scheme=realpids[0])
                    #we should log here if this fails..
                #Now we try to perform a HTTP GET request
                r = requests.get(self.pid_url)
                if r.status_code == 200:
                    if len(realpids) > 0:
                        self.pid = id.normalize_pid(pid, scheme=realpids[0])
                    self.landing_url = r.url
                    self.landing_html = r.text
                    pidresult['output'] = {
                        'pid': self.pid,
                        'resolved_url': self.landing_url,
                        'pid_schema': realpids
                    }
                else:
                    self.error.append('FsF-F1: HTTP Error: ' +
                                      str(r.status_code))
        except BaseException as err:
            self.error.append('FsF-F1: Failed to check the given identifier' +
                              str(err))

        self.results.append(uuidresult)
        self.results.append(pidresult)
Ejemplo n.º 20
0
 def __init__(self, idstring):
     self.identifier = idstring
     self.normalized_id = self.identifier
     if len(self.identifier) > 4 and not self.identifier.isnumeric():
         generic_identifiers_org_pattern = '^([a-z0-9\._]+):(.+)'
         # idutils check
         self.identifier_schemes = idutils.detect_identifier_schemes(
             self.identifier)
         # identifiers.org check
         if not self.identifier_schemes:
             self.method = 'identifiers.org'
             idmatch = re.search(generic_identifiers_org_pattern,
                                 self.identifier)
             if idmatch:
                 found_prefix = idmatch[1]
                 found_suffix = idmatch[2]
                 if found_prefix in self.IDENTIFIERS_ORG_DATA.keys():
                     if (re.search(
                             self.IDENTIFIERS_ORG_DATA[found_prefix]
                         ['pattern'], found_suffix)):
                         self.identifier_schemes = [
                             found_prefix, 'identifiers_org'
                         ]
                         self.preferred_schema = found_prefix
                     self.identifier_url = str(
                         self.IDENTIFIERS_ORG_DATA[found_prefix]
                         ['url_pattern']).replace('{$id}', found_suffix)
                     self.normalized_id = found_prefix.lower(
                     ) + ':' + found_suffix
         else:
             # preferred schema
             if len(self.identifier_schemes) > 0:
                 if len(self.identifier_schemes) > 1:
                     if 'url' in self.identifier_schemes:  # ['doi', 'url']
                         self.identifier_schemes.remove('url')
                 self.preferred_schema = self.identifier_schemes[0]
                 self.normalized_id = idutils.normalize_pid(
                     self.identifier, self.preferred_schema)
             self.identifier_url = idutils.to_url(self.identifier,
                                                  self.preferred_schema)
         if self.preferred_schema in Mapper.VALID_PIDS.value or self.preferred_schema in self.IDENTIFIERS_ORG_DATA.keys(
         ):
             self.is_persistent = True
Ejemplo n.º 21
0
Archivo: dcat.py Proyecto: xbee/zenodo
class DCATSerializer(object):
    """DCAT serializer for records."""
    def __init__(self, datacite_serializer):
        """."""
        self.datacite_serializer = datacite_serializer

    @cached_property
    def xslt_transform_func(self):
        """Return the DCAT XSLT transformation function."""
        with resource_stream('zenodo.modules.records',
                             'data/datacite-to-dcat-ap.xsl') as f:
            xsl = ET.XML(f.read())
        transform = ET.XSLT(xsl)
        return transform

    FILES_FIELDS = {
        '{{{dcat}}}downloadURL':
        lambda f, r: ui_link_for(
            'record_file', id=r['recid'], filename=f['key']),
        '{{{dcat}}}mediaType':
        lambda f, r: mimetypes.guess_type(f['key'])[0],
        '{{{dcat}}}byteSize':
        lambda f, r: str(f['size']),
        '{{{dcat}}}accessURL':
        lambda f, r: idutils.to_url(r['doi'], 'doi', url_scheme='https'),
        # TODO: there's also "spdx:checksum", but it's not in the W3C spec yet
    }

    def _add_files(self, root, files, record):
        """Add files information via distribution elements."""
        ns = root.nsmap
        for f in files:
            dist_wrapper = ET.SubElement(root[0],
                                         '{{{dcat}}}distribution'.format(**ns))
            dist = ET.SubElement(dist_wrapper,
                                 '{{{dcat}}}Distribution'.format(**ns))

            for tag, func in self.FILES_FIELDS.items():
                val = func(f, record)
                if val:
                    el = ET.SubElement(dist, tag.format(**ns))
                    el.text = val

    def _etree_tostring(self, root):
        return ET.tostring(
            root,
            pretty_print=True,
            xml_declaration=True,
            encoding='utf-8',
        ).decode('utf-8')

    def transform_with_xslt(self, pid, record, search_hit=False, **kwargs):
        """Transform record with XSLT."""
        files_data = None
        if search_hit:
            dc_record = self.datacite_serializer.transform_search_hit(
                pid, record, **kwargs)
            if '_files' in record['_source']:
                files_data = record['_source']['_files']
            elif '_files' in record:
                files_data = record['_files']

        else:
            dc_record = self.datacite_serializer.transform_record(
                pid, record, **kwargs)
            # for single-record serialization check file read permissions
            if isinstance(record, Record) and '_files' in record:
                if not has_request_context() or has_read_files_permission(
                        current_user, record):
                    files_data = record['_files']

        dc_etree = self.datacite_serializer.schema.dump_etree(dc_record)
        dc_namespace = self.datacite_serializer.schema.ns[None]
        dc_etree.tag = '{{{0}}}resource'.format(dc_namespace)
        dcat_etree = self.xslt_transform_func(dc_etree).getroot()

        # Inject files in results (since the XSLT can't do that by default)
        if files_data:
            self._add_files(
                root=dcat_etree,
                files=files_data,
                record=(record['_source'] if search_hit else record),
            )

        return dcat_etree

    def serialize(self, pid, record, **kwargs):
        """Serialize a single record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        """
        return self._etree_tostring(
            self.transform_with_xslt(pid, record, **kwargs))

    def serialize_search(self, pid_fetcher, search_result, **kwargs):
        """Serialize a search result.

        :param pid_fetcher: Persistent identifier fetcher.
        :param search_result: Elasticsearch search result.
        :param links: Dictionary of links to add to response.
        """
        records = []
        for hit in search_result['hits']['hits']:
            pid = pid_fetcher(hit['_id'], hit['_source'])
            dcat_etree = self.transform_with_xslt(pid,
                                                  hit,
                                                  search_hit=True,
                                                  **kwargs)
            records.append(self._etree_tostring(dcat_etree))

        return '\n'.join(records)

    def serialize_oaipmh(self, pid, record):
        """Serialize a single record for OAI-PMH."""
        return self.transform_with_xslt(pid, record, search_hit=True)
Ejemplo n.º 22
0
def pid_url(related_identifier):
    identifier = related_identifier.get('identifier')
    scheme = related_identifier.get('scheme')
    if scheme and identifier:
        return idutils.to_url(identifier, scheme)
    return ""
Ejemplo n.º 23
0
def _serialize_subjects(ids):
    """Serialize subjects to URLs."""
    return [{
        '@type': 'CreativeWork',
        '@id': idutils.to_url(i['identifier'], i['scheme'], 'https')
    } for i in ids if 'scheme' in i]
Ejemplo n.º 24
0
 def apply_rule(item, rule):
     r = copy.deepcopy(rule)
     r['link'] = idutils.to_url(item['identifier'], item['scheme'], 'https')
     return r
Ejemplo n.º 25
0
def _serialize_subjects(ids):
    """Serialize subjects to URLs."""
    return [{'@type': 'CreativeWork',
             '@id': idutils.to_url(i['identifier'], i['scheme'], 'https')}
            for i in ids if 'scheme' in i]
Ejemplo n.º 26
0
def test_tourl():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(idutils.normalize_pid(i, expected_schemes[0]),
                              expected_schemes[0]) == url_value
Ejemplo n.º 27
0
 def access_url(_, record):
     url = idutils.to_url(record['doi'], 'doi', url_scheme='https')
     return None, {'{{{rdf}}}resource'.format(**ns): url}
Ejemplo n.º 28
0
    def evaluate(self):

        self.result = Persistence(id=self.fuji.count,
                                  metric_identifier=self.metric_identifier,
                                  metric_name=self.metric_name)
        self.output = PersistenceOutput()
        # ======= CHECK IDENTIFIER PERSISTENCE =======
        self.logger.info(
            'FsF-F1-02D : PID schemes-based assessment supported by the assessment service - {}'
            .format(Mapper.VALID_PIDS.value))

        if self.fuji.pid_scheme is not None:
            check_url = idutils.to_url(self.fuji.id,
                                       scheme=self.fuji.pid_scheme)
        elif self.fuji.id_scheme == 'url':
            check_url = self.fuji.id

        # ======= RETRIEVE METADATA FROM LANDING PAGE =======
        requestHelper = RequestHelper(check_url, self.logger)
        requestHelper.setAcceptType(AcceptTypes.html)  # request
        neg_source, self.fuji.extruct_result = requestHelper.content_negotiate(
            'FsF-F1-02D')
        r = requestHelper.getHTTPResponse()
        signposting_pid = None
        if r:
            self.fuji.landing_url = requestHelper.redirect_url
            if r.status == 200:
                # identify signposting links in header
                header_link_string = requestHelper.getHTTPResponse().getheader(
                    'Link')
                if header_link_string is not None:
                    self.logger.info(
                        'FsF-F1-02D : Found signposting links in response header of landingpage'
                    )

                    for preparsed_link in header_link_string.split(','):
                        found_link = None
                        found_type, type_match = None, None
                        found_rel, rel_match = None, None
                        parsed_link = preparsed_link.strip().split(';')
                        found_link = parsed_link[0].strip()
                        for link_prop in parsed_link[1:]:
                            if str(link_prop).startswith('rel="'):
                                rel_match = re.search('rel=\"(.*?)\"',
                                                      link_prop)
                            elif str(link_prop).startswith('type="'):
                                type_match = re.search('type=\"(.*?)\"',
                                                       link_prop)
                        if type_match:
                            found_type = type_match[1]
                        if rel_match:
                            found_rel = rel_match[1]
                        signposting_link_dict = {
                            'url': found_link[1:-1],
                            'type': found_type,
                            'rel': found_rel
                        }
                        if found_link:
                            self.fuji.signposting_header_links.append(
                                signposting_link_dict)
                        '''
                        if found_rel:
                            if self.fuji.signposting_header_links.get(found_rel[1]):
                                self.fuji.signposting_header_links[found_rel[1]].append(found_link[1:-1])
                            else:
                                self.fuji.signposting_header_links[found_rel[1]]=[found_link[1:-1]]
                        '''

                #check if there is a cite-as signposting link
                if self.fuji.pid_scheme is None:
                    signposting_pid_link = self.fuji.get_signposting_links(
                        'cite-as')
                    if signposting_pid_link:
                        signposting_pid = signposting_pid_link[0].get('url')
                    if signposting_pid:
                        found_ids = idutils.detect_identifier_schemes(
                            signposting_pid[0])
                        if len(found_ids) > 1:
                            found_ids.remove('url')
                            found_id = found_ids[0]
                            if found_id in Mapper.VALID_PIDS.value:
                                self.logger.info(
                                    'FsF-F1-02D : Found object identifier in signposting header links'
                                )
                                self.fuji.pid_scheme = found_id

                up = urlparse(self.fuji.landing_url)
                self.fuji.landing_origin = '{uri.scheme}://{uri.netloc}'.format(
                    uri=up)
                self.fuji.landing_html = requestHelper.getResponseContent()

                self.output.resolved_url = self.fuji.landing_url  # url is active, although the identifier is not based on a pid scheme
                self.output.resolvable_status = True
                self.logger.info(
                    'FsF-F1-02D : Object identifier active (status code = 200)'
                )
                self.fuji.isMetadataAccessible = True
            elif r.status_code in [401, 402, 403]:
                self.fuji.isMetadataAccessible = False
                self.logger.warning(
                    "Resource inaccessible, identifier returned http status code: {code}"
                    .format(code=r.status_code))
            else:
                self.fuji.isMetadataAccessible = False
                self.logger.warning(
                    "Resource inaccessible, identifier returned http status code: {code}"
                    .format(code=r.status_code))
        else:
            self.fuji.isMetadataAccessible = False
            self.logger.warning(
                "FsF-F1-02D :Resource inaccessible, no response received from: {}"
                .format(check_url))

        if self.fuji.pid_scheme is not None:
            # short_pid = id.normalize_pid(self.id, scheme=pid_scheme)
            if signposting_pid is None:
                self.fuji.pid_url = idutils.to_url(self.fuji.id,
                                                   scheme=self.fuji.pid_scheme)
            else:
                self.fuji.pid_url = signposting_pid[0]
            self.output.pid_scheme = self.fuji.pid_scheme
            self.result.test_status = 'pass'
            self.output.pid = self.fuji.pid_url
            self.setEvaluationCriteriumScore('FsF-F1-02D-1', 0, 'pass')
            if self.fuji.isMetadataAccessible:
                self.setEvaluationCriteriumScore('FsF-F1-02D-2', 1, 'pass')
                self.score.earned = self.total_score  # idenfier should be based on a persistence scheme and resolvable

            #print(self.metric_tests)

            self.logger.log(
                self.fuji.LOG_SUCCESS,
                'FsF-F1-02D : Persistence identifier scheme - {}'.format(
                    self.fuji.pid_scheme))
            #self.logger.info('FsF-F1-02D : Persistence identifier scheme - {}'.format(self.fuji.pid_scheme))
        else:
            self.score.earned = 0
            self.logger.warning(
                'FsF-F1-02D : Not a persistent identifier scheme - {}'.format(
                    self.fuji.id_scheme))

        self.result.score = self.score
        self.result.metric_tests = self.metric_tests
        self.result.output = self.output
Ejemplo n.º 29
0
 def apply_rule(item, rule):
     r = copy.deepcopy(rule)
     r['link'] = idutils.to_url(item['identifier'], item['scheme'])
     return r
Ejemplo n.º 30
0
 def get_doi(self, obj):
     """Get DOI of the record."""
     data = obj['metadata']
     return idutils.to_url(data['doi'], 'doi', 'https') \
         if data.get('doi') \
         else missing
Ejemplo n.º 31
0
def test_tourl():
    """Test URL generation."""
    for i, expected_schemes, normalized_value, url_value in identifiers:
        assert idutils.to_url(
            idutils.normalize_pid(i, expected_schemes[0]), expected_schemes[0]
            ) == url_value
Ejemplo n.º 32
0
 def _serialize(self, value, attr, obj):
     if value is None:
         return None
     return idutils.to_url(value, 'doi')
Ejemplo n.º 33
0
 def get_doi(self, obj):
     """Get DOI of the record."""
     data = obj['metadata']
     return idutils.to_url(data['doi'], 'doi', 'https') \
         if data.get('doi') \
         else missing
Ejemplo n.º 34
0
def pid_url(related_identifier):
    identifier = related_identifier.get('identifier')
    scheme = related_identifier.get('scheme')
    if scheme and identifier:
        return idutils.to_url(identifier, scheme)
    return ""
Ejemplo n.º 35
0
 def _serialize(self, value, attr, obj):
     if value is None:
         return None
     return idutils.to_url(value, 'doi')
Ejemplo n.º 36
0
 def apply_rule(item, rule):
     r = copy.deepcopy(rule)
     r["link"] = idutils.to_url(item["identifier"], item["scheme"])
     return r