def make_biorxiv_pages(pub): if pub.doi.startswith('10.1101/') and pub.genre == 'posted-content': url = 'https://doi.org/{}'.format(pub.doi) pmh_page = page.PageTitleMatch() pmh_page.url = url pmh_page.doi = pub.doi pmh_page.title = pub.title pmh_page.normalized_title = pub.normalized_title pmh_page.authors = _pmh_authors(pub) pmh_page.scrape_version = 'submittedVersion' pmh_page.scrape_metadata_url = url xref_institution = pub.crossref_api_raw_new.get('institution', {}).get('name', None) if xref_institution == 'medRxiv': pmh_page.pmh_id = _medrxiv_pmh_id(pub.doi) pmh_page.endpoint_id = medrxiv_endpoint_id else: pmh_page.pmh_id = _biorxiv_pmh_id(pub.doi) pmh_page.endpoint_id = biorxiv_endpoint_id if _existing_page(page.PageTitleMatch, pmh_page.url, pmh_page.pmh_id): return [] else: return [pmh_page] else: return []
def make_research_square_pages(pub): if pub.doi.startswith('10.21203/rs.') and pub.genre == 'posted-content': url = u'https://doi.org/{}'.format(pub.doi) pmh_page = page.PageTitleMatch() pmh_page.pmh_id = _research_square_pmh_id(pub.doi) pmh_page.url = url pmh_page.doi = pub.doi pmh_page.title = pub.title pmh_page.normalized_title = pub.normalized_title pmh_page.authors = _pmh_authors(pub) pmh_page.endpoint_id = research_square_endpoint_id pmh_page.scrape_version = 'submittedVersion' pmh_page.scrape_metadata_url = url if _existing_page(page.PageTitleMatch, pmh_page.url, pmh_page.pmh_id): return [] else: return [pmh_page] else: return []
def make_authorea_pages(pub): if pub.publisher and 'authorea' in pub.publisher.lower( ) and pub.genre == 'posted-content': url = 'https://doi.org/{}'.format(pub.doi) pmh_page = page.PageTitleMatch() pmh_page.pmh_id = _authorea_pmh_id(pub.doi) pmh_page.url = url pmh_page.doi = pub.doi pmh_page.title = pub.title pmh_page.normalized_title = pub.normalized_title pmh_page.authors = _pmh_authors(pub) pmh_page.endpoint_id = authorea_endpoint_id pmh_page.scrape_version = 'submittedVersion' pmh_page.scrape_metadata_url = url if _existing_page(page.PageTitleMatch, pmh_page.url, pmh_page.pmh_id): return [] else: return [pmh_page] else: return []
def make_eartharxiv_pages(pub): if pub.doi.startswith( '10.31223/' ) and pub.publisher and 'california digital library' in pub.publisher.lower( ) and pub.genre == 'posted-content': url = 'https://doi.org/{}'.format(pub.doi) pmh_page = page.PageTitleMatch() pmh_page.pmh_id = _eartharxiv_pmh_id(pub.doi) pmh_page.url = url pmh_page.doi = pub.doi pmh_page.title = pub.title pmh_page.normalized_title = pub.normalized_title pmh_page.authors = _pmh_authors(pub) pmh_page.endpoint_id = eartharxiv_endpoint_id pmh_page.scrape_version = 'submittedVersion' pmh_page.scrape_metadata_url = url if _existing_page(page.PageTitleMatch, pmh_page.url, pmh_page.pmh_id): return [] else: return [pmh_page] else: return []