Example #1
0
def get_info_by_DOI(DOI: str) -> Dict:
    '''This function takes a DOI str, requests information about the corresponding
	article via metapub or crossref and checks if all necessary information has been retrieved.'''
    article_dict = {}
    fetch = PubMedFetcher()
    try:
        article = fetch.article_by_doi(DOI)
        # Save information in Dict
        for info in dir(article):
            if info[0] != '_':
                article_dict[info] = eval('article.' + info)
        # Add data retrieval info to the dict
        article_dict = add_retrieval_information(article_dict, 'MetaPub',
                                                 'DOI', DOI)
    except MetaPubError:
        # If it does not work via Metapub, do it via Crossref Api
        # If there is a timeout, try again (5 times)
        for _ in range(5):
            try:
                works = Works()
                article_dict = works.doi(DOI)
                break
            except:
                pass
        #article_dict = normalize_crossref_dict(article_dict)
        # Add data retrieval info to the dict
        #if contains_minimal_information(article_dict):
        article_dict = add_retrieval_information(article_dict, 'Crossref',
                                                 'DOI', DOI)
    return article_dict
Example #2
0
def cite_it(bot, chat_id, doi):
    """Returns citation for given DOI"""
    # headers = {"content-type":"application/x-bibtex"}
    # resp = requests.get("https://doi.org/" + DOI, headers=headers)
    # return resp.content
    works = Works()
    if not works.agency(doi):
        bot.send_message(
            chat_id=chat_id,
            text="Этот документ не входит в базу цитирования CrossRef...")
        return SEARCH_RESULTLS
    else:
        record = works.doi(doi)
        found, meta_bib = get_bib(doi)
        if not found:
            bot.send_message(chat_id=chat_id, text="Документ не найден...")
            return SEARCH_RESULTLS
        bot.send_message(chat_id=chat_id, text="Цитирование по CrossRef:")
        filename = doi.replace('/', '-')
        with open(os.path.join('downloads', filename + '.bib'),
                  'w+') as downloaded_file:
            downloaded_file.write(meta_bib)
        bot.send_document(
            chat_id=chat_id,
            document=open(os.path.join('downloads', filename + '.bib'), 'rb'),
        )
        return SEARCH_RESULTLS
Example #3
0
def getRefByDOI(doi, debug=False):
    works = Works()
    ref = works.doi(doi)
    ref = {key: value for key, value in ref.items() if key in ALL_BIBTEX_ATTR}
    ref['newrender'] = renderBibFromDict(ref)
    if ref:
        return convertDictToBibStr(ref)
Example #4
0
    def parse_reference(cls, reference, bibtex_parser=None):
        ret = None
        crossref_match = const.DOI_RE.search(reference)

        if crossref_match:
            works = Works(etiquette=const.CROSSREF_ETIQUETTE)
            doi = works.doi(crossref_match.group(0))

            if doi:
                ret = {'raw_reference': reference}
                ret['doi'] = crossref_match.group(0)

                ret['author'] = ''
                if 'author' in doi:
                    ret['author'] = ', '.join([
                        f'{author.get("given", "")} {author.get("family", "")}'
                        for author in doi['author']
                    ])

                if 'title' in doi and doi["title"]:
                    ret['title'] = doi['title'][0]
                else:
                    logger.warning(f"No Title available for {crossref_match} ")
                    return None

                if 'container-title' in doi and doi['container-title']:
                    ret['journal'] = doi['container-title'][0]

                if 'volume' in doi:
                    ret['volume'] = doi['volume'][0]

                if 'published-online' in doi:
                    ret['year'] = doi['published-online']['date-parts'][0][0]

        return ret
Example #5
0
def recursiveReferenceAdd(sql, cursor, item):
    works = Works()
    
    returnId = -1
    # Check for duplicates in the database
    tableEntry = isInTable(cursor,item)
    if tableEntry[0] == False:
        returnId = addToResearchTable(cursor,item)
    else:
        print "Got duplicate"
        returnId = tableEntry[1]
        return -1#returnId
        #TODO: Allow a duplicate to check the references and update them

    # Continue checking references recursively 
    if 'reference' in item:
        for i in xrange(len(item['reference'])):
            if 'doi' in item['reference'][i]:
                if item['reference'][i]['doi'] != None:
                    returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['doi']))
                    # Add relations between research papers
                    if (returnId != -1 and returnId2 != -1):
                        addToReferenceTable(cursor, returnId, returnId2)
            elif 'DOI' in item['reference'][i]:
                if item['reference'][i]['DOI'] != None:
                    returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['DOI']))
                    #Add relations between research papers
                    if (returnId != -1 and returnId2 != -1):
                        addToReferenceTable(cursor, returnId, returnId2)

    sql.commit()
    return returnId
Example #6
0
def search(request):
    keyword = request.GET.get('search')
    if (keyword == None):
        return render(request, 'search.html', {})
    else:
        find_keyword = Sea.objects.filter(keyword__iexact=keyword)
        l = len(find_keyword)
        if (l != 0):
            print('数据库Sea中找到记录')
            return render(request, 'search.html', {
                'results': find_keyword,
                'keyword': keyword
            })
        else:
            print('数据库Sea未中找到记录')
            works = Works()
            w1 = works.query(title=keyword)
            for index, item in enumerate(w1):
                if (index > 499):
                    break
                result = searchDo.parse_result(keyword, item)
                #存入数据库
                sea_record = Sea(**result)
                sea_record.save()
                print(str(index) + ' record is ok')
            find_keyword = Sea.objects.filter(keyword__iexact=keyword)
            return render(request, 'search.html', {
                'results': find_keyword,
                'keyword': keyword
            })
Example #7
0
def vis_cit(request):
    keyword = request.POST.get('keyword')
    cit_doi = doi_trans(keyword)
    if (cit_doi == None):
        signal = 'TitleError'
        return HttpResponse(signal)
    else:
        # 查找对应的文件名,如1,2,3
        cit_file_id = file_find(cit_doi)
        print(cit_file_id)
        #查找cit数据库中记录
        cit_flag = cit_find(cit_doi)
        if (cit_flag == 'existed'):
            print("cit数据库中找到记录")
            return HttpResponse(cit_file_id)
        else:
            print("cit数据库中无记录")
            wk = Works()
            item = wk.doi(cit_doi)
            if (item == None):
                signal = 'DoiError'
            else:
                citDo.citdo(cit_doi, cit_file_id)
                cit_record = Cit(cit_doi=cit_doi, create_ctime=datetime.now())
                cit_record.save()
            return HttpResponse(cit_file_id)
Example #8
0
def vis_ref(request):
    keyword = request.POST.get('keyword')
    ref_doi = doi_trans(keyword)
    if (ref_doi == None):
        signal = 'TitleError'
        return HttpResponse(signal)
    else:
        # 查找对应的文件名,如1,2,3
        ref_file_id = file_find(ref_doi)
        print(ref_file_id)
        # 查找ref数据库中记录
        ref_flag = ref_find(ref_doi)
        if (ref_flag == 'existed'):
            print("ref数据库中找到记录")
            return HttpResponse(ref_file_id)
        else:
            print("ref数据库中无记录")
            wk = Works()
            item = wk.doi(ref_doi)
            if (item == None):
                signal = 'DoiError'
            else:
                refDo2.refdo(ref_doi, ref_file_id)
                ref_record = Ref(ref_doi=ref_doi, create_rtime=datetime.now())
                ref_record.save()
            return HttpResponse(ref_file_id)
Example #9
0
def fetch_bibtex_by_fulltext_crossref(txt, **kw):
    work = Works(etiquette=my_etiquette)
    logger.debug(six.u('crossref fulltext seach:\n') + six.u(txt))

    # get the most likely match of the first results
    # results = []
    # for i, r in enumerate(work.query(txt).sort('score')):
    #     results.append(r)
    #     if i > 50:
    #         break
    query = work.query(txt, **kw).sort('score')
    query_result = query.do_http_request('get',
                                         query.url,
                                         custom_header=str(
                                             query.etiquette)).text
    results = json.loads(query_result)['message']['items']

    if len(results) > 1:
        maxscore = 0
        result = results[0]
        for res in results:
            score = _crossref_score(txt, res)
            if score > maxscore:
                maxscore = score
                result = res
        logger.info('score: ' + str(maxscore))

    elif len(results) == 0:
        raise ValueError('crossref fulltext: no results')

    else:
        result = results[0]

    # convert to bibtex
    return crossref_to_bibtex(result).strip()
Example #10
0
 def all_articles(self):
     articles = []
     try:
         works = Works(etiquette=Prompt.etiquette)
         if __debug__:
             log(f'asking Crossref for all works by {self.doi_prefix}')
         for item in works.filter(prefix=self.doi_prefix):
             doi = item.get('DOI', '')
             title = item.get('title', [''])[0]
             online = item.get('published-online', None)
             if not online or 'date-parts' not in online:
                 if __debug__:
                     log(f'skipping {doi} lacking published-online')
                 continue
             else:
                 date = '-'.join(
                     format(x, '02') for x in online['date-parts'][0])
                 if __debug__:
                     log(f'keeping publication {doi} dated {date}')
             pdf = pdf_link(item.get('link', []))
             jats = ''
             image = ''
             basename = tail_of_doi(doi)
             status = 'complete' if all([pdf, doi, title, date
                                         ]) else 'incomplete'
             articles.append(
                 Article(self.issn, doi, date, title, basename, pdf, jats,
                         image, status))
     except Exception as ex:
         if __debug__: log(f'crossref API exception: {str(ex)}')
         raise ServerError(f'Failed to get data from Crossref: {str(ex)}')
     return articles
Example #11
0
    def __init__(self, isbn):
        self.works = Works()
        self.isbn = isbn

        # Get book metadata
        self.book_metadata = self.get_book_metadata()
        self.chapters_data = self.get_chapters_data()
Example #12
0
def fetch_bibtex_by_doi(doi):
    url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex"
    work = Works(etiquette=my_etiquette)
    bibtex = work.do_http_request('get',
                                  url,
                                  custom_header=str(work.etiquette)).text
    return bibtex.strip()
Example #13
0
def checkDoiJournalArticle(doi):
    isJournal = ""
    publicationDate = 0
    printDate = 9999
    onlineDate = 9999
    works = Works()
    author = []
    try:
        data = works.doi(doi)
        if 'type' in data:
            if data['type'] == 'journal-article':
                isJournal = doi
        if 'author' in data:
            author = data['author']
        if 'published-print' in data:
            printDate = data['published-print']['date-parts'][0][0]
        if 'published-online' in data:
            onlineDate = data['published-online']['date-parts'][0][0]
        publicationDate = min(printDate, onlineDate)
        return isJournal, publicationDate, doi, author
    except KeyboardInterrupt:
        exit()
    except:
        print('DOI NOT FOUND: ', doi)
        return isJournal, publicationDate, doi, author
def getTitle(eventdoi):
    works = Works()
    adoi = eventdoi
    metainfo = works.doi(adoi)
    title = metainfo['title']
    title = ''.join(title)
    return title
def cross(L):
    works = Works()
    print("no")
    for e in works.query("cancer"):
        print("yes")
        print(e["DOI"])
        L.append(e["DOI"])
Example #16
0
def search_doi(journal_title, start_date, end_date, print_issn, online_issn, count):
    """
    A defined number of doi are put into a list
    If no doi are found using the online issn, the print issn

    :param journal_title: The title of the journal
    :param start_date: The start date in the format yyyy-mm-dd
    :param end_date: The end date in the format yyyy-mm-dd
    :param print_issn: The International Standard Serial Number for the print journal
    :param online_issn: The International Standard Serial Number for the online journal
    :param count: How many doi the method should try to find
    :return: A list of doi, this can be empty, but should be no larger than count
    """
    works = Works()
    received_doi = []

    if print_issn == '' and online_issn == '':
        return 'Both ISSNs are empty'

    if online_issn != '':  # online ISSN exists
        for i in works.query(journal_title).filter(
                issn=online_issn,
                from_pub_date=start_date,
                until_pub_date=end_date).sample(count).select('DOI'):
            received_doi.append(i['DOI'])

    if print_issn != '':  # print ISSN exists
        if not received_doi:
            for j in works.query(journal_title).filter(
                    issn=print_issn,
                    from_pub_date=start_date,
                    until_pub_date=end_date).sample(count).select('DOI'):
                received_doi.append(j['DOI'])

    return received_doi
Example #17
0
def fetch_json_by_doi(doi):
    url = "http://api.crossref.org/works/" + doi + "/transform/application/json"
    work = Works(etiquette=my_etiquette)
    jsontxt = work.do_http_request('get',
                                   url,
                                   custom_header=str(work.etiquette)).text
    return jsontxt.dumps(json)
Example #18
0
    def form_valid(self, form):
        self.publication = form.save(commit=False)
        self.publication.created_by = self.request.user
        works = Works()
        if self.publication.DOI != "" and works.doi_exists(self.publication.DOI):
            paper_data_result = works.doi(self.publication.DOI)
            self.publication.publication_year = str(paper_data_result.get('created').get('date-parts')[0][0])
            self.publication.title = paper_data_result.get('title')[0]
            self.publication.author = f"{paper_data_result.get('author')[0].get('given')},{paper_data_result.get('author')[0].get('family')}"
            sub = paper_data_result.get("subject", [self.publication.subject])
            self.publication.subject = ', '.join([str(elem) for elem in sub])
            self.publication.URL = paper_data_result.get('URL')

        elif self.publication.ISBN != "" and is_isbn13(self.publication.ISBN):
            book_data_result = meta(self.publication.ISBN)
            self.publication.publication_year = book_data_result.get('Year')
            self.publication.title = book_data_result.get('Title')
            self.publication.author = book_data_result.get('Authors')[0]

        elif self.publication.crossref and (self.publication.DOI or self.publication.ISBN):
            messages.error(self.request, 'DOI/ISBN no encontrado. Cargar datos y desmarcar el campo "tiene DOI/ISBN"')
            return render(self.request, 'bibliography/publication_form.html', {'form': form})
        self.publication.save()
        messages.success(self.request, "Registro realizado con exito")

        return redirect('bibliography:publication_detail', pk=self.publication.pk)
Example #19
0
def get_crossref_metadata(file_name):
    # work needs to be defined for crossref
    works = Works()
    # open the json that contains the full list of dois of which we need metadata
    with open(file_name, "r", encoding="utf-8") as journals_articles:
        json_data = json.load(journals_articles)
        # we access one by one the json objects that are divided by journals. within the journal, we access the values for research articles
        for object in json_data:
            id_list_research = object["research_articles"]
            # Define the file name for the metadata json final file
            new_file_name = (
                f'{object["journal_title"]}_crossref_metadata.json').replace(
                    " ", "_").replace("/", "").replace(":", "")
            with open(f'../data/json_files/crossref_api/{new_file_name}',
                      "a",
                      encoding="utf-8") as fd:
                fd.write("[")
                # Make api request to crossref for each of the dois present in the list
                for index, id in enumerate(id_list_research):
                    # the following line makes the request to crossref
                    record = works.doi(id)
                    # Write the final json with append mode
                    json.dump(record, fd)
                    if index < len(id_list_research) - 1:
                        fd.write(",")
                fd.write("]")
Example #20
0
def consulta(lista_isbns, servico):
    formatador_json = bibformatters['json']
    isbn_dict = {}

    if servico == 'gbooks':
        for isbn in lista_isbns:
            try:
                data = isbnlib.meta(isbn, service='goob')
                isbn_dict[str(isbn)] = formatador_json(data)
            except isbnlib.dev.DataNotFoundAtServiceError:
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))    
            except isbnlib.dev._exceptions.NoDataForSelectorError:
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))    
    elif servico == 'openl':
        for isbn in lista_isbns:
            try:
                a = isbn
                a = a.strip()
                data = isbnlib.meta(isbn, service = 'openl')
                isbn_dict[str(isbn)] = formatador_json(data)
            except isbnlib.dev.DataNotFoundAtServiceError:                    
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))
    elif servico == 'crref':
        for isbn in lista_isbns:
            a = isbn
            a = a.strip()
            works = Works()
            l = works.filter(isbn = str(a))
            b = []
            for item in l:
                b.append((item))
Example #21
0
def fetch_bibtex_by_doi(doi):
    url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex"
    work = Works(etiquette=my_etiquette)
    response = work.do_http_request(
        'get', url, custom_header={'user-agent': str(work.etiquette)})
    if response.ok:
        bibtex = response.text.strip()
        return bibtex
    raise DOIRequestError(repr(doi) + ': ' + response.text)
Example #22
0
def getMetadataFromDOI(id):
    works = Works()
    metadata = works.doi(id)

    if metadata is None:
        s = CrossRefClient()
        metadata = s.doi2json(id)
    metadata.pop('id', None)
    return metadata
Example #23
0
 def __init__(self, context):
     super().__init__(context, DataResource.data_dir / 'doi_metadata.db')
     etiquette = Etiquette(
         'SYNTH transform', '0.1',
         'https://github.com/NaturalHistoryMuseum/synth_transform',
         '*****@*****.**')
     self.works = Works(etiquette=etiquette)
     self._handled = set()  # all the dois that are checked in this run
     self._added = set()  # all the dois that are added in this run
     self._errors = {}
def get_xref(doi):
    xrefapi = Works()
    try:
        xref_data = xrefapi.doi(doi)
        if not xref_data:
            print "Could not get data for DOI:", doi
            print xrefapi.doi(doi)
        return xref_data
    except ValueError as e:
        print "Error: {}".format(e)
Example #25
0
 def request_doi_api():
     works = Works()
     address = parameters["source"][prop]
     data = works.doi(address)
     if data:
         value = data["URL"]
         to_insert = QLabel("<html><a href=\"" + value + "\">" + value + "</a></html>")
     else:
         to_insert = None
     return to_insert
Example #26
0
def write_bibtex_v1(bibtex_file, dois):
    my_etiquette = Etiquette('VTLibraries', 0.1, 'https://lib.vt.edu/', '*****@*****.**')
    work = Works(etiquette=my_etiquette)
    with open(bibtex_file, 'w') as bib:
        for doi in dois:
            url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex"
            jsontxt = work.do_http_request('get', url, custom_header=str(work.etiquette)).text
            if not jsontxt.startswith('Resource'):
                bib.writelines(jsontxt)
                bib.write('\n')
Example #27
0
def cross_doi(publication):
    works = Works()
    paper_data_result = works.doi(publication.DOI)
    publication.publication_year = str(
        paper_data_result.get('created').get('date-parts')[0][0])
    publication.title = paper_data_result.get('title')[0]
    publication.author = f"{paper_data_result.get('author')[0].get('given')},{paper_data_result.get('author')[0].get('family')}"
    publication.subject = paper_data_result.get("subject",
                                                [publication.subject])[0]
    publication.URL = paper_data_result.get('URL')
Example #28
0
	def query_crossref(self, author = None, pub = None):
		from crossref.restful import Works
		works = Works()

		if author is not None and pub is not None:
			return works.query(title = pub, author = author)
		elif author is not None:
			return works.query(author = author)
		elif pub is not None:
			return works.query(title = pub)
Example #29
0
def get_crossref_records(dois):
    works = Works()
    crossref_records = {}
    print('searching crossref for all DOIs, this might take a few minutes...')
    for doi in dois:
        r = works.doi(doi)
        if r is not None:
            crossref_records[doi] = r
        else:
            print('missing crossref record for', doi)
    return (crossref_records)
Example #30
0
 def __init__(self, context):
     super().__init__(context, DataResource.data_dir / 'output_dois.db')
     etiquette = Etiquette(
         'SYNTH transform', '0.1',
         'https://github.com/NaturalHistoryMuseum/synth_transform',
         '*****@*****.**')
     self.works = Works(etiquette=etiquette)
     self._handled = set()
     self._added = set()
     self._errors = {}
     self._methods = {}