def get_info_by_DOI(DOI: str) -> Dict: '''This function takes a DOI str, requests information about the corresponding article via metapub or crossref and checks if all necessary information has been retrieved.''' article_dict = {} fetch = PubMedFetcher() try: article = fetch.article_by_doi(DOI) # Save information in Dict for info in dir(article): if info[0] != '_': article_dict[info] = eval('article.' + info) # Add data retrieval info to the dict article_dict = add_retrieval_information(article_dict, 'MetaPub', 'DOI', DOI) except MetaPubError: # If it does not work via Metapub, do it via Crossref Api # If there is a timeout, try again (5 times) for _ in range(5): try: works = Works() article_dict = works.doi(DOI) break except: pass #article_dict = normalize_crossref_dict(article_dict) # Add data retrieval info to the dict #if contains_minimal_information(article_dict): article_dict = add_retrieval_information(article_dict, 'Crossref', 'DOI', DOI) return article_dict
def cite_it(bot, chat_id, doi): """Returns citation for given DOI""" # headers = {"content-type":"application/x-bibtex"} # resp = requests.get("https://doi.org/" + DOI, headers=headers) # return resp.content works = Works() if not works.agency(doi): bot.send_message( chat_id=chat_id, text="Этот документ не входит в базу цитирования CrossRef...") return SEARCH_RESULTLS else: record = works.doi(doi) found, meta_bib = get_bib(doi) if not found: bot.send_message(chat_id=chat_id, text="Документ не найден...") return SEARCH_RESULTLS bot.send_message(chat_id=chat_id, text="Цитирование по CrossRef:") filename = doi.replace('/', '-') with open(os.path.join('downloads', filename + '.bib'), 'w+') as downloaded_file: downloaded_file.write(meta_bib) bot.send_document( chat_id=chat_id, document=open(os.path.join('downloads', filename + '.bib'), 'rb'), ) return SEARCH_RESULTLS
def getRefByDOI(doi, debug=False): works = Works() ref = works.doi(doi) ref = {key: value for key, value in ref.items() if key in ALL_BIBTEX_ATTR} ref['newrender'] = renderBibFromDict(ref) if ref: return convertDictToBibStr(ref)
def parse_reference(cls, reference, bibtex_parser=None): ret = None crossref_match = const.DOI_RE.search(reference) if crossref_match: works = Works(etiquette=const.CROSSREF_ETIQUETTE) doi = works.doi(crossref_match.group(0)) if doi: ret = {'raw_reference': reference} ret['doi'] = crossref_match.group(0) ret['author'] = '' if 'author' in doi: ret['author'] = ', '.join([ f'{author.get("given", "")} {author.get("family", "")}' for author in doi['author'] ]) if 'title' in doi and doi["title"]: ret['title'] = doi['title'][0] else: logger.warning(f"No Title available for {crossref_match} ") return None if 'container-title' in doi and doi['container-title']: ret['journal'] = doi['container-title'][0] if 'volume' in doi: ret['volume'] = doi['volume'][0] if 'published-online' in doi: ret['year'] = doi['published-online']['date-parts'][0][0] return ret
def recursiveReferenceAdd(sql, cursor, item): works = Works() returnId = -1 # Check for duplicates in the database tableEntry = isInTable(cursor,item) if tableEntry[0] == False: returnId = addToResearchTable(cursor,item) else: print "Got duplicate" returnId = tableEntry[1] return -1#returnId #TODO: Allow a duplicate to check the references and update them # Continue checking references recursively if 'reference' in item: for i in xrange(len(item['reference'])): if 'doi' in item['reference'][i]: if item['reference'][i]['doi'] != None: returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['doi'])) # Add relations between research papers if (returnId != -1 and returnId2 != -1): addToReferenceTable(cursor, returnId, returnId2) elif 'DOI' in item['reference'][i]: if item['reference'][i]['DOI'] != None: returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['DOI'])) #Add relations between research papers if (returnId != -1 and returnId2 != -1): addToReferenceTable(cursor, returnId, returnId2) sql.commit() return returnId
def search(request): keyword = request.GET.get('search') if (keyword == None): return render(request, 'search.html', {}) else: find_keyword = Sea.objects.filter(keyword__iexact=keyword) l = len(find_keyword) if (l != 0): print('数据库Sea中找到记录') return render(request, 'search.html', { 'results': find_keyword, 'keyword': keyword }) else: print('数据库Sea未中找到记录') works = Works() w1 = works.query(title=keyword) for index, item in enumerate(w1): if (index > 499): break result = searchDo.parse_result(keyword, item) #存入数据库 sea_record = Sea(**result) sea_record.save() print(str(index) + ' record is ok') find_keyword = Sea.objects.filter(keyword__iexact=keyword) return render(request, 'search.html', { 'results': find_keyword, 'keyword': keyword })
def vis_cit(request): keyword = request.POST.get('keyword') cit_doi = doi_trans(keyword) if (cit_doi == None): signal = 'TitleError' return HttpResponse(signal) else: # 查找对应的文件名,如1,2,3 cit_file_id = file_find(cit_doi) print(cit_file_id) #查找cit数据库中记录 cit_flag = cit_find(cit_doi) if (cit_flag == 'existed'): print("cit数据库中找到记录") return HttpResponse(cit_file_id) else: print("cit数据库中无记录") wk = Works() item = wk.doi(cit_doi) if (item == None): signal = 'DoiError' else: citDo.citdo(cit_doi, cit_file_id) cit_record = Cit(cit_doi=cit_doi, create_ctime=datetime.now()) cit_record.save() return HttpResponse(cit_file_id)
def vis_ref(request): keyword = request.POST.get('keyword') ref_doi = doi_trans(keyword) if (ref_doi == None): signal = 'TitleError' return HttpResponse(signal) else: # 查找对应的文件名,如1,2,3 ref_file_id = file_find(ref_doi) print(ref_file_id) # 查找ref数据库中记录 ref_flag = ref_find(ref_doi) if (ref_flag == 'existed'): print("ref数据库中找到记录") return HttpResponse(ref_file_id) else: print("ref数据库中无记录") wk = Works() item = wk.doi(ref_doi) if (item == None): signal = 'DoiError' else: refDo2.refdo(ref_doi, ref_file_id) ref_record = Ref(ref_doi=ref_doi, create_rtime=datetime.now()) ref_record.save() return HttpResponse(ref_file_id)
def fetch_bibtex_by_fulltext_crossref(txt, **kw): work = Works(etiquette=my_etiquette) logger.debug(six.u('crossref fulltext seach:\n') + six.u(txt)) # get the most likely match of the first results # results = [] # for i, r in enumerate(work.query(txt).sort('score')): # results.append(r) # if i > 50: # break query = work.query(txt, **kw).sort('score') query_result = query.do_http_request('get', query.url, custom_header=str( query.etiquette)).text results = json.loads(query_result)['message']['items'] if len(results) > 1: maxscore = 0 result = results[0] for res in results: score = _crossref_score(txt, res) if score > maxscore: maxscore = score result = res logger.info('score: ' + str(maxscore)) elif len(results) == 0: raise ValueError('crossref fulltext: no results') else: result = results[0] # convert to bibtex return crossref_to_bibtex(result).strip()
def all_articles(self): articles = [] try: works = Works(etiquette=Prompt.etiquette) if __debug__: log(f'asking Crossref for all works by {self.doi_prefix}') for item in works.filter(prefix=self.doi_prefix): doi = item.get('DOI', '') title = item.get('title', [''])[0] online = item.get('published-online', None) if not online or 'date-parts' not in online: if __debug__: log(f'skipping {doi} lacking published-online') continue else: date = '-'.join( format(x, '02') for x in online['date-parts'][0]) if __debug__: log(f'keeping publication {doi} dated {date}') pdf = pdf_link(item.get('link', [])) jats = '' image = '' basename = tail_of_doi(doi) status = 'complete' if all([pdf, doi, title, date ]) else 'incomplete' articles.append( Article(self.issn, doi, date, title, basename, pdf, jats, image, status)) except Exception as ex: if __debug__: log(f'crossref API exception: {str(ex)}') raise ServerError(f'Failed to get data from Crossref: {str(ex)}') return articles
def __init__(self, isbn): self.works = Works() self.isbn = isbn # Get book metadata self.book_metadata = self.get_book_metadata() self.chapters_data = self.get_chapters_data()
def fetch_bibtex_by_doi(doi): url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex" work = Works(etiquette=my_etiquette) bibtex = work.do_http_request('get', url, custom_header=str(work.etiquette)).text return bibtex.strip()
def checkDoiJournalArticle(doi): isJournal = "" publicationDate = 0 printDate = 9999 onlineDate = 9999 works = Works() author = [] try: data = works.doi(doi) if 'type' in data: if data['type'] == 'journal-article': isJournal = doi if 'author' in data: author = data['author'] if 'published-print' in data: printDate = data['published-print']['date-parts'][0][0] if 'published-online' in data: onlineDate = data['published-online']['date-parts'][0][0] publicationDate = min(printDate, onlineDate) return isJournal, publicationDate, doi, author except KeyboardInterrupt: exit() except: print('DOI NOT FOUND: ', doi) return isJournal, publicationDate, doi, author
def getTitle(eventdoi): works = Works() adoi = eventdoi metainfo = works.doi(adoi) title = metainfo['title'] title = ''.join(title) return title
def cross(L): works = Works() print("no") for e in works.query("cancer"): print("yes") print(e["DOI"]) L.append(e["DOI"])
def search_doi(journal_title, start_date, end_date, print_issn, online_issn, count): """ A defined number of doi are put into a list If no doi are found using the online issn, the print issn :param journal_title: The title of the journal :param start_date: The start date in the format yyyy-mm-dd :param end_date: The end date in the format yyyy-mm-dd :param print_issn: The International Standard Serial Number for the print journal :param online_issn: The International Standard Serial Number for the online journal :param count: How many doi the method should try to find :return: A list of doi, this can be empty, but should be no larger than count """ works = Works() received_doi = [] if print_issn == '' and online_issn == '': return 'Both ISSNs are empty' if online_issn != '': # online ISSN exists for i in works.query(journal_title).filter( issn=online_issn, from_pub_date=start_date, until_pub_date=end_date).sample(count).select('DOI'): received_doi.append(i['DOI']) if print_issn != '': # print ISSN exists if not received_doi: for j in works.query(journal_title).filter( issn=print_issn, from_pub_date=start_date, until_pub_date=end_date).sample(count).select('DOI'): received_doi.append(j['DOI']) return received_doi
def fetch_json_by_doi(doi): url = "http://api.crossref.org/works/" + doi + "/transform/application/json" work = Works(etiquette=my_etiquette) jsontxt = work.do_http_request('get', url, custom_header=str(work.etiquette)).text return jsontxt.dumps(json)
def form_valid(self, form): self.publication = form.save(commit=False) self.publication.created_by = self.request.user works = Works() if self.publication.DOI != "" and works.doi_exists(self.publication.DOI): paper_data_result = works.doi(self.publication.DOI) self.publication.publication_year = str(paper_data_result.get('created').get('date-parts')[0][0]) self.publication.title = paper_data_result.get('title')[0] self.publication.author = f"{paper_data_result.get('author')[0].get('given')},{paper_data_result.get('author')[0].get('family')}" sub = paper_data_result.get("subject", [self.publication.subject]) self.publication.subject = ', '.join([str(elem) for elem in sub]) self.publication.URL = paper_data_result.get('URL') elif self.publication.ISBN != "" and is_isbn13(self.publication.ISBN): book_data_result = meta(self.publication.ISBN) self.publication.publication_year = book_data_result.get('Year') self.publication.title = book_data_result.get('Title') self.publication.author = book_data_result.get('Authors')[0] elif self.publication.crossref and (self.publication.DOI or self.publication.ISBN): messages.error(self.request, 'DOI/ISBN no encontrado. Cargar datos y desmarcar el campo "tiene DOI/ISBN"') return render(self.request, 'bibliography/publication_form.html', {'form': form}) self.publication.save() messages.success(self.request, "Registro realizado con exito") return redirect('bibliography:publication_detail', pk=self.publication.pk)
def get_crossref_metadata(file_name): # work needs to be defined for crossref works = Works() # open the json that contains the full list of dois of which we need metadata with open(file_name, "r", encoding="utf-8") as journals_articles: json_data = json.load(journals_articles) # we access one by one the json objects that are divided by journals. within the journal, we access the values for research articles for object in json_data: id_list_research = object["research_articles"] # Define the file name for the metadata json final file new_file_name = ( f'{object["journal_title"]}_crossref_metadata.json').replace( " ", "_").replace("/", "").replace(":", "") with open(f'../data/json_files/crossref_api/{new_file_name}', "a", encoding="utf-8") as fd: fd.write("[") # Make api request to crossref for each of the dois present in the list for index, id in enumerate(id_list_research): # the following line makes the request to crossref record = works.doi(id) # Write the final json with append mode json.dump(record, fd) if index < len(id_list_research) - 1: fd.write(",") fd.write("]")
def consulta(lista_isbns, servico): formatador_json = bibformatters['json'] isbn_dict = {} if servico == 'gbooks': for isbn in lista_isbns: try: data = isbnlib.meta(isbn, service='goob') isbn_dict[str(isbn)] = formatador_json(data) except isbnlib.dev.DataNotFoundAtServiceError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) except isbnlib.dev._exceptions.NoDataForSelectorError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) elif servico == 'openl': for isbn in lista_isbns: try: a = isbn a = a.strip() data = isbnlib.meta(isbn, service = 'openl') isbn_dict[str(isbn)] = formatador_json(data) except isbnlib.dev.DataNotFoundAtServiceError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) elif servico == 'crref': for isbn in lista_isbns: a = isbn a = a.strip() works = Works() l = works.filter(isbn = str(a)) b = [] for item in l: b.append((item))
def fetch_bibtex_by_doi(doi): url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex" work = Works(etiquette=my_etiquette) response = work.do_http_request( 'get', url, custom_header={'user-agent': str(work.etiquette)}) if response.ok: bibtex = response.text.strip() return bibtex raise DOIRequestError(repr(doi) + ': ' + response.text)
def getMetadataFromDOI(id): works = Works() metadata = works.doi(id) if metadata is None: s = CrossRefClient() metadata = s.doi2json(id) metadata.pop('id', None) return metadata
def __init__(self, context): super().__init__(context, DataResource.data_dir / 'doi_metadata.db') etiquette = Etiquette( 'SYNTH transform', '0.1', 'https://github.com/NaturalHistoryMuseum/synth_transform', '*****@*****.**') self.works = Works(etiquette=etiquette) self._handled = set() # all the dois that are checked in this run self._added = set() # all the dois that are added in this run self._errors = {}
def get_xref(doi): xrefapi = Works() try: xref_data = xrefapi.doi(doi) if not xref_data: print "Could not get data for DOI:", doi print xrefapi.doi(doi) return xref_data except ValueError as e: print "Error: {}".format(e)
def request_doi_api(): works = Works() address = parameters["source"][prop] data = works.doi(address) if data: value = data["URL"] to_insert = QLabel("<html><a href=\"" + value + "\">" + value + "</a></html>") else: to_insert = None return to_insert
def write_bibtex_v1(bibtex_file, dois): my_etiquette = Etiquette('VTLibraries', 0.1, 'https://lib.vt.edu/', '*****@*****.**') work = Works(etiquette=my_etiquette) with open(bibtex_file, 'w') as bib: for doi in dois: url = "http://api.crossref.org/works/" + doi + "/transform/application/x-bibtex" jsontxt = work.do_http_request('get', url, custom_header=str(work.etiquette)).text if not jsontxt.startswith('Resource'): bib.writelines(jsontxt) bib.write('\n')
def cross_doi(publication): works = Works() paper_data_result = works.doi(publication.DOI) publication.publication_year = str( paper_data_result.get('created').get('date-parts')[0][0]) publication.title = paper_data_result.get('title')[0] publication.author = f"{paper_data_result.get('author')[0].get('given')},{paper_data_result.get('author')[0].get('family')}" publication.subject = paper_data_result.get("subject", [publication.subject])[0] publication.URL = paper_data_result.get('URL')
def query_crossref(self, author = None, pub = None): from crossref.restful import Works works = Works() if author is not None and pub is not None: return works.query(title = pub, author = author) elif author is not None: return works.query(author = author) elif pub is not None: return works.query(title = pub)
def get_crossref_records(dois): works = Works() crossref_records = {} print('searching crossref for all DOIs, this might take a few minutes...') for doi in dois: r = works.doi(doi) if r is not None: crossref_records[doi] = r else: print('missing crossref record for', doi) return (crossref_records)
def __init__(self, context): super().__init__(context, DataResource.data_dir / 'output_dois.db') etiquette = Etiquette( 'SYNTH transform', '0.1', 'https://github.com/NaturalHistoryMuseum/synth_transform', '*****@*****.**') self.works = Works(etiquette=etiquette) self._handled = set() self._added = set() self._errors = {} self._methods = {}