def removeUnusedBibtexs(existingBibsDict): """Functions that reads the list of bibtex entries in the existing .bib file and removes the ones that are not inside \cite commands """ newDict = {} notFound = [] for k, v in existingBibsDict.items(): if k in self.allCitations: newDict[k] = existingBibsDict[k] else: notFound.append(k) db.entries = [ newDict[k] for k in sorted( [e["ID"] for e in newDict.values()], key=lambda s: s.lower() ) ] bibf = pbWriter.write(db) try: with open(outFileName, "w") as o: o.write(exstr.byPhysbiblio + bibf) pBLogger.info(exstr.entriesRemoved % notFound) except IOError: pBLogger.exception(exstr.errorWrite % outFileName)
def call_gui(args=None): """Function that runs the PhysBiblio GUI""" # these two imports must stay here, # so they start after the profile has been loaded properly: try: import physbiblio.gui.mainWindow from physbiblio.database import pBDB except ImportError: print("Could not find physbiblio and its modules!") raise try: app = QApplication(sys.argv) mainWin = physbiblio.gui.mainWindow.MainWindow() sys.excepthook = mainWin.errormessage.emit mainWin.show() mainWin.raise_() if pbConfig.params["openSinceLastUpdate"] != __version__: mainWin.recentChanges() pbConfig.globalDb.config.update("openSinceLastUpdate", __version__) pbConfig.globalDb.commit() sys.exit(app.exec_()) except NameError: pBLogger.critical("NameError:", exc_info=True) except SystemExit: pBDB.closeDB() pBLogger.info(apstr.closeMainW)
def saveEntryOutBib(a, m=None): """Remove unwanted fields and add the bibtex entry to the output file Parameters: a: the bibtex entry m: the ID (bibtex key) of the entry, if it is not the default one """ entry = ( bibtexparser.bparser.BibTexParser(common_strings=True) .parse(a) .entries[0] ) for u in self.unwantedFields: try: del entry[u] except KeyError: pass if m is not None: m = m.strip() if m != entry["ID"].strip(): entry["ID"] = m db.entries = [entry] bibf = pbWriter.write(db) try: with open(outFileName, "a") as o: o.write(bibf) pBLogger.info(exstr.entryInserted % m) except IOError: pBLogger.exception(exstr.errorWrite % outFileName) return False
def openLink(self, key, arg="arxiv", fileArg=None): """Uses the getLink method to generate the web link and opens it in an external application Parameters: key, arg, fileArg as in the getLink method """ if isinstance(key, list): for k in key: self.openLink(k, arg, fileArg) else: if arg == "file": self.getLink(key, arg=arg, fileArg=fileArg) return elif arg == "link": link = key else: link = self.getLink(key, arg=arg, fileArg=fileArg) if link: if self.webApp != "": pBLogger.info(vstr.opening % link) try: subprocess.Popen( [self.webApp, link], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) except OSError: pBLogger.warning(vstr.openingFailed % ("link", key)) else: pBLogger.warning(vstr.errorLink % (arg, key))
def getGenericInfo(self, string, fields, rows=pbConfig.params["maxExternalAPIResults"]): """Use the unofficial python client for the ADS API to obtain a list of results from a given search string Parameters: string: the search string fields: a list with the names of the required fields rows: the number of rows to obtain Output: a list of ads objects with the obtained entries """ ads.config.token = pbConfig.params["ADSToken"] try: self.q = ads.SearchQuery(q=string, fl=fields, rows=rows) l = list(self.q) except ads.exceptions.APIResponseError: pBLogger.exception(self.unauthorized) except Exception: pBLogger.exception(self.genericFetchError, exc_info=True) else: pBLogger.info(self.getLimitInfo()) return l return []
def editCategory(parentObject, mainWinObject, editIdCat=None, useParentCat=None): """Open a dialog (`EditCategoryDialog`) to edit a category and process the output. Parameters: parentObject: the parent widget mainWinObject: the object which has the `statusBarMessage` and `setWindowTitle` methods editIdCat: the id of the category to be edited, or `None` to create a new category useParentCat: the parent category (if any) of the one to be edited """ if editIdCat is not None: edit = pBDB.cats.getDictByID(editIdCat) else: edit = None newCatWin = EditCategoryDialog(parentObject, category=edit, useParentCat=useParentCat) newCatWin.exec_() if newCatWin.result: data = {} for k, v in newCatWin.textValues.items(): if k == "parentCat": try: s = str(newCatWin.selectedCats[0]) except IndexError: s = "0" else: s = "%s" % v.text() data[k] = s if data["name"].strip() != "": if "idCat" in data.keys(): pBLogger.info(cwstr.updateCat % data["idCat"]) pBDB.cats.update(data, data["idCat"]) else: pBDB.cats.insert(data) message = cwstr.catSaved mainWinObject.setWindowTitle(cwstr.winTitleModified) try: parentObject.recreateTable() except AttributeError: pBLogger.debug(cwstr.noAttribute % ("parentObject", "recreateTable"), exc_info=True) else: message = cwstr.emptyName else: message = cwstr.noModifications try: mainWinObject.statusBarMessage(message) except AttributeError: pBLogger.debug(cwstr.noAttribute % ("mainWinObject", "statusBarMessage"), exc_info=True)
def exportLast(self, fileName): """Export the last queried entries into a .bib file, if the list is not empty. Parameters: fileName: the name of the output bibtex file """ if pBDB.bibs.lastFetched: self.exportRows(fileName, pBDB.bibs.lastFetched) else: pBLogger.info(exstr.noLastSel)
def changeBackend(self, wantBackend): """Changes the matplotlib backend currently in use. Parameters: wantBackend: a string that defines the wanted backend """ if wantBackend != matplotlib.get_backend(): matplotlib.use(wantBackend, warn=False, force=True) from matplotlib import pyplot as plt pBLogger.info(isstr.changeBackend % matplotlib.get_backend())
def cli(): """Open a command line interface. Many initial imports allow the user to automatically access the useful classes. """ vars = globals().copy() vars.update(locals()) shell = code.InteractiveConsole(vars) shell.interact(clistr.activate) pBDB.closeDB() pBLogger.info(clistr.close)
def editExperiment(parentObject, mainWinObject, editIdExp=None): """Open a dialog (`EditExperimentDialog`) to edit an experiment and process the output. Parameters: parentObject: the parent widget mainWinObject: the object which has the `statusBarMessage` and `setWindowTitle` methods editIdCat: the id of the experiment to be edited, or `None` to create a new one """ if editIdExp is not None: edit = pBDB.exps.getDictByID(editIdExp) else: edit = None newExpWin = EditExperimentDialog(parentObject, experiment=edit) newExpWin.exec_() if newExpWin.result: data = {} for k, v in newExpWin.textValues.items(): s = "%s" % v.text() data[k] = s if data["name"].strip() != "": if "idExp" in data.keys(): pBLogger.info(ewstr.updateExp % data["idExp"]) pBDB.exps.update(data, data["idExp"]) else: pBDB.exps.insert(data) message = ewstr.expSaved mainWinObject.setWindowTitle(ewstr.winTitleModified) try: parentObject.recreateTable() except AttributeError: pBLogger.debug( ewstr.noAttribute % ("parentObject", "recreateTable"), exc_info=True ) else: message = ewstr.emptyName else: message = ewstr.noModifications try: mainWinObject.statusBarMessage(message) except AttributeError: pBLogger.debug( ewstr.noAttribute % ("mainWinObject", "statusBarMessage"), exc_info=True )
def exportRows(self, fileName, rows): """Export the given entries into a .bib file. Parameters: fileName: the name of the output bibtex file rows: the list of entries to be exported """ self.backupCopy(fileName) if rows != []: try: with codecs.open(fileName, "w", "utf-8") as bibfile: for q in rows: bibfile.write(q["bibtex"] + "\n") except Exception: pBLogger.exception(exstr.errorExport, traceback) self.restoreBackupCopy(fileName) else: pBLogger.info(exstr.noElement) self.rmBackupCopy(fileName)
def parse_accents_record(record): """Function that reads the fields inside a bibtex dictionary and translates all the known unicode characters into latex commands. Parameters: record: the bibtex dictionary generated by bibtexparser Output: the dictionary after having processed all the fields """ for val in record: if val != "ID" and len(record[val].strip()) > 0: tmp = utf8tolatex(record[val], non_ascii_only=True) if tmp != record[val]: pBLogger.info(pastr.converting % record["ID"]) pBLogger.info(pastr.infodashes + tmp.encode("utf-8")) accents_changed.append(record["ID"]) record[val] = tmp return record
def retrieveUrlFirst(self, string): """Retrieves the first (only) result from the content of the given web page. Parameters: string: the search string (the DOI) Output: returns the bibtex string """ url = self.createUrl(string) pBLogger.info(self.searchInfo % (string, url)) text = self.textFromUrl(url, self.headers) if "<title>Error: DOI Not Found</title>" in text: return "" try: return parse_accents_str(text[:]) except Exception: pBLogger.exception(self.genericError) return ""
def onLoad(self): """Get current selection and (eventually) load new profile""" prof, desc = self.combo.currentText().split(pmstr.splitter) newProfile = pbConfig.profiles[prof] if prof != pbConfig.currentProfileName: pBLogger.info(pmstr.changingProfile) pbConfig.reInit(prof, newProfile) pBDB.reOpenDB(pbConfig.currentDatabase) self.parent().reloadConfig() self.parent().closeAllTabs() try: self.parent().catListWin.close() except AttributeError: pass try: self.parent().expListWin.close() except AttributeError: pass self.parent().reloadMainContent() self.close()
def retrieveUrlFirst(self, string): """Retrieves the first (only) result from the content of the given web page. Parameters: string: the search string (the ISBN) Output: returns the bibtex string """ self.urlArgs["isbn"] = string url = self.createUrl() pBLogger.info(self.searchInfo % (string, url)) text = self.textFromUrl(url) if "Not found" in text: return "" try: return parse_accents_str(text[:]) except Exception: pBLogger.exception(self.genericError) return ""
def run(self): """Start the receiver, import the required entries and finish """ self.receiver.start() db = bibtexparser.bibdatabase.BibDatabase() inserted = [] failed = [] for key in sorted(self.found): if not self.runningImport: continue el = self.found[key] if pBDB.bibs.loadAndInsert(el["bibpars"]["eprint"]): try: newKey = pBDB.bibs.getByKey(key)[0]["bibkey"] except IndexError: newKey = pBDB.bibs.getByBibtex(key)[0]["bibkey"] inserted.append(newKey) else: db.entries = [{ "ID": el["bibpars"]["eprint"], "ENTRYTYPE": "article", "title": el["bibpars"]["title"], "author": el["bibpars"]["author"], "archiveprefix": "arXiv", "eprint": el["bibpars"]["eprint"], "primaryclass": el["bibpars"]["primaryclass"], }] entry = pbWriter.write(db) data = pBDB.bibs.prepareInsert(entry) if pBDB.bibs.insert(data): pBLogger.info(thestr.elementInserted % key) inserted.append(key) else: pBLogger.warning(thestr.elementFailed % key) failed.append(key) continue try: eid = pBDB.bibs.updateInspireID(key) pBDB.bibs.searchOAIUpdates( 0, entries=pBDB.bibs.getByBibkey(key), force=True, reloadAll=True, ) newKey = pBDB.bibs.getByKey(key)[0]["bibkey"] if key != newKey: inserted[-1] = newKey except: pBLogger.warning(thestr.failedComplete % (key), exc_info=True) failed.append(key) pBLogger.info(thestr.elementImported % (inserted)) pBLogger.info(thestr.errorsEntries % (failed)) self.parent().importArXivResults = (inserted, failed) time.sleep(0.1) self.receiver.running = False
def getBibtexs(self, bibcodes): """Obtain a string containing the bibtex entries for all the requested bibcodes Parameter: bibcodes: a single bibcode (string containing the ADS identifier of a given entry) or a list of bibcodes Output: a string with all the bibtex entries """ ads.config.token = pbConfig.params["ADSToken"] try: self.q = ads.ExportQuery(bibcodes=bibcodes, format="bibtex") export = self.q.execute() except ads.exceptions.APIResponseError: pBLogger.exception(self.unauthorized) except Exception: pBLogger.exception(self.genericExportError, exc_info=True) else: pBLogger.info(self.getLimitInfo()) return export return ""
def plotStats( self, paper=False, author=False, show=False, save=False, path=".", markPapers=False, pickVal=6, ): """Plot the collected information, using matplotlib.pyplot. Parameters: paper (boolean, default False): plot statistics for the last analyzed paper author (boolean, default False): plot statistics for the last analyzed author show (boolean, default False): True to show the plots in a separate window (with matplotlib.pyplot.show()) save (boolean, default False): True to save the plots into files. path (string): where to save the plots markPapers (boolean, default False): True to draw a vertical lines at the dates corresponding to a paper appearing pickVal (float, default 6): the picker tolerance Output: False if paper==False and author==False, the matplotlib.pyplot figure containing the citation plot if paper==True, a list of matplotlib.pyplot figures containing the various plots if author==True """ if paper and self.paperPlotInfo is not None: if len(self.paperPlotInfo["citList"][0]) > 0: pBLogger.info(isstr.plotPaper % self.paperPlotInfo["id"]) fig, ax = plt.subplots() plt.plot( self.paperPlotInfo["citList"][0], self.paperPlotInfo["citList"][1], picker=True, pickradius=pickVal, ) fig.autofmt_xdate() if save: pdf = PdfPages( osp.join(path, self.paperPlotInfo["id"] + ".pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() return fig elif author and self.authorPlotInfo is not None: pBLogger.info(isstr.plotAuthor % self.authorPlotInfo["name"]) try: ymin = min( int(self.authorPlotInfo["allLi"][0][0].strftime("%Y")) - 2, int(self.authorPlotInfo["paLi"][0][0].strftime("%Y")) - 2, ) ymax = max( int(self.authorPlotInfo["allLi"][0][-1].strftime("%Y")) + 2, int(self.authorPlotInfo["paLi"][0][-1].strftime("%Y")) + 2, ) except: try: ymin = int( self.authorPlotInfo["paLi"][0][0].strftime("%Y")) - 2 ymax = int( self.authorPlotInfo["paLi"][0][-1].strftime("%Y")) + 2 except: pBLogger.warning(isstr.noPublications) return False figs = [] if len(self.authorPlotInfo["paLi"][0]) > 0: fig, ax = plt.subplots() plt.title(isstr.paperNumber) plt.plot( self.authorPlotInfo["paLi"][0], self.authorPlotInfo["paLi"][1], picker=True, pickradius=pickVal, ) fig.autofmt_xdate() if save: pdf = PdfPages( osp.join(path, self.authorPlotInfo["name"] + "_papers.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) if len(self.authorPlotInfo["paLi"][0]) > 0: fig, ax = plt.subplots() plt.title(isstr.paperYear) ax.hist( [ int(q.strftime("%Y")) for q in self.authorPlotInfo["paLi"][0] ], bins=range(ymin, ymax), picker=True, ) ax.get_xaxis().get_major_formatter().set_useOffset(False) plt.xlim([ymin, ymax]) if save: pdf = PdfPages( osp.join( path, self.authorPlotInfo["name"] + "_yearPapers.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) if len(self.authorPlotInfo["allLi"][0]) > 0: fig, ax = plt.subplots() plt.title(isstr.totalCitations) plt.plot( self.authorPlotInfo["allLi"][0], self.authorPlotInfo["allLi"][1], picker=True, pickradius=pickVal, ) fig.autofmt_xdate() if save: pdf = PdfPages( osp.join(path, self.authorPlotInfo["name"] + "_allCit.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) if len(self.authorPlotInfo["allLi"][0]) > 0: fig, ax = plt.subplots() plt.title(isstr.citationsYear) ax.hist( [ int(q.strftime("%Y")) for q in self.authorPlotInfo["allLi"][0] ], bins=range(ymin, ymax), picker=True, ) ax.get_xaxis().get_major_formatter().set_useOffset(False) plt.xlim([ymin, ymax]) if save: pdf = PdfPages( osp.join(path, self.authorPlotInfo["name"] + "_yearCit.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) if len(self.authorPlotInfo["meanLi"][0]) > 0: fig, ax = plt.subplots() plt.title(isstr.meanCitations) plt.plot( self.authorPlotInfo["meanLi"][0], self.authorPlotInfo["meanLi"][1], picker=True, pickradius=pickVal, ) fig.autofmt_xdate() if markPapers: for q in self.authorPlotInfo["paLi"][0]: plt.axvline( datetime.datetime( int(q.strftime("%Y")), int(q.strftime("%m")), int(q.strftime("%d")), ), color="k", ls="--", ) if save: pdf = PdfPages( osp.join(path, self.authorPlotInfo["name"] + "_meanCit.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) if len(self.authorPlotInfo["aI"].keys()) > 0: fig, ax = plt.subplots() plt.title(isstr.citationsPaper) for i, p in enumerate(self.authorPlotInfo["aI"].keys()): try: plt.plot( self.authorPlotInfo["aI"][p]["citingPapersList"] [0], self.authorPlotInfo["aI"][p]["citingPapersList"] [1], ) except: pBLogger.exception(isstr.errorPlotting) fig.autofmt_xdate() if save: pdf = PdfPages( osp.join(path, self.authorPlotInfo["name"] + "_paperCit.pdf")) pdf.savefig() pdf.close() if show: plt.show() plt.close() figs.append(fig) return figs else: pBLogger.info(isstr.noPlot) return False
def paperStats( self, paperID, plot=False, verbose=1, paperDate=None, reset=True, pbMax=None, pbVal=None, ): """Function that gets the data and constructs the statistics for a given paper. Parameters: paperID (string): the INSPIRE-HEP id of the paper (a number) plot (boolean): whether or not the citations should be plotted (default False) verbose (int, default 1): increase the verbosity level paperDate (datetime, optional): the date of at which the paper was published reset (boolean, default False): True to delete all previous existing data (used as False when processing a list of IDs) pbMax (callable, optional): a function to set the maximum of a progress bar in the GUI, if possible pbVal (callable, optional): a function to set the value of a progress bar in the GUI, if possible Output: a dictionary containing all the desired information. The structure is the following: { "id": the paper ID, "aI": the list of creation date for all the papers, in INSPIRE-HEP order, "citList": the ordered list of citing papers, "fig" (only if `plot` is True): contains the figure. See self.plotStats } """ if reset: self.allInfoP = {} self.citingPapersList = [[], []] if isinstance(paperID, list): self.runningPaperStats = True try: pbMax(len(paperID)) except TypeError: pass for ia, a in enumerate(paperID): try: pbVal(ia + 1) except TypeError: pass if self.runningPaperStats: self.paperStats(a, reset=False) self.paperPlotInfo["id"] = paperID return self.paperPlotInfo if verbose > 0: pBLogger.info(isstr.paperStats % paperID) url = (pbConfig.inspireLiteratureAPI + "?q=refersto:recid:" + paperID + self.paperStatsOpts + str(self.maxPerPage)) data = self.JsonFromUrl(url) recid_citingPapers = [a["id"] for a in data] if paperDate is not None: self.citingPapersList[0].append(paperDate) for i, p in enumerate(recid_citingPapers): self.allInfoP[p] = {} self.allInfoP[p]["date"] = dateutil.parser.parse( data[i]["created"]) self.citingPapersList[0].append( self.allInfoP[p]["date"].replace(tzinfo=pytz.UTC)) for i, p in enumerate(sorted(self.citingPapersList[0])): self.citingPapersList[0][i] = p self.citingPapersList[1].append(i + 1) self.citingPapersList[0].append( datetime.datetime.fromordinal( datetime.date.today().toordinal()).replace(tzinfo=pytz.UTC)) try: self.citingPapersList[1].append(self.citingPapersList[1][-1]) except IndexError: self.citingPapersList[1].append(0) self.paperPlotInfo = { "id": paperID, "aI": self.allInfoP, "citList": self.citingPapersList, } if plot: self.paperPlotInfo["fig"] = self.plotStats(paper=True) if verbose > 0: pBLogger.info(isstr.doneE) return self.paperPlotInfo
def exportForTexFile( self, texFileName, outFileName, overwrite=False, autosave=True, updateExisting=False, removeUnused=False, reorder=False, newOperation=True, ): """Reads a .tex file looking for the \cite{} commands, collects the bibtex entries cited in the text and stores them in a bibtex file. The entries are taken from the database first, or from INSPIRE-HEP if possible. The downloaded entries are saved in the database. Parameters: texFileName: the name (or a list of names) of the considered .tex file(s) outFileName: the name of the output file, where the required entries will be added overwrite (boolean, default False): if True, the previous version of the file is replaced and no backup copy is created autosave (boolean, default True): if True, the changes to the database are automatically saved. updateExisting (boolean, default False): if True, remove duplicates and update entries that have been chenged in the DB removeUnused (boolean, default False): if True, remove bibtex entries that are no more cited in the tex files reorder (boolean, default False): if True, reorder (not update!) the bibtex entries in the bib files before adding the new ones newOperation (boolean, default True): reset the self.existingBibsList and read file .bib content. Time consuming! better to just keep it updated when using multiple texs... Output: True if successful, False if errors occurred """ db = bibtexparser.bibdatabase.BibDatabase() def printOutput( reqBibkeys, miss, retr, nFound, unexp, nKeys, warn, totalCites, full=False ): """Print information on the process""" pBLogger.info(exstr.resume) if totalCites is not None: pBLogger.info(exstr.keysFound % totalCites) pBLogger.info(exstr.newKeysFound % len(reqBibkeys)) j = ", " if full: pBLogger.info(j.join(reqBibkeys)) if len(miss) > 0: pBLogger.info(exstr.missingEntries % len(miss)) if full: pBLogger.info(j.join(miss)) if len(retr) > 0: pBLogger.info(exstr.retrievedEntries % len(retr)) pBLogger.info(j.join(retr)) if len(nFound) > 0: pBLogger.info(exstr.entriesNotFound % len(nFound)) pBLogger.info(j.join(nFound)) if len(unexp) > 0: pBLogger.info(exstr.unexpectedForEntries % len(unexp)) pBLogger.info(j.join(unexp)) if len(nKeys.keys()) > 0: pBLogger.info( exstr.nonMatchingEntries % len(nKeys.keys()) + "\n".join(["'%s' => '%s'" % (k, n) for k, n in nKeys.items()]) ) pBLogger.info(exstr.totalWarnings % warn) def saveEntryOutBib(a, m=None): """Remove unwanted fields and add the bibtex entry to the output file Parameters: a: the bibtex entry m: the ID (bibtex key) of the entry, if it is not the default one """ entry = ( bibtexparser.bparser.BibTexParser(common_strings=True) .parse(a) .entries[0] ) for u in self.unwantedFields: try: del entry[u] except KeyError: pass if m is not None: m = m.strip() if m != entry["ID"].strip(): entry["ID"] = m db.entries = [entry] bibf = pbWriter.write(db) try: with open(outFileName, "a") as o: o.write(bibf) pBLogger.info(exstr.entryInserted % m) except IOError: pBLogger.exception(exstr.errorWrite % outFileName) return False def removeUnusedBibtexs(existingBibsDict): """Functions that reads the list of bibtex entries in the existing .bib file and removes the ones that are not inside \cite commands """ newDict = {} notFound = [] for k, v in existingBibsDict.items(): if k in self.allCitations: newDict[k] = existingBibsDict[k] else: notFound.append(k) db.entries = [ newDict[k] for k in sorted( [e["ID"] for e in newDict.values()], key=lambda s: s.lower() ) ] bibf = pbWriter.write(db) try: with open(outFileName, "w") as o: o.write(exstr.byPhysbiblio + bibf) pBLogger.info(exstr.entriesRemoved % notFound) except IOError: pBLogger.exception(exstr.errorWrite % outFileName) self.exportForTexFlag = True pBLogger.info(exstr.startEFTF) pBLogger.info(exstr.readFrom % texFileName) pBLogger.info(exstr.saveTo % outFileName) if autosave: pBLogger.info(exstr.autoSave) missing = [] newKeys = {} notFound = [] requiredBibkeys = [] retrieved = [] unexpected = [] warnings = 0 totalCites = 0 # if overwrite, reset the output file if overwrite: updateExisting = False removeUnused = False reorder = False try: with open(outFileName, "w") as o: o.write(exstr.byPhysbiblio) except IOError: pBLogger.exception(exstr.cannotWrite) return False # read previous content of output file, if any try: with open(outFileName, "r") as f: existingBibText = f.readlines() except IOError: pBLogger.error(exstr.cannotRead % outFileName) try: open(outFileName, "w").close() except IOError: pBLogger.exception(exstr.cannotCreate % outFileName) return False existingBibText = "" # this is time consuming if there are many entries. # Do not load it every time for multiple texs! if newOperation: self.allCitations = set([]) if existingBibText != "": self.existingBibsList = pBDB.bibs.parseAllBibtexs( existingBibText, verbose=False ) else: self.existingBibsList = [] # work with dictionary, so that if there are repeated entries # (entries with same ID) they are automatically discarded existingBibsDict = CaseInsensitiveDict() for e in self.existingBibsList: existingBibsDict[e["ID"]] = e # if requested, do some cleaning if updateExisting or reorder: # update entry from DB if existing if updateExisting: for k, v in existingBibsDict.items(): e = pBDB.bibs.getByBibtex(k, saveQuery=False) if len(e) > 0 and e[0]["bibtexDict"] != v: existingBibsDict[k] = e[0]["bibtexDict"] if existingBibsDict[k]["ID"].lower() != k.lower(): existingBibsDict[k]["ID"] = k # write new (updated) bib content # (so also repeated entries are removed) db.entries = [ existingBibsDict[k] for k in sorted( [e["ID"] for e in existingBibsDict.values()], key=lambda s: s.lower(), ) ] bibf = pbWriter.write(db) try: with open(outFileName, "w") as o: o.write(exstr.byPhysbiblio + bibf) pBLogger.info(exstr.outputUpdated) except IOError: pBLogger.exception(exstr.errorWrite % outFileName) # if there is a list of tex files, run this function # for each of them...no updateExisting and removeUnused! if isinstance(texFileName, list): if len(texFileName) == 0: return False elif len(texFileName) == 1: texFileName = texFileName[0] else: for t in texFileName: req, m, ret, nF, un, nK, w, cits = self.exportForTexFile( t, outFileName, overwrite=False, autosave=autosave, updateExisting=False, removeUnused=False, reorder=False, newOperation=False, ) requiredBibkeys += req missing += m retrieved += ret notFound += nF unexpected += un for k, v in nK.items(): newKeys[k] = v warnings += w pBLogger.info(exstr.doneAllTexs) if removeUnused: removeUnusedBibtexs(existingBibsDict) printOutput( requiredBibkeys, missing, retrieved, notFound, unexpected, newKeys, warnings, len(self.allCitations), full=True, ) return ( requiredBibkeys, missing, retrieved, notFound, unexpected, newKeys, warnings, len(self.allCitations), ) # read the texFile keyscont = "" try: with open(texFileName) as r: keyscont += r.read() except IOError: pBLogger.exception(exstr.errorNoFile % texFileName) return False # extract \cite* commands matchKeys = "([0-9A-Za-z_\-':\+\.\&]+)" cite = re.compile( "\\\\(cite|citep|citet)\{([\n ]*" + matchKeys + "[,]?[\n ]*)*\}", re.MULTILINE, ) # find \cite{...} citeKeys = re.compile( matchKeys, re.MULTILINE ) # find the keys inside \cite{...} citaz = [m for m in cite.finditer(keyscont) if m != ""] pBLogger.info(exstr.citeFound % len(citaz)) # extract required keys from \cite* commands for c in citaz: try: for e in [l.group(1) for l in citeKeys.finditer(c.group())]: e = e.strip() if e == "" or e in ["cite", "citep", "citet"]: continue self.allCitations.add(e) if e not in requiredBibkeys: try: # this it's just to check if already present tmp = existingBibsDict[e] except KeyError: requiredBibkeys.append(e) except (IndexError, AttributeError, TypeError): pBLogger.warning(exstr.errorCitation % c.group()) a = [] pBLogger.info( exstr.newKeysTotal % (len(requiredBibkeys), len(self.allCitations)) ) # if True, remove unused bibtex entries if removeUnused: removeUnusedBibtexs(existingBibsDict) # check what is missing in the database and insert/import # what is needed: for m in requiredBibkeys: if m.strip() == "": continue entry = pBDB.bibs.getByBibtex(m) entryMissing = len(entry) == 0 if not self.exportForTexFlag: # if flag set, stop execution and # go to the end skipping everything continue elif not entryMissing: # if already in the database, just insert it as it is bibtex = entry[0]["bibtex"] bibtexDict = entry[0]["bibtexDict"] else: # if no entry is found, mark it as missing missing.append(m) # if not present, try INSPIRE import pBLogger.info(exstr.keyMissing % m) newWeb = pBDB.bibs.loadAndInsert(m, returnBibtex=True) newCheck = pBDB.bibs.getByBibtex(m, saveQuery=False) # if the import worked, insert the entry if len(newCheck) > 0: # if key is not matching, # just replace it in the exported bib and print a message if m.strip().lower() != newCheck[0]["bibkey"].lower(): warnings += 1 newKeys[m] = newCheck[0]["bibkey"] if newCheck[0]["bibkey"] not in retrieved: retrieved.append(newCheck[0]["bibkey"]) pBDB.catBib.insert( pbConfig.params["defaultCategories"], newCheck[0]["bibkey"] ) bibtex = newCheck[0]["bibtex"] bibtexDict = newCheck[0]["bibtexDict"] else: # if nothing found, add a warning for the end warnings += 1 notFound.append(m) continue pBLogger.info("\n") # save in output file try: bibtexDict["ID"] = m self.existingBibsList.append(bibtexDict) saveEntryOutBib(bibtex, m) except: unexpected.append(m) pBLogger.exception(exstr.unexpectedEntry % m) if autosave: pBDB.commit() printOutput( requiredBibkeys, missing, retrieved, notFound, unexpected, newKeys, warnings, len(self.allCitations), ) return ( requiredBibkeys, missing, retrieved, notFound, unexpected, newKeys, warnings, len(self.allCitations), )
def printOutput( reqBibkeys, miss, retr, nFound, unexp, nKeys, warn, totalCites, full=False ): """Print information on the process""" pBLogger.info(exstr.resume) if totalCites is not None: pBLogger.info(exstr.keysFound % totalCites) pBLogger.info(exstr.newKeysFound % len(reqBibkeys)) j = ", " if full: pBLogger.info(j.join(reqBibkeys)) if len(miss) > 0: pBLogger.info(exstr.missingEntries % len(miss)) if full: pBLogger.info(j.join(miss)) if len(retr) > 0: pBLogger.info(exstr.retrievedEntries % len(retr)) pBLogger.info(j.join(retr)) if len(nFound) > 0: pBLogger.info(exstr.entriesNotFound % len(nFound)) pBLogger.info(j.join(nFound)) if len(unexp) > 0: pBLogger.info(exstr.unexpectedForEntries % len(unexp)) pBLogger.info(j.join(unexp)) if len(nKeys.keys()) > 0: pBLogger.info( exstr.nonMatchingEntries % len(nKeys.keys()) + "\n".join(["'%s' => '%s'" % (k, n) for k, n in nKeys.items()]) ) pBLogger.info(exstr.totalWarnings % warn)
def arxivDaily(self, category): """Read daily RSS feed for a given category Parameter: category: the selected category (see `self.categories) """ if "." in category: main, sub = category.split(".") else: main = category sub = "" url = self.urlRss if main not in self.categories.keys(): pBLogger.warning(self.mainCatNotFound % main) return False else: url += main if sub != "" and sub not in self.categories[main]: pBLogger.warning(self.subCatNotFound % sub) return False elif sub != "" and sub in self.categories[main]: url += "." + sub pBLogger.info(url) text = self.textFromUrl(url) if text is None: pBLogger.warning(self.emptyUrl) return False author = re.compile("(>|>)([^/]*)(</a>|</a>)") additionalInfo = re.compile( " \(arXiv:([0-9\.v]*) \[([\-\.a-zA-Z]*)\]([ A-Z]*)\)") if sys.version_info[0] < 3: text = text.decode("utf-8") try: data = feedparser.parse(parse_accents_str(text)) entries = [] for element in data.entries: tmp = {} tmp["eprint"] = element["id"].split("/")[-1] tmp["abstract"] = (element["summary"].replace( "\n", " ").replace("<p>", "").replace("</p>", "")) tmp["authors"] = [ m.group(2) for m in author.finditer(element["authors"][0]["name"]) if m != "" ] tmp["author"] = ( " and ".join(tmp["authors"]) if len(tmp["authors"]) < pbConfig.params["maxAuthorNames"] else " and ".join( tmp["authors"][0:pbConfig.params["maxAuthorNames"]] + ["others"])) tmp["replacement"] = "UPDATED" in element["title"] tmp["primaryclass"] = [ m.group(2) for m in additionalInfo.finditer(element["title"]) if m != "" ][0] tmp["cross"] = ("CROSS LISTED" in element["title"] or category.lower() not in tmp["primaryclass"].lower()) tmp["version"] = [ m.group(1) for m in additionalInfo.finditer(element["title"]) if m != "" ][0] parenthesis = [ m.group() for m in additionalInfo.finditer(element["title"]) if m != "" ][0] tmp["title"] = element["title"].replace(parenthesis, "") entries.append(tmp) return entries except Exception: pBLogger.error(self.cannotParseRSS % text, exc_info=True) return False
def arxivRetriever(self, string, searchType="all", additionalArgs=None, fullDict=False): """Reads the feed content got from arxiv into a dictionary, used to return a bibtex. Parameters: string: the search string searchType: the search method in arxiv API (default 'all'). The possible values are: ti-> Title au -> Author abs -> Abstract co -> Comment jr -> Journal Reference cat -> Subject Category rn -> Report Number id -> Id (use id_list instead) all -> All of the above additionalArgs: a dictionary of additional arguments that can be passed to self.urlArgs (default None) fullDict (logical): return the bibtex dictionary in addition to the bibtex text (default False) Output: the bibtex text (optional, depending on fullDict): the bibtex Dictionary """ if additionalArgs: for k, v in additionalArgs.items(): self.urlArgs[k] = v self.urlArgs["search_query"] = searchType + ":" + string url = self.createUrl() pBLogger.info(self.searchInfo % (searchType, string, url)) text = parse_accents_str(self.textFromUrl(url)) try: data = feedparser.parse(text) db = BibDatabase() db.entries = [] dictionaries = [] for entry in data["entries"]: dictionary = {} idArx = (entry["id"].replace("http://arxiv.org/abs/", "").replace( "https://arxiv.org/abs/", "")) pos = idArx.find("v") if pos >= 0: idArx = idArx[0:pos] dictionary["ENTRYTYPE"] = "article" dictionary["ID"] = idArx dictionary["archiveprefix"] = "arXiv" dictionary["title"] = entry["title"] dictionary["arxiv"] = idArx try: dictionary["doi"] = entry["arxiv_doi"] except KeyError as e: pBLogger.debug("KeyError: %s" % e) dictionary["abstract"] = entry["summary"].replace("\n", " ") dictionary["authors"] = " and ".join( [au["name"] for au in entry["authors"]]) dictionary["primaryclass"] = entry["arxiv_primary_category"][ "term"] year = self.getYear(dictionary["arxiv"]) if year is not None: dictionary["year"] = year db.entries.append(dictionary) dictionaries.append(dictionary) if fullDict: dictionary = dictionaries[0] for d in dictionaries: if string in d["arxiv"]: dictionary = d return pbWriter.write(db), dictionary else: return pbWriter.write(db) except Exception: # intercept all other possible errors pBLogger.exception(self.genericError) if fullDict: return "", {} else: return ""
def authorStats(self, authorName, plot=False, reset=True, pbMax=None, pbVal=None): """Function that gets the data and constructs the statistics for a given author. Parameters: authorName: the author name as identified into INSPIRE-HEP, or a list of author names (it calls itself recursively for all the list elements) plot (boolean, default False): True to call self.plotStats reset (boolean, default False): True to delete all previous existing data (used as False when processing a list of authors) pbMax (callable, optional): a function to set the maximum of a progress bar in the GUI, if possible pbVal (callable, optional): a function to set the value of a progress bar in the GUI, if possible Output: a dictionary containing all the statistic information. For a single author, the structure is the following: { "name": the author name, "aI": The complete information, including the dictionaries with the single papers info (see self.paperStats), the citations and the corresponding dates, "paLi": a list of [id, date] of the papers associated with the author, "allLi": the complete list of [date, total citations] with all the citations to the papers, "meanLi": the complete list of [date, total citations/number of papers] computed at each point from "allLi" content, "h": the h-index, "figs" (only if `plot` is True): contains the figures. See self.plotStats } """ if reset: self.allInfoA = {} self.authorPapersList = [[], []] self.allCitations = [] if isinstance(authorName, list): try: pbMax(len(authorName)) except TypeError: pass for ia, a in enumerate(authorName): try: pbVal(ia + 1) except TypeError: pass self.authorStats(a, reset=False) self.authorPlotInfo["name"] = authorName return self.authorPlotInfo pBLogger.info(isstr.authorStats % authorName) url = (pbConfig.inspireLiteratureAPI + "?q=author:" + authorName + self.authorStatsOpts + str(self.maxPerPage)) data = self.JsonFromUrl(url) recid_authorPapers = sorted(["%s" % a["id"] for a in data]) tot = len(recid_authorPapers) pBLogger.info(isstr.authorStatsProcess % tot) self.runningAuthorStats = True try: pbMax(len(recid_authorPapers)) except TypeError: pass for i, p in enumerate(recid_authorPapers): try: pbVal(i + 1) except TypeError: pass if not self.runningAuthorStats: pBLogger.info(isstr.stopReceived) break time.sleep(1) if p in self.allInfoA.keys(): continue self.allInfoA[p] = {} self.allInfoA[p]["date"] = dateutil.parser.parse( data[i]["created"]) self.authorPapersList[0].append(self.allInfoA[p]["date"]) pBLogger.info(isstr.authorStatsLooking % (i + 1, tot, 100.0 * (i + 1) / tot, p)) paperInfo = self.paperStats(p, verbose=0, paperDate=self.allInfoA[p]["date"]) self.allInfoA[p]["infoDict"] = paperInfo["aI"] self.allInfoA[p]["citingPapersList"] = paperInfo["citList"] for c, v in self.allInfoA[p]["infoDict"].items(): self.allCitations.append(v["date"]) pBLogger.info("") self.authorPapersList[1] = [] for i, p in enumerate(sorted(self.authorPapersList[0])): self.authorPapersList[0][i] = p self.authorPapersList[1].append(i + 1) pBLogger.info(isstr.savingCitations) allCitList = [[], []] meanCitList = [[], []] currPaper = 0 for i, d in enumerate(sorted(self.allCitations)): if (currPaper < len(self.authorPapersList[0]) - 1 and d >= self.authorPapersList[0][currPaper + 1]): currPaper += 1 allCitList[0].append(d) allCitList[1].append(i + 1) meanCitList[0].append(d) meanCitList[1].append( (i + 1.0) / self.authorPapersList[1][currPaper]) hind = 0 citations = [ len(self.allInfoA[k]["citingPapersList"][0]) - 2 for k in self.allInfoA.keys() ] for h in range(len(citations)): if len([a for a in citations if a >= h]) >= h: hind = h self.authorPlotInfo = { "name": authorName, "aI": self.allInfoA, "paLi": self.authorPapersList, "allLi": allCitList, "meanLi": meanCitList, "h": hind, } if plot: self.authorPlotInfo["figs"] = self.plotStats(author=True) pBLogger.info(isstr.authorStatsCompleted % authorName) return self.authorPlotInfo