예제 #1
0
 def removeUnusedBibtexs(existingBibsDict):
     """Functions that reads the list of bibtex entries
     in the existing .bib file and removes
     the ones that are not inside \cite commands
     """
     newDict = {}
     notFound = []
     for k, v in existingBibsDict.items():
         if k in self.allCitations:
             newDict[k] = existingBibsDict[k]
         else:
             notFound.append(k)
     db.entries = [
         newDict[k]
         for k in sorted(
             [e["ID"] for e in newDict.values()], key=lambda s: s.lower()
         )
     ]
     bibf = pbWriter.write(db)
     try:
         with open(outFileName, "w") as o:
             o.write(exstr.byPhysbiblio + bibf)
             pBLogger.info(exstr.entriesRemoved % notFound)
     except IOError:
         pBLogger.exception(exstr.errorWrite % outFileName)
예제 #2
0
def call_gui(args=None):
    """Function that runs the PhysBiblio GUI"""
    # these two imports must stay here,
    # so they start after the profile has been loaded properly:
    try:
        import physbiblio.gui.mainWindow
        from physbiblio.database import pBDB
    except ImportError:
        print("Could not find physbiblio and its modules!")
        raise
    try:
        app = QApplication(sys.argv)
        mainWin = physbiblio.gui.mainWindow.MainWindow()
        sys.excepthook = mainWin.errormessage.emit
        mainWin.show()
        mainWin.raise_()
        if pbConfig.params["openSinceLastUpdate"] != __version__:
            mainWin.recentChanges()
            pbConfig.globalDb.config.update("openSinceLastUpdate", __version__)
            pbConfig.globalDb.commit()
        sys.exit(app.exec_())
    except NameError:
        pBLogger.critical("NameError:", exc_info=True)
    except SystemExit:
        pBDB.closeDB()
        pBLogger.info(apstr.closeMainW)
예제 #3
0
        def saveEntryOutBib(a, m=None):
            """Remove unwanted fields and add the bibtex entry
            to the output file

            Parameters:
                a: the bibtex entry
                m: the ID (bibtex key) of the entry,
                    if it is not the default one
            """
            entry = (
                bibtexparser.bparser.BibTexParser(common_strings=True)
                .parse(a)
                .entries[0]
            )
            for u in self.unwantedFields:
                try:
                    del entry[u]
                except KeyError:
                    pass
            if m is not None:
                m = m.strip()
                if m != entry["ID"].strip():
                    entry["ID"] = m
            db.entries = [entry]
            bibf = pbWriter.write(db)
            try:
                with open(outFileName, "a") as o:
                    o.write(bibf)
                    pBLogger.info(exstr.entryInserted % m)
            except IOError:
                pBLogger.exception(exstr.errorWrite % outFileName)
                return False
예제 #4
0
    def openLink(self, key, arg="arxiv", fileArg=None):
        """Uses the getLink method to generate the web link
        and opens it in an external application

        Parameters:
            key, arg, fileArg as in the getLink method
        """
        if isinstance(key, list):
            for k in key:
                self.openLink(k, arg, fileArg)
        else:
            if arg == "file":
                self.getLink(key, arg=arg, fileArg=fileArg)
                return
            elif arg == "link":
                link = key
            else:
                link = self.getLink(key, arg=arg, fileArg=fileArg)
            if link:
                if self.webApp != "":
                    pBLogger.info(vstr.opening % link)
                    try:
                        subprocess.Popen(
                            [self.webApp, link],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                        )
                    except OSError:
                        pBLogger.warning(vstr.openingFailed % ("link", key))
            else:
                pBLogger.warning(vstr.errorLink % (arg, key))
예제 #5
0
    def getGenericInfo(self,
                       string,
                       fields,
                       rows=pbConfig.params["maxExternalAPIResults"]):
        """Use the unofficial python client for the ADS API to obtain
        a list of results from a given search string

        Parameters:
            string: the search string
            fields: a list with the names of the required fields
            rows: the number of rows to obtain

        Output:
            a list of ads objects with the obtained entries
        """
        ads.config.token = pbConfig.params["ADSToken"]
        try:
            self.q = ads.SearchQuery(q=string, fl=fields, rows=rows)
            l = list(self.q)
        except ads.exceptions.APIResponseError:
            pBLogger.exception(self.unauthorized)
        except Exception:
            pBLogger.exception(self.genericFetchError, exc_info=True)
        else:
            pBLogger.info(self.getLimitInfo())
            return l
        return []
예제 #6
0
def editCategory(parentObject,
                 mainWinObject,
                 editIdCat=None,
                 useParentCat=None):
    """Open a dialog (`EditCategoryDialog`) to edit a category
    and process the output.

    Parameters:
        parentObject: the parent widget
        mainWinObject: the object which has
            the `statusBarMessage` and `setWindowTitle` methods
        editIdCat: the id of the category to be edited,
            or `None` to create a new category
        useParentCat: the parent category (if any)
            of the one to be edited
    """
    if editIdCat is not None:
        edit = pBDB.cats.getDictByID(editIdCat)
    else:
        edit = None
    newCatWin = EditCategoryDialog(parentObject,
                                   category=edit,
                                   useParentCat=useParentCat)
    newCatWin.exec_()
    if newCatWin.result:
        data = {}
        for k, v in newCatWin.textValues.items():
            if k == "parentCat":
                try:
                    s = str(newCatWin.selectedCats[0])
                except IndexError:
                    s = "0"
            else:
                s = "%s" % v.text()
            data[k] = s
        if data["name"].strip() != "":
            if "idCat" in data.keys():
                pBLogger.info(cwstr.updateCat % data["idCat"])
                pBDB.cats.update(data, data["idCat"])
            else:
                pBDB.cats.insert(data)
            message = cwstr.catSaved
            mainWinObject.setWindowTitle(cwstr.winTitleModified)
            try:
                parentObject.recreateTable()
            except AttributeError:
                pBLogger.debug(cwstr.noAttribute %
                               ("parentObject", "recreateTable"),
                               exc_info=True)
        else:
            message = cwstr.emptyName
    else:
        message = cwstr.noModifications
    try:
        mainWinObject.statusBarMessage(message)
    except AttributeError:
        pBLogger.debug(cwstr.noAttribute %
                       ("mainWinObject", "statusBarMessage"),
                       exc_info=True)
예제 #7
0
    def exportLast(self, fileName):
        """Export the last queried entries into a .bib file,
        if the list is not empty.

        Parameters:
            fileName: the name of the output bibtex file
        """
        if pBDB.bibs.lastFetched:
            self.exportRows(fileName, pBDB.bibs.lastFetched)
        else:
            pBLogger.info(exstr.noLastSel)
예제 #8
0
    def changeBackend(self, wantBackend):
        """Changes the matplotlib backend currently in use.

        Parameters:
            wantBackend: a string that defines the wanted backend
        """
        if wantBackend != matplotlib.get_backend():
            matplotlib.use(wantBackend, warn=False, force=True)
            from matplotlib import pyplot as plt

            pBLogger.info(isstr.changeBackend % matplotlib.get_backend())
예제 #9
0
파일: cli.py 프로젝트: steog88/PhysBiblio
def cli():
    """Open a command line interface.

    Many initial imports allow the user
    to automatically access the useful classes.
    """
    vars = globals().copy()
    vars.update(locals())
    shell = code.InteractiveConsole(vars)
    shell.interact(clistr.activate)
    pBDB.closeDB()
    pBLogger.info(clistr.close)
예제 #10
0
def editExperiment(parentObject, mainWinObject, editIdExp=None):
    """Open a dialog (`EditExperimentDialog`) to edit an experiment
    and process the output.

    Parameters:
        parentObject: the parent widget
        mainWinObject: the object which has
            the `statusBarMessage` and `setWindowTitle` methods
        editIdCat: the id of the experiment to be edited,
            or `None` to create a new one
    """
    if editIdExp is not None:
        edit = pBDB.exps.getDictByID(editIdExp)
    else:
        edit = None
    newExpWin = EditExperimentDialog(parentObject, experiment=edit)
    newExpWin.exec_()
    if newExpWin.result:
        data = {}
        for k, v in newExpWin.textValues.items():
            s = "%s" % v.text()
            data[k] = s
        if data["name"].strip() != "":
            if "idExp" in data.keys():
                pBLogger.info(ewstr.updateExp % data["idExp"])
                pBDB.exps.update(data, data["idExp"])
            else:
                pBDB.exps.insert(data)
            message = ewstr.expSaved
            mainWinObject.setWindowTitle(ewstr.winTitleModified)
            try:
                parentObject.recreateTable()
            except AttributeError:
                pBLogger.debug(
                    ewstr.noAttribute % ("parentObject", "recreateTable"), exc_info=True
                )
        else:
            message = ewstr.emptyName
    else:
        message = ewstr.noModifications
    try:
        mainWinObject.statusBarMessage(message)
    except AttributeError:
        pBLogger.debug(
            ewstr.noAttribute % ("mainWinObject", "statusBarMessage"), exc_info=True
        )
예제 #11
0
    def exportRows(self, fileName, rows):
        """Export the given entries into a .bib file.

        Parameters:
            fileName: the name of the output bibtex file
            rows: the list of entries to be exported
        """
        self.backupCopy(fileName)
        if rows != []:
            try:
                with codecs.open(fileName, "w", "utf-8") as bibfile:
                    for q in rows:
                        bibfile.write(q["bibtex"] + "\n")
            except Exception:
                pBLogger.exception(exstr.errorExport, traceback)
                self.restoreBackupCopy(fileName)
        else:
            pBLogger.info(exstr.noElement)
        self.rmBackupCopy(fileName)
예제 #12
0
def parse_accents_record(record):
    """Function that reads the fields inside a bibtex dictionary
    and translates all the known unicode characters into latex commands.

    Parameters:
        record: the bibtex dictionary generated by bibtexparser

    Output:
        the dictionary after having processed all the fields
    """
    for val in record:
        if val != "ID" and len(record[val].strip()) > 0:
            tmp = utf8tolatex(record[val], non_ascii_only=True)
            if tmp != record[val]:
                pBLogger.info(pastr.converting % record["ID"])
                pBLogger.info(pastr.infodashes + tmp.encode("utf-8"))
                accents_changed.append(record["ID"])
            record[val] = tmp
    return record
예제 #13
0
파일: doi.py 프로젝트: steog88/PhysBiblio
    def retrieveUrlFirst(self, string):
        """Retrieves the first (only) result from the content
        of the given web page.

        Parameters:
            string: the search string (the DOI)

        Output:
            returns the bibtex string
        """
        url = self.createUrl(string)
        pBLogger.info(self.searchInfo % (string, url))
        text = self.textFromUrl(url, self.headers)
        if "<title>Error: DOI Not Found</title>" in text:
            return ""
        try:
            return parse_accents_str(text[:])
        except Exception:
            pBLogger.exception(self.genericError)
            return ""
예제 #14
0
 def onLoad(self):
     """Get current selection and (eventually) load new profile"""
     prof, desc = self.combo.currentText().split(pmstr.splitter)
     newProfile = pbConfig.profiles[prof]
     if prof != pbConfig.currentProfileName:
         pBLogger.info(pmstr.changingProfile)
         pbConfig.reInit(prof, newProfile)
         pBDB.reOpenDB(pbConfig.currentDatabase)
         self.parent().reloadConfig()
     self.parent().closeAllTabs()
     try:
         self.parent().catListWin.close()
     except AttributeError:
         pass
     try:
         self.parent().expListWin.close()
     except AttributeError:
         pass
     self.parent().reloadMainContent()
     self.close()
예제 #15
0
    def retrieveUrlFirst(self, string):
        """Retrieves the first (only) result from the content
        of the given web page.

        Parameters:
            string: the search string (the ISBN)

        Output:
            returns the bibtex string
        """
        self.urlArgs["isbn"] = string
        url = self.createUrl()
        pBLogger.info(self.searchInfo % (string, url))
        text = self.textFromUrl(url)
        if "Not found" in text:
            return ""
        try:
            return parse_accents_str(text[:])
        except Exception:
            pBLogger.exception(self.genericError)
            return ""
예제 #16
0
 def run(self):
     """Start the receiver,
     import the required entries and finish
     """
     self.receiver.start()
     db = bibtexparser.bibdatabase.BibDatabase()
     inserted = []
     failed = []
     for key in sorted(self.found):
         if not self.runningImport:
             continue
         el = self.found[key]
         if pBDB.bibs.loadAndInsert(el["bibpars"]["eprint"]):
             try:
                 newKey = pBDB.bibs.getByKey(key)[0]["bibkey"]
             except IndexError:
                 newKey = pBDB.bibs.getByBibtex(key)[0]["bibkey"]
             inserted.append(newKey)
         else:
             db.entries = [{
                 "ID": el["bibpars"]["eprint"],
                 "ENTRYTYPE": "article",
                 "title": el["bibpars"]["title"],
                 "author": el["bibpars"]["author"],
                 "archiveprefix": "arXiv",
                 "eprint": el["bibpars"]["eprint"],
                 "primaryclass": el["bibpars"]["primaryclass"],
             }]
             entry = pbWriter.write(db)
             data = pBDB.bibs.prepareInsert(entry)
             if pBDB.bibs.insert(data):
                 pBLogger.info(thestr.elementInserted % key)
                 inserted.append(key)
             else:
                 pBLogger.warning(thestr.elementFailed % key)
                 failed.append(key)
                 continue
             try:
                 eid = pBDB.bibs.updateInspireID(key)
                 pBDB.bibs.searchOAIUpdates(
                     0,
                     entries=pBDB.bibs.getByBibkey(key),
                     force=True,
                     reloadAll=True,
                 )
                 newKey = pBDB.bibs.getByKey(key)[0]["bibkey"]
                 if key != newKey:
                     inserted[-1] = newKey
             except:
                 pBLogger.warning(thestr.failedComplete % (key),
                                  exc_info=True)
                 failed.append(key)
     pBLogger.info(thestr.elementImported % (inserted))
     pBLogger.info(thestr.errorsEntries % (failed))
     self.parent().importArXivResults = (inserted, failed)
     time.sleep(0.1)
     self.receiver.running = False
예제 #17
0
    def getBibtexs(self, bibcodes):
        """Obtain a string containing the bibtex entries for all the
        requested bibcodes

        Parameter:
            bibcodes: a single bibcode
                (string containing the ADS identifier of a given entry)
                or a list of bibcodes

        Output:
            a string with all the bibtex entries
        """
        ads.config.token = pbConfig.params["ADSToken"]
        try:
            self.q = ads.ExportQuery(bibcodes=bibcodes, format="bibtex")
            export = self.q.execute()
        except ads.exceptions.APIResponseError:
            pBLogger.exception(self.unauthorized)
        except Exception:
            pBLogger.exception(self.genericExportError, exc_info=True)
        else:
            pBLogger.info(self.getLimitInfo())
            return export
        return ""
예제 #18
0
    def plotStats(
        self,
        paper=False,
        author=False,
        show=False,
        save=False,
        path=".",
        markPapers=False,
        pickVal=6,
    ):
        """Plot the collected information, using matplotlib.pyplot.

        Parameters:
            paper (boolean, default False): plot statistics
                for the last analyzed paper
            author (boolean, default False): plot statistics
                for the last analyzed author
            show (boolean, default False): True to show the plots
                in a separate window (with matplotlib.pyplot.show())
            save (boolean, default False): True to save the plots into files.
            path (string): where to save the plots
            markPapers (boolean, default False): True to draw
                a vertical lines at the dates
                corresponding to a paper appearing
            pickVal (float, default 6): the picker tolerance

        Output:
            False if paper==False and author==False,
            the matplotlib.pyplot figure containing
                the citation plot if paper==True,
            a list of matplotlib.pyplot figures containing
                the various plots if author==True
        """
        if paper and self.paperPlotInfo is not None:
            if len(self.paperPlotInfo["citList"][0]) > 0:
                pBLogger.info(isstr.plotPaper % self.paperPlotInfo["id"])
                fig, ax = plt.subplots()
                plt.plot(
                    self.paperPlotInfo["citList"][0],
                    self.paperPlotInfo["citList"][1],
                    picker=True,
                    pickradius=pickVal,
                )
                fig.autofmt_xdate()
                if save:
                    pdf = PdfPages(
                        osp.join(path, self.paperPlotInfo["id"] + ".pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                return fig
        elif author and self.authorPlotInfo is not None:
            pBLogger.info(isstr.plotAuthor % self.authorPlotInfo["name"])
            try:
                ymin = min(
                    int(self.authorPlotInfo["allLi"][0][0].strftime("%Y")) - 2,
                    int(self.authorPlotInfo["paLi"][0][0].strftime("%Y")) - 2,
                )
                ymax = max(
                    int(self.authorPlotInfo["allLi"][0][-1].strftime("%Y")) +
                    2,
                    int(self.authorPlotInfo["paLi"][0][-1].strftime("%Y")) + 2,
                )
            except:
                try:
                    ymin = int(
                        self.authorPlotInfo["paLi"][0][0].strftime("%Y")) - 2
                    ymax = int(
                        self.authorPlotInfo["paLi"][0][-1].strftime("%Y")) + 2
                except:
                    pBLogger.warning(isstr.noPublications)
                    return False
            figs = []
            if len(self.authorPlotInfo["paLi"][0]) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.paperNumber)
                plt.plot(
                    self.authorPlotInfo["paLi"][0],
                    self.authorPlotInfo["paLi"][1],
                    picker=True,
                    pickradius=pickVal,
                )
                fig.autofmt_xdate()
                if save:
                    pdf = PdfPages(
                        osp.join(path,
                                 self.authorPlotInfo["name"] + "_papers.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)

            if len(self.authorPlotInfo["paLi"][0]) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.paperYear)
                ax.hist(
                    [
                        int(q.strftime("%Y"))
                        for q in self.authorPlotInfo["paLi"][0]
                    ],
                    bins=range(ymin, ymax),
                    picker=True,
                )
                ax.get_xaxis().get_major_formatter().set_useOffset(False)
                plt.xlim([ymin, ymax])
                if save:
                    pdf = PdfPages(
                        osp.join(
                            path,
                            self.authorPlotInfo["name"] + "_yearPapers.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)

            if len(self.authorPlotInfo["allLi"][0]) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.totalCitations)
                plt.plot(
                    self.authorPlotInfo["allLi"][0],
                    self.authorPlotInfo["allLi"][1],
                    picker=True,
                    pickradius=pickVal,
                )
                fig.autofmt_xdate()
                if save:
                    pdf = PdfPages(
                        osp.join(path,
                                 self.authorPlotInfo["name"] + "_allCit.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)

            if len(self.authorPlotInfo["allLi"][0]) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.citationsYear)
                ax.hist(
                    [
                        int(q.strftime("%Y"))
                        for q in self.authorPlotInfo["allLi"][0]
                    ],
                    bins=range(ymin, ymax),
                    picker=True,
                )
                ax.get_xaxis().get_major_formatter().set_useOffset(False)
                plt.xlim([ymin, ymax])
                if save:
                    pdf = PdfPages(
                        osp.join(path,
                                 self.authorPlotInfo["name"] + "_yearCit.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)

            if len(self.authorPlotInfo["meanLi"][0]) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.meanCitations)
                plt.plot(
                    self.authorPlotInfo["meanLi"][0],
                    self.authorPlotInfo["meanLi"][1],
                    picker=True,
                    pickradius=pickVal,
                )
                fig.autofmt_xdate()
                if markPapers:
                    for q in self.authorPlotInfo["paLi"][0]:
                        plt.axvline(
                            datetime.datetime(
                                int(q.strftime("%Y")),
                                int(q.strftime("%m")),
                                int(q.strftime("%d")),
                            ),
                            color="k",
                            ls="--",
                        )
                if save:
                    pdf = PdfPages(
                        osp.join(path,
                                 self.authorPlotInfo["name"] + "_meanCit.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)

            if len(self.authorPlotInfo["aI"].keys()) > 0:
                fig, ax = plt.subplots()
                plt.title(isstr.citationsPaper)
                for i, p in enumerate(self.authorPlotInfo["aI"].keys()):
                    try:
                        plt.plot(
                            self.authorPlotInfo["aI"][p]["citingPapersList"]
                            [0],
                            self.authorPlotInfo["aI"][p]["citingPapersList"]
                            [1],
                        )
                    except:
                        pBLogger.exception(isstr.errorPlotting)
                fig.autofmt_xdate()
                if save:
                    pdf = PdfPages(
                        osp.join(path, self.authorPlotInfo["name"] +
                                 "_paperCit.pdf"))
                    pdf.savefig()
                    pdf.close()
                if show:
                    plt.show()
                plt.close()
                figs.append(fig)
            return figs
        else:
            pBLogger.info(isstr.noPlot)
            return False
예제 #19
0
    def paperStats(
        self,
        paperID,
        plot=False,
        verbose=1,
        paperDate=None,
        reset=True,
        pbMax=None,
        pbVal=None,
    ):
        """Function that gets the data and
        constructs the statistics for a given paper.

        Parameters:
            paperID (string): the INSPIRE-HEP id of the paper (a number)
            plot (boolean): whether or not the citations
                should be plotted (default False)
            verbose (int, default 1): increase the verbosity level
            paperDate (datetime, optional): the date of at which
                the paper was published
            reset (boolean, default False): True to delete
                all previous existing data
                (used as False when processing a list of IDs)
            pbMax (callable, optional): a function to set the maximum
                of a progress bar in the GUI, if possible
            pbVal (callable, optional): a function to set the value
                of a progress bar in the GUI, if possible

        Output:
            a dictionary containing all the desired information.
            The structure is the following:
            {
                "id": the paper ID,
                "aI": the list of creation date for all the papers,
                    in INSPIRE-HEP order,
                "citList": the ordered list of citing papers,
                "fig" (only if `plot` is True): contains the figure.
                    See self.plotStats
            }
        """
        if reset:
            self.allInfoP = {}
            self.citingPapersList = [[], []]
        if isinstance(paperID, list):
            self.runningPaperStats = True
            try:
                pbMax(len(paperID))
            except TypeError:
                pass
            for ia, a in enumerate(paperID):
                try:
                    pbVal(ia + 1)
                except TypeError:
                    pass
                if self.runningPaperStats:
                    self.paperStats(a, reset=False)
            self.paperPlotInfo["id"] = paperID
            return self.paperPlotInfo
        if verbose > 0:
            pBLogger.info(isstr.paperStats % paperID)
        url = (pbConfig.inspireLiteratureAPI + "?q=refersto:recid:" + paperID +
               self.paperStatsOpts + str(self.maxPerPage))
        data = self.JsonFromUrl(url)
        recid_citingPapers = [a["id"] for a in data]
        if paperDate is not None:
            self.citingPapersList[0].append(paperDate)
        for i, p in enumerate(recid_citingPapers):
            self.allInfoP[p] = {}
            self.allInfoP[p]["date"] = dateutil.parser.parse(
                data[i]["created"])
            self.citingPapersList[0].append(
                self.allInfoP[p]["date"].replace(tzinfo=pytz.UTC))
        for i, p in enumerate(sorted(self.citingPapersList[0])):
            self.citingPapersList[0][i] = p
            self.citingPapersList[1].append(i + 1)
        self.citingPapersList[0].append(
            datetime.datetime.fromordinal(
                datetime.date.today().toordinal()).replace(tzinfo=pytz.UTC))
        try:
            self.citingPapersList[1].append(self.citingPapersList[1][-1])
        except IndexError:
            self.citingPapersList[1].append(0)
        self.paperPlotInfo = {
            "id": paperID,
            "aI": self.allInfoP,
            "citList": self.citingPapersList,
        }
        if plot:
            self.paperPlotInfo["fig"] = self.plotStats(paper=True)
        if verbose > 0:
            pBLogger.info(isstr.doneE)
        return self.paperPlotInfo
예제 #20
0
    def exportForTexFile(
        self,
        texFileName,
        outFileName,
        overwrite=False,
        autosave=True,
        updateExisting=False,
        removeUnused=False,
        reorder=False,
        newOperation=True,
    ):
        """Reads a .tex file looking for the \cite{} commands,
        collects the bibtex entries cited in the text and
        stores them in a bibtex file.
        The entries are taken from the database first,
        or from INSPIRE-HEP if possible.
        The downloaded entries are saved in the database.

        Parameters:
            texFileName: the name (or a list of names)
                of the considered .tex file(s)
            outFileName: the name of the output file,
                where the required entries will be added
            overwrite (boolean, default False):
                if True, the previous version of the file is replaced
                and no backup copy is created
            autosave (boolean, default True):
                if True, the changes to the database are automatically saved.
            updateExisting (boolean, default False):
                if True, remove duplicates and update entries
                that have been chenged in the DB
            removeUnused (boolean, default False):
                if True, remove bibtex entries that are no more cited
                in the tex files
            reorder (boolean, default False):
                if True, reorder (not update!) the bibtex entries
                in the bib files before adding the new ones
            newOperation (boolean, default True):
                reset the self.existingBibsList and read file .bib content.
                Time consuming! better to just keep it updated
                when using multiple texs...

        Output:
            True if successful, False if errors occurred
        """
        db = bibtexparser.bibdatabase.BibDatabase()

        def printOutput(
            reqBibkeys, miss, retr, nFound, unexp, nKeys, warn, totalCites, full=False
        ):
            """Print information on the process"""
            pBLogger.info(exstr.resume)
            if totalCites is not None:
                pBLogger.info(exstr.keysFound % totalCites)
            pBLogger.info(exstr.newKeysFound % len(reqBibkeys))
            j = ", "
            if full:
                pBLogger.info(j.join(reqBibkeys))
            if len(miss) > 0:
                pBLogger.info(exstr.missingEntries % len(miss))
                if full:
                    pBLogger.info(j.join(miss))
            if len(retr) > 0:
                pBLogger.info(exstr.retrievedEntries % len(retr))
                pBLogger.info(j.join(retr))
            if len(nFound) > 0:
                pBLogger.info(exstr.entriesNotFound % len(nFound))
                pBLogger.info(j.join(nFound))
            if len(unexp) > 0:
                pBLogger.info(exstr.unexpectedForEntries % len(unexp))
                pBLogger.info(j.join(unexp))
            if len(nKeys.keys()) > 0:
                pBLogger.info(
                    exstr.nonMatchingEntries % len(nKeys.keys())
                    + "\n".join(["'%s' => '%s'" % (k, n) for k, n in nKeys.items()])
                )
            pBLogger.info(exstr.totalWarnings % warn)

        def saveEntryOutBib(a, m=None):
            """Remove unwanted fields and add the bibtex entry
            to the output file

            Parameters:
                a: the bibtex entry
                m: the ID (bibtex key) of the entry,
                    if it is not the default one
            """
            entry = (
                bibtexparser.bparser.BibTexParser(common_strings=True)
                .parse(a)
                .entries[0]
            )
            for u in self.unwantedFields:
                try:
                    del entry[u]
                except KeyError:
                    pass
            if m is not None:
                m = m.strip()
                if m != entry["ID"].strip():
                    entry["ID"] = m
            db.entries = [entry]
            bibf = pbWriter.write(db)
            try:
                with open(outFileName, "a") as o:
                    o.write(bibf)
                    pBLogger.info(exstr.entryInserted % m)
            except IOError:
                pBLogger.exception(exstr.errorWrite % outFileName)
                return False

        def removeUnusedBibtexs(existingBibsDict):
            """Functions that reads the list of bibtex entries
            in the existing .bib file and removes
            the ones that are not inside \cite commands
            """
            newDict = {}
            notFound = []
            for k, v in existingBibsDict.items():
                if k in self.allCitations:
                    newDict[k] = existingBibsDict[k]
                else:
                    notFound.append(k)
            db.entries = [
                newDict[k]
                for k in sorted(
                    [e["ID"] for e in newDict.values()], key=lambda s: s.lower()
                )
            ]
            bibf = pbWriter.write(db)
            try:
                with open(outFileName, "w") as o:
                    o.write(exstr.byPhysbiblio + bibf)
                    pBLogger.info(exstr.entriesRemoved % notFound)
            except IOError:
                pBLogger.exception(exstr.errorWrite % outFileName)

        self.exportForTexFlag = True
        pBLogger.info(exstr.startEFTF)
        pBLogger.info(exstr.readFrom % texFileName)
        pBLogger.info(exstr.saveTo % outFileName)
        if autosave:
            pBLogger.info(exstr.autoSave)

        missing = []
        newKeys = {}
        notFound = []
        requiredBibkeys = []
        retrieved = []
        unexpected = []
        warnings = 0
        totalCites = 0

        # if overwrite, reset the output file
        if overwrite:
            updateExisting = False
            removeUnused = False
            reorder = False
            try:
                with open(outFileName, "w") as o:
                    o.write(exstr.byPhysbiblio)
            except IOError:
                pBLogger.exception(exstr.cannotWrite)
                return False

        # read previous content of output file, if any
        try:
            with open(outFileName, "r") as f:
                existingBibText = f.readlines()
        except IOError:
            pBLogger.error(exstr.cannotRead % outFileName)
            try:
                open(outFileName, "w").close()
            except IOError:
                pBLogger.exception(exstr.cannotCreate % outFileName)
                return False
            existingBibText = ""

        # this is time consuming if there are many entries.
        # Do not load it every time for multiple texs!
        if newOperation:
            self.allCitations = set([])
            if existingBibText != "":
                self.existingBibsList = pBDB.bibs.parseAllBibtexs(
                    existingBibText, verbose=False
                )
            else:
                self.existingBibsList = []
        # work with dictionary, so that if there are repeated entries
        # (entries with same ID) they are automatically discarded
        existingBibsDict = CaseInsensitiveDict()
        for e in self.existingBibsList:
            existingBibsDict[e["ID"]] = e

        # if requested, do some cleaning
        if updateExisting or reorder:
            # update entry from DB if existing
            if updateExisting:
                for k, v in existingBibsDict.items():
                    e = pBDB.bibs.getByBibtex(k, saveQuery=False)
                    if len(e) > 0 and e[0]["bibtexDict"] != v:
                        existingBibsDict[k] = e[0]["bibtexDict"]
                        if existingBibsDict[k]["ID"].lower() != k.lower():
                            existingBibsDict[k]["ID"] = k

            # write new (updated) bib content
            # (so also repeated entries are removed)
            db.entries = [
                existingBibsDict[k]
                for k in sorted(
                    [e["ID"] for e in existingBibsDict.values()],
                    key=lambda s: s.lower(),
                )
            ]
            bibf = pbWriter.write(db)
            try:
                with open(outFileName, "w") as o:
                    o.write(exstr.byPhysbiblio + bibf)
                    pBLogger.info(exstr.outputUpdated)
            except IOError:
                pBLogger.exception(exstr.errorWrite % outFileName)

        # if there is a list of tex files, run this function
        # for each of them...no updateExisting and removeUnused!
        if isinstance(texFileName, list):
            if len(texFileName) == 0:
                return False
            elif len(texFileName) == 1:
                texFileName = texFileName[0]
            else:
                for t in texFileName:
                    req, m, ret, nF, un, nK, w, cits = self.exportForTexFile(
                        t,
                        outFileName,
                        overwrite=False,
                        autosave=autosave,
                        updateExisting=False,
                        removeUnused=False,
                        reorder=False,
                        newOperation=False,
                    )
                    requiredBibkeys += req
                    missing += m
                    retrieved += ret
                    notFound += nF
                    unexpected += un
                    for k, v in nK.items():
                        newKeys[k] = v
                    warnings += w
                pBLogger.info(exstr.doneAllTexs)
                if removeUnused:
                    removeUnusedBibtexs(existingBibsDict)
                printOutput(
                    requiredBibkeys,
                    missing,
                    retrieved,
                    notFound,
                    unexpected,
                    newKeys,
                    warnings,
                    len(self.allCitations),
                    full=True,
                )
                return (
                    requiredBibkeys,
                    missing,
                    retrieved,
                    notFound,
                    unexpected,
                    newKeys,
                    warnings,
                    len(self.allCitations),
                )

        # read the texFile
        keyscont = ""
        try:
            with open(texFileName) as r:
                keyscont += r.read()
        except IOError:
            pBLogger.exception(exstr.errorNoFile % texFileName)
            return False

        # extract \cite* commands
        matchKeys = "([0-9A-Za-z_\-':\+\.\&]+)"
        cite = re.compile(
            "\\\\(cite|citep|citet)\{([\n ]*" + matchKeys + "[,]?[\n ]*)*\}",
            re.MULTILINE,
        )  # find \cite{...}
        citeKeys = re.compile(
            matchKeys, re.MULTILINE
        )  # find the keys inside \cite{...}
        citaz = [m for m in cite.finditer(keyscont) if m != ""]
        pBLogger.info(exstr.citeFound % len(citaz))

        # extract required keys from \cite* commands
        for c in citaz:
            try:
                for e in [l.group(1) for l in citeKeys.finditer(c.group())]:
                    e = e.strip()
                    if e == "" or e in ["cite", "citep", "citet"]:
                        continue
                    self.allCitations.add(e)
                    if e not in requiredBibkeys:
                        try:
                            # this it's just to check if already present
                            tmp = existingBibsDict[e]
                        except KeyError:
                            requiredBibkeys.append(e)
            except (IndexError, AttributeError, TypeError):
                pBLogger.warning(exstr.errorCitation % c.group())
                a = []
        pBLogger.info(
            exstr.newKeysTotal % (len(requiredBibkeys), len(self.allCitations))
        )

        # if True, remove unused bibtex entries
        if removeUnused:
            removeUnusedBibtexs(existingBibsDict)

        # check what is missing in the database and insert/import
        # what is needed:
        for m in requiredBibkeys:
            if m.strip() == "":
                continue
            entry = pBDB.bibs.getByBibtex(m)
            entryMissing = len(entry) == 0
            if not self.exportForTexFlag:
                # if flag set, stop execution and
                # go to the end skipping everything
                continue
            elif not entryMissing:
                # if already in the database, just insert it as it is
                bibtex = entry[0]["bibtex"]
                bibtexDict = entry[0]["bibtexDict"]
            else:
                # if no entry is found, mark it as missing
                missing.append(m)
                # if not present, try INSPIRE import
                pBLogger.info(exstr.keyMissing % m)
                newWeb = pBDB.bibs.loadAndInsert(m, returnBibtex=True)
                newCheck = pBDB.bibs.getByBibtex(m, saveQuery=False)

                # if the import worked, insert the entry
                if len(newCheck) > 0:
                    # if key is not matching,
                    # just replace it in the exported bib and print a message
                    if m.strip().lower() != newCheck[0]["bibkey"].lower():
                        warnings += 1
                        newKeys[m] = newCheck[0]["bibkey"]
                    if newCheck[0]["bibkey"] not in retrieved:
                        retrieved.append(newCheck[0]["bibkey"])
                    pBDB.catBib.insert(
                        pbConfig.params["defaultCategories"], newCheck[0]["bibkey"]
                    )
                    bibtex = newCheck[0]["bibtex"]
                    bibtexDict = newCheck[0]["bibtexDict"]
                else:
                    # if nothing found, add a warning for the end
                    warnings += 1
                    notFound.append(m)
                    continue
                pBLogger.info("\n")
            # save in output file
            try:
                bibtexDict["ID"] = m
                self.existingBibsList.append(bibtexDict)
                saveEntryOutBib(bibtex, m)
            except:
                unexpected.append(m)
                pBLogger.exception(exstr.unexpectedEntry % m)

        if autosave:
            pBDB.commit()
        printOutput(
            requiredBibkeys,
            missing,
            retrieved,
            notFound,
            unexpected,
            newKeys,
            warnings,
            len(self.allCitations),
        )
        return (
            requiredBibkeys,
            missing,
            retrieved,
            notFound,
            unexpected,
            newKeys,
            warnings,
            len(self.allCitations),
        )
예제 #21
0
 def printOutput(
     reqBibkeys, miss, retr, nFound, unexp, nKeys, warn, totalCites, full=False
 ):
     """Print information on the process"""
     pBLogger.info(exstr.resume)
     if totalCites is not None:
         pBLogger.info(exstr.keysFound % totalCites)
     pBLogger.info(exstr.newKeysFound % len(reqBibkeys))
     j = ", "
     if full:
         pBLogger.info(j.join(reqBibkeys))
     if len(miss) > 0:
         pBLogger.info(exstr.missingEntries % len(miss))
         if full:
             pBLogger.info(j.join(miss))
     if len(retr) > 0:
         pBLogger.info(exstr.retrievedEntries % len(retr))
         pBLogger.info(j.join(retr))
     if len(nFound) > 0:
         pBLogger.info(exstr.entriesNotFound % len(nFound))
         pBLogger.info(j.join(nFound))
     if len(unexp) > 0:
         pBLogger.info(exstr.unexpectedForEntries % len(unexp))
         pBLogger.info(j.join(unexp))
     if len(nKeys.keys()) > 0:
         pBLogger.info(
             exstr.nonMatchingEntries % len(nKeys.keys())
             + "\n".join(["'%s' => '%s'" % (k, n) for k, n in nKeys.items()])
         )
     pBLogger.info(exstr.totalWarnings % warn)
예제 #22
0
파일: arxiv.py 프로젝트: steog88/PhysBiblio
    def arxivDaily(self, category):
        """Read daily RSS feed for a given category

        Parameter:
            category: the selected category (see `self.categories)
        """
        if "." in category:
            main, sub = category.split(".")
        else:
            main = category
            sub = ""
        url = self.urlRss
        if main not in self.categories.keys():
            pBLogger.warning(self.mainCatNotFound % main)
            return False
        else:
            url += main
        if sub != "" and sub not in self.categories[main]:
            pBLogger.warning(self.subCatNotFound % sub)
            return False
        elif sub != "" and sub in self.categories[main]:
            url += "." + sub
        pBLogger.info(url)
        text = self.textFromUrl(url)
        if text is None:
            pBLogger.warning(self.emptyUrl)
            return False
        author = re.compile("(>|&gt;)([^/]*)(</a>|&lt;/a&gt;)")
        additionalInfo = re.compile(
            " \(arXiv:([0-9\.v]*) \[([\-\.a-zA-Z]*)\]([ A-Z]*)\)")
        if sys.version_info[0] < 3:
            text = text.decode("utf-8")
        try:
            data = feedparser.parse(parse_accents_str(text))
            entries = []
            for element in data.entries:
                tmp = {}
                tmp["eprint"] = element["id"].split("/")[-1]
                tmp["abstract"] = (element["summary"].replace(
                    "\n", " ").replace("<p>", "").replace("</p>", ""))
                tmp["authors"] = [
                    m.group(2)
                    for m in author.finditer(element["authors"][0]["name"])
                    if m != ""
                ]
                tmp["author"] = (
                    " and ".join(tmp["authors"])
                    if len(tmp["authors"]) < pbConfig.params["maxAuthorNames"]
                    else " and ".join(
                        tmp["authors"][0:pbConfig.params["maxAuthorNames"]] +
                        ["others"]))
                tmp["replacement"] = "UPDATED" in element["title"]
                tmp["primaryclass"] = [
                    m.group(2)
                    for m in additionalInfo.finditer(element["title"])
                    if m != ""
                ][0]
                tmp["cross"] = ("CROSS LISTED" in element["title"]
                                or category.lower()
                                not in tmp["primaryclass"].lower())
                tmp["version"] = [
                    m.group(1)
                    for m in additionalInfo.finditer(element["title"])
                    if m != ""
                ][0]
                parenthesis = [
                    m.group()
                    for m in additionalInfo.finditer(element["title"])
                    if m != ""
                ][0]
                tmp["title"] = element["title"].replace(parenthesis, "")
                entries.append(tmp)
            return entries
        except Exception:
            pBLogger.error(self.cannotParseRSS % text, exc_info=True)
            return False
예제 #23
0
파일: arxiv.py 프로젝트: steog88/PhysBiblio
    def arxivRetriever(self,
                       string,
                       searchType="all",
                       additionalArgs=None,
                       fullDict=False):
        """Reads the feed content got from arxiv into a dictionary,
        used to return a bibtex.

        Parameters:
            string: the search string
            searchType: the search method in arxiv API (default 'all').
            The possible values are:
                ti->    Title
                au    ->    Author
                abs    ->    Abstract
                co    ->    Comment
                jr    ->    Journal Reference
                cat    ->    Subject Category
                rn    ->    Report Number
                id    ->    Id (use id_list instead)
                all    ->    All of the above
            additionalArgs: a dictionary of additional arguments
                that can be passed to self.urlArgs (default None)
            fullDict (logical): return the bibtex dictionary in addition
                to the bibtex text (default False)

        Output:
            the bibtex text
            (optional, depending on fullDict): the bibtex Dictionary
        """
        if additionalArgs:
            for k, v in additionalArgs.items():
                self.urlArgs[k] = v
        self.urlArgs["search_query"] = searchType + ":" + string
        url = self.createUrl()
        pBLogger.info(self.searchInfo % (searchType, string, url))
        text = parse_accents_str(self.textFromUrl(url))
        try:
            data = feedparser.parse(text)
            db = BibDatabase()
            db.entries = []
            dictionaries = []
            for entry in data["entries"]:
                dictionary = {}
                idArx = (entry["id"].replace("http://arxiv.org/abs/",
                                             "").replace(
                                                 "https://arxiv.org/abs/", ""))
                pos = idArx.find("v")
                if pos >= 0:
                    idArx = idArx[0:pos]
                dictionary["ENTRYTYPE"] = "article"
                dictionary["ID"] = idArx
                dictionary["archiveprefix"] = "arXiv"
                dictionary["title"] = entry["title"]
                dictionary["arxiv"] = idArx
                try:
                    dictionary["doi"] = entry["arxiv_doi"]
                except KeyError as e:
                    pBLogger.debug("KeyError: %s" % e)
                dictionary["abstract"] = entry["summary"].replace("\n", " ")
                dictionary["authors"] = " and ".join(
                    [au["name"] for au in entry["authors"]])
                dictionary["primaryclass"] = entry["arxiv_primary_category"][
                    "term"]
                year = self.getYear(dictionary["arxiv"])
                if year is not None:
                    dictionary["year"] = year
                db.entries.append(dictionary)
                dictionaries.append(dictionary)
            if fullDict:
                dictionary = dictionaries[0]
                for d in dictionaries:
                    if string in d["arxiv"]:
                        dictionary = d
                return pbWriter.write(db), dictionary
            else:
                return pbWriter.write(db)
        except Exception:  # intercept all other possible errors
            pBLogger.exception(self.genericError)
            if fullDict:
                return "", {}
            else:
                return ""
예제 #24
0
    def authorStats(self,
                    authorName,
                    plot=False,
                    reset=True,
                    pbMax=None,
                    pbVal=None):
        """Function that gets the data and
        constructs the statistics for a given author.

        Parameters:
            authorName: the author name as identified into INSPIRE-HEP,
                or a list of author names
                (it calls itself recursively for all the list elements)
            plot (boolean, default False): True to call self.plotStats
            reset (boolean, default False): True to delete
                all previous existing data
                (used as False when processing a list of authors)
            pbMax (callable, optional): a function to set the maximum
                of a progress bar in the GUI, if possible
            pbVal (callable, optional): a function to set the value
                of a progress bar in the GUI, if possible

        Output:
            a dictionary containing all the statistic information.
            For a single author, the structure is the following:
            {
                "name": the author name,
                "aI": The complete information,
                    including the dictionaries with the single papers info
                    (see self.paperStats), the citations
                    and the corresponding dates,
                "paLi": a list of [id, date] of the papers
                    associated with the author,
                "allLi": the complete list of [date, total citations]
                    with all the citations to the papers,
                "meanLi": the complete list of
                    [date, total citations/number of papers]
                    computed at each point from "allLi" content,
                "h": the h-index,
                "figs" (only if `plot` is True): contains the figures.
                    See self.plotStats
            }
        """
        if reset:
            self.allInfoA = {}
            self.authorPapersList = [[], []]
            self.allCitations = []
        if isinstance(authorName, list):
            try:
                pbMax(len(authorName))
            except TypeError:
                pass
            for ia, a in enumerate(authorName):
                try:
                    pbVal(ia + 1)
                except TypeError:
                    pass
                self.authorStats(a, reset=False)
            self.authorPlotInfo["name"] = authorName
            return self.authorPlotInfo
        pBLogger.info(isstr.authorStats % authorName)
        url = (pbConfig.inspireLiteratureAPI + "?q=author:" + authorName +
               self.authorStatsOpts + str(self.maxPerPage))
        data = self.JsonFromUrl(url)
        recid_authorPapers = sorted(["%s" % a["id"] for a in data])
        tot = len(recid_authorPapers)
        pBLogger.info(isstr.authorStatsProcess % tot)
        self.runningAuthorStats = True
        try:
            pbMax(len(recid_authorPapers))
        except TypeError:
            pass
        for i, p in enumerate(recid_authorPapers):
            try:
                pbVal(i + 1)
            except TypeError:
                pass
            if not self.runningAuthorStats:
                pBLogger.info(isstr.stopReceived)
                break
            time.sleep(1)
            if p in self.allInfoA.keys():
                continue
            self.allInfoA[p] = {}
            self.allInfoA[p]["date"] = dateutil.parser.parse(
                data[i]["created"])
            self.authorPapersList[0].append(self.allInfoA[p]["date"])
            pBLogger.info(isstr.authorStatsLooking % (i + 1, tot, 100.0 *
                                                      (i + 1) / tot, p))
            paperInfo = self.paperStats(p,
                                        verbose=0,
                                        paperDate=self.allInfoA[p]["date"])
            self.allInfoA[p]["infoDict"] = paperInfo["aI"]
            self.allInfoA[p]["citingPapersList"] = paperInfo["citList"]
            for c, v in self.allInfoA[p]["infoDict"].items():
                self.allCitations.append(v["date"])
            pBLogger.info("")
        self.authorPapersList[1] = []
        for i, p in enumerate(sorted(self.authorPapersList[0])):
            self.authorPapersList[0][i] = p
            self.authorPapersList[1].append(i + 1)
        pBLogger.info(isstr.savingCitations)
        allCitList = [[], []]
        meanCitList = [[], []]
        currPaper = 0
        for i, d in enumerate(sorted(self.allCitations)):
            if (currPaper < len(self.authorPapersList[0]) - 1
                    and d >= self.authorPapersList[0][currPaper + 1]):
                currPaper += 1
            allCitList[0].append(d)
            allCitList[1].append(i + 1)
            meanCitList[0].append(d)
            meanCitList[1].append(
                (i + 1.0) / self.authorPapersList[1][currPaper])
        hind = 0
        citations = [
            len(self.allInfoA[k]["citingPapersList"][0]) - 2
            for k in self.allInfoA.keys()
        ]
        for h in range(len(citations)):
            if len([a for a in citations if a >= h]) >= h:
                hind = h
        self.authorPlotInfo = {
            "name": authorName,
            "aI": self.allInfoA,
            "paLi": self.authorPapersList,
            "allLi": allCitList,
            "meanLi": meanCitList,
            "h": hind,
        }
        if plot:
            self.authorPlotInfo["figs"] = self.plotStats(author=True)
        pBLogger.info(isstr.authorStatsCompleted % authorName)
        return self.authorPlotInfo