Esempio n. 1
0
    def findEntry(self, journal, volume, page):
        from papers.pdfglobals import PDFGetGlobals
        for label in self.records:
            try:
                record = self.records[label]
                volcheck = str(record.get("volume"))
                if not volume == volcheck:
                    continue

                pagecheck = str(record.get("pages")).split("-")[0].strip()
                if not page == pagecheck:
                    continue

                jobj = PDFGetGlobals.get_journal(journal)
                jrec = PDFGetGlobals.get_journal(str(record.get("journal")))
                if jobj and jobj.name != jrec.name:
                    continue

                return record
            except Exception, error:
                continue
Esempio n. 2
0
def parse_reference(text):
    from papers.pdfglobals import PDFGetGlobals as pdfglobals

    repls = {
        ".," : ".",
        " and" :  ", ",
        ".-" : ". ",
    }

    for entry in repls:
        text = text.replace(entry, repls[entry])

    match = re.compile("^(.*?)\n\s*[\(]\d+[\)]", re.DOTALL).search(text)
    if match:
        text = match.groups()[0]

    journal = pdfglobals.find_journal_in_entry(text)
    if not journal:
        sys.stderr.write("Could not find properly formatted journal\n")
        return None 

    match = re.compile("^.*?(\d{4}[,]\s*\d+[,]\s*\d+[.]?)", re.DOTALL).search(text) #for some reason spaces are not matching here
    if not match:
        sys.stderr.write("Could not find properly formatted volume, page, year\n")
        return None

    numbers = match.groups()[0]
    match = re.compile("(\d{4})[,].*?(\d+)[,].*?(\d+)", re.DOTALL).search(numbers)
    if not match:
        sys.stderr.write("Could not find properly formatted volume, page, year\n")
        return None
    year, volume, page = match.groups()


    #throw out the journal
    new_text = text.replace(journal, "").replace(numbers, "")
    match = re.compile("[\(]\d+[\)](.*)", re.DOTALL).search(new_text)
    if match:
        new_text = match.groups()[0]

    #get the authors
    matches = new_text.split(";")
    if not matches:
        sys.stderr.write("Could not find properly formatted authors\n")
        return None

    authors = []
    for author in matches:
        lastname, initials = author.split(",")
        initials = map(lambda x: x.strip().strip("."), initials)
        initials = "".join(initials)
        authors.append("%s, %s" % (lastname, initials))

    vals = {}
    vals["volume"] = int(volume)
    vals["page"] = Page(page)
    vals["year"] = int(year)
    vals["authors"] = authors
    vals["journal"] = journal

    return vals
Esempio n. 3
0
 def __init__(self, value):
     from papers.pdfglobals import PDFGetGlobals as globals
     jobj = globals.get_journal(value)
     WOKField.__init__(self, "%s*" % jobj.name)