def walkISI(files, archive, notes): from papers.pdfget import download_pdf parser = SavedRecordParser(archive) for file in files: text = open(file).read() parser.feed(text, notes) println("%d new articles\n" % len(parser.archive)) for article in parser: journal = article.get_journal() abbrev = article.get_abbrev() volume = article.get_volume() start = article.get_start_page() name = "%s %d %s" % (abbrev, volume, start) println("Downloading %s" % name) path = name + ".pdf" if os.path.isfile(path): println(" -> exists %s" % path) article.set_pdf(path) continue #check to see if we already have it path = download_pdf(ISIArticle.get_journal(journal), volume, 0, start) #don't require issue if path: println(" -> %s" % path) article.set_pdf(path) else: sys.stdout.write(" -> FAILED") parser.archive.commit()
def store(self, download = False, notes = [], keywords = []): journal = self.article.get_journal() volume = self.article.get_volume() page = self.article.get_page() year = self.article.get_year() name = "%s %d %s (%d)" % (self.article.get_abbrev(), volume, page, year) local_match = self.archive.find_match(self.article) if local_match: download = download and not local_match.has_pdf() #set to download if we don't have pdf self.article = local_match println("Already have article %s in local archive\n" % name) master_match = None if not local_match: artreq = ArchiveRequest(self.article) master_match = artreq.run() #query master if master_match: println("Already have article %s in master archive\n" % name) download = download and not master_match.has_pdf() #set to download if we don't have pdf #self.article = master_match if not local_match and not master_match: self.archive.add(self.article) if download: path = download_pdf(journal, volume=volume, page=page) if path: println(" -> downloaded %s\n" % path) self.article.set_pdf(path) if keywords: self.add_keywords(keywords) if notes: self.add_notes(notes) println("Completed storage of %s\n%s\n%s\n" % (name, keywords, notes))