コード例 #1
0
ファイル: isi.py プロジェクト: jjwilke/PySkyNet
def walkISI(files, archive, notes):
    from papers.pdfget import download_pdf

    parser = SavedRecordParser(archive)

    for file in files:
        text = open(file).read()
        parser.feed(text, notes)
        println("%d new articles\n" % len(parser.archive))

        for article in parser:
            journal = article.get_journal()
            abbrev = article.get_abbrev()
            volume = article.get_volume()
            start = article.get_start_page() 
            name = "%s %d %s" % (abbrev, volume, start)
            println("Downloading %s" % name)

            path = name + ".pdf"
            if os.path.isfile(path):
                println(" -> exists %s" % path)
                article.set_pdf(path)
                continue

            #check to see if we already have it
            path = download_pdf(ISIArticle.get_journal(journal), volume, 0, start) #don't require issue
            if path:
                println(" -> %s" % path)
                article.set_pdf(path)
            else:
                sys.stdout.write(" -> FAILED")
    parser.archive.commit()
コード例 #2
0
ファイル: isi.py プロジェクト: jjwilke/PySkyNet
    def store(self, download = False, notes = [], keywords = []):
        journal = self.article.get_journal()
        volume = self.article.get_volume()
        page = self.article.get_page()
        year = self.article.get_year()
        name = "%s %d %s (%d)" % (self.article.get_abbrev(), volume, page, year)

        local_match = self.archive.find_match(self.article)
        if local_match:
            download = download and not local_match.has_pdf() #set to download if we don't have pdf
            self.article = local_match
            println("Already have article %s in local archive\n" % name)

        master_match = None
        if not local_match:
            artreq = ArchiveRequest(self.article)
            master_match = artreq.run() #query master
            if master_match:
                println("Already have article %s in master archive\n" % name)
                download = download and not master_match.has_pdf() #set to download if we don't have pdf
                #self.article = master_match
        
        if not local_match and not master_match:
            self.archive.add(self.article)

        if download:
            path = download_pdf(journal, volume=volume, page=page)
            if path:
                println(" -> downloaded %s\n" % path)
                self.article.set_pdf(path)

        if keywords:
            self.add_keywords(keywords)
        if notes:
            self.add_notes(notes)

        println("Completed storage of %s\n%s\n%s\n" % (name, keywords, notes))