예제 #1
0
파일: isi.py 프로젝트: jjwilke/PySkyNet
 def __init__(self, archive, journal=None, author=None, year=None, volume=None, page=None, notes=None, download=False, keywords=None):
     ServerRequest.__init__(self, ISIServer.REQUEST_PORT, ISIAnswer)
     self.archive = Archive(archive)
     self.download = download
     self.notes = notes
     self.keywords = keywords
     self.kwargs = {}
     if journal: self.kwargs["journal"] = journal
     if author: self.kwargs["author"] = author
     if year: self.kwargs["year"] = year
     if volume: self.kwargs["volume"] = volume
     if page: self.kwargs["page"] = page
     self.search = WOKSearch(**self.kwargs)
예제 #2
0
파일: isi.py 프로젝트: jjwilke/PySkyNet
 def __init__(self, name):
     self.archive = Archive(name)
     self.master = MasterArchive()
     self.lib = Library()
예제 #3
0
파일: isi.py 프로젝트: jjwilke/PySkyNet
class SavedRecordParser:
    
    def __init__(self, name):
        self.archive = Archive(name)
        self.master = MasterArchive()
        self.lib = Library()

    def __iter__(self):
        return iter(self.archive)

    def __getitem__(self, index):
        return self.archive.__getitem__(index)

    def add_pdf(self, path):
        self.archive.add_pdf(path)

    def get_text(self, text, start, stop):
        regexp = "\n%s (.*?)\n%s " % (start.upper(), stop.upper())
        match = re.compile(regexp, re.DOTALL).search(text)
        if not match:
            return None

        return match.groups()[0].strip()

    def exclude_entry(self, entry, exclude):
        for exc in exclude:
            if exc in entry:
                return True
        return False

    def get_entry(self, attr, method=None, default=None, require=True, exclude=(), entries=()):
        set = getattr(self.article, "set_%s" % attr)
        str_arr = []
        for start, stop in entries:
            str_arr.append("%s->%s" % (start, stop))
            entry = self.get_text(self.block, start, stop)
            if entry and not self.exclude_entry(entry, exclude):
                if method:
                    entry = method(entry)

                set(entry)
                return

        if not default == None:
            set(default)
            return

        if require:
            sys.stderr.write("ERROR: %s\n" % self.block)
            msg = "no %s for tags\n" % attr
            msg += "\n".join(str_arr)
            raise ISIError(msg)
            
    def feed(self, text, notes):
        journals = {}
        blocks = re.compile("PT\sJ(.*?)\nER", re.DOTALL).findall(text)
        for block in blocks:
            try:
                self.block = block
                self.article = self.archive.create_article()

                get_number = lambda x: re.compile("(\d+)").search(x).groups()[0] 
                get_page = lambda x: Page(get_number(x))
                #clean_title = lambda x: clean_line(clean_entry(x))
                clean_title = Cleanup.clean_title

                self.get_entry("journal", entries=(("so", "la"), ("so", "ab"), ("so", "sn")) )
                self.get_entry("volume", method=int, entries=(("vl", "is"), ("vl", "bp")) )
                self.get_entry("issue", method=lambda x: int(get_number(x)), require=False, entries=(("is", "bp"),) )
                self.get_entry("start_page", method=get_page, exclude=("art. no.",), entries=(("bp", "ep"), ("bp", "ut"), ("ar", "di"), ("ar", "ut")) )
                self.get_entry("end_page", method=get_page, require=False, entries=(("ep", "di"), ("ep", "ut")) )


                self.get_entry("authors", method=lambda x: get_authors(x, "\n", ","), entries=(("af", "ti"), ("au", "ti"), ("au", "so")))

                self.get_entry("title", method=clean_title, entries=(("ti", "so"),) )
                self.get_entry("abstract", method=clean_entry, require=False, entries=(("ab", "sn"),) )
                self.get_entry("year", method=int, entries=(("py", "vl"), ("py", "tc") ) )

                self.get_entry("doi", require=False, entries=(("di", "pg"), ("di", "ut"),("di", "er")) )

                self.article.set_notes(notes)
                
                journal = ISIArticle.get_journal(self.article.get_journal())
                volume = self.article.get_volume()
                page = self.article.get_page()
                name = "%s %d %s" % (journal, volume, page)
                if not self.master.has(self.article):
                    self.archive.test_and_add(self.article)
                else:
                    println("%s exists in archive\n" % name)
                    continue
            except Exception, error:
                sys.stderr.write("ERROR: %s\n%s\n" % (error, block))
예제 #4
0
파일: isi.py 프로젝트: jjwilke/PySkyNet
class WOKParser(WOKObject, ServerRequest):

    def __init__(self, archive, journal=None, author=None, year=None, volume=None, page=None, notes=None, download=False, keywords=None):
        ServerRequest.__init__(self, ISIServer.REQUEST_PORT, ISIAnswer)
        self.archive = Archive(archive)
        self.download = download
        self.notes = notes
        self.keywords = keywords
        self.kwargs = {}
        if journal: self.kwargs["journal"] = journal
        if author: self.kwargs["author"] = author
        if year: self.kwargs["year"] = year
        if volume: self.kwargs["volume"] = volume
        if page: self.kwargs["page"] = page
        self.search = WOKSearch(**self.kwargs)

    def run(self, method, args=None):
        cmd = ISIServerCommand(method, args)
        response = ServerRequest.run(self, cmd)
        return response

    def pick_article(self, articles):
        for article in articles:
            foundmatch = True
            for key, value in self.kwargs.items():
                if not hasattr(article, key): #don't use this for matching
                    continue

                match = str(getattr(article, key))
                field = WOKField.get(key, value)
                if not field:
                    field = str(value)

                if not field == match:
                    foundmatch = False
                    break;
            
            if foundmatch:
                return article

    def store_article(self):
        block = self.run("get_text")
        article = WOKArticle(self.archive, block) 
        if article:
            article.store(self.download, self.notes, self.keywords)
        return article
                
    def run_citedrefs(self):
        try:
            import time
            void = self.run("isi_search", self.search)
            articles = self.run("get_articles")

            title = ""
            article = self.pick_article(articles)
            if not article:
                raise ISIError("Could not find article with given specifications");

            self.run("open_article", article.title)
            nrefs = self.run("open_references")

            #get all possible refs on this page 
            self.walk_references()

            #if there are more pages, go through those as well
            nstart = 31 
            onclick = 2
            while nstart < nrefs:
                self.run("go_to_next_page", onclick)
                self.walk_references()

                nstart += 30
                onclick += 1
            
            self.archive.commit()
        except KeyboardInterrupt, error:
            raise error
        except ISIError, error:
            sys.stderr.write("ERROR: %s\nFailed on block:\n%s\n" % (error, error.block))
            raise error