def __init__(self, archive, journal=None, author=None, year=None, volume=None, page=None, notes=None, download=False, keywords=None): ServerRequest.__init__(self, ISIServer.REQUEST_PORT, ISIAnswer) self.archive = Archive(archive) self.download = download self.notes = notes self.keywords = keywords self.kwargs = {} if journal: self.kwargs["journal"] = journal if author: self.kwargs["author"] = author if year: self.kwargs["year"] = year if volume: self.kwargs["volume"] = volume if page: self.kwargs["page"] = page self.search = WOKSearch(**self.kwargs)
def __init__(self, name): self.archive = Archive(name) self.master = MasterArchive() self.lib = Library()
class SavedRecordParser: def __init__(self, name): self.archive = Archive(name) self.master = MasterArchive() self.lib = Library() def __iter__(self): return iter(self.archive) def __getitem__(self, index): return self.archive.__getitem__(index) def add_pdf(self, path): self.archive.add_pdf(path) def get_text(self, text, start, stop): regexp = "\n%s (.*?)\n%s " % (start.upper(), stop.upper()) match = re.compile(regexp, re.DOTALL).search(text) if not match: return None return match.groups()[0].strip() def exclude_entry(self, entry, exclude): for exc in exclude: if exc in entry: return True return False def get_entry(self, attr, method=None, default=None, require=True, exclude=(), entries=()): set = getattr(self.article, "set_%s" % attr) str_arr = [] for start, stop in entries: str_arr.append("%s->%s" % (start, stop)) entry = self.get_text(self.block, start, stop) if entry and not self.exclude_entry(entry, exclude): if method: entry = method(entry) set(entry) return if not default == None: set(default) return if require: sys.stderr.write("ERROR: %s\n" % self.block) msg = "no %s for tags\n" % attr msg += "\n".join(str_arr) raise ISIError(msg) def feed(self, text, notes): journals = {} blocks = re.compile("PT\sJ(.*?)\nER", re.DOTALL).findall(text) for block in blocks: try: self.block = block self.article = self.archive.create_article() get_number = lambda x: re.compile("(\d+)").search(x).groups()[0] get_page = lambda x: Page(get_number(x)) #clean_title = lambda x: clean_line(clean_entry(x)) clean_title = Cleanup.clean_title self.get_entry("journal", entries=(("so", "la"), ("so", "ab"), ("so", "sn")) ) self.get_entry("volume", method=int, entries=(("vl", "is"), ("vl", "bp")) ) self.get_entry("issue", method=lambda x: int(get_number(x)), require=False, entries=(("is", "bp"),) ) self.get_entry("start_page", method=get_page, exclude=("art. no.",), entries=(("bp", "ep"), ("bp", "ut"), ("ar", "di"), ("ar", "ut")) ) self.get_entry("end_page", method=get_page, require=False, entries=(("ep", "di"), ("ep", "ut")) ) self.get_entry("authors", method=lambda x: get_authors(x, "\n", ","), entries=(("af", "ti"), ("au", "ti"), ("au", "so"))) self.get_entry("title", method=clean_title, entries=(("ti", "so"),) ) self.get_entry("abstract", method=clean_entry, require=False, entries=(("ab", "sn"),) ) self.get_entry("year", method=int, entries=(("py", "vl"), ("py", "tc") ) ) self.get_entry("doi", require=False, entries=(("di", "pg"), ("di", "ut"),("di", "er")) ) self.article.set_notes(notes) journal = ISIArticle.get_journal(self.article.get_journal()) volume = self.article.get_volume() page = self.article.get_page() name = "%s %d %s" % (journal, volume, page) if not self.master.has(self.article): self.archive.test_and_add(self.article) else: println("%s exists in archive\n" % name) continue except Exception, error: sys.stderr.write("ERROR: %s\n%s\n" % (error, block))
class WOKParser(WOKObject, ServerRequest): def __init__(self, archive, journal=None, author=None, year=None, volume=None, page=None, notes=None, download=False, keywords=None): ServerRequest.__init__(self, ISIServer.REQUEST_PORT, ISIAnswer) self.archive = Archive(archive) self.download = download self.notes = notes self.keywords = keywords self.kwargs = {} if journal: self.kwargs["journal"] = journal if author: self.kwargs["author"] = author if year: self.kwargs["year"] = year if volume: self.kwargs["volume"] = volume if page: self.kwargs["page"] = page self.search = WOKSearch(**self.kwargs) def run(self, method, args=None): cmd = ISIServerCommand(method, args) response = ServerRequest.run(self, cmd) return response def pick_article(self, articles): for article in articles: foundmatch = True for key, value in self.kwargs.items(): if not hasattr(article, key): #don't use this for matching continue match = str(getattr(article, key)) field = WOKField.get(key, value) if not field: field = str(value) if not field == match: foundmatch = False break; if foundmatch: return article def store_article(self): block = self.run("get_text") article = WOKArticle(self.archive, block) if article: article.store(self.download, self.notes, self.keywords) return article def run_citedrefs(self): try: import time void = self.run("isi_search", self.search) articles = self.run("get_articles") title = "" article = self.pick_article(articles) if not article: raise ISIError("Could not find article with given specifications"); self.run("open_article", article.title) nrefs = self.run("open_references") #get all possible refs on this page self.walk_references() #if there are more pages, go through those as well nstart = 31 onclick = 2 while nstart < nrefs: self.run("go_to_next_page", onclick) self.walk_references() nstart += 30 onclick += 1 self.archive.commit() except KeyboardInterrupt, error: raise error except ISIError, error: sys.stderr.write("ERROR: %s\nFailed on block:\n%s\n" % (error, error.block)) raise error