def update(item): keyword = item.keyword papers_existing_in_item = [x.id for x in item.papers] p = PubMedFetcher(keyword, num_of_documents=10, sort="pub+date") for paper in p.papers.values(): paper_mongo = store_paper(paper) if paper_mongo.id not in papers_existing_in_item: item.update(push__papers=paper_mongo) paper_mongo.update(push__subscriptions=item)
def process_one(self, item): if len(item) < 30: return None m = re.search( 'pmid (\d+).+?year (\d+).+?month (\d+).+?day (\d+).+?title.+?name "(.+?)".+?authors \{(.+?)\},\s*from journal.+?name "(.+?)".+?abstract "(.+?)"', item, re.DOTALL, ) error_count = 1 if m and m.group(6): id = m.group(1) title = m.group(5).replace("\n", "").strip() author = m.group(6) m_author = re.findall('name ml "(.+?)"', author) journal = m.group(7) year = int(m.group(2)) month = int(m.group(3)) day = int(m.group(4)) abstract = m.group(8).replace("\n", "").strip() if not m_author: return None h = { "Source": "PubMed", "PMID": id, "Title": title, "Author": m_author, "Journal": journal, "Year": year, "Date": datetime(year, month, day), "Abstract": abstract, } h["URL"] = "http://www.ncbi.nlm.nih.gov/pubmed/" + h["PMID"] try: paper_mongo = store_paper(h) h["DBID"] = str(paper_mongo.id) except: logging.error("Store paper failed: {}".format(h["PMID"])) return h else: logging.warning("Parse error. #%d", error_count) error_count += 1 return None