def process(self): params=urlparse.parse_qs(urlparse.urlparse(self.path).query) pm=Pubmed("*****@*****.**") pm.query(params["query"]) pm.fetch() pm.parse() return json.dumps(convert_articles(pm.articles))
def get_pubmed_words(self, pubmed_id): ''' return a dict in the same format as get_field_words: k=field, v=sanitized list of words ''' words = dict() pubmed = Pubmed(pubmed_id).populate() for tag in Pubmed.text_tags: try: words[tag] = getattr(pubmed, tag) except AttributeError as ae: pass return words
def insert(self, pmid, user): medline = FetchMedline([pmid]) records = medline.get_records() ## it is weird you can't do record = records[0]?? for rec in records: record = rec # get pubmed instance pubmed = Pubmed(record) # insert journal journal_no = Journal.insert(pubmed.journal_abbrev, user) # insert reference ref_no = 0 ref_query = self.query.filter_by(pubmed=pmid) if ref_query.first(): ref_no = ref_query.first().reference_no else: ref_entry = self(user, pubmed.publish_status, pubmed.citation, pubmed.year, pmid, 'PubMed script', pubmed.pdf_status, pubmed.pages, pubmed.volume, pubmed.title, pubmed.issue, journal_no) db.session.add(ref_entry) db.session.commit() ref_no = ref_entry.reference_no # insert abstract Abstract.insert(ref_no, pubmed.abstract_txt) # insert author order = 0 for name in pubmed.authors: order += 1 author_no = Author.insert(name, user) AuthorEditor.insert(author_no, ref_no, order) # insert ref_type RefType.insert(pubmed.pub_type, ref_no, 'NCBI', user) return ref_no
# First download nltk stuffs home=os.environ["HOME"] if not os.path.exists("%s/nltk_data" %home): import nltk nltk.download('all') # Download neurosynth data df = pandas.read_csv("database.txt",sep="\t") pmids = df.id.unique().tolist() print "NeuroSynth database has %s unique PMIDs" %(len(pmids)) # download abstract text email = "*****@*****.**" pm = Pubmed(email,pmc=False) articles1 = pm.get_many_articles(pmids[:10000]) articles2 = pm.get_many_articles(pmids[10000:]) articles = articles1.copy() articles.update(articles2) if not os.path.exists("articles.pkl"): pickle.dump(articles,open("articles.pkl","wb")) # Write articles to file #88390|"<text><p>sentence1</p><p>sentence2</p><p></text>" #88390|"<text><p>sentence1</p><p>sentence2</p><p></text>" # We should use utf-8 http://www.postgresql.org/docs/9.0/static/multibyte.html filey = open(output_file,"wb") count = 0