예제 #1
0
 def process(self):
   params=urlparse.parse_qs(urlparse.urlparse(self.path).query)
   pm=Pubmed("*****@*****.**")
   pm.query(params["query"])
   pm.fetch()
   pm.parse()
   return json.dumps(convert_articles(pm.articles))
예제 #2
0
    def get_pubmed_words(self, pubmed_id):
        ''' 
        return a dict in the same format as get_field_words: k=field, v=sanitized list of words
        '''
        words = dict()
        pubmed = Pubmed(pubmed_id).populate()
        for tag in Pubmed.text_tags:
            try:
                words[tag] = getattr(pubmed, tag)

            except AttributeError as ae:
                pass

        return words
예제 #3
0
    def insert(self, pmid, user):

        medline = FetchMedline([pmid])
        records = medline.get_records()

        ## it is weird you can't do record = records[0]??
        for rec in records:
            record = rec

        # get pubmed instance
        pubmed = Pubmed(record)

        # insert journal
        journal_no = Journal.insert(pubmed.journal_abbrev, user)

        # insert reference
        ref_no = 0
        ref_query = self.query.filter_by(pubmed=pmid)
        if ref_query.first():
            ref_no = ref_query.first().reference_no
        else:
            ref_entry = self(user, pubmed.publish_status, pubmed.citation,
                             pubmed.year, pmid, 'PubMed script',
                             pubmed.pdf_status, pubmed.pages, pubmed.volume,
                             pubmed.title, pubmed.issue, journal_no)
            db.session.add(ref_entry)
            db.session.commit()
            ref_no = ref_entry.reference_no

        # insert abstract
        Abstract.insert(ref_no, pubmed.abstract_txt)

        # insert author
        order = 0
        for name in pubmed.authors:
            order += 1
            author_no = Author.insert(name, user)
            AuthorEditor.insert(author_no, ref_no, order)

        # insert ref_type
        RefType.insert(pubmed.pub_type, ref_no, 'NCBI', user)

        return ref_no
예제 #4
0
# First download nltk stuffs
home=os.environ["HOME"]
if not os.path.exists("%s/nltk_data" %home):
   import nltk
   nltk.download('all')

# Download neurosynth data
df = pandas.read_csv("database.txt",sep="\t")
pmids = df.id.unique().tolist()

print "NeuroSynth database has %s unique PMIDs" %(len(pmids))

# download abstract text
email = "*****@*****.**"
pm = Pubmed(email,pmc=False)
articles1 = pm.get_many_articles(pmids[:10000])
articles2 = pm.get_many_articles(pmids[10000:])
articles = articles1.copy()
articles.update(articles2)

if not os.path.exists("articles.pkl"):
    pickle.dump(articles,open("articles.pkl","wb"))

# Write articles to file
#88390|"<text><p>sentence1</p><p>sentence2</p><p></text>"
#88390|"<text><p>sentence1</p><p>sentence2</p><p></text>"
# We should use utf-8 http://www.postgresql.org/docs/9.0/static/multibyte.html

filey = open(output_file,"wb")
count = 0