def search(self): if self.database == 'PubMed': from Bio import PubMed from Bio import GenBank searchIds = PubMed.search_for(self.searchTerm, max_ids=self.maxResults) GBrecParser = GenBank.FeatureParser() ncbiDict = GenBank.NCBIDictionary(self.type, 'genbank', parser=GBrecParser) from Bio import Medline MLrecParser = Medline.RecordParser() medlineDict = PubMed.Dictionary(delay=1.0, parser=MLrecParser) for id in searchIds: MLrecord = medlineDict[id] GBrecord = ncbiDict[id] newDBItem = DBItem(self.project, seq=GBrecord.seq, descript=GBrecord.description, id=id, record=MLrecord) self.items[id] = newDBItem
"""Example script showing how to interact with PubMed.""" # standard library import string # biopython from Bio import PubMed from Bio import Medline # do the search and get the ids search_term = 'orchid' orchid_ids = PubMed.search_for(search_term) print orchid_ids # access Medline through a dictionary interface that returns PubMed Records rec_parser = Medline.RecordParser() medline_dict = PubMed.Dictionary(parser=rec_parser) for id in orchid_ids[0:5]: cur_record = medline_dict[id] print 'title:', string.rstrip(cur_record.title) print 'authors:', cur_record.authors print 'source:', string.strip(cur_record.source) print
elif opt == '-c': count_only = 1 elif opt == '-d': try: delay = float(arg) except ValueError: print "Delay must be a floating point value" sys.exit(0) if delay < 0: print "Delay cannot be negative" sys.exit(0) if help: print_usage() sys.exit(0) print "Doing a PubMed search for %s..." % repr(query) ids = PubMed.search_for(query) print "Found %d citations" % len(ids) if count_only: sys.exit(0) pm = PubMed.Dictionary(delay=delay) for id in ids: try: print pm[id] except KeyError, x: print "Couldn't download %s, %s" % (id, x) sys.stdout.flush()
if reduce(operator.or_, [x in chunk.lower() for x in useless_lines]): continue if len(chunk) < 2: continue outfile.write('%010d|%s\n' % (chunkid, chunk)) chunkmap[fakename].append(chunkid) chunkid += 1 outfile.close() print "Saving chunkmap" pickle.dump(chunkmap, open(outmapname, "wb"), pickle.HIGHEST_PROTOCOL) print "These files couldn't be processed:" print '\n'.join(skipped) print "Opening (or creating) cache in", sys.argv[2] the_cache=StringDBDict(os.path.join(sys.argv[2], DEFAULT_CACHE_NAME), file_mode='c') PubMed.download_many([str(x) for x in known_articles if str(x) not in the_cache.keys()], download_callback, parser=Medline.RecordParser()) mti_filename=sys.argv[1]+'.mti' print "Finished processing the cache. Using the cache to build", \ mti_filename mti_file=open(mti_filename, "w") chunkmap={} hexfinder=re.compile(r'\\x[a-f0-9][a-f0-9]', re.IGNORECASE) for article in known_articles: try: article_record=the_cache[str(article)] except KeyError: print "Article doesn't exist in cache. Skipping." continue if article_record.abstract=='': print "Article", article, "has no abstract. Skipping."
help="output format." , action = "store_true" ) parser.add_option("-l", "--library", dest="library", help="library to add medline entries to." , metavar="FILE" ) parser.set_defaults( pmid = None, format= "oo", clipboard = False, library = None, ) (options, args) = Experiment.Start( parser ) if options.pmid: ids = PubMed.search_for(options.pmid) outlines = [] medline_parser = Medline.RecordParser() medline_dict = PubMed.Dictionary(parser = medline_parser) for id in ids: this_record = medline_dict[id] year = this_record.publication_date.split(" ")[0] last_names = map( lambda x: x.split(" ")[0], this_record.authors) if options.format == "oo":
"""Example script showing how to interact with PubMed.""" # standard library import string # biopython from Bio import PubMed from Bio import Medline # do the search and get the ids search_term = 'orchid' orchid_ids = PubMed.search_for(search_term) print orchid_ids # access Medline through a dictionary interface that returns PubMed Records rec_parser = Medline.RecordParser() medline_dict = PubMed.Dictionary(parser = rec_parser) for id in orchid_ids[0:5]: cur_record = medline_dict[id] print 'title:', string.rstrip(cur_record.title) print 'authors:', cur_record.authors print 'source:', string.strip(cur_record.source) print