if reduce(operator.or_, [x in chunk.lower() for x in useless_lines]): continue if len(chunk) < 2: continue outfile.write('%010d|%s\n' % (chunkid, chunk)) chunkmap[fakename].append(chunkid) chunkid += 1 outfile.close() print "Saving chunkmap" pickle.dump(chunkmap, open(outmapname, "wb"), pickle.HIGHEST_PROTOCOL) print "These files couldn't be processed:" print '\n'.join(skipped) print "Opening (or creating) cache in", sys.argv[2] the_cache=StringDBDict(os.path.join(sys.argv[2], DEFAULT_CACHE_NAME), file_mode='c') PubMed.download_many([str(x) for x in known_articles if str(x) not in the_cache.keys()], download_callback, parser=Medline.RecordParser()) mti_filename=sys.argv[1]+'.mti' print "Finished processing the cache. Using the cache to build", \ mti_filename mti_file=open(mti_filename, "w") chunkmap={} hexfinder=re.compile(r'\\x[a-f0-9][a-f0-9]', re.IGNORECASE) for article in known_articles: try: article_record=the_cache[str(article)] except KeyError: print "Article doesn't exist in cache. Skipping." continue if article_record.abstract=='': print "Article", article, "has no abstract. Skipping."
else: raise "unknown format" if options.clipboard: os.system('dcop klipper klipper setClipboardContents \"%s\"' % '\n'.join(outlines)) else: print "\n".join(outlines) if options.library: nids = [] if os.path.exists( options.library ): pmids = map(lambda x: x[:-1].split(" ")[1], filter(lambda x: re.match( "PMID", x), open(options.library).readlines())) for id in ids: if id in pmids: continue nids.append(id) outfile = open(options.library, "a") else: outfile = open(options.library, "w") f = lambda id, x: outfile.write( "\n%s\n" % (str(x))) PubMed.download_many( nids, f ) outfile.close() print "added %i out of %i entries to library %s" % (len(nids), len(ids), options.library) Experiment.Stop()