Esempio n. 1
0
         if reduce(operator.or_, 
                   [x in chunk.lower() for x in useless_lines]): continue
         if len(chunk) < 2: continue
         outfile.write('%010d|%s\n' % (chunkid, chunk))
         chunkmap[fakename].append(chunkid)
         chunkid += 1
 outfile.close()
 print "Saving chunkmap"
 pickle.dump(chunkmap, open(outmapname, "wb"), pickle.HIGHEST_PROTOCOL)
 print "These files couldn't be processed:"
 print '\n'.join(skipped)
 print "Opening (or creating) cache in", sys.argv[2]
 the_cache=StringDBDict(os.path.join(sys.argv[2], DEFAULT_CACHE_NAME),
                        file_mode='c')
 PubMed.download_many([str(x) for x in known_articles if str(x) not in 
                       the_cache.keys()], download_callback,
                      parser=Medline.RecordParser())
 mti_filename=sys.argv[1]+'.mti'
 print "Finished processing the cache. Using the cache to build", \
        mti_filename
 mti_file=open(mti_filename, "w")
 chunkmap={}
 hexfinder=re.compile(r'\\x[a-f0-9][a-f0-9]', re.IGNORECASE)
 for article in known_articles:
     try:
         article_record=the_cache[str(article)]
     except KeyError:
         print "Article doesn't exist in cache. Skipping."
         continue
     if article_record.abstract=='':
         print "Article", article, "has no abstract. Skipping."
Esempio n. 2
0
        else:
            raise "unknown format"
        
    if options.clipboard:
        os.system('dcop klipper klipper setClipboardContents \"%s\"' % '\n'.join(outlines))
    else:
        print "\n".join(outlines)

    if options.library:

        nids = []
        if os.path.exists( options.library ):
            pmids = map(lambda x: x[:-1].split(" ")[1], filter(lambda x: re.match( "PMID", x), open(options.library).readlines()))
            for id in ids:
                if id in pmids: continue
                nids.append(id)
            outfile = open(options.library, "a")                            
        else:
            outfile = open(options.library, "w")            

        f = lambda id, x: outfile.write( "\n%s\n" % (str(x)))
        PubMed.download_many( nids, f )
        outfile.close()

        print "added %i out of %i entries to library %s" % (len(nids), len(ids), options.library)
        
    Experiment.Stop()