def generate_pickles(save_path, limit_to): if not os.path.exists(save_path): os.mkdir(save_path) print "Loading from DB" print "pentaplets" pentaplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) print "quadruplets" quadruplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) print "triplets" triplets = tr.get_report_kplets(profile_id2code, limit_to=limit_to) print "duplets" duplets = d.get_report_kplets(profile_id2code, limit_to=limit_to) print "Dumping to files" dump_file = os.path.join(save_path, 'duplets.p.bz2') print dump_file t.dump_compressed_pickle(dump_file, duplets) dump_file = os.path.join(save_path, 'triplets.p.bz2') print dump_file t.dump_compressed_pickle(dump_file, triplets) dump_file = os.path.join(save_path, 'quadruplets.p.bz2') print dump_file t.dump_compressed_pickle(dump_file, quadruplets) dump_file = os.path.join(save_path, 'pentaplets.p.bz2') print dump_file t.dump_compressed_pickle(dump_file, pentaplets)
def merging_pipeline_for_order(order, load_from_db=False): limit_to = 1000000000 print "starting for ", order if load_from_db: print "Loading kplets from DB" if order == 2: kplet_file = 'duplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 3: kplet_file = 'triplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 4: kplet_file = 'quadruplets.p.bz2' kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 5: kplet_file = 'pentaplets.p.bz2' kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) else: print "Loading kplets from pickle file" if order == 2: kplet_file = 'duplets.p.bz2' elif order == 3: kplet_file = 'triplets.p.bz2' elif order == 4: kplet_file = 'quadruplets.p.bz2' elif order == 5: kplet_file = 'pentaplets.p.bz2' kplet_file_full = os.path.join(data_path, kplet_file) print kplet_file_full kplets = t.load_compressed_pickle(kplet_file_full) # print "Starting for", kplet_file # print "Loading kplets" # kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) print "No of kplets:", len(kplets) #print "Loading file2genes" #_file2genes = {} #for _f in os.listdir(neighborhood_files_path): # _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f)) # print 'Filtering' # kplets = filter_seed(kplets, _file2genes) # print "No of kplets:", len(kplets) # fname = os.path.join(data_path, kplet_file.split('.')[0]+'_seed.p.bz2') # print 'Dumping', fname # t.dump_compressed_pickle(fname, kplets) print "Basic merging" merged_lists = merging.basic_merge_within_orders(kplets) fname = os.path.join(data_path, "basic_merged_" + kplet_file) print "Dumping basic merging: ", fname t.dump_compressed_pickle(fname, merged_lists) print "Iterative merging" merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists) fname = os.path.join(data_path, "iterative_merged_" + kplet_file) print "Dumping Iterative merging: ", fname t.dump_compressed_pickle(fname, merged_lists)
def generate_pickle_order(order, save_path, limit_to): print "Loading from DB" if order == 2: print 'duplets' data_file = 'duplets.p.bz2' kplets = d.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 3: print 'triplets' data_file = 'triplets.p.bz2' kplets = tr.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 4: print 'quadruplets' data_file = 'quadruplets.p.bz2' kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 5: print 'pentaplets' data_file = 'pentaplets.p.bz2' kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) # # block for work aorund of too bign pentaplet # print 'Loading file2genes' # neighborhood_files_path = os.path.join(gv.project_data_path,'CRISPR/datasets/crispr/wgs') # _file2genes = {} # for _f in os.listdir(neighborhood_files_path): # _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f)) # # kplets = filter_seed(kplets, _file2genes) # dump_file = os.path.join(save_path, data_file) # print "Dumtiping to file", dump_file # t.dump_compressed_pickle(kplets, dump_file) # print "Finished" # sys.exit() dump_file = os.path.join(save_path, data_file) print "Dumping to file", dump_file t.dump_compressed_pickle(dump_file, kplets) print "Finished"
def merging_pipeline_for_order(order, data_path, load_from_db=False): limit_to = 1000000000 print "starting for ", order if load_from_db: print "Loading kplets from DB" if order == 2: kplet_file = 'duplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 3: kplet_file = 'triplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 4: kplet_file = 'quadruplets.p.bz2' kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 5: kplet_file = 'pentaplets.p.bz2' kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) else: print "Loading kplets from pickle file" if order == 2: kplet_file = 'duplets.p.bz2' elif order == 3: kplet_file = 'triplets.p.bz2' elif order == 4: kplet_file = 'quadruplets.p.bz2' elif order == 5: kplet_file = 'pentaplets.p.bz2' kplet_file_full = os.path.join(data_path, kplet_file) print "Loading :",kplet_file_full kplets = t.load_compressed_pickle(kplet_file_full) print "No of kplets:", len(kplets) #print "Loading file2genes" # tic = time.time() # print "Basic merging" # merged_lists = merging.basic_merge_within_orders(kplets) # print "Basic merging done. Merged lists:", len(merged_lists) # # fname = os.path.join(data_path, "basic_merged_"+kplet_file) # # print "Dumping basic merging: ", fname # # t.dump_compressed_pickle(fname, merged_lists) # # print "Iterative merging" # merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists) # print "Iterative merging done. Merged lists:", len(merged_lists) # # fname = os.path.join(data_path, "iterative_merged_"+kplet_file) # # print "Dumping Iterative merging: ",fname # # t.dump_compressed_pickle(fname, merged_lists) # print "Completed in:", time.time()-tic, "(s)" csv_kplet_file = os.path.join(data_path, kplet_file.split('.')[0]+".csv") csv_merged_lists_file = os.path.join(data_path, "iterative_merged_"+kplet_file.split('.')[0]+".csv") print "Writing kplets to csv file:" print csv_kplet_file t.write_kplets_to_csv(kplets, csv_kplet_file) tic = time.time() print "Starting kpletmerger with params:" print "kpletmerger", csv_kplet_file, csv_merged_lists_file print "\n\n" sp.call(["kpletmerger",csv_kplet_file,csv_merged_lists_file]) print "\n\n" print "Completed in:", time.time()-tic, "(s)"
def merging_pipeline_for_order(order, data_path, load_from_db=False): limit_to = 1000000000 print "starting for ", order if load_from_db: print "Loading kplets from DB" if order == 2: kplet_file = 'duplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 3: kplet_file = 'triplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 4: kplet_file = 'quadruplets.p.bz2' kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 5: kplet_file = 'pentaplets.p.bz2' kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) else: print "Loading kplets from pickle file" if order == 2: kplet_file = 'duplets.p.bz2' elif order == 3: kplet_file = 'triplets.p.bz2' elif order == 4: kplet_file = 'quadruplets.p.bz2' elif order == 5: kplet_file = 'pentaplets.p.bz2' kplet_file_full = os.path.join(data_path, kplet_file) print "Loading :", kplet_file_full kplets = t.load_compressed_pickle(kplet_file_full) print "No of kplets:", len(kplets) #print "Loading file2genes" # tic = time.time() # print "Basic merging" # merged_lists = merging.basic_merge_within_orders(kplets) # print "Basic merging done. Merged lists:", len(merged_lists) # # fname = os.path.join(data_path, "basic_merged_"+kplet_file) # # print "Dumping basic merging: ", fname # # t.dump_compressed_pickle(fname, merged_lists) # # print "Iterative merging" # merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists) # print "Iterative merging done. Merged lists:", len(merged_lists) # # fname = os.path.join(data_path, "iterative_merged_"+kplet_file) # # print "Dumping Iterative merging: ",fname # # t.dump_compressed_pickle(fname, merged_lists) # print "Completed in:", time.time()-tic, "(s)" csv_kplet_file = os.path.join(data_path, kplet_file.split('.')[0] + ".csv") csv_merged_lists_file = os.path.join( data_path, "iterative_merged_" + kplet_file.split('.')[0] + ".csv") print "Writing kplets to csv file:" print csv_kplet_file t.write_kplets_to_csv(kplets, csv_kplet_file) tic = time.time() print "Starting kpletmerger with params:" print "kpletmerger", csv_kplet_file, csv_merged_lists_file print "\n\n" sp.call(["kpletmerger", csv_kplet_file, csv_merged_lists_file]) print "\n\n" print "Completed in:", time.time() - tic, "(s)"
def merging_pipeline_for_order(order, load_from_db=False): limit_to = 1000000000 print "starting for ", order if load_from_db: print "Loading kplets from DB" if order == 2: kplet_file = 'duplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 3: kplet_file = 'triplets.p.bz2' #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) elif order == 4: kplet_file = 'quadruplets.p.bz2' kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to) elif order == 5: kplet_file = 'pentaplets.p.bz2' kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to) else: print "Loading kplets from pickle file" if order == 2: kplet_file = 'duplets.p.bz2' elif order == 3: kplet_file = 'triplets.p.bz2' elif order == 4: kplet_file = 'quadruplets.p.bz2' elif order == 5: kplet_file = 'pentaplets.p.bz2' kplet_file_full = os.path.join(data_path, kplet_file) print kplet_file_full kplets = t.load_compressed_pickle(kplet_file_full) # print "Starting for", kplet_file # print "Loading kplets" # kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file)) print "No of kplets:", len(kplets) #print "Loading file2genes" #_file2genes = {} #for _f in os.listdir(neighborhood_files_path): # _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f)) # print 'Filtering' # kplets = filter_seed(kplets, _file2genes) # print "No of kplets:", len(kplets) # fname = os.path.join(data_path, kplet_file.split('.')[0]+'_seed.p.bz2') # print 'Dumping', fname # t.dump_compressed_pickle(fname, kplets) print "Basic merging" merged_lists = merging.basic_merge_within_orders(kplets) fname = os.path.join(data_path, "basic_merged_"+kplet_file) print "Dumping basic merging: ", fname t.dump_compressed_pickle(fname, merged_lists) print "Iterative merging" merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists) fname = os.path.join(data_path, "iterative_merged_"+kplet_file) print "Dumping Iterative merging: ",fname t.dump_compressed_pickle(fname, merged_lists)