sys.path.append('/home/hudaiber/Projects/lib/BioPy/') sys.path.append('/home/hudaiber/Projects/SystemFiles/') import global_variables as gv sys.path.append(gv.project_code_path) from lib.db.archea import db_tools, neighborhoods_path from lib.utils import tools as t import os target_profiles = t.target_profiles() target_profiles = [l.strip() for l in open('/Volumes/pan1/patternquest/Projects/NewSystems/data/Archea/arCOG/selected_arcogs.txt').readlines()] profile2def = t.map_profile2def() gid2arcog_cdd = t.map_gid2arcog_cdd() neighborhood_files_path = neighborhoods_path() neighborhood_files_path = '/Volumes/pan1/patternquest/Projects/NewSystems/data/Archea/genes_and_flanks/win_10/pty/' def write_to_xls(xls_file, kplets): community = set() [community.update(kplet.codes) for kplet in kplets] _file2kplets = {} for kplet in kplets: for f in kplet.files: if f in _file2kplets: _file2kplets[f].append(kplet) else:
# dump_file = bz2.BZ2File(os.path.join(save_path, 'pentaplets_merged_across.p.bz2'), 'w') # pickle.dump(pentaplets, dump_file) # dump_file = bz2.BZ2File(os.path.join(save_path, 'quadruplets_merged_across.p.bz2'), 'w') # pickle.dump(quadruplets, dump_file) # dump_file = bz2.BZ2File(os.path.join(save_path, 'triplets_merged_across.p.bz2'), 'w') # pickle.dump(triplets, dump_file) # dump_file = bz2.BZ2File(os.path.join(save_path, 'duplets_merged_across.p.bz2'), 'w') # pickle.dump(duplets, dump_file) if __name__ == '__main__': print 'Pre-Loading dictionaries' target_profiles = t.bacteria_target_profiles() profile2def = t.map_cdd_profile2def() gid2arcog_cdd = t.map_gid2arcog_cdd() neighborhood_files_path = neighborhoods_path() profile_id2code = map_id2cdd() # for limit_to, report_dir in zip([300, 500, 1000, 100000],['top_300', 'top_500', 'top_1000', 'top_100000']): # # print "Limit_to:", limit_to # print # generate_plots(limit_to, report_dir, target_profiles, profile2def, gid2arcog_cdd, neighborhood_files_path, profile_id2code) # print 'Done' # print "------------------------" data_path = os.path.join(gv.project_data_path, 'Bacteria/pickle/') print 'Generating pickles' for limit_to in [100000]: