sys.path.append('/home/hudaiber/Projects/lib/BioPy/')
    sys.path.append('/home/hudaiber/Projects/SystemFiles/')

import global_variables as gv
sys.path.append(gv.project_code_path)

from lib.db.archea import db_tools, neighborhoods_path
from lib.utils import tools as t
import os

target_profiles = t.target_profiles()

target_profiles = [l.strip() for l in open('/Volumes/pan1/patternquest/Projects/NewSystems/data/Archea/arCOG/selected_arcogs.txt').readlines()]

profile2def = t.map_profile2def()
gid2arcog_cdd = t.map_gid2arcog_cdd()
neighborhood_files_path = neighborhoods_path()

neighborhood_files_path = '/Volumes/pan1/patternquest/Projects/NewSystems/data/Archea/genes_and_flanks/win_10/pty/'


def write_to_xls(xls_file, kplets):

    community = set()
    [community.update(kplet.codes) for kplet in kplets]
    _file2kplets = {}
    for kplet in kplets:
        for f in kplet.files:
            if f in _file2kplets:
                _file2kplets[f].append(kplet)
            else:
Exemple #2
0
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'pentaplets_merged_across.p.bz2'), 'w')
    # pickle.dump(pentaplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'quadruplets_merged_across.p.bz2'), 'w')
    # pickle.dump(quadruplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'triplets_merged_across.p.bz2'), 'w')
    # pickle.dump(triplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'duplets_merged_across.p.bz2'), 'w')
    # pickle.dump(duplets, dump_file)


if __name__ == '__main__':

    print 'Pre-Loading dictionaries'
    target_profiles = t.bacteria_target_profiles()
    profile2def = t.map_cdd_profile2def()
    gid2arcog_cdd = t.map_gid2arcog_cdd()
    neighborhood_files_path = neighborhoods_path()
    profile_id2code = map_id2cdd()

    # for limit_to, report_dir in zip([300, 500, 1000, 100000],['top_300', 'top_500', 'top_1000', 'top_100000']):
    #
    #     print "Limit_to:", limit_to
    #     print
    #     generate_plots(limit_to, report_dir, target_profiles, profile2def, gid2arcog_cdd, neighborhood_files_path, profile_id2code)
    #     print 'Done'
    #     print "------------------------"

    data_path = os.path.join(gv.project_data_path, 'Bacteria/pickle/')

    print 'Generating pickles'
    for limit_to in [100000]: