Beispiel #1
0
def get_kegg_biomart_id(sp_latin):
    kegg_map_dict = load_fn_to_obj(KEGG_ORGANISM_JSON)
    kegg_sp, biomart_sp = '', ''
    if sp_latin in kegg_map_dict:
        kegg_sp = kegg_map_dict[sp_latin]
    sp_latin_list = sp_latin.split('_')
    biomart_sp = '{0}{1}'.format(sp_latin_list[0][0], sp_latin_list[1])
    return kegg_sp, biomart_sp
Beispiel #2
0
proj_name_list = args.proj_names.split(',')

with open(all_info_file) as all_info_file_if:
    for n, eachline in enumerate(all_info_file_if):
        if n != 0:
            eachline_info = eachline.strip().split('\t')
            if eachline_info[0] in proj_name_list:
                wgc_id = eachline_info[1]
                sample_id = eachline_info[4]
                wgc_to_sample_dict[wgc_id] = sample_id
                sample_to_wgc_dict[sample_id] = wgc_id

if os.path.isfile('rawdata_number.json') and os.stat(
        'rawdata_number.json').st_size:
    sample_number_dict = python_tools.load_fn_to_obj('rawdata_number.json')
else:
    sample_number_dict = {}

sample_dict = {}
for each_num in data_dir_dict:
    if args.ALL or each_num in seq_number_list:
        each_dir_list = data_dir_dict[each_num]
        for each_dir in each_dir_list:
            rawdata_list = os.listdir(each_dir)
            for each_rawdata_dir in rawdata_list:
                each_fq_dir = os.path.join(each_dir, each_rawdata_dir)
                if os.path.isdir(each_fq_dir):
                    if re.search(r'(WGC\d{6})', each_rawdata_dir):
                        wgc_id = re.search(r'(WGC\d{6})',
                                           each_rawdata_dir).groups()[0]
Beispiel #3
0
import sys
import os
import python_tools

if not len(sys.argv) == 4:
    print '    python ' + sys.argv[
        0] + ' exp.table gene.interpro.annotate exp.annotate.table'
    sys.exit(0)

exp_table = sys.argv[1]
interpro_json = sys.argv[2]
exp_anno = sys.argv[3]

interpro_dict = python_tools.load_fn_to_obj(interpro_json)
exp_anno_inf = open(exp_anno, 'w')
with open(exp_table) as exp_table_inf:
    for n, eachline in enumerate(exp_table_inf):
        eachline = eachline.strip()
        if n == 0:
            exp_anno_inf.write(
                '%s\tGene_name\tInterpro_ID\tInterpro_description\n' %
                eachline)
        else:
            eachline_inf = eachline.split('\t')
            gene_id = eachline_inf[0]
            annotate_list = ['--'] * 3
            if gene_id in interpro_dict:
                for m, each in enumerate(annotate_list):
                    if interpro_dict[gene_id][m]:
                        annotate_list[m] = ','.join(interpro_dict[gene_id][m])
            annotate_out = '\t'.join(annotate_list)
import os
import python_tools

if not len(sys.argv) == 4:
    print '    python ' + sys.argv[
        0] + ' gene.interpro.id interpro.map gene.interpro.des'
    sys.exit(0)

gene_interpro_id_file = sys.argv[1]
interpro_map = sys.argv[2]
gene_interpro_des_file = sys.argv[3]

interpro_map_dict = {}

if interpro_map.endswith('json'):
    interpro_map_dict = python_tools.load_fn_to_obj(interpro_map)
else:
    with open(interpro_map) as interpro_map_inf:
        for eachline in interpro_map_inf:
            eachline_inf = eachline.strip().split('\t')
            interpro_map_dict[eachline_inf[0]] = eachline_inf[1]
    interpro_map_json = '%s.json' % interpro_map
    python_tools.write_obj_to_json(interpro_map_dict, interpro_map_json)

gene_inf_dict = {}
with open(gene_interpro_id_file) as gene_interpro_id:
    for n, eachline in enumerate(gene_interpro_id):
        if n != 0:
            eachline_inf = eachline.strip().split(',')
            gene_id = eachline_inf[0]
            gene_name = eachline_inf[1]