def get_kegg_biomart_id(sp_latin): kegg_map_dict = load_fn_to_obj(KEGG_ORGANISM_JSON) kegg_sp, biomart_sp = '', '' if sp_latin in kegg_map_dict: kegg_sp = kegg_map_dict[sp_latin] sp_latin_list = sp_latin.split('_') biomart_sp = '{0}{1}'.format(sp_latin_list[0][0], sp_latin_list[1]) return kegg_sp, biomart_sp
proj_name_list = args.proj_names.split(',') with open(all_info_file) as all_info_file_if: for n, eachline in enumerate(all_info_file_if): if n != 0: eachline_info = eachline.strip().split('\t') if eachline_info[0] in proj_name_list: wgc_id = eachline_info[1] sample_id = eachline_info[4] wgc_to_sample_dict[wgc_id] = sample_id sample_to_wgc_dict[sample_id] = wgc_id if os.path.isfile('rawdata_number.json') and os.stat( 'rawdata_number.json').st_size: sample_number_dict = python_tools.load_fn_to_obj('rawdata_number.json') else: sample_number_dict = {} sample_dict = {} for each_num in data_dir_dict: if args.ALL or each_num in seq_number_list: each_dir_list = data_dir_dict[each_num] for each_dir in each_dir_list: rawdata_list = os.listdir(each_dir) for each_rawdata_dir in rawdata_list: each_fq_dir = os.path.join(each_dir, each_rawdata_dir) if os.path.isdir(each_fq_dir): if re.search(r'(WGC\d{6})', each_rawdata_dir): wgc_id = re.search(r'(WGC\d{6})', each_rawdata_dir).groups()[0]
import sys import os import python_tools if not len(sys.argv) == 4: print ' python ' + sys.argv[ 0] + ' exp.table gene.interpro.annotate exp.annotate.table' sys.exit(0) exp_table = sys.argv[1] interpro_json = sys.argv[2] exp_anno = sys.argv[3] interpro_dict = python_tools.load_fn_to_obj(interpro_json) exp_anno_inf = open(exp_anno, 'w') with open(exp_table) as exp_table_inf: for n, eachline in enumerate(exp_table_inf): eachline = eachline.strip() if n == 0: exp_anno_inf.write( '%s\tGene_name\tInterpro_ID\tInterpro_description\n' % eachline) else: eachline_inf = eachline.split('\t') gene_id = eachline_inf[0] annotate_list = ['--'] * 3 if gene_id in interpro_dict: for m, each in enumerate(annotate_list): if interpro_dict[gene_id][m]: annotate_list[m] = ','.join(interpro_dict[gene_id][m]) annotate_out = '\t'.join(annotate_list)
import os import python_tools if not len(sys.argv) == 4: print ' python ' + sys.argv[ 0] + ' gene.interpro.id interpro.map gene.interpro.des' sys.exit(0) gene_interpro_id_file = sys.argv[1] interpro_map = sys.argv[2] gene_interpro_des_file = sys.argv[3] interpro_map_dict = {} if interpro_map.endswith('json'): interpro_map_dict = python_tools.load_fn_to_obj(interpro_map) else: with open(interpro_map) as interpro_map_inf: for eachline in interpro_map_inf: eachline_inf = eachline.strip().split('\t') interpro_map_dict[eachline_inf[0]] = eachline_inf[1] interpro_map_json = '%s.json' % interpro_map python_tools.write_obj_to_json(interpro_map_dict, interpro_map_json) gene_inf_dict = {} with open(gene_interpro_id_file) as gene_interpro_id: for n, eachline in enumerate(gene_interpro_id): if n != 0: eachline_inf = eachline.strip().split(',') gene_id = eachline_inf[0] gene_name = eachline_inf[1]