if crf: f=open(contigfile,'w') x=1 for i in contigs: #print "A contig, ", i if crf: string_seq = i[1] #print "String seq is", string_seq nuc_index = i[0][0] dict_seq = {} # the sequence string at for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 #print "original dict_seq is", dict_seq # add info for consensus dictionary mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail) consensus_single = get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf) #print consensus_single # alter dict_seq keys for the implementation # of the consensus information # #print "CONSENSUS SINGLE: ", consensus_single for p_info in consensus_single: if p_info[0] in dict_seq.keys(): #print "P_INFO: ", p_info # maybe I don't need to consider mismatch but I'll do anyway if p_info[-1] == 'mism': dict_seq[p_info[0]] = p_info[1][0] # check THIS elif p_info[-1] == 'ins': # in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases dict_seq[p_info[0]+'.1'] = p_info[1][0][1:]
x = 1 for i in contigs: #print "A contig, ", i if crf: string_seq = i[1] #print "String seq is", string_seq nuc_index = i[0][0] dict_seq = {} # the sequence string at for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 #print "original dict_seq is", dict_seq # add info for consensus dictionary mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail) consensus_single = get_consensus_single( mut_events[mut_events.keys()[0]], hf=hf) #print consensus_single # alter dict_seq keys for the implementation # of the consensus information # #print "CONSENSUS SINGLE: ", consensus_single for p_info in consensus_single: if p_info[0] in dict_seq.keys(): #print "P_INFO: ", p_info # maybe I don't need to consider mismatch but I'll do anyway if p_info[-1] == 'mism': dict_seq[p_info[0]] = p_info[1][0] # check THIS elif p_info[-1] == 'ins':
# [((contig1_start, contig1_end), dict_seq = {pos : nuc, ...}), ((contig2_start, contig2_end), dict_seq = {pos : nuc, ...}), ...] # # so that each dict_seq can be handled with the Consensus dict information for ambiguities and indels. # SAMFILE, MT-TABLE FOR MTVCF_GENERATOR. # Sample name is defined as sample_name = os.getcwd().split('/')[-1].split('_')[1] sam_handle = basext + '.sam' mt_table_handle = tablefile sam_file = open(basext + '.sam', 'r') mt_table = open(tablefile, 'r').readlines() if type(sample_name) == (list): sample_name = sample_name[0] mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail, Q=mqual, minrd=cov) print "Heteroplasmic range for IUPAC in consensus is = {0} - {1}\n".format( hf_min, hf_max) if os.path.exists('../VCF_dict_tmp'): VCF_dict = ast.literal_eval(open('../VCF_dict_tmp', 'r').read()) # global VCF dict else: VCF_dict = {} # global VCF dict contigs_wdict = [] if crf: f = open(contigfile, 'w') x = 1 for i in contigs: #initialize new_i new_i = i
print '=============================' print "" # sam_file = open(basext+'.sam', 'r') sam = sam_file.readlines() sam_file.close() mt_table_file = open(tablefile, 'r') mt_table = mt_table_file.readlines() mt_table_file.close() # Calling of indels and mismatches. In the case of indels, mt_table (file that was generated in a # previous step) is not used. However, for calling mismatches I think it is. print " -Calling mtvcf_main_analysis..." mut_events = mtVariantCaller.mtvcf_main_analysis(mt_table, sam, sample_name, cov, indel_obs, tail) print " -mtvcf_main_analysis DONE" if os.path.exists('..'+os.sep+'VCF_dict_tmp'): VCF_dict = ast.literal_eval(open('..'+os.sep+'VCF_dict_tmp', 'r').read()) # global VCF dict print "Mutation events will be appended to existing global VCF dict ../VCF_dict_tmp" else: VCF_dict = {} # global VCF dict print "Creating new global VCF dict ../VCF_dict_tmp" if mut_events: print "Updating the VCF dict..." VCF_dict.update(mut_events) mut_events_cellar = open('..'+os.sep+'VCF_dict_tmp', 'w') mut_events_cellar.write(str(VCF_dict)) mut_events_cellar.close()