converted = classifydict(value) for nucleotide, valuen in converted.items(): if int(valuen[0]) > 0 and float(valuen[2]) < float(args.minqual): count +=1 # now check for synonymity, move this to functions alternativeSeq = MutableSeq(str(element.seq[start:stop]), generic_dna) ## print nucleotide alternativeSeq = mutateSequence(alternativeSeq,sub_element,nucleotide,start) alternativeSeq = Seq(str(alternativeSeq), generic_dna) if len(alternativeSeq)%3 != 0: overlap = len(alternativeSeq)%3 alternativeSeq = alternativeSeq[:-int(overlap)] altprot = alternativeSeq.translate() altprot = list2dict(altprot[0:len(protein)],0) protposition = int((sub_element-start)/3) try: if protein[protposition] != altprot[protposition]: positionList.append(sub_element) synonym = 'NonSynon' if protein[protposition] == altprot[protposition]: synonym = 'Syn' except: synonym = 'Unknown' # correct against python specific error of starting count at 0. truepos = int(sub_element)+1
def determine_synonymous(nuc_muts_on_branch, parent_diffs_from_ref, reference_gene_locations, reference_gene_codon, reference_sequence_nt, reference_sequence_aa): """ Check every nucleotide mutation that occurred on a branch to determine whether or not it is synonymous. For each node, all nucleotide mutations that occurred in parents of the node are applied to the reference sequence to give the genome prior to this node. Then, each nucleotide mutation at the node is made to the appropriate codon from this genome and determined to be synonymous or nonsynonymous. Returns a dictionary of synonymous mutations where the key is a gene and the value is a list of synonymous mutations in this gene. """ parent_diffs_pos = [int(k) for k, v in parent_diffs_from_ref.items()] # make dictionary of synonymous (and noncoding) mutations to add to tree syn_muts = {} # don't care about deletions because they are obviously not synonymous for mut in nuc_muts_on_branch: if mut[-1] != '-' and mut[0] != '-': mut_pos = int(mut[1:-1]) # find what gene this mut happens in if (mut_pos - 1) in reference_gene_locations.keys(): mut_gene = reference_gene_locations[mut_pos - 1] mut_codon_num = reference_gene_codon[mut_pos - 1][0] mut_codon_pos = reference_gene_codon[mut_pos - 1][1] # find the reference sequence of the codon this mutation occurs in codon_ref_aa = reference_sequence_aa[mut_gene][mut_codon_num] codon_ref_nt = reference_sequence_nt[mut_gene][( mut_codon_num * 3):(mut_codon_num * 3 + 3)] # check if a mutation occurred within the same codon in a parent # and if so, change the reference codon sequence accordingly, # to tell whether the mutation at this branch is synonymous or not codon_genome_pos = list( range((mut_pos - 1 - mut_codon_pos), (mut_pos - 1 - mut_codon_pos + 3))) parent_codon = codon_ref_nt for parent_diff in parent_diffs_pos: parent_diff_zero_based = parent_diff - 1 if parent_diff_zero_based in codon_genome_pos: parent_diff_pos = codon_genome_pos.index( parent_diff_zero_based) parent_codon = MutableSeq(str(codon_ref_nt)) parent_codon[parent_diff_pos] = parent_diffs_from_ref[ parent_diff] parent_codon = Seq(parent_codon) codon_mutated = MutableSeq(str(parent_codon)) #if deletion (or seq error) has happened at neighboring nucleotide if '-' in codon_mutated: pass else: codon_mutated[mut_codon_pos] = mut[-1] codon_mutated = Seq(codon_mutated) codon_mutated_translation = codon_mutated.translate() if str(codon_ref_aa) == str(codon_mutated_translation): if mut_gene in syn_muts.keys(): syn_muts[mut_gene] += [mut] else: syn_muts[mut_gene] = [mut] else: if 'noncoding' in syn_muts.keys(): syn_muts['noncoding'] += [mut] else: syn_muts['noncoding'] = [mut] return syn_muts