Ejemplo n.º 1
0
                    converted = classifydict(value)
                    for nucleotide, valuen in converted.items():
                        if int(valuen[0]) > 0 and float(valuen[2]) < float(args.minqual):
                            count +=1


                            # now check for synonymity,  move this to functions
                            alternativeSeq = MutableSeq(str(element.seq[start:stop]), generic_dna)
##                            print nucleotide
                            alternativeSeq = mutateSequence(alternativeSeq,sub_element,nucleotide,start)
                            alternativeSeq = Seq(str(alternativeSeq), generic_dna)
                            if len(alternativeSeq)%3 != 0:
                                overlap = len(alternativeSeq)%3
                                alternativeSeq = alternativeSeq[:-int(overlap)]
                            
                            altprot = alternativeSeq.translate()
                            altprot = list2dict(altprot[0:len(protein)],0)

                            protposition = int((sub_element-start)/3)
                            try:
                                if protein[protposition] != altprot[protposition]:
                                    positionList.append(sub_element)
                                    synonym = 'NonSynon'
                                if protein[protposition] == altprot[protposition]:
                                    synonym = 'Syn'
                            except:
                                synonym = 'Unknown'

                            # correct against python specific error of starting count at 0. 

                            truepos = int(sub_element)+1
Ejemplo n.º 2
0
def determine_synonymous(nuc_muts_on_branch, parent_diffs_from_ref,
                         reference_gene_locations, reference_gene_codon,
                         reference_sequence_nt, reference_sequence_aa):
    """
    Check every nucleotide mutation that occurred on a branch to determine whether or not it is synonymous.

    For each node, all nucleotide mutations that occurred in parents of the node are applied to the reference sequence to give the genome prior to this node. Then, each nucleotide mutation at the node is made to the appropriate codon from this genome and determined to be synonymous or nonsynonymous.

    Returns a dictionary of synonymous mutations where the key is a gene and the value is a list of synonymous mutations in this gene.
    """
    parent_diffs_pos = [int(k) for k, v in parent_diffs_from_ref.items()]

    # make dictionary of synonymous (and noncoding) mutations to add to tree
    syn_muts = {}

    # don't care about deletions because they are obviously not synonymous
    for mut in nuc_muts_on_branch:
        if mut[-1] != '-' and mut[0] != '-':
            mut_pos = int(mut[1:-1])
            # find what gene this mut happens in
            if (mut_pos - 1) in reference_gene_locations.keys():
                mut_gene = reference_gene_locations[mut_pos - 1]
                mut_codon_num = reference_gene_codon[mut_pos - 1][0]
                mut_codon_pos = reference_gene_codon[mut_pos - 1][1]

                # find the reference sequence of the codon this mutation occurs in
                codon_ref_aa = reference_sequence_aa[mut_gene][mut_codon_num]

                codon_ref_nt = reference_sequence_nt[mut_gene][(
                    mut_codon_num * 3):(mut_codon_num * 3 + 3)]

                # check if a mutation occurred within the same codon in a parent
                # and if so, change the reference codon sequence accordingly,
                # to tell whether the mutation at this branch is synonymous or not
                codon_genome_pos = list(
                    range((mut_pos - 1 - mut_codon_pos),
                          (mut_pos - 1 - mut_codon_pos + 3)))

                parent_codon = codon_ref_nt
                for parent_diff in parent_diffs_pos:
                    parent_diff_zero_based = parent_diff - 1
                    if parent_diff_zero_based in codon_genome_pos:
                        parent_diff_pos = codon_genome_pos.index(
                            parent_diff_zero_based)
                        parent_codon = MutableSeq(str(codon_ref_nt))
                        parent_codon[parent_diff_pos] = parent_diffs_from_ref[
                            parent_diff]
                        parent_codon = Seq(parent_codon)

                codon_mutated = MutableSeq(str(parent_codon))
                #if deletion (or seq error) has happened at neighboring nucleotide
                if '-' in codon_mutated:
                    pass
                else:
                    codon_mutated[mut_codon_pos] = mut[-1]
                    codon_mutated = Seq(codon_mutated)
                    codon_mutated_translation = codon_mutated.translate()

                    if str(codon_ref_aa) == str(codon_mutated_translation):
                        if mut_gene in syn_muts.keys():
                            syn_muts[mut_gene] += [mut]
                        else:
                            syn_muts[mut_gene] = [mut]

            else:
                if 'noncoding' in syn_muts.keys():
                    syn_muts['noncoding'] += [mut]
                else:
                    syn_muts['noncoding'] = [mut]

    return syn_muts