Esempio n. 1
0
def translateSixFrame(seq):
    """Translate seq in 6 frames"""
    from cogent import DNA
    from cogent.core.genetic_code import DEFAULT as standard_code
    translations = standard_code.sixframes(seq)
    stops_frame1 = standard_code.getStopIndices(seq, start=0)
    print translations
    return
Esempio n. 2
0
def translateSixFrame(seq):
    """Translate seq in 6 frames"""
    from cogent import DNA
    from cogent.core.genetic_code import DEFAULT as standard_code
    translations = standard_code.sixframes(seq)
    stops_frame1 = standard_code.getStopIndices(seq, start=0)
    print translations
    return
            if not protein in dna_sequence_dic:
                nucleotide_not_found.append(protein)
                continue

            sequence_with_stop_codons = DNA.makeSequence(dna_sequence_dic[protein])

            #Check if the sequence is the right one, and check for in frame stops
            #It seems that in JGI annotation, when scaffolds are joined, the resulted proteins do not match
            #the DNA sequence
            #Right now, I'll just remove those sequences, and deal with that later

            if len(sequence_with_stop_codons) % 3 == 0:
                seq_no_stop_codon = sequence_with_stop_codons.withoutTerminalStopCodon()

                #Chec for inframe stop codons
                stops_frame = standard_code.getStopIndices(seq_no_stop_codon, start=0)

                if len(stops_frame) > 0:
                    inframe_stops.append([cluster, genome_id, protein_id])

                else:
                    curated_protein_list[protein] = seq_no_stop_codon

            else:
                frameshift_cases.append([cluster, genome_id, protein_id])

        if len(curated_protein_list) < 2:  # Only take those clusters with 3 sequences or more
            clusters_too_short.append(cluster)
            continue

        #Alignments and output data
Esempio n. 4
0
                continue

            sequence_with_stop_codons = DNA.makeSequence(
                dna_sequence_dic[protein])

            #Check if the sequence is the right one, and check for in frame stops
            #It seems that in JGI annotation, when scaffolds are joined, the resulted proteins do not match
            #the DNA sequence
            #Right now, I'll just remove those sequences, and deal with that later

            if len(sequence_with_stop_codons) % 3 == 0:
                seq_no_stop_codon = sequence_with_stop_codons.withoutTerminalStopCodon(
                )

                #Chec for inframe stop codons
                stops_frame = standard_code.getStopIndices(seq_no_stop_codon,
                                                           start=0)

                if len(stops_frame) > 0:
                    inframe_stops.append([cluster, genome_id, protein_id])

                else:
                    curated_protein_list[protein] = seq_no_stop_codon

            else:
                frameshift_cases.append([cluster, genome_id, protein_id])

        if len(curated_protein_list
               ) < 2:  # Only take those clusters with 3 sequences or more
            clusters_too_short.append(cluster)
            continue