Example #1
0
def calculate_tt_ratio(fasta_data: List[str]) -> float:
    """
    :param fasta_data: Two equal length DNA sequences in FASTA format
    :return: Transition/transversion ratio, rounded to 4th digit
    """
    strands = parse_fasta(fasta_data)
    first, second = strands
    transitions, transversions = first.transitions_transversions(second)
    tt_ratio = transitions / transversions
    return round(tt_ratio, 4)
Example #2
0
def test_parse_fasta():
    """
    Checks proper parsing of fasta files
    :return:
    """
    data = [">Tag1", "ATGC", "CGTA", "GGCC", ">Tag2", "ATGC", "AATT"]
    output = parse_fasta(data)
    output = [(line.sequence, line.tag) for line in output]
    expected = [("ATGCCGTAGGCC", "Tag1"), ("ATGCAATT", "Tag2")]
    assert output == expected
Example #3
0
def calculate_max_gc_content(fasta_data: List[str]) -> Tuple[str, float]:
    """
    :param fasta_data: - a list of DNA sequences in FASTA format
    :return: the tag and gc content of DNA strand with highest GC content
    """
    strands = parse_fasta(fasta_data)
    gc_content_values = [(strand.tag, strand.gc_content())
                         for strand in strands]
    max_tag, max_gc_content = max(gc_content_values,
                                  key=lambda value: value[1])
    return max_tag, max_gc_content
Example #4
0
def get_possible_proteins(fasta_data: List[str]) -> str:
    """
    :param fasta_data: A DNA sequence in FASTA format
    :return: a formatted set of possible proteins encoded in DNA
    """
    strands = parse_fasta(fasta_data)
    # there should be only one sequence
    strand, = strands
    orfs = strand.search_for_orf()
    proteins = {
        orf.transcribe().translate_to_protein().sequence
        for orf in orfs
    }
    return '\n'.join(proteins)
Example #5
0
def splice_and_translate(fasta_data: List[str]) -> str:
    """
    :param fasta_data: A DNA sequence of gene, followed by several intron sequences, in FASTA format
    :return: A protein sequence obtained after translating spliced RNA
    """
    dna_strands = parse_fasta(fasta_data)
    # sequences are DNA, so we need to transcribe them
    rna_strands = [strand.transcribe() for strand in dna_strands]
    # first sequence is
    matrix, *introns = rna_strands
    for intron in introns:
        matrix = matrix.splice(intron)
    # we need protein sequence from spliced RNA
    protein = matrix.translate_to_protein()
    return protein.sequence
Example #6
0
from rps.sequence_problems.parsing import parse_fasta
import rps.sequence_problems.consensus_and_profile as problem

dataset = [
    ">Rosalind_1", "ATCCAGCT", ">Rosalind_2", "GGGCAACT", ">Rosalind_3",
    "ATGGATCT", ">Rosalind_4", "AAGCAACC", ">Rosalind_5", "TTGGAACT",
    ">Rosalind_6", "ATGCCATT", ">Rosalind_7", "ATGGCACT"
]

sequences = parse_fasta(dataset)


def test_get_profile_matrix():
    profile_matrix = problem.get_profile_matrix(sequences)
    expected_a = [5, 1, 0, 0, 5, 5, 0, 0]
    expected_c = [0, 0, 1, 4, 2, 0, 6, 1]
    expected_g = [1, 1, 6, 3, 0, 1, 0, 0]
    expected_t = [1, 5, 0, 0, 0, 1, 1, 6]
    assert profile_matrix.A == expected_a
    assert profile_matrix.C == expected_c
    assert profile_matrix.G == expected_g
    assert profile_matrix.T == expected_t


def test_get_consensus_string():
    matrix = problem.get_profile_matrix(sequences)
    result = problem.get_consensus_string(matrix)
    expected_string = 'ATGCAACT'
    assert result == expected_string