def test_dotplot_raw_seq_basic(): data = dict() data['seq-name-1'] = "xxx" data['seq-content-1'] = "CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTG" data['seq-name-2'] = "yyy" data['seq-content-2'] = "AATCTGGAGGACCTGTGGTAACTCAGCTCGTCGTGGCACTGCTTTTGTCGTGACCCTGCTTTGTTGTTGG" dotplot = DotPlot(data) results = dotplot.raw_sequence() expected_results = [ {'name': "Sequence xxx length", 'value': 70}, {'name': "Sequence yyy length", 'value': 70}, {'name': "Coverage [%]", 'value': 100}, {'name': "Average identity [%]", 'value': 61.4}, {'name': "Fragmental identity [%]", 'value': 61.4}, ] base64_img = dotplot.get_dot_plot_image() alignment = dotplot.get_alignments() assert isinstance(base64_img, str) assert isinstance(alignment, str) for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_consensus_sequence_raw_seq_basic(muscle_command_line_mock, muscle_standard_seq_return_value): muscle_command_line_mock.return_value.return_value = muscle_standard_seq_return_value data = dict() data['sequences'] = """>gi|2765658 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTG AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCTCCCGTGGTGACCCTGATTTGTTGTTGGG""" expected_results = [ { 'name': "Consensus sequence", 'value': "CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTGAATCCGGAGGACCGGTGT" "ACTCAGCTCACCGGGGGCATTGCTCCCGTGGTGACCCTGATTTGTTGTTGGG", }, { 'name': "Sequence length", 'value': 140 }, ] con_seq = ConsensusSequence(data) results = con_seq.raw_sequence() for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_dotplot_genebank_basic(): data = dict() data['seq-name-1'] = "2765658" data['seq-name-2'] = "2765657" dotplot = DotPlot(data) results = dotplot.genebank_seq() expected_results = [ {'name': "Seq id. Z78533.1 C.irapeanum 5.8S rRNA gen ... length [bp]", 'value': 740}, {'name': "Seq id. Z78532.1 C.californicum 5.8S rRNA ... length [bp]", 'value': 753}, {'name': "Coverage [%]", 'value': 98.3}, {'name': "Average identity [%]", 'value': 82.4}, {'name': "Fragmental identity [%]", 'value': 83.1}, ] base64_img = dotplot.get_dot_plot_image() alignment = dotplot.get_alignments() assert isinstance(base64_img, str) assert isinstance(alignment, str) for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_hw_basic(): data = dict() data["ho"] = 4 data["he"] = 3 data["rho"] = 2 data["alfa"] = 0.05 hw = HardyWeinbergCalculation(data) results = hw.calculate() expected_results = [ {'name': "expected number of homozygotes", 'value': 3.36}, {'name': "expected number of heterozygotes", 'value': 4.28}, {'name': "expected number of rare homozygotes", 'value': 1.36}, {'name': "p", 'value': 0.61111}, {'name': "q", 'value': 0.38889}, {'name': "p-value", 'value': 0.66931}, {'name': "Chi-square value", 'value': 0.803}, {'name': "Yate`s chi-square value", 'value': 0.16133}, {'name': "Yate`s p-value", 'value': 0.9225}, { 'name': "status", 'value': "Distribution consistent with Hardy Weinberg's law at the level of significance: 0.05", }, ] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_pic_codominant_basic(): data = dict() data["count"] = 3 data["allele-0"] = 4 data["allele-1"] = 2 data["allele-2"] = 3 pic_codominant = Codominant(data) results = pic_codominant.calculate() expected_results = [{ 'name': "H", 'value': 0.642 }, { 'name': "PIC", 'value': 0.5676 }] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_sequences_tools_complement(): data = dict() data['type'] = "complement" data['sequences'] = """>2765658 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTG AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCTCCCGTGGTGACCCTGATTTGTTGTTGGG CCGCCTCGGGAGCGTCCATGGCGGGTTTGAACCTCTAGCCCGGCGCAGTTTGGGCGCCAAGCCATATGAA AGCATCACCGGCGAATGGCATTGTCTTCCCCAAAACCCGGAGCGGCGGCGTGCTGTCGCGTGCCCAATGA >2765657 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAGAATATATGATCGAGTG AATCTGGAGGACCTGTGGTAACTCAGCTCGTCGTGGCACTGCTTTTGTCGTGACCCTGCTTTGTTGTTGG GCCTCCTCAAGAGCTTTCATGGCAGGTTTGAACTTTAGTACGGTGCAGTTTGCGCCAAGTCATATAAAGC >2765656 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAGAACATACGATCGAGTG AATCCGGAGGACCCGTGGTTACACGGCTCACCGTGGCTTTGCTCTCGTGGTGAACCCGGTTTGCGACCGG GCCGCCTCGGGAACTTTCATGGCGGGTTTGAACGTCTAGCGCGGCGCAGTTTGCGCCAAGTCATATGGAG""" seq_tools = SequencesTools(data) results = seq_tools.calculate() expected_results = [ { 'name': ">2765658", 'value': "GCATTGTTCCAAAGGCATCCACTTGGACGCCTTCCTAGTAACTACTCTGGCACCTTATTTGCTAGCTCACTTAGGCCTCCTGGCCACATGA" "GTCGAGTGGCCCCCGTAACGAGGGCACCACTGGGACTAAACAACAACCCGGCGGAGCCCTCGCAGGTACCGCCCAAACTTGGAGATCGGGCC" "GCGTCAAACCCGCGGTTCGGTATACTTTCGTAGTGGCCGCTTACCGTAACAGAAGGGGTTTTGGGCCTCGCCGCCGCACGACAGCGCACGGGT" "TACT", }, { 'name': ">2765657", 'value': "GCATTGTTCCAAAGGCATCCACTTGGACGCCTTCCTAGTAACAACTCTGTTGTCTTATATACTAGCTCACTTAGACCTCCTGGACACCATTGAGTC" "GAGCAGCACCGTGACGAAAACAGCACTGGGACGAAACAACAACCCGGAGGAGTTCTCGAAAGTACCGTCCAAACTTGAAATCATGCCACGTCAAA" "CGCGGTTCAGTATATTTCG", }, { 'name': ">2765656", 'value': "GCATTGTTCCAAAGGCATCCACTTGGACGCCTTCCTAGTAACAACTCTGTCGTCTTGTATGCTAGCTCACTTAGGCCTCCTGGGCACCAATGT" "GCCGAGTGGCACCGAAACGAGAGCACCACTTGGGCCAAACGCTGGCCCGGCGGAGCCCTTGAAAGTACCGCCCAAACTTGCAGATCGCGCCGCG" "TCAAACGCGGTTCAGTATACCTC", }, ] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_sequences_tools_transcription(): data = dict() data['type'] = "transcription" data['sequences'] = """>2765658 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTG AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCTCCCGTGGTGACCCTGATTTGTTGTTGGG CCGCCTCGGGAGCGTCCATGGCGGGTTTGAACCTCTAGCCCGGCGCAGTTTGGGCGCCAAGCCATATGAA AGCATCACCGGCGAATGGCATTGTCTTCCCCAAAACCCGGAGCGGCGGCGTGCTGTCGCGTGCCCAATGA >2765657 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAGAATATATGATCGAGTG AATCTGGAGGACCTGTGGTAACTCAGCTCGTCGTGGCACTGCTTTTGTCGTGACCCTGCTTTGTTGTTGG GCCTCCTCAAGAGCTTTCATGGCAGGTTTGAACTTTAGTACGGTGCAGTTTGCGCCAAGTCATATAAAGC >2765656 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAGAACATACGATCGAGTG AATCCGGAGGACCCGTGGTTACACGGCTCACCGTGGCTTTGCTCTCGTGGTGAACCCGGTTTGCGACCGG GCCGCCTCGGGAACTTTCATGGCGGGTTTGAACGTCTAGCGCGGCGCAGTTTGCGCCAAGTCATATGGAG""" seq_tools = SequencesTools(data) results = seq_tools.calculate() expected_results = [ { 'name': ">2765658", 'value': "CGUAACAAGGUUUCCGUAGGUGAACCUGCGGAAGGAUCAUUGAUGAGACCGUGGAAUAAACGAUCGAGUGAAUCCGGAGGACCGGUGUACUCAGCUC" "ACCGGGGGCAUUGCUCCCGUGGUGACCCUGAUUUGUUGUUGGGCCGCCUCGGGAGCGUCCAUGGCGGGUUUGAACCUCUAGCCCGGCGCAGUUUGGG" "CGCCAAGCCAUAUGAAAGCAUCACCGGCGAAUGGCAUUGUCUUCCCCAAAACCCGGAGCGGCGGCGUGCUGUCGCGUGCCCAAUGA", }, { 'name': ">2765657", 'value': "CGUAACAAGGUUUCCGUAGGUGAACCUGCGGAAGGAUCAUUGUUGAGACAACAGAAUAUAUGAUCGAGUGAAUCUGGAGGACCUGUGGUAACUC" "AGCUCGUCGUGGCACUGCUUUUGUCGUGACCCUGCUUUGUUGUUGGGCCUCCUCAAGAGCUUUCAUGGCAGGUUUGAACUUUAGUACGGUGCAGU" "UUGCGCCAAGUCAUAUAAAGC", }, { 'name': ">2765656", 'value': "CGUAACAAGGUUUCCGUAGGUGAACCUGCGGAAGGAUCAUUGUUGAGACAGCAGAACAUACGAUCGAGUGAAUCCGGAGGACCCGUGGUUACACG" "GCUCACCGUGGCUUUGCUCUCGUGGUGAACCCGGUUUGCGACCGGGCCGCCUCGGGAACUUUCAUGGCGGGUUUGAACGUCUAGCGCGGCGCAGUU" "UGCGCCAAGUCAUAUGGAG", }, ] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_chi_sqaure_basic(): data = dict() data['row-0'] = [4.0, 4.0] data['row-1'] = [2.0, 3.0] data['column-0'] = [4.0, 2.0] data['column-0'] = [4.0, 3.0] data['width'] = 2 data['height'] = 2 data['field_sum'] = 13 expected_results = [ { 'name': "coefficient of contingency type", 'value': "Phi" }, { 'name': "dof", 'value': 1 }, { 'name': "Chi square", 'value': 0.12381 }, { 'name': "Chi square p-value", 'value': 0.72494 }, { 'name': "Chi-square correlation", 'value': 0.09759 }, { 'name': "Yate`s Chi square", 'value': 0.04836 }, { 'name': "Yate`s Chi square p-value", 'value': 0.82594 }, { 'name': "Yate`s chi-square correlation", 'value': 0.06099 }, ] chi_square = ChiSquareCalculation(data) results = chi_square.calculate() for result in results: name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_sequences_tools_translation_to_amino_acid(): data = dict() data['type'] = "translation_to_amino_acid" data['sequences'] = """>2765658 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGGAATAAACGATCGAGTG AATCCGGAGGACCGGTGTACTCAGCTCACCGGGGGCATTGCTCCCGTGGTGACCCTGATTTGTTGTTGGG CCGCCTCGGGAGCGTCCATGGCGGGTTTGAACCTCTAGCCCGGCGCAGTTTGGGCGCCAAGCCATATGAA AGCATCACCGGCGAATGGCATTGTCTTCCCCAAAACCCGGAGCGGCGGCGTGCTGTCGCGTGCCCAATGAq >2765657 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAGAATATATGATCGAGTG AATCTGGAGGACCTGTGGTAACTCAGCTCGTCGTGGCACTGCTTTTGTCGTGACCCTGCTTTGTTGTTGG GCCTCCTCAAGAGCTTTCATGGCAGGTTTGAACTTTAGTACGGTGCAGTTTGCGCCAAGTCATATAAAGC >2765656 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAGAACATACGATCGAGTG AATCCGGAGGACCCGTGGTTACACGGCTCACCGTGGCTTTGCTCTCGTGGTGAACCCGGTTTGCGACCGG GCCGCCTCGGGAACTTTCATGGCGGGTTTGAACGTCTAGCGCGGCGCAGTTTGCGCCAAGTCATATGGAG""" seq_tools = SequencesTools(data) results = seq_tools.calculate() expected_results = [ { 'name': ">2765658", 'value': "RNKVSVGEPAEGSLMRPWNKRSSESGGPVYSAHRGHCSRGDPDLLLGRLGSVHGGFEPLARRSLGAKPYESITGEWHCLPQNPERRRAVACPM", }, { 'name': ">2765657", 'value': "RNKVSVGEPAEGSLLRQQNI*SSESGGPVVTQLVVALLLS*PCFVVGPPQELSWQV*TLVRCSLRQVI*S" }, { 'name': ">2765656", 'value': "RNKVSVGEPAEGSLLRQQNIRSSESGGPVVTRLTVALLSW*TRFATGPPRELSWRV*TSSAAQFAPSHME" }, ] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_pic_dominant_basic(): data = dict() data["amplified_marker"] = 2 data["absence_marker"] = 3 pic_dominant = Dominant(data) results = pic_dominant.calculate() expected_results = [{'name': "PIC", 'value': 0.48}] for i, result in enumerate(results): name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value
def test_chi_sqaure_goodness_basic(): data = dict() data['observed'] = [4.0, 3.0, 2.0] data['expected'] = [3.0, 2.0, 4.0] expected_results = [ {'name': "Chi square", 'value': 1.83333}, {'name': "Chi square p-value", 'value': 0.39985}, {'name': "dof", 'value': 2}, {'name': "Chi square p-value", 'value': 0.72494}, {'name': "Yate`s Chi square", 'value': 0.77083}, {'name': "Yate`s Chi square p-value", 'value': 0.68017}, ] chi_square_goodness = ChiSquareGoodness(data['observed'], data['expected']) results = chi_square_goodness.calculate() for result in results: name = result.get('name') value = result.get('value') expected_value = find_value_by_name(expected_results, name) assert expected_value == value