def main(input_file, out, reference, ambiguous): """Calculate allele frequencies from the given input multialignment. Input can be either a fasta or csv file with multialigned sequences, which may or may not contain the reference sequence in the first position. In the latter case, an additional reference sequence file is needed, either in fasta or csv format. """ input_ext = input_file.split(".")[-1] if input_ext == "fasta": a = AlleleFreqs.from_fasta(input_file, reference, ambiguous) elif input_ext == "csv": a = AlleleFreqs.from_csv(input_file, reference, ambiguous) else: click.echo("Input not recognised. " "Please provide either a fasta or csv file.") return 1 a.to_csv(out) click.echo(f"Allele frequencies saved to {out}.") return 0
def setUp(self) -> None: self.af = AlleleFreqs.from_fasta(sequences=REAL_ALG_L6_NOREF_FASTA, reference=REAL_RSRS_FASTA)
def setUp(self) -> None: ref = Reference("AAG-CTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGG-TAT") alg = MultiAlignment(SAMPLE_SEQUENCES_DICT) self.af = AlleleFreqs(multialg=alg, reference=ref) self.af_amb = AlleleFreqs(multialg=alg, reference=ref, ambiguous=True)
def setUp(self) -> None: self.af = AlleleFreqs.from_fasta(sequences=REAL_ALG_L6_FASTA)
def setUp(self) -> None: self.af = AlleleFreqs.from_csv(sequences=SAMPLE_MULTIALG_NOREF_CSV, reference=SAMPLE_REF_CSV)
class TestBasic(unittest.TestCase): def setUp(self) -> None: ref = Reference("AAG-CTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGG-TAT") alg = MultiAlignment(SAMPLE_SEQUENCES_DICT) self.af = AlleleFreqs(multialg=alg, reference=ref) self.af_amb = AlleleFreqs(multialg=alg, reference=ref, ambiguous=True) def test_df(self): # Given/When exp_df = sample_sequences_df() # Then pdtest.assert_frame_equal(self.af.df, exp_df) def test_frequencies(self): # Given/When exp_freqs = sample_sequences_freqs() # Then pdtest.assert_frame_equal(self.af.frequencies, exp_freqs) def test_frequencies_ambiguous(self): # Given/When exp_freqs = sample_sequences_freqs_amb() # Then pdtest.assert_frame_equal(self.af_amb.frequencies, exp_freqs) def test__get_frequencies(self): # Given test_freq = pd.Series({ 'A': 0.2, 'C': 0.2, 'G': 0.1, 'T': 0.3, '-': 0.1, 'N': 0.1 }) exp_freq = { 'A': 0.2, 'C': 0.2, 'G': 0.1, 'T': 0.3, 'gap': 0.1, 'oth': 0.1 } # When result = self.af._get_frequencies(test_freq) # Then self._dict_almost_equal(result, exp_freq) def test_to_csv(self): # Given/When self.af.to_csv(TEST_CSV) result = pd.read_csv(TEST_CSV) expected = pd.read_csv(SAMPLE_FREQUENCIES) # Then pdtest.assert_frame_equal(result, expected) def test_to_csv_ambiguous(self): # Given/When self.af_amb.to_csv(TEST_CSV) result = pd.read_csv(TEST_CSV) expected = pd.read_csv(SAMPLE_FREQUENCIES_AMB) # Then pdtest.assert_frame_equal(result, expected) @staticmethod def _dict_almost_equal(expected: dict, result: dict, acc=10**-8) -> bool: """Compare to dictionaries and ensure that all their values are the same, accounting for some fluctuation up to the given accuracy value. Args: expected: expected dictionary result: resulting dictionary acc: accuracy to use [default: 10**-8] """ if expected.keys() == result.keys(): for key in expected.keys(): if abs(expected[key] - result[key]) < acc: continue return True return False
def setUp(self) -> None: self.af = AlleleFreqs.from_csv(sequences=SAMPLE_MULTIALG_CSV)
def setUp(self) -> None: self.af = AlleleFreqs.from_fasta(sequences=SAMPLE_MULTIALG_NOREF_FASTA, reference=SAMPLE_REF_FASTA)
def setUp(self) -> None: self.af = AlleleFreqs.from_fasta(sequences=SAMPLE_MULTIALG_FASTA)