def test_find_longest_orf_stopsites(self): # check that the stop site is calculated as the * for seqs with it, # and the last AA for those without stop_loc_with_stop = find_longest_orfs(['AMEATBALL*'])[2] stop_loc_without_stop = find_longest_orfs(['AMEATBALL'])[2] self.assertEqual(stop_loc_with_stop, stop_loc_without_stop + 1)
def test_convert_nt_output_format(self): # tests that a length 3 tupple output, and each is the correct numpy # array type sequences = read_fasta('test_data/test_frames.fa') ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames( sequences, both_strands=False) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_frames) # filter data by minimum orf length keep = orf_length >= 6 aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) output = convert_start_stop_to_nt(start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) t_len = len(output) == 3 # test numpy types of all outputs t0 = output[0].dtype == 'int64' t1 = output[1].dtype == 'int64' t2 = output[2].dtype == 'int64' all_right_types = t0 and t1 and t2 and t_len self.assertTrue(all_right_types)
def test_add_upstream_aa_exactupstream(self): # test with upstream AAs of exactly min_upstream_length aa_sequence = np.array(['BEARMEATBALLS']) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_sequence) output = add_upstream_aas(aa_sequence, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=4) self.assertEqual(output[0], 'BEARMEATBALLS')
def test_add_upstream_aa_noupstream(self): # test with no viable upstream AAs aa_sequence = np.array(['BEAREATS*MEATBALLS']) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_sequence) output = add_upstream_aas(aa_sequence, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=5) self.assertEqual(output[0], 'MEATBALLS')
def test_add_upstream_aa(self): # test expected output aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL']) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_sequence) output = add_upstream_aas(aa_sequence, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=5) self.assertEqual(output[0], 'ALONGERUPSTREAMMEATBALL')
def test_find_longest_orf_output_format(self): # tests that a length 5 tupple output, and each is the correct numpy # array type long_orf = find_longest_orfs(['AMEATBALL']) t_len = len(long_orf) == 5 # test numpy types of all outputs t0 = long_orf[0].dtype == '<U8' t1 = long_orf[1].dtype == 'int64' t2 = long_orf[2].dtype == 'int64' t3 = long_orf[3].dtype == 'int64' t4 = long_orf[4].dtype == 'bool' all_right_types = t0 and t1 and t2 and t3 and t4 and t_len self.assertTrue(all_right_types)
def test_convert_utr_nt(self): sequences = read_fasta('test_data/test_frames.fa') ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames( sequences, both_strands=False) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_frames) # filter data by minimum orf length keep = orf_length >= 6 aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects( keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length) start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt( start_sites, stop_sites, seq_length_nt, orf_length, frame, last_aa_is_stop) self.assertTrue(np.all(utr3_length == np.array([5, 4, 3])))
def test_add_upstream_aa_multi(self): # test with multiple inputs aa_sequence = np.array([ 'ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS', 'BROWNBEARMAULSGIANTSQUID' ]) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_sequence) output = add_upstream_aas(aa_sequence, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=5) self.assertTrue( np.all(output[0] == np.array([ 'ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS', 'BROWNBEARMAULSGIANTSQUID' ])))
def test_add_upstream_aa_output(self): # check all outputs generated and all in correct type aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL']) orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs( aa_sequence) output = add_upstream_aas(aa_sequence, stop_sites, start_sites, orf_sequence, orf_length, min_upstream_length=5) t_len = len(output) == 3 # test numpy types of all outputs t0 = output[0].dtype.type == np.str_ t1 = output[1].dtype == 'int64' t2 = output[2].dtype == 'int64' all_right_types = t0 and t1 and t2 and t_len self.assertTrue(all_right_types)
def test_find_longest_orf_multiple(self): input = ['AMEATBALL*', 'TWOMEATBALLS'] result = find_longest_orfs(input) self.assertEqual(len(result[0]), len(input))
def test_find_longest_orf_trimmed(self): # check that the last * is trimmed from the orf sequence self.assertEqual(find_longest_orfs(['AMEATBALL*'])[0], ['MEATBALL'])