def test_seq_subset_load_from_all_genes(self): seqfile_ops.write_genbank(self.gbk_filename, self.record) subset_mode = 'features' subset_args = {'types': ('genes'), 'tags': {}} subset, subset_file = dataset_load.seq_subset_load(self.gbk_filename, subset_mode, subset_args) self.assertIs(len(subset), 2)
def test_seq_subset_load_from_chop_by_size(self): seqfile_ops.write_fasta(self.single_q_file, self.single_record) subset_mode = 'size' subset_args = {'size': 5, 'chop_mode': 'exact_size'} subset, subset_file = dataset_load.seq_subset_load(self.single_q_file, subset_mode, subset_args) self.assertIs(len(subset), 10) self.assertEqual(subset[0].id, 'temp_1_0-5')
def test_seq_subset_load_from_cds_by_locus_tag(self): seqfile_ops.write_genbank(self.gbk_filename, self.record) subset_mode = 'features' subset_args = {'types': ('CDS'), 'tags': {'locus_tag': ['locustag 1', 'locustag 4']}} subset, subset_file = dataset_load.seq_subset_load(self.gbk_filename, subset_mode, subset_args) self.assertIs(len(subset), 1) self.assertEqual(subset[0].id, 'temp_5-10')
def test_seq_subset_load_from_mixed_features(self): seqfile_ops.write_genbank(self.gbk_filename, self.record) subset_mode = 'features' subset_args = {'types': ('CDS', 'gene'), 'tags': {'locus_tag': ('locustag 3'), 'product': (('product 2'))}} subset, subset_file = dataset_load.seq_subset_load(self.gbk_filename, subset_mode, subset_args) self.assertIs(len(subset), 2)
def test_seq_subset_load_from_multifasta(self): seqfile_ops.write_fasta(self.multi_q_file, self.multi_records) subset_mode = 'flatfile' subset_args = None subset, subset_file = dataset_load.seq_subset_load(self.multi_q_file, subset_mode, subset_args) self.assertIs(len(subset), 5) index = 0 for record in subset: self.assertEqual(subset[index].id, self.multi_records[index].id) index += 1 self.assertIs(subset_file, self.multi_q_file)
def test_seq_subset_load_from_coords(self): seqfile_ops.write_fasta(self.single_q_file, self.single_record) temp_file = open(self.coords_file, 'w') temp_file.write(self.str_contents) temp_file.close() subset_mode = 'coordinates' subset_args = {'file': self.coords_file, 'header': 1, 'columns': (1, 2)} subset, subset_file = dataset_load.seq_subset_load(self.single_q_file, subset_mode, subset_args) self.assertIs(len(subset), 3) self.assertEqual(subset[0].id, 'temp_1_0-10') self.assertEqual(str(subset[2].seq), 'TTTGGCGCTCGCGGCGGG')