def test_noop__no_intervals(self): seq = Seq(''.join([random.choice('ATGC') for i in range(200)]), generic_dna) orig_seq_record = SeqRecord(seq) seq_record = copy.deepcopy(orig_seq_record) interval_list = [] automated_intergenic_gc_fixer(seq_record, interval_list) self.assertEqual(str(orig_seq_record.seq), str(seq_record.seq))
def test_noop__small_interval(self): # Sequence is all GC so no changes to make. seq = Seq(''.join([random.choice('GC') for i in range(200)]), generic_dna) orig_seq_record = SeqRecord(seq) seq_record = copy.deepcopy(orig_seq_record) interval_list = [(100, 125)] automated_intergenic_gc_fixer(seq_record, interval_list) self.assertEqual(str(orig_seq_record.seq), str(seq_record.seq))
def test_change_some(self): """Keep changing positions until target is met. """ # Repeat to make sure not stochastic result # ...though it still could be :) for i in range(100): # Sequence is all AT so some will have to change. seq = Seq(''.join([random.choice('AT') for i in range(200)]), generic_dna) orig_seq_record = SeqRecord(seq) seq_record = copy.deepcopy(orig_seq_record) self.assertTrue(GC(seq_record.seq) == 0) interval_list = [(100, 125)] automated_intergenic_gc_fixer(seq_record, interval_list) for center in range(100, 125): window_seq = seq_record.seq[center - 50:center + 50] self.assertTrue( GC(window_seq) >= 30, "GC for window centered at %d is %f" % (center, GC(window_seq))) self.assertNotEqual(str(orig_seq_record.seq), str(seq_record.seq))
def test_change_all(self): """Change all positions around intervals. """ # Sequence is all AT so some will have to change. seq = Seq(''.join([random.choice('AT') for i in range(200)]), generic_dna) orig_seq_record = SeqRecord(seq) seq_record = copy.deepcopy(orig_seq_record) self.assertTrue(GC(seq_record.seq) < 30) interval_list = [(100, 125)] constraint_obj = GCContentConstraints() constraint_obj.local_window_lower_bound = 1.0 constraint_obj.local_window_upper_bound = 1.1 # no upper bound automated_intergenic_gc_fixer(seq_record, interval_list, gc_content_constraint_obj=constraint_obj) for center in range(100, 125): window_seq = seq_record.seq[center - 50:center + 50] self.assertTrue( GC(window_seq) == 100, "GC for window centered at %d is %f" % (center, GC(window_seq))) self.assertNotEqual(str(orig_seq_record.seq), str(seq_record.seq))
def test_avoid_changes_in_shadows(self): """Avoid changing bases in CDS features. """ # Sequence is all AT so some will have to change. seq = Seq(''.join([random.choice('AT') for i in range(200)]), generic_dna) orig_seq_record = SeqRecord(seq) # We will expect a 60 base shadow from 60 - 120. This is the CDS # and 20 bases upstream. feature_1_loc = FeatureLocation(80, 120, strand=1) feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1) orig_seq_record.features.append(feature_1) seq_record = copy.deepcopy(orig_seq_record) self.assertTrue(GC(seq_record.seq) == 0) # Hit just one position. interval_list = [(100, 101)] # Aim for 100% to hit all bases possible. constraint_obj = GCContentConstraints() constraint_obj.local_window_lower_bound = 1.0 constraint_obj.local_window_upper_bound = 1.1 # no upper bound automated_intergenic_gc_fixer(seq_record, interval_list, gc_content_constraint_obj=constraint_obj) # Expect window centered at 100 to have GC 60, which is all window_seq = seq_record.seq[50:150] self.assertEqual(40, GC(window_seq)) # Make sure shadow seq is unchanged. self.assertEqual(str(feature_1.extract(orig_seq_record.seq)), str(feature_1.extract(seq_record.seq)))