def test_insert_sequence_no_features(self): seq = Seq('ATGTTTGGG', generic_dna) seq_record = SeqRecord(seq) insert_seq = Seq('CCC', generic_dna) insert_start_position = 6 updated_seq_record = insert_sequence(seq_record, insert_seq, insert_start_position) self.assertEqual('ATGTTTCCCGGG', str(updated_seq_record.seq))
def test_insert_sequence_no_features(self): seq = Seq('ATGTTTGGG', generic_dna) seq_record = SeqRecord(seq) insert_seq = Seq('CCC', generic_dna) insert_start_position = 6 updated_seq_record = insert_sequence( seq_record, insert_seq, insert_start_position) self.assertEqual('ATGTTTCCCGGG', str(updated_seq_record.seq))
def test_insert_sequence_update_features(self): seq = Seq('ATGTTTGGGAAATTT', generic_dna) seq_record = SeqRecord(seq) feature_id_to_seq_map = { 1: 'ATG', 2: 'GGG', 3: 'AAATTT' } def _assert_feature_seq(feature, seq_record): original_seq = feature_id_to_seq_map[feature.id] self.assertEqual(original_seq, str(feature.extract(seq_record).seq)) # Make a mix of features, some before, some after the insertion. feature_1_loc = FeatureLocation(0, 3) feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1) seq_record.features.append(feature_1) _assert_feature_seq(feature_1, seq_record) feature_2_loc = FeatureLocation(6, 9) feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2) seq_record.features.append(feature_2) _assert_feature_seq(feature_2, seq_record) feature_3_loc = FeatureLocation(9, 15) feature_3 = SeqFeature(feature_3_loc, type='CDS', id=3) seq_record.features.append(feature_3) _assert_feature_seq(feature_3, seq_record) # The sequence to insert. insert_seq = Seq('CCC', generic_dna) insert_start_position = 6 # Perform the insertion. updated_seq_record = insert_sequence( seq_record, insert_seq, insert_start_position) # Assert conditions that we want to preserve. self.assertEqual('ATGTTTCCCGGGAAATTT', str(updated_seq_record.seq)) self.assertEqual( len(seq_record.features), len(updated_seq_record.features)) for feature in updated_seq_record.features: _assert_feature_seq(feature, updated_seq_record)
def test_insert_sequence_update_features(self): seq = Seq('ATGTTTGGGAAATTT', generic_dna) seq_record = SeqRecord(seq) feature_id_to_seq_map = {1: 'ATG', 2: 'GGG', 3: 'AAATTT'} def _assert_feature_seq(feature, seq_record): original_seq = feature_id_to_seq_map[feature.id] self.assertEqual(original_seq, str(feature.extract(seq_record).seq)) # Make a mix of features, some before, some after the insertion. feature_1_loc = FeatureLocation(0, 3) feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1) seq_record.features.append(feature_1) _assert_feature_seq(feature_1, seq_record) feature_2_loc = FeatureLocation(6, 9) feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2) seq_record.features.append(feature_2) _assert_feature_seq(feature_2, seq_record) feature_3_loc = FeatureLocation(9, 15) feature_3 = SeqFeature(feature_3_loc, type='CDS', id=3) seq_record.features.append(feature_3) _assert_feature_seq(feature_3, seq_record) # The sequence to insert. insert_seq = Seq('CCC', generic_dna) insert_start_position = 6 # Perform the insertion. updated_seq_record = insert_sequence(seq_record, insert_seq, insert_start_position) # Assert conditions that we want to preserve. self.assertEqual('ATGTTTCCCGGGAAATTT', str(updated_seq_record.seq)) self.assertEqual(len(seq_record.features), len(updated_seq_record.features)) for feature in updated_seq_record.features: _assert_feature_seq(feature, updated_seq_record)
def insert_frt_site(genome_record, upstream_frt_seq, upstream_insert_pos, downstream_frt_seq, downstream_insert_pos, feature_id_prefix='', upstream_validation_seq=None, downstream_validation_seq=None): """Inserts FRT sites at the given position. Args: genome_record: The genome_record to start with. This is not mutated in this method. upstream_frt: One of FRT_OPTIONS. upstream_insert_pos: Insert position for the upstream FRT site. downstream_frt: One of FRT_OPTIONS. downstream_insert_pos: Insert position for the downstream FRT site. NOTE: This is the insert position before the upstream is inserted. This method will account for that. feature_id_prefix: Prefix for the BioPython feature id. Recommended: 'seg_3' for the upstream FRT site of segment 3. upstream_validation_seq: The next n bases after the start of the upstream FRT site. This is used as a sanity check to make sure the FRT is being inserted in the right place. downstream_validation_seq: The n bases before the downstream site. Used for sanity checking the insertion. Returns: A modified SeqRecord with the change. """ # Maybe check the validation bases. if upstream_validation_seq: actual_upstream_seq = str( genome_record.seq[upstream_insert_pos:upstream_insert_pos + len(upstream_validation_seq)]) assert upstream_validation_seq == actual_upstream_seq, ( "Actual %s" % actual_upstream_seq) if downstream_validation_seq: actual_downstream_seq = str( genome_record. seq[downstream_insert_pos - len(downstream_validation_seq):downstream_insert_pos]) assert downstream_validation_seq == actual_downstream_seq, ( "Actual %s" % actual_downstream_seq) # Adjust the downstream insert position to account for the upstream # one being inserted first. adjusted_downstream_insert_pos = (downstream_insert_pos + len(upstream_frt_seq)) # Make a copy of the record so we can modify it. updated_genome_record = copy.deepcopy(genome_record) # Insert the upstream frt. updated_genome_record = insert_sequence( updated_genome_record, upstream_frt_seq, upstream_insert_pos, safe_features=[], insert_feature_type=FRT_FEATURE_TYPE, insert_feature_id=feature_id_prefix + '_upstream_frt', insert_feature_strand=1) # Insert the downstream frt. updated_genome_record = insert_sequence( updated_genome_record, downstream_frt_seq, adjusted_downstream_insert_pos, safe_features=[], insert_feature_type=FRT_FEATURE_TYPE, insert_feature_id=feature_id_prefix + '_downstream_frt', insert_feature_strand=1) return updated_genome_record