def test_insert_sequence_no_features(self):
        seq = Seq('ATGTTTGGG', generic_dna)
        seq_record = SeqRecord(seq)

        insert_seq = Seq('CCC', generic_dna)
        insert_start_position = 6
        updated_seq_record = insert_sequence(seq_record, insert_seq,
                                             insert_start_position)
        self.assertEqual('ATGTTTCCCGGG', str(updated_seq_record.seq))
    def test_insert_sequence_no_features(self):
        seq = Seq('ATGTTTGGG', generic_dna)
        seq_record = SeqRecord(seq)

        insert_seq = Seq('CCC', generic_dna)
        insert_start_position = 6
        updated_seq_record = insert_sequence(
                seq_record, insert_seq, insert_start_position)
        self.assertEqual('ATGTTTCCCGGG', str(updated_seq_record.seq))
    def test_insert_sequence_update_features(self):
        seq = Seq('ATGTTTGGGAAATTT', generic_dna)
        seq_record = SeqRecord(seq)

        feature_id_to_seq_map = {
            1: 'ATG',
            2: 'GGG',
            3: 'AAATTT'
        }

        def _assert_feature_seq(feature, seq_record):
            original_seq = feature_id_to_seq_map[feature.id]
            self.assertEqual(original_seq, str(feature.extract(seq_record).seq))

        # Make a mix of features, some before, some after the insertion.
        feature_1_loc = FeatureLocation(0, 3)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1)
        seq_record.features.append(feature_1)
        _assert_feature_seq(feature_1, seq_record)

        feature_2_loc = FeatureLocation(6, 9)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2)
        seq_record.features.append(feature_2)
        _assert_feature_seq(feature_2, seq_record)

        feature_3_loc = FeatureLocation(9, 15)
        feature_3 = SeqFeature(feature_3_loc, type='CDS', id=3)
        seq_record.features.append(feature_3)
        _assert_feature_seq(feature_3, seq_record)

        # The sequence to insert.
        insert_seq = Seq('CCC', generic_dna)
        insert_start_position = 6

        # Perform the insertion.
        updated_seq_record = insert_sequence(
                seq_record, insert_seq, insert_start_position)

        # Assert conditions that we want to preserve.
        self.assertEqual('ATGTTTCCCGGGAAATTT', str(updated_seq_record.seq))
        self.assertEqual(
                len(seq_record.features), len(updated_seq_record.features))
        for feature in updated_seq_record.features:
            _assert_feature_seq(feature, updated_seq_record)
    def test_insert_sequence_update_features(self):
        seq = Seq('ATGTTTGGGAAATTT', generic_dna)
        seq_record = SeqRecord(seq)

        feature_id_to_seq_map = {1: 'ATG', 2: 'GGG', 3: 'AAATTT'}

        def _assert_feature_seq(feature, seq_record):
            original_seq = feature_id_to_seq_map[feature.id]
            self.assertEqual(original_seq,
                             str(feature.extract(seq_record).seq))

        # Make a mix of features, some before, some after the insertion.
        feature_1_loc = FeatureLocation(0, 3)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1)
        seq_record.features.append(feature_1)
        _assert_feature_seq(feature_1, seq_record)

        feature_2_loc = FeatureLocation(6, 9)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2)
        seq_record.features.append(feature_2)
        _assert_feature_seq(feature_2, seq_record)

        feature_3_loc = FeatureLocation(9, 15)
        feature_3 = SeqFeature(feature_3_loc, type='CDS', id=3)
        seq_record.features.append(feature_3)
        _assert_feature_seq(feature_3, seq_record)

        # The sequence to insert.
        insert_seq = Seq('CCC', generic_dna)
        insert_start_position = 6

        # Perform the insertion.
        updated_seq_record = insert_sequence(seq_record, insert_seq,
                                             insert_start_position)

        # Assert conditions that we want to preserve.
        self.assertEqual('ATGTTTCCCGGGAAATTT', str(updated_seq_record.seq))
        self.assertEqual(len(seq_record.features),
                         len(updated_seq_record.features))
        for feature in updated_seq_record.features:
            _assert_feature_seq(feature, updated_seq_record)
Example #5
0
def insert_frt_site(genome_record,
                    upstream_frt_seq,
                    upstream_insert_pos,
                    downstream_frt_seq,
                    downstream_insert_pos,
                    feature_id_prefix='',
                    upstream_validation_seq=None,
                    downstream_validation_seq=None):
    """Inserts FRT sites at the given position.

    Args:
        genome_record: The genome_record to start with. This is not mutated
            in this method.
        upstream_frt: One of FRT_OPTIONS.
        upstream_insert_pos: Insert position for the upstream FRT site.
        downstream_frt: One of FRT_OPTIONS.
        downstream_insert_pos: Insert position for the downstream FRT site.
            NOTE: This is the insert position before the upstream is inserted.
                This method will account for that.
        feature_id_prefix: Prefix for the BioPython feature id.
            Recommended: 'seg_3' for the upstream FRT site of
            segment 3.
        upstream_validation_seq: The next n bases after the start of the
            upstream FRT site. This is used as a sanity check to make sure the
            FRT is being inserted in the right place.
        downstream_validation_seq: The n bases before the downstream site.
            Used for sanity checking the insertion.

    Returns:
        A modified SeqRecord with the change.
    """
    # Maybe check the validation bases.
    if upstream_validation_seq:
        actual_upstream_seq = str(
            genome_record.seq[upstream_insert_pos:upstream_insert_pos +
                              len(upstream_validation_seq)])
        assert upstream_validation_seq == actual_upstream_seq, (
            "Actual %s" % actual_upstream_seq)
    if downstream_validation_seq:
        actual_downstream_seq = str(
            genome_record.
            seq[downstream_insert_pos -
                len(downstream_validation_seq):downstream_insert_pos])
        assert downstream_validation_seq == actual_downstream_seq, (
            "Actual %s" % actual_downstream_seq)

    # Adjust the downstream insert position to account for the upstream
    # one being inserted first.
    adjusted_downstream_insert_pos = (downstream_insert_pos +
                                      len(upstream_frt_seq))

    # Make a copy of the record so we can modify it.
    updated_genome_record = copy.deepcopy(genome_record)

    # Insert the upstream frt.
    updated_genome_record = insert_sequence(
        updated_genome_record,
        upstream_frt_seq,
        upstream_insert_pos,
        safe_features=[],
        insert_feature_type=FRT_FEATURE_TYPE,
        insert_feature_id=feature_id_prefix + '_upstream_frt',
        insert_feature_strand=1)

    # Insert the downstream frt.
    updated_genome_record = insert_sequence(
        updated_genome_record,
        downstream_frt_seq,
        adjusted_downstream_insert_pos,
        safe_features=[],
        insert_feature_type=FRT_FEATURE_TYPE,
        insert_feature_id=feature_id_prefix + '_downstream_frt',
        insert_feature_strand=1)

    return updated_genome_record