def test_fix_overlaps_third_feature_none_overlap(self):
        feature_1_seq = 'ATGTTTGGG'
        feature_2_seq = 'GGGCCCAAAGTA'
        inter_f2_f3_junk = 'GTAGCTATCTATCTGGTTAAATC'
        feature_3_seq = 'ATGAAACCCTTTGGGTTTCCCAAA'
        overlap_start_pos = 6
        whole_seq = (feature_1_seq[:overlap_start_pos] + feature_2_seq +
                     inter_f2_f3_junk + feature_3_seq)
        overlap_size = len(feature_1_seq) - overlap_start_pos
        seq = Seq(whole_seq, generic_dna)
        seq_record = SeqRecord(seq)

        feature_id_to_seq_map = {
            1: feature_1_seq,
            2: feature_2_seq,
            3: feature_3_seq,
        }

        feature_1_loc = FeatureLocation(0, len(feature_1_seq), strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1)
        seq_record.features.append(feature_1)
        self._assert_feature_seq(feature_1, seq_record, feature_id_to_seq_map)

        feature_2_loc = FeatureLocation(overlap_start_pos,
                                        overlap_start_pos + len(feature_2_seq),
                                        strand=-1)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2)
        seq_record.features.append(feature_2)
        self._assert_feature_seq(feature_2, seq_record, feature_id_to_seq_map)

        feature_3_start = feature_2_loc.end + len(inter_f2_f3_junk)
        feature_3_end = feature_3_start + len(feature_3_seq)
        feature_3_loc = FeatureLocation(feature_3_start,
                                        feature_3_end,
                                        strand=1)
        feature_3 = SeqFeature(feature_3_loc, type='CDS', id=3)
        seq_record.features.append(feature_3)
        self._assert_feature_seq(feature_3, seq_record, feature_id_to_seq_map)

        # Build and use the overlap fixer.
        updated_seq_record = copy.deepcopy(seq_record)
        refactor_context = RefactorContext(updated_seq_record)
        refactor_context.set_forbidden_codon_set(set(['GGG']))
        cpf = ConflictingPairFixer(refactor_context)
        cpf.fix_overlaps()

        EXPECTED_SEQUENCE = (feature_1_seq + feature_2_seq + inter_f2_f3_junk +
                             feature_3_seq)

        self.assertEqual(EXPECTED_SEQUENCE, str(updated_seq_record.seq))
        for feature_id in feature_id_to_seq_map.keys():
            new_feature = get_feature_by_id(updated_seq_record, feature_id)
            self._assert_feature_seq(new_feature, updated_seq_record,
                                     feature_id_to_seq_map)
Beispiel #2
0
    def test_remove_site_in_coding_feature(self):
        """Tests removing a restriction enzyme that falls in a coding
        region.
        """
        RESTRICTION_ENZYME = Restriction.BsmBI
        BEFORE = 'ATGTTTGGGCCCAAATTTGGGAAATTTGGGAAATTTGGGAAATTTGGGAAATTTGGG'
        SITE_SEQ = RESTRICTION_ENZYME.site
        AFTER = 'TAGAAAAAAAAAAAAAAAA'
        SEQ = Seq(BEFORE + SITE_SEQ + AFTER, generic_dna)
        seq_record = SeqRecord(SEQ)

        refactor_context = RefactorContext(seq_record)

        feature_1_loc = FeatureLocation(0,
                                        len(BEFORE) + len(SITE_SEQ) + 3,
                                        strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
        seq_record.features.append(feature_1)
        FEATURE_1_SEQ_ORIG = feature_1.extract(str(seq_record.seq))
        FEATURE_1_NUM_CODONS = len(feature_1) / 3

        # Compute fake feature profile.
        fake_profile_values_map = {}
        fake_profile_values_map[feature_1.id] = {
            GCContentFeatureProfile.get_name(): [0.2] * FEATURE_1_NUM_CODONS,
            SecondaryStructureFeatureProfile.get_name():
            [-10] * FEATURE_1_NUM_CODONS,
            CodonRarityFeatureProfile.get_name(): [0.5] * FEATURE_1_NUM_CODONS,
        }
        refactor_context.set_feature_id_to_profile_values_map(
            fake_profile_values_map)

        occurrences = find_restriction_site_occurrences(
            seq_record, RESTRICTION_ENZYME)
        self.assertEqual(1, len(occurrences))

        result = _remove_site_in_coding_feature(refactor_context, seq_record,
                                                occurrences[0], feature_1)

        self.assertTrue(result['is_success'])

        seq_record = result['updated_genome_record']

        FEATURE_1_SEQ_UPDATED = feature_1.extract(str(seq_record.seq))

        occurrences = find_restriction_site_occurrences(
            seq_record, RESTRICTION_ENZYME)
        self.assertEqual(0, len(occurrences))
        self.assertEqual(translate_custom(FEATURE_1_SEQ_ORIG),
                         translate_custom(FEATURE_1_SEQ_UPDATED))
Beispiel #3
0
def fix_homology_issues(genome_record, ids_to_fix=[]):
    """Finds pairs of copied features created during genome refactoring
    and muddles the upstream original (near the 3' terminus) in order
    to decreate the probability of "snap-back" during insertion.

    Returns:
        A copy of genome_record with homology issues resolved.
    """
    resolved_genome_record = copy.deepcopy(genome_record)

    refactor_context = RefactorContext(resolved_genome_record)

    # Identify features to check for homology issues. These are
    # generally (always?) features that that have head the head/RBS portions
    # copied in order to split apart large overlaps.
    homology_pair_obj_list = find_features_to_check_for_homology(
        resolved_genome_record)

    # Resolve homologies.
    for pair_obj in homology_pair_obj_list:
        copy_id = pair_obj['copy_id']
        if ids_to_fix:
            if not copy_id in ids_to_fix:
                continue
        resolve_single_homology_issue(
            refactor_context,
            pair_obj,
        )

    return resolved_genome_record
    def test_fix_overlaps_simple(self):
        feature_1_seq = 'ATGTTTGGG'
        feature_2_seq = 'GGGCCCAAAGTA'
        overlap_start_pos = 6
        whole_seq = feature_1_seq[:overlap_start_pos] + feature_2_seq
        overlap_size = len(feature_1_seq) - overlap_start_pos
        seq = Seq(whole_seq, generic_dna)
        seq_record = SeqRecord(seq)

        feature_id_to_seq_map = {
            1: feature_1_seq,
            2: feature_2_seq,
        }

        feature_1_loc = FeatureLocation(0, len(feature_1_seq), strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id=1)
        seq_record.features.append(feature_1)
        self._assert_feature_seq(feature_1, seq_record, feature_id_to_seq_map)

        feature_2_loc = FeatureLocation(overlap_start_pos,
                                        overlap_start_pos + len(feature_2_seq),
                                        strand=-1)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id=2)
        seq_record.features.append(feature_2)
        self._assert_feature_seq(feature_2, seq_record, feature_id_to_seq_map)

        # Build and use the overlap fixer.
        updated_seq_record = copy.deepcopy(seq_record)
        refactor_context = RefactorContext(updated_seq_record)
        refactor_context.set_forbidden_codon_set(set(['GGG']))
        cpf = ConflictingPairFixer(refactor_context)
        cpf.fix_overlaps()

        EXPECTED_SEQUENCE = feature_1_seq + feature_2_seq

        self.assertEqual(EXPECTED_SEQUENCE, str(updated_seq_record.seq))
        new_feature_1 = get_feature_by_id(updated_seq_record, feature_1.id)
        new_feature_2 = get_feature_by_id(updated_seq_record, feature_2.id)
        self.assertEqual(new_feature_1.location.end,
                         new_feature_2.location.start)
        self._assert_feature_seq(new_feature_1, updated_seq_record,
                                 feature_id_to_seq_map)
        self._assert_feature_seq(new_feature_2, updated_seq_record,
                                 feature_id_to_seq_map)
    def test_swap_region_seq(self):
        SEQ = Seq('ATGTTTGGG', generic_dna)
        SEQ_RECORD = SeqRecord(SEQ)
        REFACTOR_CONTEXT = RefactorContext(SEQ_RECORD)

        NEW_SEQ = 'TTAGGA'

        updated_seq_record = swap_region_seq(REFACTOR_CONTEXT, 3, 'TTTGGG',
                                             NEW_SEQ)

        self.assertEqual('ATGTTAGGA', str(updated_seq_record.seq))
Beispiel #6
0
def replace_codons_in_feature_subset(
        genome_record,
        essential_feature_ids,
        codons_to_remove,
        original_codon_usage_memex,
        refactored_codon_usage_memex,
        feature_id_to_profile_values_map,
        range_start,
        range_end,
        tmp_result_file,
        debug):
    """Work on a subset of the features to fix.
    """
    assert range_end >= range_start

    results = {}

    refactor_context = RefactorContext(genome_record)
    refactor_context.set_feature_id_to_profile_values_map(
        feature_id_to_profile_values_map)

    # Code path that helps debug across a limited range when debug is True.
    effective_range_start = range_start
    effective_range_end = range_end if not debug else range_start + 1

    num_features = len(essential_feature_ids)
    for feature_index in range(effective_range_start, effective_range_end):
        print 'Fixing feature %d of  %d' % (feature_index + 1, num_features)
        feature_id = essential_feature_ids[feature_index]
        print 'Feature id: %s' % feature_id

        result = replace_codons_in_single_feature(refactor_context, feature_id)

        # Add the results to the growing dictionary.
        results[feature_id] = result

    # Write resuts to file as soon as we're done.
    with open(tmp_result_file, 'w') as fh:
        pickle.dump(results, fh)
Beispiel #7
0
def perform_final_steps(refactor_context, seg_start, seg_end,
        upstream_flanking_seq=None, downstream_flanking_seq=None,
        validation_start_seq=None, validation_end_seq=None,
        ignore_problems_in_feature_ids=[], report_prefix=None):
    """Updates the contained genome_record after mutating it, including:
        * remove homopolymer runs
        * remove restriction sites
        * (optional) add end pieces (e.g. FRT sites)

    Args:
        refactor_context: The RefactorContext.
        seg_start: The first position (pythonic) in the genome_record contained
            within refactor_context for the segment.
        seg_end: End position (pythonic) for the segment.
        upstream_flanking_seq: Sequence to insert at the head of the segment.
        downstream_flanking_seq: Sequence to insert at the tail of the segment.
        validation_start_seq: Optional sequence at the start of the segment
            to sanity check the start position. We lack a ui.
        validation_end_seq: Optional sequence at the end of the segment
            to sanity check the end position.
        ignore_problems_in_feature_ids: Feature ids that the client
            is aware may have problems so that we can ignore.

    Returns:
        An updated SeqRecord reflecting changes.
    """
    updated_genome_record = copy.deepcopy(refactor_context.get_genome_record())

    # Check features are conserved before we start.
    check_recoding_is_complete(ORIGINAL_GENOME_RECORD, updated_genome_record,
            ignore_problems_in_feature_ids=ignore_problems_in_feature_ids,
            interval=(seg_start, seg_end))

    orig_seq = str(updated_genome_record.seq)

    if validation_start_seq:
        assert validation_start_seq == orig_seq[
                seg_start:seg_start + len(validation_start_seq)]

    if validation_end_seq:
        assert validation_end_seq == orig_seq[
                seg_end - len(validation_end_seq):seg_end]

    updated_refactor_context = RefactorContext(updated_genome_record)

    # Fix GC content.
    GC_CONTENT_CONSTRAINT_OBJ = GCContentConstraints()
    updated_genome_record = fix_gc_content(
            refactor_context,
            GC_CONTENT_CONSTRAINT_OBJ,
            start_bound=seg_start,
            end_bound=seg_end,
            debug=False)
    updated_refactor_context.set_genome_record(updated_genome_record)

    # Remove homopolymer runs.
    remove_homopolymer_result = remove_homopolymer_runs(
            updated_refactor_context, start_bound=seg_start, end_bound=seg_end,
            report_prefix=report_prefix)
    updated_genome_record = remove_homopolymer_result['updated_genome_record']
    flagged_h_runs = remove_homopolymer_result['flagged']
    updated_refactor_context.set_genome_record(updated_genome_record)
    print 'Flagged homopolyer runs:'
    PRETTY_PRINTER.pprint(flagged_h_runs)

    # Remove restriction sites.
    remove_res_sites_result = remove_restriction_sites(
            updated_refactor_context, RESTRICTION_ENZYME_SITES_TO_REMOVE,
            start_bound=seg_start, end_bound=seg_end,
            report_prefix=report_prefix)
    updated_genome_record = remove_res_sites_result['updated_genome_record']
    updated_refactor_context.set_genome_record(updated_genome_record)
    flagged_res_sites = remove_res_sites_result['flagged']
    print 'Flagged restriction sites:'
    PRETTY_PRINTER.pprint(flagged_res_sites)

    # Generate the GC content report after all fixes are done.
    gc_report_file = report_prefix + 'gc_content.csv'
    updated_genome_record = fix_gc_content(
            updated_refactor_context,
            GC_CONTENT_CONSTRAINT_OBJ,
            start_bound=seg_start,
            end_bound=seg_end,
            debug=True,
            report_file=gc_report_file)

    # Check features are conserved.
    print 'Checking translation/rna/forbidden codons ...'
    check_recoding_is_complete(ORIGINAL_GENOME_RECORD, updated_genome_record,
            ignore_problems_in_feature_ids=ignore_problems_in_feature_ids,
            interval=(seg_start, seg_end))

    # Maybe insert FRT sites.
    if upstream_flanking_seq or downstream_flanking_seq:
        updated_genome_record = insert_frt_site(
                updated_genome_record,
                upstream_flanking_seq,
                seg_start,
                downstream_flanking_seq,
                seg_end,
                feature_id_prefix='seg2',
                upstream_validation_seq=validation_start_seq,
                downstream_validation_seq=validation_end_seq)

    return updated_genome_record
Beispiel #8
0
        except AssertionError:
            print 'WARNING: Could not find %s' % string.upper(row['original'])
            # Probably couldn't find it because overlaps, just continue on.
            continue


if __name__ == '__main__':
    from Bio import SeqIO

    from biopython_util import get_genome_record
    from refactor_context import RefactorContext

    genome_record = get_genome_record(
            '../data/completed_segments/seg2/2013_03_06_20_16_04_mds42_refactored.gbk')
    genome_record.name =  genome_record.name[:-3] + 'seg2'
    refactor_context = RefactorContext(genome_record)

    SEG_START = 100863
    SEG_END = 148475
    UPSTREAM_FLANKING_SEQ = 'CAGCCTTGTTTCGCCAGAATGCCAGTCAGCATAAGGGAGAGCTCAAGGCAGAAGTTCCTATTCCGAAGTTCCTATTCTCATATAAGTATAGGAACTTC'
    DOWNSTREAM_FLANKING_SEQ = 'CCTGTTGACAATTAATCATCGGCATAGTATATCGGCATAGTATAATACGACAAGGTGAGGAACTAAACCCAGGAGGCAGATCATGAGTCTGAAAGAAAAAACACAATCTCTGTTTGCCAACGCATTTGGCTACCCTGCCACTCACACCATTCAGGCGCCTGGCCGCGTGAATTTGATTGGTGAACACACCGACTACAACGACGGTTTCGTTCTGCCCTGCGCGATTGATTATCAAACCGTGATCAGTTGTGCACCACGCGATGACCGTAAAGTTCGCGTGATGGCAGCCGATTATGAAAATCAGCTCGACGAGTTTTCCCTCGATGCGCCCATTGTCGCACATGAAAACTATCAATGGGCTAACTACGTTCGTGGCGTGGTGAAACATCTGCAACTGCGTAACAACAGCTTCGGCGGCGTGGACATGGTGATCAGCGGCAATGTGCCGCAGGGTGCCGGGTTAAGTTCTTCCGCTTCACTGGAAGTCGCGGTCGGAACCGTATTGCAGCAGCTTTATCATCTGCCGCTGGACGGCGCACAAATCGCGCTTAACGGTCAGGAAGCAGAAAACCAGTTTGTAGGCTGTAACTGCGGGATCATGGATCAGCTAATTTCCGCGCTCGGCAAGAAAGATCATGCCTTGCTGATCGATTGCCGCTCACTGGGGACCAAAGCAGTTTCCATGCCCAAAGGTGTGGCTGTCGTCATCATCAACAGTAACTTCAAACGTACCCTGGTTGGCAGCGAATACAACACCCGTCGTGAACAGTGCGAAACCGGTGCGCGTTTCTTCCAGCAGCCAGCCCTGCGTGATGTCACCATTGAAGAGTTCAACGCTGTTGCGCATGAACTGGACCCGATCGTGGCAAAACGCGTGCGTCATATACTGACTGAAAACGCCCGCACCGTTGAAGCTGCCAGCGCGCTGGAGCAAGGCGACCTGAAACGTATGGGCGAGTTGATGGCGGAGTCTCATGCCTCTATGCGCGATGATTTCGAAATCACCGTGCCGCAAATTGACACTCTGGTAGAAATCGTCAAAGCTGTGATTGGCGACAAAGGTGGCGTACGCATGACCGGCGGCGGATTTGGCGGCTGTATCGTCGCGCTGATCCCGGAAGAGCTGGTGCCTGCCGTACAGCAAGCTGTCGCTGAACAATATGAAGCAAAAACAGGTATTAAAGAGACTTTTTACGTTTGTAAACCATCACAAGGAGCAGGACAGTGCTGAAAAAAAAAACCCCGCCCCTGACAGGGCGGGGTTTTTTTTGAAGTTCCTATTCCGAAGTTCCTATTCTATCAGAAGTATAGGAACTTCAGTGCGGATTTCGTATTTGCAGCTCGTCAGTACTTTCAGAATCATGGCCT'
    VALIDATION_START_SEQ = 'GAGGCCGACGATGATTACGGCCTCAGG'
    VALIDATION_END_SEQ = 'TTAATCATTTGACGTCCCTTGT'

    updated_genome_record = perform_final_steps(refactor_context, SEG_START,
            SEG_END, UPSTREAM_FLANKING_SEQ, DOWNSTREAM_FLANKING_SEQ,
            VALIDATION_START_SEQ, VALIDATION_END_SEQ)

    genome_output_file = (
            '../data/completed_segments/seg2/seg2_final_with_flanking.gbk')
    with open(genome_output_file, 'w') as output_fh:
Beispiel #9
0
def main():
    source_ids_to_muddle = []
    with open(AGN_DEBUG_FILE) as agn_debug_fh:
        reader = csv.DictReader(agn_debug_fh)
        for row in reader:
            if row['Separate'] == '1':
                source_ids_to_muddle.append(row['ID'])
    print source_ids_to_muddle

    record = get_genome_record(RECODED_PATH)
    refactor_context = RefactorContext(record)

    for source_feature_id in source_ids_to_muddle:
        source_feature = get_feature_by_id(record, source_feature_id)
        muddle_end(source_feature, record, refactor_context, 20)

    # rbs_cp_features = [feature for feature in record.features if
    #         feature.type == InsertType.FIX_OVERLAP_RBS_COPY]

    # overlap_head_cp_features = [feature for feature in record.features if
    #         feature.type == InsertType.FIX_OVERLAP_HEAD_COPY]

    # print 'rbs', len(rbs_cp_features)
    # print 'head', len(overlap_head_cp_features)

    # head_cp_feature_ids = set()
    # for head_feature in overlap_head_cp_features:
    #     source_feature_id = re.match(r'(?P<feature_id>.*)_' + InsertType.FIX_OVERLAP_HEAD_COPY, head_feature.id).group('feature_id')
    #     head_cp_feature_ids.add(source_feature_id)

    # count = 0
    # for rbs_cp_feature in rbs_cp_features:
    #     downstream = False
    #     match = re.match(r'(?P<feature_id>.*)_upstream_' + InsertType.FIX_OVERLAP_RBS_COPY, rbs_cp_feature.id)
    #     if not match:
    #         downstream = True
    #         match = re.match(r'(?P<feature_id>.*)_downstream_' + InsertType.FIX_OVERLAP_RBS_COPY, rbs_cp_feature.id)
    #     source_feature_id = match.group('feature_id')
    #     if source_feature_id in head_cp_feature_ids:
    #         print 'HAS HEAD_CP', source_feature_id
    #         continue
    #     source_feature = get_feature_by_id(record, source_feature_id)
    #     num_part = re.match(r'.*_(?P<num>[0-9]+)', source_feature_id).group('num')

    #     if source_feature.strand == 1:
    #         assert source_feature.location.start > rbs_cp_feature.location.start
    #         actual_source_id = ID_ROOT + str(int(num_part) - 1)
    #     else:
    #         assert source_feature.location.start < rbs_cp_feature.location.start
    #         actual_source_id = ID_ROOT + str(int(num_part) + 1)
    #     try:
    #         actual_source_feature = get_feature_by_id(record, actual_source_id)
    #     except:
    #         print 'NOT MUDDLING', rbs_cp_feature.id
    #         continue

    #     if actual_source_feature.strand != rbs_cp_feature.strand:
    #         print 'NOT MUDDLING', rbs_cp_feature.id
    #         continue

    #     print 'MUDDLING', rbs_cp_feature.id
    #     muddle_end(actual_source_feature, record, refactor_context, len(rbs_cp_feature))

    #     count += 1

    # print 'COUNT', count

    with open(OUTFILE, 'w') as fh:
        SeqIO.write(record, fh, 'genbank')
Beispiel #10
0
def refactor_with_min_overlap_fixes_and_preserve_rbs(genome_record,
                                                     tmp_file_prefix,
                                                     debug=False):
    """Refactoring strategy that only fixes overlaps when it's necessary
    for removing forbidden codons and/or preserving rbs strength.

    This is the second big iteration of our strategy following disucssion
    on 2/6/13. Notable differences from the initial strategy:
        * Before this we were just pulling apart all overlaps and copying the
          RBS site. However, intuitively we have concerns that this may
          introduce many unnecessary changes.
        * We are now also taking into account coding features that are close
          enough to each other (even if not overlapping) where re-coding
          could affect one of the feature's RBS regions.

    Super high-level algorithm overview:
        1. Fix overlaps.
        2. Recode each gene to remove forbidden codons.

    Medium-level algorithm overview:
        * Identify all pairs of coding regions that are either overlapping,
          or are close enough (< 20 bp), where recoding one may affect
          translation of the other.
            - For each of these pairs:
                * If there are no forbidden codons in the affected regions:
                    - Nothing to do, mark the pair as resolved.
                * If there are forbidden codons:
                    - Do an exhaustive search over the affected region
                      and try to find a path of synonymous codon
                      substitutions that don't require physical separation.
                      If success, perform the change, and mark any changed
                      codons as "fixed" so that they are not changed in the
                      second half of the overall algorithm where we do the bulk
                      forbidden codon removal.
                    - Otherwise, we need to separate:
                        * If overlap < 4 bp, find minimum amount to copy that
                          resolves any issues, and lock affected RBS regions
                          so that they are not changed.
                        * Otherwise, need to copy overlap + 15 bp upstream
                          of ATG, and to help prevent snap-back:
                            - muddle old start codon in upstream gene
                            - muddle bases in copied region that are not part
                              of RBS
        * Perform synonymous swapping as before, but this time respecting
          locked-in regions from first half of algorithm.
    """
    # Make a copy of the original for validation.
    original_genome_record = copy.deepcopy(genome_record)

    # Context object to be passed around to different methods.
    refactor_context = RefactorContext(genome_record)

    ###########################################################################
    # Fix overlaps
    ###########################################################################

    cfp = ConflictingPairFixer(
        refactor_context,
        cache=USE_CACHE,
        include_close_features=True,
        single_iteration=DEBUG_SINGLE_ITERATION,
        force_separate_AGN=True,
        agn_separation_data_file=AGN_SEPARATION_DATA_FILE)
    genome_record = cfp.fix_overlaps()
    refactor_context.set_genome_record(genome_record)

    # Write the output before going on to next step in case there is an error
    # later, so we can at least have partial results.
    _write_output(genome_record, {}, tmp_file_prefix)

    # Validate that overlaps were fixed correctly before moving on.
    check_all(original_genome_record, genome_record)

    ###########################################################################
    # Swap out remaining forbidden codons
    # NOTE: Some were already replaced while fixing overlaps.
    ###########################################################################

    (genome_record,
     metadata) = replace_forbidden_codons(refactor_context,
                                          num_cores=NUM_CORES,
                                          tmp_file_prefix=tmp_file_prefix,
                                          debug=debug)

    # Write the output just in case again.
    _write_output(genome_record, metadata, tmp_file_prefix)

    # Check that forbidden codons were removed.
    check_forbidden_codons_removed(genome_record, CODONS_TO_REMOVE)

    # Validate that we're still good after codon replacement.
    check_all(original_genome_record, genome_record)

    ###########################################################################
    # Resolve homology issues
    ###########################################################################

    genome_record = fix_homology_issues(genome_record)

    # Write the final output, overriding the intermediate write above.
    _write_output(genome_record, metadata, tmp_file_prefix)

    # Validation checks.
    check_forbidden_codons_removed(genome_record, CODONS_TO_REMOVE)
    check_all(original_genome_record, genome_record)

    print 'Done.'