def test_complex_sequences():
    seq1 = random_dna_sequence(50000, seed=123)
    seq1 = smu.copy(seq1, 25000, 30000, 50000)

    seq2 = seq1
    seq2 = smu.insert(seq2, 39000, random_dna_sequence(100))
    seq2 = smu.insert(seq2, 38000, random_dna_sequence(100))
    seq2 = smu.reverse(seq2, 30000, 35000)
    seq2 = smu.swap(seq2, (30000, 35000), (45000, 480000))
    seq2 = smu.delete(seq2, 20000, 2000)
    seq2 = smu.insert(seq2, 10000, random_dna_sequence(2000))
    seq2 = smu.insert(seq2, 0, 1000 * "A")
    diff_blocks = DiffBlocks.from_sequences(seq1, seq2).merged()

    b = diff_blocks.blocks
    assert len(b) == 15
    assert b[0].operation == "insert"
    start, end, _ = b[0].s2_location.to_tuple()
    assert end - start == 1000
    assert b[1].operation == "equal"
    assert b[2].operation == "insert"
    start, end, _ = b[2].s2_location.to_tuple()
    assert end - start == 2000
    assert sorted([b[6].operation, b[7].operation]) == ["change", "transpose"]
    assert sorted([b[-1].operation, b[-2].operation]) == ["change", "reverse"]

    s1, s2 = diff_blocks.reconstruct_sequences_from_blocks(diff_blocks.blocks)
    assert s1 == seq1
    assert s2 == seq2
Exemple #2
0
    def work(self):
        data = self.data

        self.logger(message='Reading the files...')
        seq_1 = records_from_data_files([data.sequence1])[0]
        seq_2 = records_from_data_files([data.sequence2])[0]

        self.logger(message='Computing the difference blocks...')
        diff_blocks = DiffBlocks.from_sequences(seq_1, seq_2)

        self.logger(message='Computing the difference blocks...')
        ax = diff_blocks.plot(figure_width=data.figure_width)
        if not hasattr(ax, 'figure'):
            ax = ax[0]
        ax.set_title("%s, with annotated diffs to %s" %
                     (seq_2.name, seq_1.name))
        figure_data = matplotlib_figure_to_svg_base64_data(ax.figure,
                                                           bbox_inches="tight")

        diff_features = diff_blocks.diffs_as_features()
        for f in diff_features:
            f.type = "misc_feature"
        seq_2.features += diff_features
        genbank_data = record_to_formated_string(seq_2)

        return {
            'record': {
                'data': genbank_data,
                'name': 'diff.gb',
                'mimetype': 'application/genbank'
            },
            'figure_data': figure_data
        }
def plot_optimization_changes(problem):
    if not GENEBLOCKS_AVAILABLE:
        raise ImportError("Install Geneblocks to use plot_differences()")
    sequence_before = sequence_to_biopython_record(problem.sequence_before)
    sequence_after = problem.to_record()
    diffs = DiffBlocks.from_sequences(sequence_before, sequence_after)
    span = max(2, len(sequence_after) / 20)
    diffs = diffs.merged(blocks_per_span=(3, span),
                         replace_gap=span / 2,
                         change_gap=span / 2)
    _, diffs_ax = diffs.plot(
        translator_class=SpecAnnotationsTranslator,
        annotate_inline=True,
        figure_width=15,
    )
    return diffs_ax
def write_optimization_report(target, problem, project_name="unnammed",
                              constraints_evaluations=None,
                              objectives_evaluations=None,
                              figure_width=20, max_features_in_plots=300):
    """Write an optimization report with a PDF summary, plots, and genbanks.

    Parameters
    ----------
    target
      Path to a directory or zip file, or "@memory" for returning raw data of
      a zip file created in-memory.
    
    problem
      A DnaOptimizationProblem to be solved and optimized
    
    project_name
      Name of the project that will appear on the PDF report
    
    constraints_evaluations
      Precomputed constraints evaluations. If None provided, they will be
      computed again from the problem.
    
    objectives_evaluations
      Precomputed objectives evaluations. If None provided, they will be
      computed again from the problem.
      
    
    figure_width
      Width of the report's figure, in inches. The more annotations there will
      be in the figure, the wider it should be. The default should work for
      most cases.
    
    max_features_in_plots
      Limit to the number of features to plot (plots with thousands of features
      may take ages to plot)

    """
    if not PDF_REPORTS_AVAILABLE:
        raise ImportError(install_extras_message("PDF Reports"))
    if not SEQUENTICON_AVAILABLE:
        raise ImportError(install_extras_message("Sequenticon"))
    if constraints_evaluations is None:
        constraints_evaluations = problem.constraints_evaluations()
    if objectives_evaluations is None:
        objectives_evaluations = problem.objectives_evaluations()
    if isinstance(target, str):
        root = flametree.file_tree(target, replace=True)
    else:
        root = target
    translator = SpecAnnotationsTranslator()
    # CREATE FIGURES AND GENBANKS
    diffs_figure_data = None
    sequence_before = sequence_to_biopython_record(problem.sequence_before)
    if GENEBLOCKS_AVAILABLE:
        sequence_after = problem.to_record()
        contract_under = max(3, int(len(sequence_after) / 10))
        diffs = DiffBlocks.from_sequences(sequence_before, sequence_after,
                                          use_junk_over=50,
                                          contract_under=contract_under)
        _, diffs_ax = diffs.plot()
        diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt='svg')
        plt.close(diffs_ax.figure)

    with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io:

        figures_data = [
            (
                "Before",
                sequence_before,
                problem.constraints_before,
                problem.objectives_before,
                []
            ),
            (
                "After",
                sequence_to_biopython_record(problem.sequence),
                constraints_evaluations,
                objectives_evaluations,
                problem.sequence_edits_as_features()
            )
        ]

        plot_height = None
        for (title, record, constraints, objectives, edits) in figures_data:

            full_title = (
                "{title}:        {nfailing} constraints failing (in red)"
                "        Total Score: {score:.01E} {bars}").format(
                title=title, score=objectives.scores_sum(),
                nfailing=len(constraints.filter("failing").evaluations),
                bars="" if (title == "Before") else
                "       (bars indicate edits)"
            )
            ax = None
            if title == "After":
                record.features += edits
                graphical_record = translator.translate_record(record)
                fig, ax = plt.subplots(1, figsize=(figure_width, plot_height))
                graphical_record.plot(ax=ax, level_offset=-0.3)
                record.features = []

            record.features += constraints.success_and_failures_as_features()
            record.features += objectives.success_and_failures_as_features()

            graphical_record = translator.translate_record(record)
            ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width)
            ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT)
            plot_height = ax.figure.get_size_inches()[1]
            pdf_io.savefig(ax.figure, bbox_inches="tight")
            plt.close(ax.figure)

            record.features += edits
            breaches_locations = \
                constraints.filter("failing") \
                           .locations_as_features(label_prefix="Breach from",
                                                  merge_overlapping=True)
            record.features += breaches_locations

            SeqIO.write(record, root._file(title.lower() + ".gb").open("w"),
                        "genbank")

            if breaches_locations != []:
                record.features = breaches_locations
                graphical_record = translator.translate_record(record)
                if len(graphical_record.features) > max_features_in_plots:
                    features = sorted(graphical_record.features,
                                      key=lambda f: f.start - f.end)
                    new_ft = features[:max_features_in_plots]
                    graphical_record.features = new_ft
                    message = "(only %d features shown)" % \
                              max_features_in_plots
                else:
                    message = ""
                ax, _ = graphical_record.plot(figure_width=figure_width)
                ax.set_title(title + ": Constraints breaches locations"
                             + message, loc="left", fontdict=TITLE_FONTDICT)
                pdf_io.savefig(ax.figure, bbox_inches="tight")
                plt.close(ax.figure)

    # CREATE PDF REPORT
    html = report_writer.pug_to_html(
        path=os.path.join(ASSETS_DIR, "optimization_report.pug"),
        project_name=project_name,
        problem=problem,
        constraints_evaluations=constraints_evaluations,
        objectives_evaluations=objectives_evaluations,
        edits=sum(len(f) for f in edits),
        diffs_figure_data=diffs_figure_data,
        sequenticons={
            label: sequenticon(seq, output_format="html_image", size=24)
            for label, seq in [("before", problem.sequence_before),
                               ("after", problem.sequence)]
        }
    )
    problem.to_record(root._file("final_sequence.gb").open("w"),
                      with_constraints=False,
                      with_objectives=False)

    report_writer.write_report(html, root._file("Report.pdf"))
    if isinstance(target, str):
        return root._close()
Exemple #5
0
import os
from geneblocks import DiffBlocks, load_record

seq_1 = load_record(os.path.join("sequences", "sequence1.gb"))
seq_2 = load_record(os.path.join("sequences", "sequence2.gb"))

diff_blocks = DiffBlocks.from_sequences(seq_1, seq_2)
ax1, ax2 = diff_blocks.plot(figure_width=8)
ax1.figure.savefig("diff_blocks.png", bbox_inches='tight')
from geneblocks import DiffBlocks, CommonBlocks, random_dna_sequence
import geneblocks.sequence_modification_utils as smu
import matplotlib.pyplot as plt
import numpy

numpy.random.seed(1)  # ensures the sequences will be the same at each run

# GENERATE 2 "SISTER" SEQUENCES FOR THE EXAMPLE
seq1 = random_dna_sequence(50000)
seq1 = smu.copy(seq1, 25000, 30000, 50000)

seq2 = seq1
seq2 = smu.insert(seq2, 39000, random_dna_sequence(100))
seq2 = smu.insert(seq2, 38000, random_dna_sequence(100))
seq2 = smu.reverse(seq2, 30000, 35000)
seq2 = smu.swap(seq2, (30000, 35000), (45000, 480000))
seq2 = smu.delete(seq2, 20000, 2000)
seq2 = smu.insert(seq2, 10000, random_dna_sequence(2000))
seq2 = smu.insert(seq2, 0, 1000 * "A")

# FIND COMMON BLOCKS AND DIFFS
common_blocks = CommonBlocks.from_sequences({'seq1': seq1, 'seq2': seq2})
diff_blocks = DiffBlocks.from_sequences(seq1, seq2).merged()

# PLOT EVERYTHING
fig, axes = plt.subplots(3, 1, figsize=(16, 8))
common_blocks.plot_common_blocks(axes=axes[:-1])
diff_blocks.plot(ax=axes[-1], separate_axes=False)
axes[-1].set_xlabel("Changes in seq2 vs. seq1")
fig.savefig("complex_sequences.png", bbox_inches='tight')