def test_complex_sequences(): seq1 = random_dna_sequence(50000, seed=123) seq1 = smu.copy(seq1, 25000, 30000, 50000) seq2 = seq1 seq2 = smu.insert(seq2, 39000, random_dna_sequence(100)) seq2 = smu.insert(seq2, 38000, random_dna_sequence(100)) seq2 = smu.reverse(seq2, 30000, 35000) seq2 = smu.swap(seq2, (30000, 35000), (45000, 480000)) seq2 = smu.delete(seq2, 20000, 2000) seq2 = smu.insert(seq2, 10000, random_dna_sequence(2000)) seq2 = smu.insert(seq2, 0, 1000 * "A") diff_blocks = DiffBlocks.from_sequences(seq1, seq2).merged() b = diff_blocks.blocks assert len(b) == 15 assert b[0].operation == "insert" start, end, _ = b[0].s2_location.to_tuple() assert end - start == 1000 assert b[1].operation == "equal" assert b[2].operation == "insert" start, end, _ = b[2].s2_location.to_tuple() assert end - start == 2000 assert sorted([b[6].operation, b[7].operation]) == ["change", "transpose"] assert sorted([b[-1].operation, b[-2].operation]) == ["change", "reverse"] s1, s2 = diff_blocks.reconstruct_sequences_from_blocks(diff_blocks.blocks) assert s1 == seq1 assert s2 == seq2
def work(self): data = self.data self.logger(message='Reading the files...') seq_1 = records_from_data_files([data.sequence1])[0] seq_2 = records_from_data_files([data.sequence2])[0] self.logger(message='Computing the difference blocks...') diff_blocks = DiffBlocks.from_sequences(seq_1, seq_2) self.logger(message='Computing the difference blocks...') ax = diff_blocks.plot(figure_width=data.figure_width) if not hasattr(ax, 'figure'): ax = ax[0] ax.set_title("%s, with annotated diffs to %s" % (seq_2.name, seq_1.name)) figure_data = matplotlib_figure_to_svg_base64_data(ax.figure, bbox_inches="tight") diff_features = diff_blocks.diffs_as_features() for f in diff_features: f.type = "misc_feature" seq_2.features += diff_features genbank_data = record_to_formated_string(seq_2) return { 'record': { 'data': genbank_data, 'name': 'diff.gb', 'mimetype': 'application/genbank' }, 'figure_data': figure_data }
def plot_optimization_changes(problem): if not GENEBLOCKS_AVAILABLE: raise ImportError("Install Geneblocks to use plot_differences()") sequence_before = sequence_to_biopython_record(problem.sequence_before) sequence_after = problem.to_record() diffs = DiffBlocks.from_sequences(sequence_before, sequence_after) span = max(2, len(sequence_after) / 20) diffs = diffs.merged(blocks_per_span=(3, span), replace_gap=span / 2, change_gap=span / 2) _, diffs_ax = diffs.plot( translator_class=SpecAnnotationsTranslator, annotate_inline=True, figure_width=15, ) return diffs_ax
def write_optimization_report(target, problem, project_name="unnammed", constraints_evaluations=None, objectives_evaluations=None, figure_width=20, max_features_in_plots=300): """Write an optimization report with a PDF summary, plots, and genbanks. Parameters ---------- target Path to a directory or zip file, or "@memory" for returning raw data of a zip file created in-memory. problem A DnaOptimizationProblem to be solved and optimized project_name Name of the project that will appear on the PDF report constraints_evaluations Precomputed constraints evaluations. If None provided, they will be computed again from the problem. objectives_evaluations Precomputed objectives evaluations. If None provided, they will be computed again from the problem. figure_width Width of the report's figure, in inches. The more annotations there will be in the figure, the wider it should be. The default should work for most cases. max_features_in_plots Limit to the number of features to plot (plots with thousands of features may take ages to plot) """ if not PDF_REPORTS_AVAILABLE: raise ImportError(install_extras_message("PDF Reports")) if not SEQUENTICON_AVAILABLE: raise ImportError(install_extras_message("Sequenticon")) if constraints_evaluations is None: constraints_evaluations = problem.constraints_evaluations() if objectives_evaluations is None: objectives_evaluations = problem.objectives_evaluations() if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target translator = SpecAnnotationsTranslator() # CREATE FIGURES AND GENBANKS diffs_figure_data = None sequence_before = sequence_to_biopython_record(problem.sequence_before) if GENEBLOCKS_AVAILABLE: sequence_after = problem.to_record() contract_under = max(3, int(len(sequence_after) / 10)) diffs = DiffBlocks.from_sequences(sequence_before, sequence_after, use_junk_over=50, contract_under=contract_under) _, diffs_ax = diffs.plot() diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt='svg') plt.close(diffs_ax.figure) with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io: figures_data = [ ( "Before", sequence_before, problem.constraints_before, problem.objectives_before, [] ), ( "After", sequence_to_biopython_record(problem.sequence), constraints_evaluations, objectives_evaluations, problem.sequence_edits_as_features() ) ] plot_height = None for (title, record, constraints, objectives, edits) in figures_data: full_title = ( "{title}: {nfailing} constraints failing (in red)" " Total Score: {score:.01E} {bars}").format( title=title, score=objectives.scores_sum(), nfailing=len(constraints.filter("failing").evaluations), bars="" if (title == "Before") else " (bars indicate edits)" ) ax = None if title == "After": record.features += edits graphical_record = translator.translate_record(record) fig, ax = plt.subplots(1, figsize=(figure_width, plot_height)) graphical_record.plot(ax=ax, level_offset=-0.3) record.features = [] record.features += constraints.success_and_failures_as_features() record.features += objectives.success_and_failures_as_features() graphical_record = translator.translate_record(record) ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width) ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT) plot_height = ax.figure.get_size_inches()[1] pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) record.features += edits breaches_locations = \ constraints.filter("failing") \ .locations_as_features(label_prefix="Breach from", merge_overlapping=True) record.features += breaches_locations SeqIO.write(record, root._file(title.lower() + ".gb").open("w"), "genbank") if breaches_locations != []: record.features = breaches_locations graphical_record = translator.translate_record(record) if len(graphical_record.features) > max_features_in_plots: features = sorted(graphical_record.features, key=lambda f: f.start - f.end) new_ft = features[:max_features_in_plots] graphical_record.features = new_ft message = "(only %d features shown)" % \ max_features_in_plots else: message = "" ax, _ = graphical_record.plot(figure_width=figure_width) ax.set_title(title + ": Constraints breaches locations" + message, loc="left", fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) # CREATE PDF REPORT html = report_writer.pug_to_html( path=os.path.join(ASSETS_DIR, "optimization_report.pug"), project_name=project_name, problem=problem, constraints_evaluations=constraints_evaluations, objectives_evaluations=objectives_evaluations, edits=sum(len(f) for f in edits), diffs_figure_data=diffs_figure_data, sequenticons={ label: sequenticon(seq, output_format="html_image", size=24) for label, seq in [("before", problem.sequence_before), ("after", problem.sequence)] } ) problem.to_record(root._file("final_sequence.gb").open("w"), with_constraints=False, with_objectives=False) report_writer.write_report(html, root._file("Report.pdf")) if isinstance(target, str): return root._close()
import os from geneblocks import DiffBlocks, load_record seq_1 = load_record(os.path.join("sequences", "sequence1.gb")) seq_2 = load_record(os.path.join("sequences", "sequence2.gb")) diff_blocks = DiffBlocks.from_sequences(seq_1, seq_2) ax1, ax2 = diff_blocks.plot(figure_width=8) ax1.figure.savefig("diff_blocks.png", bbox_inches='tight')
from geneblocks import DiffBlocks, CommonBlocks, random_dna_sequence import geneblocks.sequence_modification_utils as smu import matplotlib.pyplot as plt import numpy numpy.random.seed(1) # ensures the sequences will be the same at each run # GENERATE 2 "SISTER" SEQUENCES FOR THE EXAMPLE seq1 = random_dna_sequence(50000) seq1 = smu.copy(seq1, 25000, 30000, 50000) seq2 = seq1 seq2 = smu.insert(seq2, 39000, random_dna_sequence(100)) seq2 = smu.insert(seq2, 38000, random_dna_sequence(100)) seq2 = smu.reverse(seq2, 30000, 35000) seq2 = smu.swap(seq2, (30000, 35000), (45000, 480000)) seq2 = smu.delete(seq2, 20000, 2000) seq2 = smu.insert(seq2, 10000, random_dna_sequence(2000)) seq2 = smu.insert(seq2, 0, 1000 * "A") # FIND COMMON BLOCKS AND DIFFS common_blocks = CommonBlocks.from_sequences({'seq1': seq1, 'seq2': seq2}) diff_blocks = DiffBlocks.from_sequences(seq1, seq2).merged() # PLOT EVERYTHING fig, axes = plt.subplots(3, 1, figsize=(16, 8)) common_blocks.plot_common_blocks(axes=axes[:-1]) diff_blocks.plot(ax=axes[-1], separate_axes=False) axes[-1].set_xlabel("Changes in seq2 vs. seq1") fig.savefig("complex_sequences.png", bbox_inches='tight')