def test_plot_with_gc_content(tmpdir): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # Parse the genbank file, plot annotations record = SeqIO.read(example_genbank, "genbank") graphic_record = BiopythonTranslator().translate_record(record) ax, levels = graphic_record.plot() graphic_record.plot(ax=ax1, with_ruler=False) # Plot the local GC content def plot_local_gc_content(record, window_size, ax): def gc_content(seq): return 100.0 * len([c for c in seq if c in "GC"]) / len(seq) yy = [ gc_content(record.seq[i:i + window_size]) for i in range(len(record.seq) - window_size) ] xx = np.arange(len(record.seq) - window_size) + 25 ax.fill_between(xx, yy, alpha=0.3) ax.set_ylabel("GC(%)") plot_local_gc_content(record, window_size=50, ax=ax2) # Resize the figure to the right height target_file = os.path.join(str(tmpdir), "with_plot.png") fig.tight_layout() fig.savefig(target_file)
def test_from_genbank(tmpdir): graphic_record = BiopythonTranslator().translate_record(example_genbank) assert len(graphic_record.features) == 11 ax, _ = graphic_record.plot(figure_width=10) ax.figure.tight_layout() target_file = os.path.join(str(tmpdir), "from_genbank.png") ax.figure.savefig(target_file)
def plot(self, ax=None): """Plot the fragment and its features on a Matplotlib ax. This creates a new ax if no ax is provided. The ax is returned at the end. """ graphic_record = BiopythonTranslator().translate_record(self) ax, _ = graphic_record.plot(ax=ax, strand_in_label_threshold=7) return ax
def plot_seq(record, annot_residuei=8, title='', xlabel='', plotp=None): from dna_features_viewer import BiopythonTranslator # graphic_record = BiopythonTranslator().translate_record("seqname.gb") graphic_record = BiopythonTranslator().translate_record(record) ax, _ = graphic_record.plot( figure_width=12.5, annotate_inline=True, level_offset=0.5, ) graphic_record.plot_sequence(ax=ax, ) graphic_record.plot_translation(ax=ax, location=[0, 45]) ax.plot([annot_residuei * 3 - 3.5, annot_residuei * 3 - 0.5], [-2, -2], lw=5, color='r') ax.set_title(title) ax.set_xlabel(xlabel) # ax.plot([21,23],[-2,-2]) if not plotp is None: plt.tight_layout() ax.figure.savefig(plotp, format='png')
def draw_features(rec): from dna_features_viewer import BiopythonTranslator graphic_record = BiopythonTranslator().translate_record(rec) ax, _ = graphic_record.plot(figure_width=20) plt.title(rec.id) plt.show()
def generate_map(): record = SeqIO.read("Genome.gb", "genbank") graphic_record = BiopythonTranslator().translate_record(record, record_class=CircularGraphicRecord) graphic_record.labels_spacing = 20 ax, _ = graphic_record.plot(figure_width=15, figure_height=15, draw_line=True) ax.figure.savefig("solution.jpg")
import matplotlib.pyplot as plt from dna_features_viewer import BiopythonTranslator from Bio import SeqIO import numpy as np fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # Parse the genbank file, plot annotations record = SeqIO.read("example_sequence.gb", "genbank") graphic_record = BiopythonTranslator().translate_record(record) ax, levels = graphic_record.plot() graphic_record.plot(ax=ax1, with_ruler=False) # Plot the local GC content def plot_local_gc_content(record, window_size, ax): gc_content = lambda s: 100.0 * len([c for c in s if c in "GC"]) / len(s) yy = [ gc_content(record.seq[i:i + window_size]) for i in range(len(record.seq) - window_size) ] xx = np.arange(len(record.seq) - window_size) + 25 ax.fill_between(xx, yy, alpha=0.3) ax.set_ylabel("GC(%)") plot_local_gc_content(record, window_size=50, ax=ax2) # Resize the figure to the right height fig.tight_layout() fig.savefig("with_plot.png")
def full_assembly_report( parts, target, enzyme="BsmBI", max_assemblies=40, connector_records=(), include_fragments_plots="on_failure", include_parts_plots="on_failure", include_fragments_connection_graph="on_failure", include_assembly_plots=True, n_expected_assemblies=None, no_skipped_parts=False, fragments_filters="auto", assemblies_prefix="assembly", show_overhangs_in_graph=True, show_overhangs_in_genbank=True, mix_class="restriction", ): """Write a full assembly report in a folder or a zip. The report contains the final sequence(s) of the assembly in Genbank format as well as a .csv report on all assemblies produced and PDF figures to allow a quick overview or diagnostic. Folder ``assemblies`` contains the final assemblies, ``assembly_graph`` contains a schematic view of how the parts assemble together, folder ``fragments`` contains the details of all fragments produced by the enzyme digestion, and folder ``provided_parts`` contains the original input (genbanks of all parts provided for the assembly mix). Parameters ---------- parts List of Biopython records representing the parts, potentially on entry vectors. All the parts provided should have different attributes ``name`` as it is used to name the files. target Either a path to a folder, or to a zip file, or ``@memory`` to return a string representing zip data (the latter is particularly useful for website backends). enzyme Name of the enzyme to be used in the assembly max_assemblies Maximal number of assemblies to consider. If there are more than this the additional ones won't be returned. fragments_filters Fragments filters to be used to filter out fragments before looking for assemblies. If left to auto, fragments containing the enzyme site will be filtered out. connector_records List of connector records (a connector is a part that can bridge a gap between two other parts), from which only the essential elements to form an assembly will be automatically selected and added to the other parts. assemblies_prefix Prefix for the file names of all assemblies. They will be named ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix. include_parts_plots, include_assembly_plots These two parameters control the rendering of extra figures which are great for troubleshooting, but not strictly necessary, and they slow down the report generation considerably. They can be True, False, or "on_failure" to be True only if the number of assemblies differs from n_expected_assemblies n_expected_assemblies Expected number of assemblies. No exception is raised if this number is not met, however, if parameters ``include_parts_plots`` and ``include_assembly_plots`` are set to "on_failure", then extra plots will be plotted. """ # Make prefix Genbank friendly assemblies_prefix = assemblies_prefix.replace(" ", "_")[:18] if mix_class == "restriction": mix_class = RestrictionLigationMix part_names = [p.name for p in parts] non_unique = [e for (e, count) in Counter(part_names).items() if count > 1] non_unique = list(set(non_unique)) if len(non_unique) > 0: raise ValueError("All parts provided should have different names. " "Assembly (%s) contains several times the parts %s " % (" ".join(part_names), ", ".join(non_unique))) if fragments_filters == "auto": fragments_filters = [NoRestrictionSiteFilter(enzyme)] report = file_tree(target, replace=True) assemblies_dir = report._dir("assemblies") mix = mix_class(parts, enzyme, fragments_filters=fragments_filters) if len(connector_records): try: mix.autoselect_connectors(connector_records) except AssemblyError as err: ax = mix.plot_slots_graph( with_overhangs=show_overhangs_in_graph, show_missing=True, highlighted_parts=part_names, ) f = report._file("parts_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS) ax = mix.plot_connections_graph() f = report._file("connections_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) raise err # ASSEMBLIES filters = (FragmentSetContainsPartsFilter(part_names), ) assemblies = mix.compute_circular_assemblies( annotate_homologies=show_overhangs_in_genbank, fragments_sets_filters=filters if no_skipped_parts else (), ) assemblies = sorted( [asm for (i, asm) in zip(range(max_assemblies), assemblies)], key=lambda asm: str(asm.seq), ) assemblies_data = [] i_asm = list(zip(range(max_assemblies), assemblies)) for i, asm in i_asm: if len(i_asm) == 1: name = assemblies_prefix else: name = "%s_%03d" % (assemblies_prefix, (i + 1)) asm.name = asm.id = name assemblies_data.append( dict( assembly_name=name, parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]), number_of_parts=len(asm.fragments), assembly_size=len(asm), )) write_record(asm, assemblies_dir._file(name + ".gb"), "genbank") if include_assembly_plots: gr_record = AssemblyTranslator().translate_record(asm) ax, gr = gr_record.plot(figure_width=16) ax.set_title(name) ax.set_ylim(top=ax.get_ylim()[1] + 1) ax.figure.savefig( assemblies_dir._file(name + ".pdf").open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) is_failure = (len(assemblies) == 0) or ((n_expected_assemblies is not None) and (len(assemblies) != n_expected_assemblies)) if include_fragments_plots == "on_failure": include_fragments_plots = is_failure if include_parts_plots == "on_failure": include_parts_plots = is_failure if include_fragments_connection_graph == "on_failure": include_fragments_connection_graph = is_failure # PROVIDED PARTS if include_parts_plots: provided_parts_dir = report._dir("provided_parts") for part in parts: linear = record_is_linear(part, default=False) ax, gr = plot_cuts(part, enzyme, linear=linear) f = provided_parts_dir._file(part.name + ".pdf").open("wb") ax.figure.savefig(f, format="pdf", bbox_inches="tight") plt.close(ax.figure) gb_file = provided_parts_dir._file(part.name + ".gb") write_record(part, gb_file, "genbank") # FRAGMENTS if include_fragments_plots: fragments_dir = report._dir("fragments") seenfragments = defaultdict(lambda *a: 0) for fragment in mix.fragments: gr = BiopythonTranslator().translate_record(fragment) ax, _ = gr.plot() name = name_fragment(fragment) seenfragments[name] += 1 file_name = "%s_%02d.pdf" % (name, seenfragments[name]) ax.figure.savefig( fragments_dir._file(file_name).open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS) if include_fragments_connection_graph: ax = mix.plot_connections_graph() f = report._file("connections_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) graph = mix.slots_graph(with_overhangs=False) slots_dict = { s: "|".join(list(pts)) for s, pts in mix.compute_slots().items() } non_linear_slots = [(slots_dict[n], "|".join([slots_dict[b] for b in graph.neighbors(n)])) for n in graph.nodes() if graph.degree(n) != 2] # PLOT SLOTS GRAPH if len(connector_records): highlighted_parts = part_names else: highlighted_parts = [] ax = mix.plot_slots_graph( with_overhangs=show_overhangs_in_graph, show_missing=True, highlighted_parts=highlighted_parts, ) f = report._file("parts_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) if len(non_linear_slots): report._file("non_linear_nodes.csv").write( "\n".join(["part,neighbours"] + [ "%s,%s" % (part, neighbours) for part, neighbours in non_linear_slots ])) df = pandas.DataFrame.from_records( assemblies_data, columns=["assembly_name", "number_of_parts", "assembly_size", "parts"], ) df.to_csv(report._file("report.csv").open("w"), index=False) n_constructs = len(df) if target == "@memory": return n_constructs, report._close() else: if isinstance(target, str): report._close() return n_constructs
y="neg_log10_chi_square_pvalue", hue="mutation_type", data=pval_ann, style="mutation_type", markers=markers, palette=palette, ax=ax0) sns.scatterplot(x="POS", y="neg_log10_chi_square_pvalue", hue="mutation_type", data=pval_ann, style="mutation_type", markers=markers, palette=palette, ax=ax1) record.plot(ax=ax2) plt.xlabel('Position') ax1.set_ylabel('Negative log10 p-value') ax0.set_ylabel('') box = ax2.get_position() box.y0 += .08 box.y1 += .08 ax2.set_position(box) ax1.set_ylim(10**-4.9, 17.49) ax0.set_ylim(34.9, 45.01) # ax1.set_yscale('log', basey=10) ax1.legend(handles=legend_elements, loc="upper left", ncol=3, framealpha=0.5, fancybox=True,
from dna_features_viewer import BiopythonTranslator graphic_record = BiopythonTranslator().translate_record("example_sequence.gb") ax, _ = graphic_record.plot(figure_width=10) ax.figure.tight_layout() ax.figure.savefig("from_genbank.png")
def full_assembly_report(parts, target, enzyme="BsmBI", max_assemblies=40, connector_records=(), include_fragments=True, include_parts=True, fragments_filters='auto', assemblies_prefix='assembly', show_overhangs_in_graph=True, show_overhangs_in_genbank=False, mix_class="restriction"): """Write a full assembly report in a folder or a zip. The report contains the final sequence(s) of the assembly in Genbank format as well as a .csv report on all assemblies produced and PDF figures to allow a quick overview or diagnostic. Folder ``assemblies`` contains the final assemblies, ``assembly_graph`` contains a schematic view of how the parts assemble together, folder ``fragments`` contains the details of all fragments produced by the enzyme digestion, and folder ``provided_parts`` contains the original input (genbanks of all parts provided for the assembly mix). Parameters ---------- parts List of Biopython records representing the parts, potentially on entry vectors. All the parts provided should have different attributes ``name`` as it is used to name the files. target Either a path to a folder, or to a zip file, or ``@memory`` to return a string representing zip data (the latter is particularly useful for website backends). enzyme Name of the enzyme to be used in the assembly max_assemblies Maximal number of assemblies to consider. If there are more than this the additional ones won't be returned. fragments_filters Fragments filters to be used to filter out fragments before looking for assemblies. If left to auto, fragments containing the enzyme site will be filtered out. connector_records List of connector records (a connector is a part that can bridge a gap between two other parts), from which only the essential elements to form an assembly will be automatically selected and added to the other parts. assemblies_prefix Prefix for the file names of all assemblies. They will be named ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix. """ if mix_class == "restriction": mix_class = RestrictionLigationMix part_names = [p.name for p in parts] non_unique = [e for (e, count) in Counter(part_names).items() if count > 1] if len(non_unique) > 0: raise ValueError("All parts provided should have different names. " "Assembly (%s) contains several times the parts %s " % (" ".join(part_names), ", ".join(non_unique))) if fragments_filters == 'auto': fragments_filters = [NoRestrictionSiteFilter(enzyme)] report = file_tree(target, replace=True) assemblies_dir = report._dir("assemblies") mix = mix_class(parts, enzyme, fragments_filters=fragments_filters) if len(connector_records): mix.autoselect_connectors(connector_records) # PROVIDED PARTS if include_parts: provided_parts_dir = report._dir("provided_parts") for part in parts: linear = part.linear if hasattr(part, 'linear') else False ax, gr = plot_cuts(part, enzyme, linear=linear) f = provided_parts_dir._file(part.name + ".pdf").open('wb') ax.figure.savefig(f, format='pdf', bbox_inches="tight") plt.close(ax.figure) gb_file = provided_parts_dir._file(part.name + ".gb") SeqIO.write(part, gb_file.open('w'), 'genbank') # FRAGMENTS if include_fragments: fragments_dir = report._dir("fragments") seenfragments = defaultdict(lambda *a: 0) for fragment in mix.fragments: gr = BiopythonTranslator().translate_record(fragment) ax, pos = gr.plot() name = name_fragment(fragment) seenfragments[name] += 1 file_name = "%s_%02d.pdf" % (name, seenfragments[name]) ax.figure.savefig(fragments_dir._file(file_name).open('wb'), format='pdf', bbox_inches="tight") plt.close(ax.figure) # GRAPH ax = plot_slots_graph(mix, with_overhangs=show_overhangs_in_graph, show_missing=True) f = report._file('parts_graph.pdf') ax.figure.savefig(f.open('wb'), format='pdf', bbox_inches='tight') plt.close(ax.figure) # ASSEMBLIES assemblies = mix.compute_circular_assemblies( annotate_homologies=show_overhangs_in_genbank) assemblies = sorted( [asm for (i, asm) in zip(range(max_assemblies), assemblies)], key=lambda asm: str(asm.seq)) assemblies_data = [] i_asm = list(zip(range(max_assemblies), assemblies)) for i, asm in i_asm: if len(i_asm) == 1: name = assemblies_prefix else: name = '%s_%03d' % (assemblies_prefix, (i + 1)) assemblies_data.append( dict(name=name, parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]), number_of_parts=len(asm.fragments), assembly_size=len(asm))) SeqIO.write(asm, assemblies_dir._file(name + '.gb').open('w'), 'genbank') gr_record = AssemblyTranslator().translate_record(asm) ax, gr = gr_record.plot(figure_width=16) ax.set_title(name) ax.figure.savefig(assemblies_dir._file(name + '.pdf').open('wb'), format='pdf', bbox_inches='tight') plt.close(ax.figure) df = pandas.DataFrame.from_records( assemblies_data, columns=['name', 'number_of_parts', 'assembly_size', 'parts']) df.to_csv(report._file('report.csv'), index=False) n_constructs = len(df) if target == '@memory': return n_constructs, report._close() else: return n_constructs