def records_from_data_files(filepaths=None, folder=None): """Automatically convert files or a folder's content to Biopython records. """ if folder is not None: filepaths = [f._path for f in flametree.file_tree(folder)._all_files] records = [] for filepath in filepaths: filename = os.path.basename(filepath) if filename.lower().endswith("zip"): records += records_from_zip_file(filepath) continue recs, fmt = records_from_file(filepath) single_record = len(recs) == 1 for i, record in enumerate(recs): name_no_extension = "".join(filename.split(".")[:-1]) name = name_no_extension + ("" if single_record else ("%04d" % i)) name = name.replace(" ", "_") UNKNOWN_IDS = [ "None", "", "<unknown id>", ".", "EXPORTED", "<unknown name>", "Exported", ] if has_dna_alphabet: # Biopython <1.78 record.seq.alphabet = DNAAlphabet() record.annotations["molecule_type"] = "DNA" # Sorry for this parts, it took a lot of "whatever works". # keep your part names under 20c and pointless, and everything # will be good if str(record.id).strip() in UNKNOWN_IDS: record.id = name if str(record.name).strip() in UNKNOWN_IDS: record.name = name record.file_name = name_no_extension records += recs return records
def one_backbone(self): self.logger(message="Reading Data...") data = self.data backbone = records_from_data_files([data.backbone])[0] inserts = [records_from_data_files([f])[0] for f in data.inserts] records = inserts + [backbone] for record in records: record.linear = False # Trick if data.enzyme == "Autoselect": possible_enzymes = ["BsaI", "BsmBI", "BbsI"] data.enzyme = autoselect_enzyme(records, enzymes=possible_enzymes) zip_root = flametree.file_tree('@memory') for insert in inserts: record = swap_donor_vector_part(backbone, insert, data.enzyme) record.id = insert.id write_record(record, zip_root._file(autoname_genbank_file(record)), 'genbank') if len(inserts) == 1: f = zip_root._all_files[0] data = f.read('rb') return { 'file': { 'data': data_to_html_data(data, 'genbank'), 'name': f._name, 'mimetype': 'application/genbank' }, 'success': 'true', 'summary': 'Swapping succesful !' } else: return { 'file': { 'data': data_to_html_data(zip_root._close(), 'zip'), 'name': 'donor_swap_genbanks.zip', 'mimetype': 'application/zip' }, 'success': 'yeah!', 'summary': 'none yet' }
def records_from_zip_file(zip_file): zip_file = flametree.file_tree(file_to_filelike_object(zip_file)) records = [] for f in zip_file._all_files: ext = f._extension.lower() if ext in ['gb', 'fa', 'dna']: try: new_records, fmt = string_to_record(f.read()) except: content_stream = BytesIO(f.read('rb')) try: record = snapgene_file_to_seqrecord( fileobject=content_stream) new_records, fmt = [record], 'snapgene' except: try: parser = crazydoc.CrazydocParser( ['highlight_color', 'bold', 'underline']) new_records = parser.parse_doc_file(content_stream) fmt = 'doc' except: raise ValueError("Format not recognized for file " + f._path) single_record = len(new_records) == 1 for i, record in enumerate(new_records): name = record.id if name in [ None, '', "<unknown id>", '.', ' ', "<unknown name>" ]: number = ('' if single_record else ("%04d" % i)) name = f._name_no_extension.replace(" ", "_") + number name = name.split(".")[0] record.id = name record.name = name record.file_name = f._name_no_extension records += new_records print([(r.name, r.id) for r in records]) return records
def validation_analysis(self, data, clones_observations): validations = clones_observations.validate_all_clones( min_band_cutoff=data.bandsRange[0], max_band_cutoff=data.bandsRange[1], relative_tolerance=data.tolerance) # CREATE A ZIP WITH VALIDATION REPORTS zip_root = flametree.file_tree('@memory') self.logger(message="Generating the validation report...") zip_root._file('validations.pdf').write( clones_observations.plot_all_validations_patterns(validations)) if data.includeDigestionPlots: self.logger(message="Plotting cuts maps...") co = clones_observations pdf_data = plot_all_constructs_cuts_maps([ (co.constructs_records[cst], digestion_) for cst, digestions in co.constructs_digestions.items() for digestion_ in digestions ]) zip_root._file('digestions.pdf').write(pdf_data) self.logger(message="Generating the success plate map...") ax = clones_observations.plot_validations_plate_map(validations) ax.figure.savefig(zip_root._file('success_map.pdf').open('wb'), format='pdf', bbox_inches='tight') self.logger(message="All done !") return { 'zip_file': { 'data': data_to_html_data(zip_root._close(), 'zip'), 'name': 'validation_report.zip', 'mimetype': 'application/zip' }, 'success': 'yeah!' }
def generate_batch_report(groups, target="@memory", group_naming="group", plot_format="pdf"): """Generate a report with CSV and plot describing a groups batch. Parameters ---------- groups A (ordered) dict {group_name: [elements in the group]}. target Either path to a folder, or a zip file, or "@memory" to return raw data of a zip file containing the report. group_naming Word that will replace "group" in the report, e.g. "assembly", "team", etc. plot_format Formal of the plot (pdf, png, jpeg, etc). """ root = flametree.file_tree(target) csv = ("%s,elements\n" % group_naming) + "\n".join([ ",".join([group] + list(elements)) for group, elements in groups.items() ]) root._file("%ss.csv" % group_naming).write(csv) ax = plot_batch(groups) ax.set_title("Elements per %s" % group_naming) ax.figure.savefig( root._file("%ss.%s" % (group_naming, plot_format)).open("wb"), bbox_inches="tight", format=plot_format, ) return root._close()
import os from dnacauldron import load_record import flametree data_path = os.path.join("app", "data") data_dir = flametree.file_tree(data_path) # # with open(data_path, "r") as f: # DATA = f.read() connector_records = [ load_record(f._path, linear=False, id=f._name_no_extension) for f in data_dir.genbank.connectors._all_files if f._extension == "gb" ] backbone = load_record(data_dir.genbank.hc_amp_backbone_gb._path, linear=False, id='hc_amp_backbone') backbone.is_backbone = True
def work(self): self.logger(message="Reading Data...") data = self.data source_filelike = file_to_filelike_object(data.source_plate) source = plate_from_content_spreadsheet(source_filelike) source.name = data.source_name if ((data.destination_plate is not None) and ((data.rearraying_type == 'map') or (data.destination_type == 'existing'))): destination = plate_from_content_spreadsheet(dest_filelike) destination.name = destination_name else: destination = get_plate_class(data.destination_size)() destination.name = destination_name if rearraying_type == 'map': # for well in destination.iter_wells(): # well.content.volume *= 1e-6 picklist = PickList() for well in source.iter_wells(): if well.is_empty: continue part = (well.content.components_as_string()) destination_well = destination.find_unique_well( condition=lambda w: w.content.components_as_string( ) == part) picklist.add_transfer(well, destination_well, destination_well.volume) destination_well.empty_completely() picklist.execute() picklist_to_tecan_evo_picklist_file(picklist, "rearray_2018-10-02.gwl") plate_to_content_spreadsheet(destination, "destination_after_picklist.xlsx") else: pass future_plates = picklist.execute(inplace=False) def text(w): txt = human_volume(w.content.volume) if 'construct' in w.data: txt = "\n".join([w.data.construct, txt]) return txt plotter = PlateTextPlotter(text) ax, _ = plotter.plot_plate(future_plates[destination_plate], figsize=(20, 8)) ziproot = flametree.file_tree("@memory", replace=True) ax.figure.savefig(ziproot._file("final_mixplate.pdf").open('wb'), format="pdf", bbox_inches="tight") plt.close(ax.figure) picklist_to_assembly_mix_report( picklist, ziproot._file("assembly_mix_picklist_report.pdf").open('wb'), data=picklist_data) assembly_plan.write_report( ziproot._file("assembly_plan_summary.pdf").open('wb')) if data.dispenser_machine == 'labcyte_echo': picklist_to_labcyte_echo_picklist_file( picklist, ziproot._file("ECHO_picklist.csv").open('w')) else: picklist_to_tecan_evo_picklist_file( picklist, ziproot._file("EVO_picklist.gwl").open('w')) zip_data = ziproot._close() return { 'file': { 'data': data_to_html_data(zip_data, 'zip'), 'name': 'assemblies.zip', 'mimetype': 'application/zip' }, 'success': True }
def write_no_solution_report(target, problem, error, file_content=None, file_path=None): """Write a report on incompatibility found in the problem's constraints. The report comprises a PDF of plots of the sequence (global constraints, local constraints around the problem) and an annotated genbank. Parameters ---------- target Either a path to a folder, or a path to a zip archive, or "@memory" to return raw data of a zip archive containing the report. problem A DnaOptimizationProblem error A NoSolutionError (carries a message and a location) """ if not MATPLOTLIB_AVAILABLE: raise ImportError(install_extras_message("Matplotlib")) if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target # TRANSFER THE ORIGINAL FILE file_hash = None if file_path is not None: if file_content is None: with open(file_path, "rb") as f: file_content = f.read() basename = os.path.basename(file_path) file_hash = hashlib.md5(file_content).hexdigest()[:8] root._file("_".join([file_hash, basename])).write(file_content) translator = SpecAnnotationsTranslator() with PdfPages(root._file("plots.pdf").open("wb")) as pdf_io: # PLOT GLOBAL LOCATION OF ERROR record = problem.to_record() translator = SpecAnnotationsTranslator() graphical_record = translator.translate_record(record) ax, _ = graphical_record.plot(figure_width=min(20, 0.3 * len(record))) if len(record) < 60: graphical_record.plot_sequence(ax) if error.location is None: raise error start, end, strand = error.location.to_tuple() ax.fill_between([start, end], -10, 10, zorder=-1000, facecolor="#ffcccc") title = "\n".join( textwrap.wrap( "No solution found in zone [%d, %d]:%s" % (start, end, str(error)), width=120, )) ax.set_title(title, fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5) plt.close(ax.figure) # CREATE AND SAVE THE LOCAL CONSTRAINTS BREACHES RECORD record = error.problem.to_record( with_original_spec_features=False, with_constraints=False, with_objectives=False, ) start = max(0, error.location.start - 5) end = min(len(record), error.location.end + 4) focus_location = Location(start, end) def is_in_focus(location): return location.overlap_region(focus_location) is not None evals = error.problem.constraints_evaluations() passing = evals.filter("passing") record.features += passing.success_and_failures_as_features() failing = evals.filter("failing") record.features += failing.locations_as_features( label_prefix="BREACH", locations_filter=is_in_focus) SeqIO.write( record, root._file("local_constraints_breaches.gb").open("w"), "genbank", ) # CREATE A FIGURE OF THE LOCAL CONSTRAINTS BREACHES AS A NEW PDF PAGE graphical_record = translator.translate_record(record) graphical_record = graphical_record.crop((start, end)) figure_width = min(20, 0.3 * (end - start)) ax, _ = graphical_record.plot(figure_width=figure_width) graphical_record.plot_sequence(ax) ax.set_title( "Local constraints breaches in [%d, %d]" % (start, end) + " (green = passing constraints)", fontdict=TITLE_FONTDICT, ) ax.set_ylim(top=ax.get_ylim()[1] + 1) pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5) plt.close(ax.figure) root._file("logs.txt").write(problem.logger.dump_logs()) # returns zip data if target == '@memory' if isinstance(target, str): return root._close()
def write_no_solution_report(target, problem, error): """Write a report on incompatibility found in the problem's constraints. The report comprises a PDF of plots of the sequence (global constraints, local constraints around the problem) and an annotated genbank. Parameters ---------- target Either a path to a folder, or a path to a zip archive, or "@memory" to return raw data of a zip archive containing the report. problem A DnaOptimizationProblem error A NoSolutionError (carries a message and a location) """ if not MATPLOTLIB_AVAILABLE: raise ImportError(install_extras_message("Matplotlib")) if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target translator = SpecAnnotationsTranslator() with PdfPages(root._file("plots.pdf").open("wb")) as pdf_io: # PLOT GLOBAL LOCATION OF ERROR record = problem.to_record() translator = SpecAnnotationsTranslator() graphical_record = translator.translate_record(record) ax, _ = graphical_record.plot(figure_width=min(20, 0.3*len(record))) if len(record) < 60: graphical_record.plot_sequence(ax) if error.location is None: raise error start, end, strand = error.location.to_tuple() ax.fill_between([start, end], -10, 10, zorder=-1000, facecolor='#ffeeee') title = "\n".join(textwrap.wrap( "No solution found in zone [%d, %d]: %s" % (start, end, str(error)), width=120) ) ax.set_title(title, fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5) plt.close(ax.figure) # PLOT LOCAL CONSTRAINTS BREACHES evals = error.problem.constraints_evaluations() record = error.problem.to_record( with_original_spec_features=False, with_constraints=False, with_objectives=False) record.features += evals.filter('passing') \ .success_and_failures_as_features() record.features += evals.filter('failing') \ .locations_as_features(label_prefix="BREACH") start = max(0, error.location.start - 5) end = min(len(record), error.location.end + 4) graphical_record = translator.translate_record(record) graphical_record = graphical_record.crop((start, end)) ax, _ = graphical_record.plot(figure_width=min(20, 0.3*(end - start))) graphical_record.plot_sequence(ax) ax.set_title("Local constraints breaches in [%d, %d]" % (start, end) + " (green = passing constraints)", fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight", alpha=0.5) plt.close(ax.figure) # WRITE GENBANK record = problem.to_record(with_original_spec_features=False, with_constraints=True, with_objectives=True) evals = problem.constraints_evaluations() record.features += evals.filter('passing') \ .success_and_failures_as_features() record.features += evals.filter('failing') \ .locations_as_features(label_prefix="BREACH") SeqIO.write(record, root._file("constraints breaches.gb").open("w"), "genbank") root._file('logs.txt').write(problem.logger.dump_logs()) # returns zip data if target == '@memory' if isinstance(target, str): return root._close()
def work(self): self.logger(message="Reading Data...") data = self.data # Reading picklist picklist_filelike = file_to_filelike_object(data.picklist) if data.picklist.name.endswith('.csv'): csv = picklist_filelike.read().decode() rows = [l.split(',') for l in csv.split("\n") if len(l)] else: dataframe = pandas.read_excel(picklist_filelike) rows = [row for i, row in dataframe.iterrows()] assembly_plan = AssemblyPlan(OrderedDict([ (row[0], [str(e).strip() for e in row[1:] if str(e).strip() not in ['-', 'nan', '']]) for row in rows if row[0] not in ['nan', 'Construct name', 'constructs', 'construct'] ])) for assembly, parts in assembly_plan.assemblies.items(): assembly_plan.assemblies[assembly] = [ part.replace(" ", "_") for part in parts ] # Reading part infos if len(data.parts_infos): first_file = data.parts_infos[0] if first_file.name.endswith(('.csv', '.xls', '.xlsx')): first_file_filelike = file_to_filelike_object(first_file) if first_file.name.endswith('.csv'): dataframe = pandas.read_csv(first_file_filelike) else: dataframe = pandas.read_excel(first_file_filelike) parts_data = { row.part: {'size': row['size']} for i, row in dataframe.iterrows() } else: records = records_from_data_files(data.parts_infos) if data.use_file_names_as_ids: for r in records: r.id = r.name = r.file_name parts_data = { rec.id.replace(" ", "_"): {'record': rec} for rec in records } assembly_plan.parts_data = parts_data parts_without_data = assembly_plan.parts_without_data() if len(parts_without_data): return { 'success': False, 'message': 'Some parts have no provided record or data.', 'missing_parts': parts_without_data } # Reading protocol if data.quantity_unit == 'fmol': part_mol = data.part_quantity * 1e-15 part_g = None if data.quantity_unit == 'nM': part_mol = data.part_quantity * data.total_volume * 1e-15 part_g = None if data.quantity_unit == 'ng': part_mol = None part_g = data.part_quantity * 1e-9 self.logger(message='Generating picklist') picklist_generator = AssemblyPicklistGenerator( part_mol=part_mol, part_g=part_g, complement_to=data.total_volume * 1e-6, buffer_volume=data.buffer_volume * 1e-6, volume_rounding=2.5e-9, minimal_dispense_volume=5e-9 ) source_filelike = file_to_filelike_object(data.source_plate) source_plate = plate_from_content_spreadsheet(source_filelike) for well in source_plate.iter_wells(): if well.is_empty: continue quantities = well.content.quantities part, quantity = quantities.items()[0] quantities.pop(part) quantities[part.replace(" ", "_")] = quantity source_plate.name = "Source" self.logger(message="Generating Picklist...") destination_plate = Plate4ti0960("Mixplate") if data.destination_plate: dest_filelike = file_to_filelike_object(data.destination_plate) destination_plate = plate_from_content_spreadsheet(dest_filelike) destination_wells = ( well for well in destination_plate.iter_wells(direction='column') if well.is_empty ) picklist, picklist_data = picklist_generator.make_picklist( assembly_plan, source_wells=source_plate.iter_wells(), destination_wells=destination_wells ) if picklist is None: return { 'success': False, 'message': 'Some parts in the assembly plan have no ' 'corresponding well.', 'picklist_data': picklist_data, 'missing_parts': picklist_data.get('missing_parts', None) } future_plates = picklist.execute(inplace=False) def text(w): txt = human_volume(w.content.volume) if 'construct' in w.data: txt = "\n".join([w.data.construct, txt]) return txt plotter = PlateTextPlotter(text) ax, _ = plotter.plot_plate(future_plates[destination_plate], figsize=(20, 8)) ziproot = flametree.file_tree("@memory", replace=True) # MIXPLATE MAP PLOT ax.figure.savefig( ziproot._file("final_mixplate.pdf").open('wb'), format="pdf", bbox_inches="tight") plt.close(ax.figure) plate_to_platemap_spreadsheet( future_plates[destination_plate], lambda w: w.data.get('construct', ''), filepath=ziproot._file('final_mixplate.xls').open('wb')) self.logger(message="Writing report...") # ASSEMBLY REPORT picklist_to_assembly_mix_report( picklist, ziproot._file("assembly_mix_picklist_report.pdf").open('wb'), data=picklist_data) assembly_plan.write_report( ziproot._file("assembly_plan_summary.pdf").open('wb')) # MACHINE PICKLIST if data.dispenser_machine == 'labcyte_echo': picklist_to_labcyte_echo_picklist_file( picklist, ziproot._file("ECHO_picklist.csv").open('w')) else: picklist_to_tecan_evo_picklist_file( picklist, ziproot._file("EVO_picklist.gwl").open('w')) raw = file_to_filelike_object(data.source_plate).read() f = ziproot._file(data.source_plate.name) f.write(raw, mode='wb') zip_data = ziproot._close() return { 'file': { 'data': data_to_html_data(zip_data, 'zip'), 'name': 'picklist.zip', 'mimetype': 'application/zip' }, 'success': True }
def load_records_from_files(files=None, folder=None, use_file_names_as_ids=False): """Automatically convert files or a folder's content to biopython records. Parameters ---------- files A list of path to files. A ``folder`` can be provided instead. folder A path to a folder containing sequence files. use_file_names_as_ids If True, for every file containing a single record, the file name (without extension) will be set as the record's ID. """ if files is not None: for file in files: if isinstance(file, str) and not os.path.exists(file): raise IOError("File %s not found" % file) if folder is not None: files = [f._path for f in flametree.file_tree(folder)._all_files] records = [] for filepath in files: filename = os.path.basename(filepath) if filename.lower().endswith("zip"): records += _load_records_from_zip_file( filepath, use_file_names_as_ids=use_file_names_as_ids) continue recs, fmt = load_records_from_file(filepath) single_record = len(recs) == 1 for i, record in enumerate(recs): name_no_extension = "".join(filename.split(".")[:-1]) name = name_no_extension + ("" if single_record else ("%04d" % i)) name = name.replace(" ", "_") UNKNOWN_IDS = [ "None", "", "<unknown id>", ".", "EXPORTED", "<unknown name>", "Exported", ] if has_dna_alphabet: # Biopython <1.78 record.seq.alphabet = DNAAlphabet() record.annotations["molecule_type"] = "DNA" # Sorry for this parts, it took a lot of "whatever works". # keep your part names under 20c and pointless, and everything # will be good if str(record.id).strip() in UNKNOWN_IDS: record.id = name if str(record.id).strip() in UNKNOWN_IDS: record.id = name record.file_name = name_no_extension if use_file_names_as_ids and single_record: basename = os.path.basename(record.source_file) basename_no_extension = os.path.splitext(basename)[0] record.id = basename_no_extension records += recs return records
def full_assembly_plan_report(assembly_plan, target, part_records=None, enzyme="autoselect", assert_single_assemblies=True, logger="bar", connector_records=(), fail_silently=True, errors_with_traceback=False, **report_kwargs): """Makes a full report for a plan (list of single assemblies) Parameters ---------- assembly_plan A list ``[('name', [parts])...]`` or a dict ``{name: [parts]}`` where the parts are either records, or simply part names (in that case you must provide the records in ``parts_records``) parts_records A dict {part_name: part_record}. target Either a path to a folder, or to a zip file, or ``@memory`` to return a string representing zip data (the latter is particularly useful for website backends). enzyme Name of the enzyme to be used in the assembly max_assemblies Maximal number of assemblies to consider. If there are more than this the additional ones won't be returned. fragments_filters Fragments filters to be used to filter out fragments before looking for assemblies. If left to auto, fragments containing the enzyme site will be filtered out. connector_records List of connector records (a connector is a part that can bridge a gap between two other parts), from which only the essential elements to form an assembly will be automatically selected and added to the other parts. **report_kwargs Any other parameter of ``full_assembly_report``. For instance: include_fragments_plots, include_parts_plot, include_assembly_plots Returns ------- errored_assemblies,zip_data list of errored assemblies with errors, and binary zip data (or None if the target is not "@memory") """ logger = default_bar_logger(logger) if isinstance(assembly_plan, list): assembly_plan = OrderedDict(assembly_plan) if isinstance(list(assembly_plan.values())[0][0], str): if not hasattr(part_records, "items"): part_records = {r.name: r for r in part_records} for part in list(part_records): part_records[part] = deepcopy(part_records[part]) part_records[part].name = part_records[part].id = part assembly_plan = OrderedDict([(name, [part_records[p] for p in parts]) for name, parts in assembly_plan.items()]) root = file_tree(target, replace=True) all_records_folder = root._dir("all_records") errored_assemblies = [] assemblies = list(assembly_plan.items()) selected_enzymes = [] # Used to keep track of autoselected enzymes for asm_name, parts in logger.iter_bar(assembly=assemblies): if enzyme == "autoselect": selected_enzyme = autoselect_enzyme(parts) selected_enzymes.append((asm_name, selected_enzyme)) else: selected_enzyme = enzyme asm_folder = root._dir(asm_name) try: n = full_assembly_report( parts, target=asm_folder, assemblies_prefix=asm_name, enzyme=selected_enzyme, connector_records=connector_records, n_expected_assemblies=1 if assert_single_assemblies else None, **report_kwargs) if assert_single_assemblies and (n != 1): raise ValueError("%s assemblies found instead of 1 for %s." % (n, asm_name)) for f in asm_folder.assemblies._all_files: if f._extension == "gb": f.copy(all_records_folder) except Exception as err: if fail_silently: err_string = str(err) if errors_with_traceback: err_string += str(err.__traceback__) errored_assemblies.append((asm_name, str(err))) else: raise err if len(errored_assemblies): root._file("errored_assemblies.txt").write("\n\n".join( ["%s: %s" % (name, error) for name, error in errored_assemblies])) f = root._file("assembly_plan.csv") f.write("construct, parts") all_parts = [] for f_ in root._all_files: if f_._name_no_extension == "report": first_row = f_.read("r").split("\n")[1].split(",") if len(first_row) == 4: name, _, _, parts = first_row parts = parts.split(" & ") all_parts += parts f.write("\n" + ",".join([name] + parts)) all_parts = sorted(set(all_parts)) root._file("all_parts.csv").write(",\n".join(all_parts)) if enzyme == "autoselect": root._file("selected_enzymes_per_construct.csv").write(",\n".join( [",".join(selection) for selection in selected_enzymes])) return errored_assemblies, root._close()
def full_assembly_report(parts, target, enzyme="BsmBI", max_assemblies=40, connector_records=(), include_fragments=True, include_parts=True, fragments_filters='auto', assemblies_prefix='assembly', show_overhangs_in_graph=True, show_overhangs_in_genbank=False, mix_class="restriction"): """Write a full assembly report in a folder or a zip. The report contains the final sequence(s) of the assembly in Genbank format as well as a .csv report on all assemblies produced and PDF figures to allow a quick overview or diagnostic. Folder ``assemblies`` contains the final assemblies, ``assembly_graph`` contains a schematic view of how the parts assemble together, folder ``fragments`` contains the details of all fragments produced by the enzyme digestion, and folder ``provided_parts`` contains the original input (genbanks of all parts provided for the assembly mix). Parameters ---------- parts List of Biopython records representing the parts, potentially on entry vectors. All the parts provided should have different attributes ``name`` as it is used to name the files. target Either a path to a folder, or to a zip file, or ``@memory`` to return a string representing zip data (the latter is particularly useful for website backends). enzyme Name of the enzyme to be used in the assembly max_assemblies Maximal number of assemblies to consider. If there are more than this the additional ones won't be returned. fragments_filters Fragments filters to be used to filter out fragments before looking for assemblies. If left to auto, fragments containing the enzyme site will be filtered out. connector_records List of connector records (a connector is a part that can bridge a gap between two other parts), from which only the essential elements to form an assembly will be automatically selected and added to the other parts. assemblies_prefix Prefix for the file names of all assemblies. They will be named ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix. """ if mix_class == "restriction": mix_class = RestrictionLigationMix part_names = [p.name for p in parts] non_unique = [e for (e, count) in Counter(part_names).items() if count > 1] if len(non_unique) > 0: raise ValueError("All parts provided should have different names. " "Assembly (%s) contains several times the parts %s " % (" ".join(part_names), ", ".join(non_unique))) if fragments_filters == 'auto': fragments_filters = [NoRestrictionSiteFilter(enzyme)] report = file_tree(target, replace=True) assemblies_dir = report._dir("assemblies") mix = mix_class(parts, enzyme, fragments_filters=fragments_filters) if len(connector_records): mix.autoselect_connectors(connector_records) # PROVIDED PARTS if include_parts: provided_parts_dir = report._dir("provided_parts") for part in parts: linear = part.linear if hasattr(part, 'linear') else False ax, gr = plot_cuts(part, enzyme, linear=linear) f = provided_parts_dir._file(part.name + ".pdf").open('wb') ax.figure.savefig(f, format='pdf', bbox_inches="tight") plt.close(ax.figure) gb_file = provided_parts_dir._file(part.name + ".gb") SeqIO.write(part, gb_file.open('w'), 'genbank') # FRAGMENTS if include_fragments: fragments_dir = report._dir("fragments") seenfragments = defaultdict(lambda *a: 0) for fragment in mix.fragments: gr = BiopythonTranslator().translate_record(fragment) ax, pos = gr.plot() name = name_fragment(fragment) seenfragments[name] += 1 file_name = "%s_%02d.pdf" % (name, seenfragments[name]) ax.figure.savefig(fragments_dir._file(file_name).open('wb'), format='pdf', bbox_inches="tight") plt.close(ax.figure) # GRAPH ax = plot_slots_graph(mix, with_overhangs=show_overhangs_in_graph, show_missing=True) f = report._file('parts_graph.pdf') ax.figure.savefig(f.open('wb'), format='pdf', bbox_inches='tight') plt.close(ax.figure) # ASSEMBLIES assemblies = mix.compute_circular_assemblies( annotate_homologies=show_overhangs_in_genbank) assemblies = sorted( [asm for (i, asm) in zip(range(max_assemblies), assemblies)], key=lambda asm: str(asm.seq)) assemblies_data = [] i_asm = list(zip(range(max_assemblies), assemblies)) for i, asm in i_asm: if len(i_asm) == 1: name = assemblies_prefix else: name = '%s_%03d' % (assemblies_prefix, (i + 1)) assemblies_data.append( dict(name=name, parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]), number_of_parts=len(asm.fragments), assembly_size=len(asm))) SeqIO.write(asm, assemblies_dir._file(name + '.gb').open('w'), 'genbank') gr_record = AssemblyTranslator().translate_record(asm) ax, gr = gr_record.plot(figure_width=16) ax.set_title(name) ax.figure.savefig(assemblies_dir._file(name + '.pdf').open('wb'), format='pdf', bbox_inches='tight') plt.close(ax.figure) df = pandas.DataFrame.from_records( assemblies_data, columns=['name', 'number_of_parts', 'assembly_size', 'parts']) df.to_csv(report._file('report.csv'), index=False) n_constructs = len(df) if target == '@memory': return n_constructs, report._close() else: return n_constructs
def test_assembly_report(tmpdir): data_path = os.path.join("tests", "data", "assembly_report") root = flametree.file_tree(data_path) df = pandas.read_excel(root.example_picklist_xls.open('rb'), index=0) assembly_plan = AssemblyPlan( OrderedDict([(row[0], [e for e in row[1:] if str(e) not in ['-', 'nan']]) for i, row in df.iterrows() if row[0] not in ['nan', 'Construct name']])) parts_zip = flametree.file_tree(root.emma_parts_zip._path) def read(f): record = SeqIO.read(f.open('r'), 'genbank') record.id = f._name_no_extension return record parts_data = { f._name_no_extension: { 'record': read(f) } for f in parts_zip._all_files if f._extension == 'gb' } assembly_plan.parts_data = parts_data source_plate = plate_from_content_spreadsheet( root.example_echo_plate_xlsx._path) source_plate.name = "Source" for well in source_plate.iter_wells(): if not well.is_empty: content = well.content.components_as_string() well.content.quantities[content] *= 1e-3 destination_plate = Plate4ti0960("Mixplate") picklist_generator = AssemblyPicklistGenerator( part_mol=1.3e-15, complement_to=1e-6, buffer_volume=300e-9, volume_rounding=2.5e-9, minimal_dispense_volume=5e-9) picklist, data = picklist_generator.make_picklist( assembly_plan, source_wells=source_plate.iter_wells(), destination_wells=destination_plate.iter_wells(direction='column'), complement_well=source_plate.wells.O24, buffer_well=source_plate.wells.P24) future_plates = picklist.execute(inplace=False) def text(w): txt = human_volume(w.content.volume) if 'construct' in w.data: txt = "\n".join([w.data.construct, txt]) return txt plotter = PlateTextPlotter(text) ax, _ = plotter.plot_plate(future_plates[destination_plate], figsize=(20, 8)) ziproot = flametree.file_tree(os.path.join(str(tmpdir), 'a.zip')) ax.figure.savefig(ziproot._file("final_mixplate.pdf").open('wb'), format="pdf", bbox_inches="tight") plt.close(ax.figure) picklist_to_assembly_mix_report( picklist, ziproot._file("assembly_mix_picklist_report.pdf").open('wb'), data=data) assembly_plan.write_report( ziproot._file("assembly_plan_summary.pdf").open('wb')) picklist_to_labcyte_echo_picklist_file( picklist, ziproot._file("ECHO_picklist.csv").open('w')) ziproot._close()
def test_directory(tmpdir): # CREATE AND POPULATE A DIRECTORY FROM SCRATCH dir_path = os.path.join(str(tmpdir), "test_dir") root = file_tree(dir_path) assert root._file_manager.__class__ == DiskFileManager root._file("Readme.md").write("This is a test zip") root._dir("texts")._dir("shorts")._file("bla.txt").write("bla bla bla") root.texts.shorts._file("bli.txt").write("bli bli bli") root.texts.shorts._file("blu.txt").write("blu blu blu") # READ AN EXISTING FILE (two ways) assert root.texts.shorts.bla_txt.read() == "bla bla bla" with root.texts.shorts.bla_txt.open("r") as f: assert f.read() == "bla bla bla" # TEST REPLACE BEHAVIOR (replace=False) root._dir("trash")._file("bla.txt").write("bla bla bla") root._dir("trash")._file("blu.txt").write("blu blu blu") assert root.trash._filenames == ["blu.txt"] root.trash._delete() root._dir("trash")._file("bla.txt").write("bla bla bla") root._dir("trash", replace=False)._file("blu.txt").write("blu blu blu") assert set(root.trash._filenames) == set(["bla.txt", "blu.txt"]) # READ AN EXISTING DIRECTORY root = file_tree(dir_path) assert set([f._name for f in root._all_files]) == ALL_FILES # APPEND TO AN EXISTING DIRECTORY root._dir("newdir")._file("new_file.txt").write("I am a new file") root = file_tree(dir_path) assert set([f._name for f in root._all_files ]) == ALL_FILES.union(set(["new_file.txt"])) # TEST DELETION path = root.newdir.new_file_txt._path assert os.path.exists(path) root.newdir.new_file_txt.delete() assert not os.path.exists(path) assert not any([f._path == path for f in root.newdir._files]) # TEST DIRECTORY COPYING root._dir("new_folder") root.texts._copy(root.new_folder) assert [d._name for d in root.new_folder._dirs] == ["texts"] # TEST DIRECTORY MOVING root._dir("newer_folder") root.new_folder.texts._move(root.newer_folder) assert [d._name for d in root.new_folder._dirs] == [] assert [d._name for d in root.newer_folder._dirs] == ["texts"] # TEST FILE COPYING root._dir("newest_folder") root.newer_folder.texts.shorts.bla_txt.copy(root.newest_folder) assert [f._name for f in root.newest_folder._files] == ["bla.txt"] # TEST FILE MOVING root._dir("newester_folder") root.newer_folder.texts.shorts.bla_txt.move(root.newester_folder) assert [f._name for f in root.newester_folder._files] == ["bla.txt"] remaining_files = set([d._name for d in root.newer_folder._all_files]) assert remaining_files == set(["bli.txt", "blu.txt"])
def parts_ids_from_geneart_records_dir(folder): records_root = flametree.file_tree(folder) return { dir_._name.split("_")[0]: "_".join(dir_._name.split("_")[1:]) for dir_ in records_root._dirs }
def write_optimization_report(target, problem, project_name="unnammed", constraints_evaluations=None, objectives_evaluations=None, figure_width=20, max_features_in_plots=300): """Write an optimization report with a PDF summary, plots, and genbanks. Parameters ---------- """ if constraints_evaluations is None: constraints_evaluations = problem.constraints_evaluations() if objectives_evaluations is None: objectives_evaluations = problem.objectives_evaluations() if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target translator = SpecAnnotationsTranslator() # CREATE FIGURES AND GENBANKS with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io: figures_data = [ ("Before", sequence_to_biopython_record(problem.sequence_before), problem.constraints_before, problem.objectives_before, []), ("After", sequence_to_biopython_record(problem.sequence), constraints_evaluations, objectives_evaluations, problem.sequence_edits_as_features()) ] plot_height = None for (title, record, constraints, objectives, edits) in figures_data: full_title = ( "{title}: {nfailing} constraints failing (in red)" " Total Score: {score:.01E} {bars}").format( title=title, score=objectives.scores_sum(), nfailing=len(constraints.filter("failing").evaluations), bars="" if (title == "Before") else " (bars indicate edits)") ax = None if title == "After": record.features += edits graphical_record = translator.translate_record(record) fig, ax = plt.subplots(1, figsize=(figure_width, plot_height)) graphical_record.plot(ax=ax, level_offset=-0.3) record.features = [] record.features += constraints.success_and_failures_as_features() record.features += objectives.success_and_failures_as_features() graphical_record = translator.translate_record(record) ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width) ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT) plot_height = ax.figure.get_size_inches()[1] pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) record.features += edits breaches_locations = \ constraints.filter("failing") \ .locations_as_features(label_prefix="Breach from", merge_overlapping=True) record.features += breaches_locations SeqIO.write(record, root._file(title.lower() + ".gb").open("w"), "genbank") if breaches_locations != []: record.features = breaches_locations graphical_record = translator.translate_record(record) if len(graphical_record.features) > max_features_in_plots: features = sorted(graphical_record.features, key=lambda f: f.start - f.end) new_ft = features[:max_features_in_plots] graphical_record.features = new_ft message = "(only %d features shown)" % \ max_features_in_plots else: message = "" ax, _ = graphical_record.plot(figure_width=figure_width) ax.set_title(title + ": Constraints breaches locations" + message, loc="left", fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) # CREATE PDF REPORT html = report_writer.pug_to_html( path=os.path.join(ASSETS_DIR, "optimization_report.pug"), project_name=project_name, problem=problem, constraints_evaluations=constraints_evaluations, objectives_evaluations=objectives_evaluations, edits=sum(len(f) for f in edits), sequenticons={ label: sequenticon(seq, output_format="html_image", size=24) for label, seq in [( "before", problem.sequence_before), ("after", problem.sequence)] }) report_writer.write_report(html, root._file("Report.pdf")) # jinja_env = jinja2.Environment() # jinja_env.globals.update(zip=zip, len=len) # template_path = os.path.join(TEMPLATES_DIR, "optimization_report.html") # with open(template_path, "r") as f: # REPORT_TEMPLATE = jinja_env.from_string(f.read()) # # html = REPORT_TEMPLATE.render( # dnachisel_version=__version__, # project_name="bla", # problem=problem, # outcome="SUCCESS" if constraints_evaluations.all_evaluations_pass() # else "FAIL", # constraints_after=constraints_evaluations, # objectives_after=objectives_evaluations, # edits=sum(len(f) for f in edits) # ) # weasy_html = weasyprint.HTML(string=html, base_url=TEMPLATES_DIR) # weasy_html.write_pdf(root._file("Report.pdf")) problem.to_record(root._file("final_sequence.gb").open("w"), with_constraints=False, with_objectives=False) # returns zip data if target == '@memory' if isinstance(target, str): return root._close()
def extract_from_input( filename=None, directory=None, construct_list=None, direct_sense=True, output_path=None, min_sequence_length=20, ): """Extract features from input and return in a dictionary. Optionally save the features in separate files. Parameters ========== file Input sequence file (Genbank). directory Directory name containing input sequence files. construct_list A list of SeqRecords. direct_sense If True: make antisense features into direct-sense in the exported files. output_path Path for the exported feature and report files. min_sequence_length Discard sequences with length less than this integer. """ genbank_id_limit = 20 # GenBank format hard limit for name if construct_list: pass elif filename: input_record = load_record(filename, record_id="auto", upperize=False, id_cutoff=genbank_id_limit) construct_list = [input_record] elif directory: construct_list = records_from_data_files(filepaths=None, folder=directory) else: raise TypeError( "Specify one of 'construct_list', 'filename' or 'directory'.") records_dict = dict() recordname_list = [] for input_record in construct_list: records = extract_features(input_record, direct_sense) record_name = input_record.name[0:genbank_id_limit] # This part makes the key (used as dir name) unique by appending a copynumber: number_of_name_occurrences = recordname_list.count(record_name) if number_of_name_occurrences: key = "%s_%s" % (record_name, number_of_name_occurrences + 1) else: key = record_name recordname_list.append(record_name) records_dict[key] = records parts_report = make_part_dict(records_dict, min_sequence_length=min_sequence_length) processed_report = process_report(parts_report[1]) all_parts_dict = parts_report[0] records_dict["all_parts"] = list(all_parts_dict.values()) if output_path is not None: root = flametree.file_tree(output_path) for key, records in records_dict.items(): record_dir = root._dir(key) record_name_alnum_list = [] for record in records: record_name_alnum = "".join(x if x.isalnum() else "_" for x in record.name) # This part makes the filename unique by appending a copynumber: number_of_occurrences = record_name_alnum_list.count( record_name_alnum) if number_of_occurrences: record_filename = "%s_%s.gb" % ( record_name_alnum, number_of_occurrences + 1, ) else: record_filename = record_name_alnum + ".gb" record_name_alnum_list.append(record_name_alnum) record_file_path = record_dir._file(record_filename) try: write_record(record, record_file_path, fmt="genbank") except Exception as err: print("Error writing", record_filename, str(err)) processed_report.to_csv(root._file("report.csv").open("w")) records_dict["processed_report"] = processed_report return records_dict
import flametree from dnacauldron import RestrictionLigationMix, load_record data_root = flametree.file_tree(".").data.select_connectors parts = [ load_record(f._path, linear=False, id=f._name_no_extension[:15]) for f in data_root.parts_missing_connectors._all_files if f._extension == "gb" ] connectors = [ load_record(f._path, linear=False, id=f._name_no_extension[:15]) for f in data_root.connectors._all_files if f._extension == "gb" ] mix = RestrictionLigationMix(parts, enzyme='BsmBI') selected_connectors = mix.autoselect_connectors(connectors) print("Selected connectors: ", ", ".join([c.id for c in selected_connectors]))
def test_pandas(tmpdir): path = str(tmpdir) root = file_tree("@memory") root._file('test.csv').write("A,B,C\n1,2,3\n4,5,6") dataframe = pandas.read_csv(root.test_csv.open('r'))
def write_optimization_report(target, problem, project_name="unnammed", constraints_evaluations=None, objectives_evaluations=None, figure_width=20, max_features_in_plots=300): """Write an optimization report with a PDF summary, plots, and genbanks. Parameters ---------- target Path to a directory or zip file, or "@memory" for returning raw data of a zip file created in-memory. problem A DnaOptimizationProblem to be solved and optimized project_name Name of the project that will appear on the PDF report constraints_evaluations Precomputed constraints evaluations. If None provided, they will be computed again from the problem. objectives_evaluations Precomputed objectives evaluations. If None provided, they will be computed again from the problem. figure_width Width of the report's figure, in inches. The more annotations there will be in the figure, the wider it should be. The default should work for most cases. max_features_in_plots Limit to the number of features to plot (plots with thousands of features may take ages to plot) """ if not PDF_REPORTS_AVAILABLE: raise ImportError(install_extras_message("PDF Reports")) if not SEQUENTICON_AVAILABLE: raise ImportError(install_extras_message("Sequenticon")) if constraints_evaluations is None: constraints_evaluations = problem.constraints_evaluations() if objectives_evaluations is None: objectives_evaluations = problem.objectives_evaluations() if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target translator = SpecAnnotationsTranslator() # CREATE FIGURES AND GENBANKS diffs_figure_data = None sequence_before = sequence_to_biopython_record(problem.sequence_before) if GENEBLOCKS_AVAILABLE: sequence_after = problem.to_record() contract_under = max(3, int(len(sequence_after) / 10)) diffs = DiffBlocks.from_sequences(sequence_before, sequence_after, use_junk_over=50, contract_under=contract_under) _, diffs_ax = diffs.plot() diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt='svg') plt.close(diffs_ax.figure) with PdfPages(root._file("before_after.pdf").open("wb")) as pdf_io: figures_data = [ ( "Before", sequence_before, problem.constraints_before, problem.objectives_before, [] ), ( "After", sequence_to_biopython_record(problem.sequence), constraints_evaluations, objectives_evaluations, problem.sequence_edits_as_features() ) ] plot_height = None for (title, record, constraints, objectives, edits) in figures_data: full_title = ( "{title}: {nfailing} constraints failing (in red)" " Total Score: {score:.01E} {bars}").format( title=title, score=objectives.scores_sum(), nfailing=len(constraints.filter("failing").evaluations), bars="" if (title == "Before") else " (bars indicate edits)" ) ax = None if title == "After": record.features += edits graphical_record = translator.translate_record(record) fig, ax = plt.subplots(1, figsize=(figure_width, plot_height)) graphical_record.plot(ax=ax, level_offset=-0.3) record.features = [] record.features += constraints.success_and_failures_as_features() record.features += objectives.success_and_failures_as_features() graphical_record = translator.translate_record(record) ax, _ = graphical_record.plot(ax=ax, figure_width=figure_width) ax.set_title(full_title, loc="left", fontdict=TITLE_FONTDICT) plot_height = ax.figure.get_size_inches()[1] pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) record.features += edits breaches_locations = \ constraints.filter("failing") \ .locations_as_features(label_prefix="Breach from", merge_overlapping=True) record.features += breaches_locations SeqIO.write(record, root._file(title.lower() + ".gb").open("w"), "genbank") if breaches_locations != []: record.features = breaches_locations graphical_record = translator.translate_record(record) if len(graphical_record.features) > max_features_in_plots: features = sorted(graphical_record.features, key=lambda f: f.start - f.end) new_ft = features[:max_features_in_plots] graphical_record.features = new_ft message = "(only %d features shown)" % \ max_features_in_plots else: message = "" ax, _ = graphical_record.plot(figure_width=figure_width) ax.set_title(title + ": Constraints breaches locations" + message, loc="left", fontdict=TITLE_FONTDICT) pdf_io.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) # CREATE PDF REPORT html = report_writer.pug_to_html( path=os.path.join(ASSETS_DIR, "optimization_report.pug"), project_name=project_name, problem=problem, constraints_evaluations=constraints_evaluations, objectives_evaluations=objectives_evaluations, edits=sum(len(f) for f in edits), diffs_figure_data=diffs_figure_data, sequenticons={ label: sequenticon(seq, output_format="html_image", size=24) for label, seq in [("before", problem.sequence_before), ("after", problem.sequence)] } ) problem.to_record(root._file("final_sequence.gb").open("w"), with_constraints=False, with_objectives=False) report_writer.write_report(html, root._file("Report.pdf")) if isinstance(target, str): return root._close()
def work(self): self.logger(message="Reading Data...") data = self.data # Reading picklist picklist_filelike = file_to_filelike_object(data.picklist) if data.picklist.name.endswith(".csv"): csv = picklist_filelike.read().decode() rows = [l.split(",") for l in csv.split("\n") if len(l)] else: dataframe = pandas.read_excel(picklist_filelike) rows = [row for i, row in dataframe.iterrows()] assembly_plan = AssemblyPlan( OrderedDict( [ ( row[0], [ str(e).strip() for e in row[1:] if str(e).strip() not in ["-", "nan", ""] ], ) for row in rows if row[0] not in ["nan", "Construct name", "constructs", "construct"] ] ) ) for assembly, parts in assembly_plan.assemblies.items(): assembly_plan.assemblies[assembly] = [ part.replace(" ", "_") for part in parts ] # Reading part infos if len(data.parts_infos): first_file = data.parts_infos[0] if first_file.name.endswith((".csv", ".xls", ".xlsx")): first_file_filelike = file_to_filelike_object(first_file) if first_file.name.endswith(".csv"): dataframe = pandas.read_csv(first_file_filelike) else: dataframe = pandas.read_excel(first_file_filelike) parts_data = { row.part: {"size": row["size"]} for i, row in dataframe.iterrows() } else: records = records_from_data_files(data.parts_infos) if data.use_file_names_as_ids: for r in records: r.id = r.name = r.file_name parts_data = { rec.id.replace(" ", "_"): {"record": rec} for rec in records } assembly_plan.parts_data = parts_data parts_without_data = assembly_plan.parts_without_data() if len(parts_without_data): return { "success": False, "message": "Some parts have no provided record or data.", "missing_parts": parts_without_data, } # Reading protocol if data.quantity_unit == "fmol": part_mol = data.part_quantity * 1e-15 part_g = None if data.quantity_unit == "nM": part_mol = data.part_quantity * data.total_volume * 1e-15 part_g = None if data.quantity_unit == "ng": part_mol = None part_g = data.part_quantity * 1e-9 # Backbone:part molar ratio calculation is not performed in this case. # This ensures no change regardless of form input: data.part_backbone_ratio = 1 self.logger(message="Generating picklist") picklist_generator = AssemblyPicklistGenerator( part_mol=part_mol, part_g=part_g, complement_to=data.total_volume * 1e-6, buffer_volume=data.buffer_volume * 1e-6, volume_rounding=2.5e-9, minimal_dispense_volume=5e-9, ) backbone_name_list = data.backbone_name.split(",") source_filelike = file_to_filelike_object(data.source_plate) source_plate = plate_from_content_spreadsheet(source_filelike) for well in source_plate.iter_wells(): if well.is_empty: continue quantities = well.content.quantities part, quantity = list(quantities.items())[0] quantities.pop(part) quantities[part.replace(" ", "_")] = quantity if part in backbone_name_list: # This section multiplies the backbone concentration with the # part:backbone molar ratio. This tricks the calculator into making # a picklist with the desired ratio. # For example, a part:backbone = 2:1 will multiply the # backbone concentration by 2, therefore half as much of it will be # added to the well. quantities[part.replace(" ", "_")] = quantity * data.part_backbone_ratio else: quantities[part.replace(" ", "_")] = quantity source_plate.name = "Source" self.logger(message="Generating Picklist...") destination_plate = Plate4ti0960("Mixplate") if data.destination_plate: dest_filelike = file_to_filelike_object(data.destination_plate) destination_plate = plate_from_content_spreadsheet(dest_filelike) destination_wells = ( well for well in destination_plate.iter_wells(direction="column") if well.is_empty ) picklist, picklist_data = picklist_generator.make_picklist( assembly_plan, source_wells=source_plate.iter_wells(), destination_wells=destination_wells, ) if picklist is None: return { "success": False, "message": "Some parts in the assembly plan have no " "corresponding well.", "picklist_data": picklist_data, "missing_parts": picklist_data.get("missing_parts", None), } future_plates = picklist.execute(inplace=False) def text(w): txt = human_volume(w.content.volume) if "construct" in w.data: txt = "\n".join([w.data.construct, txt]) return txt plotter = PlateTextPlotter(text) ax, _ = plotter.plot_plate(future_plates[destination_plate], figsize=(20, 8)) ziproot = flametree.file_tree("@memory", replace=True) # MIXPLATE MAP PLOT ax.figure.savefig( ziproot._file("final_mixplate.pdf").open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) plate_to_platemap_spreadsheet( future_plates[destination_plate], lambda w: w.data.get("construct", ""), filepath=ziproot._file("final_mixplate.xls").open("wb"), ) self.logger(message="Writing report...") # ASSEMBLY REPORT picklist_to_assembly_mix_report( picklist, ziproot._file("assembly_mix_picklist_report.pdf").open("wb"), data=picklist_data, ) assembly_plan.write_report( ziproot._file("assembly_plan_summary.pdf").open("wb") ) # MACHINE PICKLIST if data.dispenser_machine == "labcyte_echo": picklist_to_labcyte_echo_picklist_file( picklist, ziproot._file("ECHO_picklist.csv").open("w") ) else: picklist_to_tecan_evo_picklist_file( picklist, ziproot._file("EVO_picklist.gwl").open("w") ) raw = file_to_filelike_object(data.source_plate).read() f = ziproot._file(data.source_plate.name) f.write(raw, mode="wb") zip_data = ziproot._close() return { "file": { "data": data_to_html_data(zip_data, "zip"), "name": "picklist.zip", "mimetype": "application/zip", }, "success": True, }
def write_optimization_report( target, problem, project_name="unnammed", plot_figure=True, constraints_evaluations=None, objectives_evaluations=None, figure_width=20, max_features_in_plots=300, file_path=None, file_content=None, ): """Write an optimization report with a PDF summary, plots, and genbanks. Parameters ---------- target Path to a directory or zip file, or "@memory" for returning raw data of a zip file created in-memory. problem A DnaOptimizationProblem to be solved and optimized project_name Name of the project that will appear on the PDF report constraints_evaluations Precomputed constraints evaluations. If None provided, they will be computed again from the problem. objectives_evaluations Precomputed objectives evaluations. If None provided, they will be computed again from the problem. figure_width Width of the report's figure, in inches. The more annotations there will be in the figure, the wider it should be. The default should work for most cases. max_features_in_plots Limit to the number of features to plot (plots with thousands of features may take ages to plot) file_path Path to the file from which the problem was created """ if not PDF_REPORTS_AVAILABLE: raise ImportError(install_extras_message("PDF Reports")) if not SEQUENTICON_AVAILABLE: raise ImportError(install_extras_message("Sequenticon")) if constraints_evaluations is None: constraints_evaluations = problem.constraints_evaluations() if objectives_evaluations is None: objectives_evaluations = problem.objectives_evaluations() if isinstance(target, str): root = flametree.file_tree(target, replace=True) else: root = target # TRANSFER THE ORIGINAL FILE file_hash = None if file_path is not None: if file_content is None: with open(file_path, "rb") as f: file_content = f.read() basename = os.path.basename(file_path) file_hash = hashlib.md5(file_content).hexdigest()[:8] root._file("_".join([file_hash, basename])).write(file_content) # CREATE FIGURES AND GENBANKS diffs_figure_data = None if GENEBLOCKS_AVAILABLE and plot_figure: diffs_ax = plot_optimization_changes(problem) diffs_figure_data = pdf_tools.figure_data(diffs_ax.figure, fmt="svg") plt.close(diffs_ax.figure) # GENERATE AND SAVE THE CONSTRAINTS SUMMARY constraints_before_after = constraints_before_after_dataframe( problem=problem, constraints_evaluations=constraints_evaluations) filename = "constraints_before_and_after.csv" constraints_before_after.to_csv(root._file(filename).open("w"), index=False) # GENERATE AND SAVE THE OBJECTIVES SUMMARY objectives_before_after = objectives_before_after_dataframe( problem=problem, objectives_evaluations=objectives_evaluations) filename = "objectives_before_and_after.csv" objectives_before_after.to_csv(root._file(filename).open("w"), index=False) # CREATE PDF REPORT html = report_writer.pug_to_html( path=os.path.join(ASSETS_DIR, "optimization_report.pug"), project_name=project_name, problem=problem, constraints_evaluations=constraints_evaluations, objectives_evaluations=objectives_evaluations, constraints_before_after=constraints_before_after, objectives_before_after=objectives_before_after, edits=problem.sequence_edits_as_array().sum(), diffs_figure_data=diffs_figure_data, file_hash=file_hash, sequenticons={ label: sequenticon(seq, output_format="html_image", size=24) for label, seq in [ ("before", problem.sequence_before), ("after", problem.sequence), ] }, ) report_writer.write_report(html, root._file("Report.pdf")) # CREATE THE "SEQUENCE EDITS" REPORT record = problem.to_record(with_sequence_edits=True) breaches = problem.constraints_before.filter("failing") breaches_locations = breaches.locations_as_features( label_prefix="Breach from", merge_overlapping=True) record.features += breaches_locations SeqIO.write(record, root._file("final_sequence_with_edits.gb").open("w"), "genbank") # CREATE THE "FINAL SEQUENCE" REPORT problem.to_record( root._file("final_sequence.gb").open("w"), with_constraints=False, with_objectives=False, ) if isinstance(target, str): return root._close()
import flametree # for getting/writing files and folders from dnacauldron import RestrictionLigationMix, load_record, write_record root = flametree.file_tree('.') parts = [ load_record(f._path, linear=False) for f in root.data.assemblies._all_files ] mix = RestrictionLigationMix(parts, enzyme='BsmBI') assemblies_records = mix.compute_circular_assemblies() output_folder = root._dir('output_data')._dir('combinatorial_assemblies') for i, record in enumerate(assemblies_records): output = output_folder._file("assembly_%03d.gb" % i) write_record(record, output, "genbank") print ("%d combinatorial assembly genbanks written in output_data/assemblies" % (i + 1))
def work(self): data = self.data figures = [] self.logger(message="Generating report...") records = records_from_data_files(data.files) constraints = [ dc.AvoidPattern("BsaI_site"), dc.AvoidPattern("BsmBI_site"), dc.AvoidPattern("BbsI_site"), dc.AvoidPattern("SapI_site"), dc.AvoidPattern("8x1mer"), dc.AvoidPattern("5x3mer"), dc.AvoidPattern("9x2mer"), dc.AvoidHairpins(stem_size=20, hairpin_window=200), dc.EnforceGCContent(mini=0.3, maxi=0.7, window=100), dc.EnforceGCContent(mini=0.1, maxi=0.9, window=100), dc.UniquifyAllKmers(k=15), ] dataframe = cr.constraints_breaches_dataframe(constraints, records) spreadsheet_io = BytesIO() dataframe.to_excel(spreadsheet_io) records = cr.records_from_breaches_dataframe(dataframe, records) zipped_records = flametree.file_tree("@memory") if data.include_genbanks: for record in records: target = zipped_records._file("%s.gb" % record.id) write_record(record, target) pdf_io = BytesIO() cr.breaches_records_to_pdf(records, pdf_io, logger=self.logger) return { "pdf_report": { "data": data_to_html_data( pdf_io.getvalue(), "pdf", filename="manufacturability_report.pdf", ), "name": "manufacturability_report.pdf", "mimetype": "application/pdf", }, "records": { "data": data_to_html_data( zipped_records._close(), "zip", filename="manufacturability_annotated_records.zip", ), "name": "manufacturability_annotated_records.zip", "mimetype": "application/zip", }, "spreadsheet": { "data": data_to_html_data( spreadsheet_io.getvalue(), "xlsx", filename="manufacturability_report.xlsx", ), "name": "manufacturability_report.xlsx", "mimetype": "vnd.openxmlformats-officedocument.spreadsheetml.sheet", }, }
def write_report( self, target, folder_name="auto", assembly_report_writer="default", logger="bar", include_original_parts_records=True, ): """Write a comprehensive report to a folder or zip file. Parameters ---------- target Either a path to a folder, to a zip file, or ``"@memory"`` to write into a virtual zip file whose raw data is then returned. folder_name Name of the folder created inside the target to host the report (yes, it is a folder inside a folder, which can be very practical). assembly_report_writer Either the "default" or any AssemblyReportWriter instance. logger Either "bar" for a progress bar, or None, or any Proglog logger. include_original_parts_records If true, the original provided part records will be included in the report (creates larger file sizes, but better for traceability). """ if assembly_report_writer == "default": # We'll write all records into one folder for the whole plan assembly_report_writer = AssemblyReportWriter( include_part_records=False) logger = proglog.default_bar_logger(logger) if folder_name == "auto": folder_name = self.assembly_plan.name + "_simulation" report_root = file_tree(target)._dir(folder_name, replace=True) self._write_assembly_reports(report_root, assembly_report_writer, logger=logger) self._write_errors_spreadsheet(report_root, error_type="error") self._write_errors_spreadsheet(report_root, error_type="warning") self._write_all_required_parts(report_root) self._write_construct_summary_spreadsheet(report_root) self._write_assembly_plan_spreadsheets(report_root) self._write_summary_stats(report_root) if len(self.cancelled): self._write_cancelled_assemblies(report_root) if include_original_parts_records: self._write_all_required_parts_records(report_root) if not self.has_single_level: self._plot_assembly_graph(report_root) if assembly_report_writer.include_pdf_report: if not PDF_REPORTS_AVAILABLE: raise ImportError( "Could not load PDF Reports. Install with `pip install pdf_reports`" " to generate a PDF report.") simulation_info = self._calculate_simulation_info() write_simulation_pdf_report(report_root._file("Report.pdf"), simulation_info) if target == "@memory": return report_root._close()
import flametree # for getting/writing files and folders from dnacauldron import RestrictionLigationMix, load_record, write_record root = flametree.file_tree(".") parts = [ load_record(f._path, topology="circular") for f in root.data.assemblies._all_files ] mix = RestrictionLigationMix(parts, enzyme="BsmBI") assemblies_records = mix.compute_circular_assemblies() output_folder = root._dir("output_data")._dir("combinatorial_assemblies") for i, record in enumerate(assemblies_records): output = output_folder._file("assembly_%03d.gb" % i) write_record(record, output, "genbank") print( "%d combinatorial assembly genbanks written in output_data/assemblies" % (i + 1) )
def full_assembly_report( parts, target, enzyme="BsmBI", max_assemblies=40, connector_records=(), include_fragments_plots="on_failure", include_parts_plots="on_failure", include_fragments_connection_graph="on_failure", include_assembly_plots=True, n_expected_assemblies=None, no_skipped_parts=False, fragments_filters="auto", assemblies_prefix="assembly", show_overhangs_in_graph=True, show_overhangs_in_genbank=True, mix_class="restriction", ): """Write a full assembly report in a folder or a zip. The report contains the final sequence(s) of the assembly in Genbank format as well as a .csv report on all assemblies produced and PDF figures to allow a quick overview or diagnostic. Folder ``assemblies`` contains the final assemblies, ``assembly_graph`` contains a schematic view of how the parts assemble together, folder ``fragments`` contains the details of all fragments produced by the enzyme digestion, and folder ``provided_parts`` contains the original input (genbanks of all parts provided for the assembly mix). Parameters ---------- parts List of Biopython records representing the parts, potentially on entry vectors. All the parts provided should have different attributes ``name`` as it is used to name the files. target Either a path to a folder, or to a zip file, or ``@memory`` to return a string representing zip data (the latter is particularly useful for website backends). enzyme Name of the enzyme to be used in the assembly max_assemblies Maximal number of assemblies to consider. If there are more than this the additional ones won't be returned. fragments_filters Fragments filters to be used to filter out fragments before looking for assemblies. If left to auto, fragments containing the enzyme site will be filtered out. connector_records List of connector records (a connector is a part that can bridge a gap between two other parts), from which only the essential elements to form an assembly will be automatically selected and added to the other parts. assemblies_prefix Prefix for the file names of all assemblies. They will be named ``PRE01.gb``,``PRE02.gb``, ``PRE03.gb`` where ``PRE`` is the prefix. include_parts_plots, include_assembly_plots These two parameters control the rendering of extra figures which are great for troubleshooting, but not strictly necessary, and they slow down the report generation considerably. They can be True, False, or "on_failure" to be True only if the number of assemblies differs from n_expected_assemblies n_expected_assemblies Expected number of assemblies. No exception is raised if this number is not met, however, if parameters ``include_parts_plots`` and ``include_assembly_plots`` are set to "on_failure", then extra plots will be plotted. """ # Make prefix Genbank friendly assemblies_prefix = assemblies_prefix.replace(" ", "_")[:18] if mix_class == "restriction": mix_class = RestrictionLigationMix part_names = [p.name for p in parts] non_unique = [e for (e, count) in Counter(part_names).items() if count > 1] non_unique = list(set(non_unique)) if len(non_unique) > 0: raise ValueError("All parts provided should have different names. " "Assembly (%s) contains several times the parts %s " % (" ".join(part_names), ", ".join(non_unique))) if fragments_filters == "auto": fragments_filters = [NoRestrictionSiteFilter(enzyme)] report = file_tree(target, replace=True) assemblies_dir = report._dir("assemblies") mix = mix_class(parts, enzyme, fragments_filters=fragments_filters) if len(connector_records): try: mix.autoselect_connectors(connector_records) except AssemblyError as err: ax = mix.plot_slots_graph( with_overhangs=show_overhangs_in_graph, show_missing=True, highlighted_parts=part_names, ) f = report._file("parts_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS) ax = mix.plot_connections_graph() f = report._file("connections_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) raise err # ASSEMBLIES filters = (FragmentSetContainsPartsFilter(part_names), ) assemblies = mix.compute_circular_assemblies( annotate_homologies=show_overhangs_in_genbank, fragments_sets_filters=filters if no_skipped_parts else (), ) assemblies = sorted( [asm for (i, asm) in zip(range(max_assemblies), assemblies)], key=lambda asm: str(asm.seq), ) assemblies_data = [] i_asm = list(zip(range(max_assemblies), assemblies)) for i, asm in i_asm: if len(i_asm) == 1: name = assemblies_prefix else: name = "%s_%03d" % (assemblies_prefix, (i + 1)) asm.name = asm.id = name assemblies_data.append( dict( assembly_name=name, parts=" & ".join([name_fragment(f_) for f_ in asm.fragments]), number_of_parts=len(asm.fragments), assembly_size=len(asm), )) write_record(asm, assemblies_dir._file(name + ".gb"), "genbank") if include_assembly_plots: gr_record = AssemblyTranslator().translate_record(asm) ax, gr = gr_record.plot(figure_width=16) ax.set_title(name) ax.set_ylim(top=ax.get_ylim()[1] + 1) ax.figure.savefig( assemblies_dir._file(name + ".pdf").open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) is_failure = (len(assemblies) == 0) or ((n_expected_assemblies is not None) and (len(assemblies) != n_expected_assemblies)) if include_fragments_plots == "on_failure": include_fragments_plots = is_failure if include_parts_plots == "on_failure": include_parts_plots = is_failure if include_fragments_connection_graph == "on_failure": include_fragments_connection_graph = is_failure # PROVIDED PARTS if include_parts_plots: provided_parts_dir = report._dir("provided_parts") for part in parts: linear = record_is_linear(part, default=False) ax, gr = plot_cuts(part, enzyme, linear=linear) f = provided_parts_dir._file(part.name + ".pdf").open("wb") ax.figure.savefig(f, format="pdf", bbox_inches="tight") plt.close(ax.figure) gb_file = provided_parts_dir._file(part.name + ".gb") write_record(part, gb_file, "genbank") # FRAGMENTS if include_fragments_plots: fragments_dir = report._dir("fragments") seenfragments = defaultdict(lambda *a: 0) for fragment in mix.fragments: gr = BiopythonTranslator().translate_record(fragment) ax, _ = gr.plot() name = name_fragment(fragment) seenfragments[name] += 1 file_name = "%s_%02d.pdf" % (name, seenfragments[name]) ax.figure.savefig( fragments_dir._file(file_name).open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) # PLOT CONNEXIONS GRAPH (BIGGER, MORE INFOS) if include_fragments_connection_graph: ax = mix.plot_connections_graph() f = report._file("connections_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) graph = mix.slots_graph(with_overhangs=False) slots_dict = { s: "|".join(list(pts)) for s, pts in mix.compute_slots().items() } non_linear_slots = [(slots_dict[n], "|".join([slots_dict[b] for b in graph.neighbors(n)])) for n in graph.nodes() if graph.degree(n) != 2] # PLOT SLOTS GRAPH if len(connector_records): highlighted_parts = part_names else: highlighted_parts = [] ax = mix.plot_slots_graph( with_overhangs=show_overhangs_in_graph, show_missing=True, highlighted_parts=highlighted_parts, ) f = report._file("parts_graph.pdf") ax.figure.savefig(f.open("wb"), format="pdf", bbox_inches="tight") plt.close(ax.figure) if len(non_linear_slots): report._file("non_linear_nodes.csv").write( "\n".join(["part,neighbours"] + [ "%s,%s" % (part, neighbours) for part, neighbours in non_linear_slots ])) df = pandas.DataFrame.from_records( assemblies_data, columns=["assembly_name", "number_of_parts", "assembly_size", "parts"], ) df.to_csv(report._file("report.csv").open("w"), index=False) n_constructs = len(df) if target == "@memory": return n_constructs, report._close() else: if isinstance(target, str): report._close() return n_constructs
def generate_report(records, digestions, ladder, group_by="digestions", show_band_sizes=False, full_report=True): """Yeah !""" files_contents = [] all_patterns = defaultdict(lambda *a: {}) zip_root = flametree.file_tree("@memory") # with PdfPages(details_pdf_sio) as details_pdf: with PdfPages(zip_root._file("Details.pdf").open("wb")) as details_pdf: for record in records: record_label = record.id for enzymes in digestions: enzymes_label = " + ".join(sorted(enzymes)) basename = "%s--%s" % (record_label.replace( " ", "_"), "+".join(enzymes)) record_digestion = annotate_digestion_bands( record, enzymes, ladder) files_contents.append([ ("genbanks", basename + ".gb"), lambda fh: write_record(record_digestion, fh, "genbank") ]) if full_report: (ax, _, _) = plot_record_digestion(record_digestion, ladder, record_label, enzymes_label) details_pdf.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) bands = sorted([ f.qualifiers["band_size"] for f in record_digestion.features if f.qualifiers.get("band_size", False) ]) if group_by == "digestions": all_patterns[enzymes_label][record_label] = bands else: all_patterns[record_label][enzymes_label] = bands Y = len(all_patterns) X = len(list(all_patterns.values())[0]) fig, axes = plt.subplots(Y, 1, figsize=(0.9 * X, 3 * Y)) if Y == 1: axes = [axes] bands_props = {"band_thickness": 2.5} if show_band_sizes: bands_props.update(dict(label='=size', label_fontdict=dict(size=6))) for ax, (cat1, cat2s) in zip(axes, sorted(all_patterns.items())): pattern_set = bw.BandsPatternsSet(patterns=[ bw.BandsPattern(_bands, ladder=ladder, label=cat2 if (ax == axes[0]) else None, label_fontdict=dict(rotation=70), global_bands_props=bands_props) for cat2, _bands in cat2s.items() ], ladder=ladder, ladder_ticks=4, ticks_fontdict=dict(size=9), label=cat1) pattern_set.plot(ax) preview = matplotlib_figure_to_svg_base64_data(fig, bbox_inches="tight") if full_report: # zip_root._file("details.pdf").write(details_pdf_sio.getvalue()) fig.savefig(zip_root._file("summary.pdf").open("wb"), format="pdf", bbox_inches="tight") report = zip_root._close() else: report = None return preview, report
def work(self): self.logger(message="Exploring possible digestions...") data = self.data ladder = LADDERS[data.ladder] enzymes = data.possible_enzymes records = records_from_data_files(data.files) for record in records: set_record_topology(record, data.topology) # sequences = OrderedDict( # [(record.id, str(record.seq)) for record in records] # ) self.logger(message="Initializing...") if data.goal == "ideal": mini, maxi = data.bands_range problem = IdealDigestionsProblem( sequences=records, enzymes=enzymes, ladder=ladder, min_bands=mini, max_bands=maxi, max_enzymes_per_digestion=data.max_enzymes) else: problem = SeparatingDigestionsProblem( sequences=records, enzymes=enzymes, ladder=ladder, max_enzymes_per_digestion=data.max_enzymes, ) self.logger(message="Selecting digestions...") score, selected_digestions = problem.select_digestions( max_digestions=data.max_digestions, search="full") bands_props = (None if not data.show_bands_sizes else dict( label="=size", label_fontdict=dict(size=6))) axes = problem.plot_digestions( selected_digestions, patterns_props={"label_fontdict": { "rotation": 35 }}, bands_props=bands_props, ) figure_data = matplotlib_figure_to_svg_base64_data(axes[0].figure, bbox_inches="tight") if data.plot_cuts: ladder = bandwagon.custom_ladder(None, ladder.bands) self.logger(message="Plotting cuts maps...") zip_root = flametree.file_tree("@memory") bandwagon.plot_records_digestions( target=zip_root._file("Details.pdf").open("wb"), ladder=ladder, records_and_digestions=[(rec, digestion) for rec in records for digestion in selected_digestions], ) pdf_data = zip_root["Details.pdf"].read("rb") pdf_data = data_to_html_data(pdf_data, datatype="pdf") else: pdf_data = None return { "figure_data": figure_data, "digestions": selected_digestions, "score": score, "pdf_data": pdf_data, }