def get_help(self, widget, remote=False, try_path=False): data = self.data iface = self.iface if not data.genes: return if remote: data.msa.algo = repo.toalgo(iface.remote_algo.get_active_text()) client = repo.TOOLS / 'MSA_clients' / (data.msa.algo + '.py') self.set_helpers('%s %s ' % (sys.executable, client), iface.remote_help, data.msa.remote_cmd, data.msa.algo, True, iface.remote_cmd) else: data.msa.algo = repo.toalgo(iface.msa_algo.get_active_text()) exe = shutil.which(data.msa.algo) exe = widget.get_active_text() if try_path else exe if exe: # get the --help output and save it in the lookup field on the right iface.msa_exe.set_filename(exe) self.set_helpers('%s --help; exit 0' % exe, iface.msa_help, data.msa.cmd, data.msa.algo, False, iface.msa_cmd) else: # no executable found; unselect in path box iface.msa_exe.unselect_all() txt = data.msa.algo + ' was not found on your system $PATH. You can try ' \ 'manually specifying the path to the executable.' iface.msa_help.get_buffer( ).props.text = txt # show this snarky line above iface.msa_cmd.get_buffer().props.text = '' # no cmd suggestion
def __init__(self): super().__init__() data = self.data iface = self.iface iface.msa_algo.set_entry_text_column(0) iface.msa_algo.set_id_column(0) iface.remote_algo.set_id_column(0) iface.msa_cmd.connect( 'focus_out_event', lambda widget, *args: data.msa.cmd.update({ repo.toalgo(iface.msa_algo.get_active_text()): widget.get_buffer().props.text.strip() })) iface.remote_cmd.connect( 'focus_out_event', lambda widget, *args: data.msa.remote_cmd.update({ repo.toalgo(iface.remote_algo.get_active_text()): widget.get_buffer().props.text.strip() })) iface.msa_algo.connect('changed', self.get_help) iface.remote_algo.connect('changed', self.get_help, True) iface.msa_import.connect('file-set', self.load_msa) iface.msa_exe.connect('file-set', self.get_help, True, True) # connect buttons iface.msa_build.connect('clicked', self.start_align) self.bind_accelerator(self.accelerators, iface.msa_build, 'Return') iface.remote_build.connect('clicked', self.start_align, True) self.bind_accelerator(self.accelerators, iface.remote_build, 'Return') data.msa.stack_child_name = iface.align_stack.get_visible_child_name()
def reload_ui_state(self): ns = self.data.msa iface = self.iface iface.align_stack.set_visible_child_name(ns.stack_child_name) iface.msa_algo.set_active_id(ns.msa_algo_id) if ns.msa_exe_filename: iface.msa_exe.set_filename(ns.msa_exe_filename) iface.remote_algo.set_active_id(ns.remote_algo_id) if ns.msa_import_filename: iface.msa_import.set_filename(ns.msa_import_filename) iface.msa_cmd.get_buffer().props.text = \ ns.cmd.get(repo.toalgo(ns.msa_algo_id), '') iface.remote_cmd.get_buffer().props.text = \ ns.remote_cmd.get(repo.toalgo(ns.remote_algo_id), '')
def load_msa(self, widget): data = self.data iface = self.iface try: Path.mkdir(self.wd / repo.PATHS.import_msa.parent, exist_ok=True) shutil.copy(widget.get_filename(), self.wd / repo.PATHS.import_msa) except shutil.SameFileError: pass except Exception as ex: self.show_notification(str(ex)) LOG.error(ex) self.get_hashes(repo.PATHS.import_msa, PAGE) data.genes = ['import'] data.gene_ids = { 'import': { r.id for r in SeqIO.parse(self.wd / repo.PATHS.import_msa, 'fasta') } } iface.aligner, cmd = self.get_msa_build_cmd( repo.toalgo(iface.msa_algo.get_active_text()), self.wd, data.genes) # write a metadata.tsv with open(self.wd / repo.PATHS.tsv, 'w') as metadata: metadata.write('id\tgene\n') for _id in data.gene_ids['import']: metadata.write('%s\timport\n' % _id) LOG.debug('using imported MSA') self.set_changed(PAGE, False) self.save_ui_state()
def _do_gbl3(self, shared_ids, arg, gbar, barspace): data = self.data iface = self.iface errors = list() msa_lens = list() blocks = list() array = np.empty(shape=(len(shared_ids), 0), dtype=int) for gene in data.genes: iface.text = '%s: read MSA' % gene LOG.debug(iface.text) raw_msa = (self.wd / gene / ('%s_raw_msa.fasta' % gene)).resolve() msa = self.wd / gene / ('%s_msa.fasta' % gene) records = {r.id: r for r in SeqIO.parse(raw_msa, 'fasta')} take_out = { _id for _id in records.keys() if _id in data.gbl.ignore_ids } take_out = {_id: records.pop(_id) for _id in take_out} if take_out: # write newly dropped sequences to backup file new_take_out = { _id: r for _id, r in take_out.items() if _id not in iface.tempspace.bak_ignore } with open(self.wd / gene / ('%s_raw_msa_dropped.fasta' % gene), 'a') as fasta: SeqIO.write(new_take_out.values(), fasta, 'fasta') # overwrite MSA without all dropped samples with open(raw_msa, 'w') as fasta: SeqIO.write(records.values(), fasta, 'fasta') if sorted(records.keys()) != shared_ids: errors.append( 'MSA for %s does not match the dataset, please re-build.' % gene) sleep(.1) GObject.idle_add(self.stop_gbl, errors) return True ar = np.array([ repo.seqtoint(records[_id].seq.upper()) for _id in shared_ids ]) msa_lens.append(ar.shape[1]) # get the array columns that are not only gaps usable_sites = [ i for i in range(ar.shape[1]) if set(ar[:, i]) != {repo.toint('-')} ] array = np.hstack(( array, ar, )) # shared.SEP would need to be stacked here del ar data.msa_shape[:2] = array.shape[::-1] if iface.gbl_preset.get_active_text() == 'skip': LOG.debug('skipping %s' % gene) shutil.copy(raw_msa, msa) iface.i += 1 continue iface.text = '%s: run Gblocks' % gene LOG.debug(iface.text) with open(self.wd / gene / 'gblocks.log', 'w') as log_handle: try: LOG.debug(arg % raw_msa) subprocess.run(arg % raw_msa, shell=True, check=True, stdout=log_handle, stderr=log_handle) except (OSError, subprocess.CalledProcessError) as e: errors.append(str(e)) log_handle.write(str(e)) continue # parse result iface.text = '%s: parse result' % gene LOG.debug(iface.text) shutil.move(raw_msa.with_suffix('.fasta.txt'), msa) # get the good blocks from the last pseudo-sequence in the text mask file for pseudo_seq in SeqIO.parse( raw_msa.with_suffix('.fasta.txtMask'), 'fasta'): mask = pseudo_seq # map the Gblocks mask to the original MSA sites line_blocks = [ usable_sites[i] for i, char in enumerate(mask.seq) if char == '#' ] LOG.debug(line_blocks) if not line_blocks: err = '%s: no good blocks' % gene LOG.error(err) errors.append(err) continue shift = sum(msa_lens[:-1]) blocks.extend([i + shift for i in line_blocks]) iface.i += 1 data.msa_lens = msa_lens iface.text = 'concatenating MSAs' iface.tempspace.bak_ignore = {i for i in data.gbl.ignore_ids} if 'aligner' not in iface: iface.aligner, cmd = self.get_msa_build_cmd( repo.toalgo(iface.msa_algo.get_active_text()), self.wd, data.genes) iface.aligner.reset_paths(self.wd, self.wd / repo.PATHS.msa) data.msa_shape[2], data.msa_shape[3] = iface.aligner.concat_msa( gui=shared_ids) iface.text = 'computing SHA256 hash' LOG.debug(iface.text) self.get_hashes(repo.PATHS.msa, PAGE) iface.i += 1 iface.text = 'plot MSAs' LOG.debug(iface.text) data.gbl_shape[0] = array.shape[1] * self.get_hadj() # make gaps transparent array = np.ma.masked_where(array > repo.toint('else'), array) # create a transparency mask gbl_mask = np.full(array.shape, repo.ALPHA) gbl_mask[:, blocks] = 1 data.msa_shape[2] = len(blocks) # for completeness LOG.debug('msa shape: %s' % str(data.msa_shape)) x_ratio = data.msa_shape[2] / data.msa_shape[0] LOG.debug('x ratio: %.3f' % x_ratio) # adjust maximum size scale = 6 while max(data.msa_shape[:2]) * scale > 2**14: scale -= 1 LOG.debug('scaling gbl with %d' % scale) if iface.gbl_preset.get_active_text() != 'skip': for alpha, blocks, gtk_bin, png_path, x_ratio, width \ in zip([gbl_mask, 1], [range(array.shape[1]), blocks], [iface.gbl_left_vp, iface.gbl_right_vp], [repo.PATHS.left, repo.PATHS.right], [1, x_ratio], [data.msa_shape[0], data.msa_shape[2]]): f = Figure( ) # figsize=(width / shared.DPI, data.msa_shape[1] / shared.DPI), dpi=shared.DPI) # figaspect(data.msa_shape[1] / width)) f.set_facecolor('none') f.set_figheight(data.msa_shape[1] / repo.DPI * 5) f.set_figwidth(max(1, width) / repo.DPI) # f.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) # leave room at the bottom for the gene legend bar b = barspace / data.gbl_shape[1] ax = f.add_axes([0, b, 1, 1 - b]) mat = ax.matshow(array[:, blocks], alpha=alpha, cmap=ListedColormap(repo.colors), vmin=-.5, vmax=len(repo.colors) - .5, aspect='auto') if not gbar: LOG.debug('adding ticks') [ ax.spines[t].set_visible(False) for t in ['left', 'right', 'top', 'bottom'] ] ax.yaxis.set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.tick_params(colors=iface.FG, pad=.2, length=0, labelsize=1) ax.xaxis.set_ticks([ i for i in range(1, array[:, blocks].shape[1] - 1) if not i % 100 ]) else: ax.axis('off') LOG.debug('adding gene marker bar') # build matrix of gene indicators gm = [[i] * l for i, l in enumerate(msa_lens)] if x_ratio == 1: # add the spacer for i in range(len(data.genes) - 1): gm[i] += [-1] * len(repo.SEP) gm = [gi for gl in gm for gi in gl] # turn into np array gm = np.vstack([np.array(gm)] * 2) # make spacer transparent gm = np.ma.masked_where(gm < 0, gm) # trim the array early gm = gm[:, blocks] gene_colors = get_cmap('GnBu', len(data.genes)) # plot the marker bar onto the MSA graphic bax = f.add_axes([0, b * .4, 1, b / 3]) bar = bax.pcolormesh(gm, cmap=gene_colors) # bax.axis('off') [ bax.spines[t].set_visible(False) for t in ['left', 'right', 'top', 'bottom'] ] bax.yaxis.set_visible(False) bax.xaxis.set_ticks_position('bottom') bax.tick_params(colors=iface.FG, pad=.2, length=0, labelsize=1) bax.xaxis.set_ticks( [i for i in range(1, gm.shape[1] - 1) if not i % 100]) # plot a legend for the gene marker bar with plt.rc_context({ 'axes.edgecolor': iface.FG, 'xtick.color': iface.FG }): iface.text = 'gene marker bar' LOG.debug(iface.text) Path.mkdir(self.wd / repo.PATHS.phylo_msa.parent, exist_ok=True) fig = plt.figure(figsize=(4, .2)) cax = fig.add_subplot(111) cbar = ColorbarBase(ax=cax, cmap=gene_colors, orientation='horizontal', ticks=[ (.5 / len(data.genes) + j * 1 / len(data.genes)) for j in range(len(data.genes)) ]) cbar.ax.set_xticklabels(data.genes) fig.savefig(self.wd / repo.PATHS.gbar, transparent=True, bbox_inches='tight', pad_inches=0, dpi=600) plt.close(fig) del fig, cbar iface.i += 1 iface.text = 'save PNG' LOG.debug(iface.text) Path.mkdir(self.wd / png_path.parent, exist_ok=True) f.savefig(self.wd / png_path, transparent=True, dpi=scale * repo.DPI, bbox_inches='tight', pad_inches=0.00001) if iface.rasterize.props.active: iface.text = 'place PNG' LOG.debug(iface.text) self.load_image( iface.zoomer, PAGE, gtk_bin, self.wd / png_path, data.gbl_shape[0] * x_ratio * self.get_hadj(), data.gbl_shape[1]) else: iface.text = 'place vector' LOG.debug(iface.text) canvas = FigureCanvas(f) canvas.set_size_request( max(len(blocks) * self.get_hadj(), -1), data.gbl_shape[1]) # width, height try: ch = gtk_bin.get_child() if ch: gtk_bin.remove(ch) gtk_bin.add(canvas) except Exception as ex: LOG.error(ex) iface.i += 1 gtk_bin.realize() gtk_bin.show_all() # re-size LOG.debug('re-sizing again') for wi in [iface.gbl_left, iface.gbl_right]: wi.set_max_content_height(data.gbl_shape[1]) self.load_colorbar(iface.palplot2) if gbar: self.load_colorbar(iface.gbar1, gbar=True) iface.gbar1.set_visible(True) else: iface.gbar1.set_visible(False) iface.text = 'idle' iface.frac = 1 sleep(.1) GObject.idle_add(self.stop_gbl, errors) return True