def plot_parent_coding_hist(self, fhand): fig = Figure() axes = fig.add_subplot(111) axes.hist(self.indexes, fill=True, log=True, bins=20, rwidth=1) axes.axvline(x=self.parent_index_threshold) axes.set_xlabel('support index for parental coding') axes.set_ylabel('num. SNPs') _print_figure(axes, fig, fhand, plot_legend=False)
def plot_failed_freq_dist(self, fhand): fig = Figure() axes = fig.add_subplot(111) axes.hist(self._failed_freqs, fill=True, log=True, bins=20, rwidth=1) axes.axvline(x=self.max_failed_freq) axes.set_xlabel('% of adjacent SNPs segregating differently') axes.set_ylabel('num. SNPs') _print_figure(axes, fig, fhand, plot_legend=False)
def draw_hist(self, fhand): fig = Figure() axes = fig.add_subplot(111) axes.hist(self._scores, fill=True, log=True, bins=20, rwidth=1) axes.axvline(x=self.threshold) axes.set_xlabel('% complexity score') axes.set_ylabel('num. SNPs') _print_figure(axes, fig, fhand, plot_legend=False)
def _plot_segregation_debug(plot_info, fhand): greens = ['#2d6e12', '#76b75b', '#164900'] reds = ['#8f0007', '#fb1f2a', '#b90009'] grays = ['0.5', '0.25', '0.75'] fig = Figure(figsize=(10, 4)) axes1 = fig.add_subplot(211) axes2 = fig.add_subplot(212) plot_info.sort(key=itemgetter('pos')) is_close_snp = [geno_info['close_snp'] for geno_info in plot_info] tested_snp_idx = is_close_snp.index(False) passed = [geno_info['result'] for geno_info in plot_info] total_cnts = [ geno_info['AA'] + geno_info['Aa'] + geno_info['aa'] for geno_info in plot_info ] num_snvs = len(passed) bottoms = [0] * num_snvs freq_bottoms = [0] * num_snvs lefts = range(num_snvs) for idx, geno in enumerate(('AA', 'Aa', 'aa')): cnts = [geno_info[geno] for geno_info in plot_info] backcolors = [greens[idx] if pss else reds[idx] for pss in passed] edgecolors = [grays[idx] for pss in passed] backcolors[tested_snp_idx], edgecolors[tested_snp_idx] = \ edgecolors[tested_snp_idx], backcolors[tested_snp_idx] heights = cnts axes1.bar(lefts, heights, bottom=bottoms, color=backcolors, edgecolor=edgecolors) freq_heights = [ height / total_cnt for height, total_cnt in zip(heights, total_cnts) ] axes2.bar(lefts, freq_heights, bottom=freq_bottoms, color=backcolors, edgecolor=edgecolors) bottoms = [bot + heig for bot, heig in zip(bottoms, heights)] freq_bottoms = [ bot + heig for bot, heig in zip(freq_bottoms, freq_heights) ] axes1.tick_params(axis='y', which='both', left='off', right='off') axes2.tick_params(axis='y', which='both', left='off', right='off') canvas = FigureCanvas(fig) canvas.print_figure(fhand) fhand.flush()
def plot_haplotypes(vcf_fhand, plot_fhand, genotype_mode=REFERENCE, filter_alleles_gt=FILTER_ALLELES_GT): reader = VCFReader(vcf_fhand) # collect data genotypes = None samples = [] for snv in reader.parse_snvs(): if genotypes is None: genotypes = {} for call in snv.calls: sample = call.sample genotypes[sample] = [] samples.append(sample) for call in snv.calls: alleles = _get_alleles(call, filter_alleles_gt=filter_alleles_gt) genotypes[call.sample].append(alleles) # draw n_samples = len(samples) xsize = len(genotypes[sample]) / 100 if xsize >= 100: xsize = 100 if xsize <= 8: xsize = 8 ysize = n_samples * 2 if ysize >= 100: ysize = 100 # print xsize, ysize figure_size = (xsize, ysize) fig = Figure(figsize=figure_size) for index, sample in enumerate(samples): axes = fig.add_subplot(n_samples, 1, index) axes.set_title(sample) y_data = genotypes[sample] x_data = [i + 1 for i in range(len(y_data))] x_data, y_data = _flatten_data(x_data, y_data) axes.plot(x_data, y_data, marker='o', linestyle='None', markersize=3.0, markeredgewidth=0, markerfacecolor='red') ylim = axes.get_ylim() ylim = ylim[0] - 0.1, ylim[1] + 0.1 axes.set_ylim(ylim) axes.tick_params(axis='x', bottom='off', top='off', which='both', labelbottom='off') axes.tick_params(axis='y', left='on', right='off', labelleft='off') axes.set_ylabel(sample) canvas = FigureCanvas(fig) canvas.print_figure(plot_fhand, dpi=300) plot_fhand.flush()
def _plot_hist(self, fhand, values, xlabel, ylabel, min_value=None, max_value=None): fig = Figure() axes = fig.add_subplot(111) axes.hist(values, fill=True, log=True, bins=20, rwidth=1) if min_value is not None: axes.axvline(x=min_value) if max_value is not None: axes.axvline(x=max_value) axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) _print_figure(axes, fig, fhand, plot_legend=False)
def plot_smooth_hist(self, fhand): bins = 20 fig = Figure() axes2 = fig.add_subplot(111) x = self._smoothes y = self._recombs image = axes2.hist2d(x, y, bins=bins, norm=LogNorm())[-1] axes2.tick_params( which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off', right='off', left='off', labelleft='off') xmin2d, xmax2d = axes2.get_xlim() ymin2d, ymax2d = axes2.get_ylim() axes2.vlines(self.smooth_threhsold, ymin2d, ymax2d, color='r') if self.recomb_threshold is not None: axes2.hlines(self.recomb_threshold, xmin2d, xmax2d, color='r') divider = make_axes_locatable(axes2) cax = divider.append_axes("right", size="5%", pad=0.05) fig.colorbar(image, cax=cax) axes = divider.append_axes('bottom', size=2, pad=0.1, sharex=axes2) #axes = fig.add_subplot(224) #print axes2.get_position().bounds axes.hist(x, fill=True, log=True, bins=bins, rwidth=1) axes.set_xlabel('Smooth index') axes.set_ylabel('Num. SNPs') axes.set_xlim(xmin2d, xmax2d) ymin, ymax = axes.get_ylim() axes.vlines(self.smooth_threhsold, ymin, ymax, color='r') axes = divider.append_axes('left', size=2, pad=0.1, sharey=axes2) #axes = fig.add_subplot(221) axes.hist(y, orientation='horizontal', fill=True, log=True, bins=bins, rwidth=1) axes.set_ylabel('Num. recombs.') axes.set_xlabel('Num. SNPs') _print_figure(axes, fig, fhand, plot_legend=False) axes.set_ylim(ymin2d, ymax2d) xmin, xmax = axes.get_xlim() if self.recomb_threshold is not None: axes.hlines(self.recomb_threshold, xmin, xmax, color='r')
def plot_recomb_at_0_dist_hist(self, fhand): fig = Figure() axes = fig.add_subplot(111) data = [self.recomb_rates['ok'], self.recomb_rates['ok_conf_is_None'], self.recomb_rates['not_ok']] labels = ['OK', 'OK conf. is none', 'Not OK'] colors = [(0.3, 1, 0.3), (0.3, 1, 0.6), (1, 0.3, 0.3)] some_data = [bool(dataset) for dataset in data] labels = [label for draw, label in zip(some_data, labels) if draw] colors = [color for draw, color in zip(some_data, colors) if draw] data = [dataset for draw, dataset in zip(some_data, data) if draw] axes.hist(data, stacked=True, fill=True, log=True, bins=20, label=labels, rwidth=1, color=colors) _print_figure(axes, fig, fhand)
def get_canvas_and_axes(figure_size=FIGURE_SIZE, left=0.1, right=0.9, top=0.9, bottom=0.1, plot_type=111): 'It returns a matplotlib canvas and axes instance' try: fig = Figure(figsize=FIGURE_SIZE) canvas = FigureCanvas(fig) except NameError: msg = 'Matplotlib module is required to draw graphical histograms' raise OptionalRequirementError(msg) axes = fig.add_subplot(plot_type) fig.subplots_adjust(left=left, right=right, top=top, bottom=bottom) return canvas, axes
def _plot_segregation_debug(plot_info, fhand): greens = ['#2d6e12', '#76b75b', '#164900'] reds = ['#8f0007', '#fb1f2a', '#b90009'] grays = ['0.5', '0.25', '0.75'] fig = Figure(figsize=(10, 4)) axes1 = fig.add_subplot(211) axes2 = fig.add_subplot(212) plot_info.sort(key=itemgetter('pos')) is_close_snp = [geno_info['close_snp'] for geno_info in plot_info] tested_snp_idx = is_close_snp.index(False) passed = [geno_info['result'] for geno_info in plot_info] total_cnts = [geno_info['AA'] + geno_info['Aa'] + geno_info['aa'] for geno_info in plot_info] num_snvs = len(passed) bottoms = [0] * num_snvs freq_bottoms = [0] * num_snvs lefts = range(num_snvs) for idx, geno in enumerate(('AA', 'Aa', 'aa')): cnts = [geno_info[geno] for geno_info in plot_info] backcolors = [greens[idx] if pss else reds[idx] for pss in passed] edgecolors = [grays[idx] for pss in passed] backcolors[tested_snp_idx], edgecolors[tested_snp_idx] = \ edgecolors[tested_snp_idx], backcolors[tested_snp_idx] heights = cnts axes1.bar(lefts, heights, bottom=bottoms, color=backcolors, edgecolor=edgecolors) freq_heights = [height/total_cnt for height, total_cnt in zip(heights, total_cnts)] axes2.bar(lefts, freq_heights, bottom=freq_bottoms, color=backcolors, edgecolor=edgecolors) bottoms = [bot + heig for bot, heig in zip(bottoms, heights)] freq_bottoms = [bot + heig for bot, heig in zip(freq_bottoms, freq_heights)] axes1.tick_params(axis='y', which='both', left='off', right='off') axes2.tick_params(axis='y', which='both', left='off', right='off') canvas = FigureCanvas(fig) canvas.print_figure(fhand) fhand.flush()
def plot_recomb_at_0_dist_hist(self, fhand): fig = Figure() axes = fig.add_subplot(111) data = [ self.recomb_rates['ok'], self.recomb_rates['ok_conf_is_None'], self.recomb_rates['not_ok'] ] labels = ['OK', 'OK conf. is none', 'Not OK'] colors = [(0.3, 1, 0.3), (0.3, 1, 0.6), (1, 0.3, 0.3)] some_data = [bool(dataset) for dataset in data] labels = [label for draw, label in zip(some_data, labels) if draw] colors = [color for draw, color in zip(some_data, colors) if draw] data = [dataset for draw, dataset in zip(some_data, data) if draw] axes.hist(data, stacked=True, fill=True, log=True, bins=20, label=labels, rwidth=1, color=colors) _print_figure(axes, fig, fhand)
def get_fig_and_canvas(num_rows=1, num_cols=1, figsize=None): if figsize is None: height = 5.0 * num_rows width = 7.5 * num_cols if height > 320.0: height = 320.0 figsize = (width, height) try: fig = Figure(figsize=figsize) canvas = FigureCanvas(fig) except NameError: msg = 'Matplotlib module is required to draw graphical histograms' raise OptionalRequirementError(msg) return fig, canvas
def _calc_ajusted_recomb(dists, recombs, max_recomb, max_zero_dist_recomb, alpha_recomb_0, plot_fhand=None): # first rough interpolation # we remove the physical distances with high recombination rates because # they're not very informative. e.g. more than 40 cM will not discriminate # between false recombination due to hidden segregation in the parents and # true recombination if plot_fhand: fig = Figure() axes = fig.add_subplot(111) axes.set_axis_bgcolor((1, 0.6, 0.6)) axes.scatter(dists, recombs, c='r', label='For 1st fit') else: axes = None fig = None dists = array(dists) recombs = array(recombs) recomb_rate = 1e-7 popt, pcov = _fit_kosambi(dists, recombs, init_params=[recomb_rate, 0]) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, {'kosambi_fit_ok': False, 'reason_no_fit': '1st fit failed'} est_dists = dists est_recombs = _kosambi(est_dists, popt[0], popt[1]) if fig: axes.plot(est_dists, est_recombs, label='1st fit', c='r') # now we perform a second fit but only with those markers that are a # distance that results in a recombination fraction lower than max_recomb close_markers = est_recombs < max_recomb close_recombs = recombs[close_markers] close_dists = dists[close_markers] if plot_fhand: axes.scatter(close_dists, close_recombs, c='b', label='For 2nd fit') if len(close_dists) < 1: # This marker is so bad that their closest markers are at a large # distance _print_figure(axes, fig, plot_fhand) return None, False, {'kosambi_fit_ok': False, 'reason_no_fit': 'no close region left'} if len(close_dists) != len(dists): # If we've removed any points we fit again popt, pcov = _fit_kosambi(close_dists, close_recombs, init_params=popt) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, {'kosambi_fit_ok': False, 'reason_no_fit': '2nd fit failed'} est_close_recombs = _kosambi(close_dists, popt[0], popt[1]) residuals = close_recombs - est_close_recombs if fig: axes.plot(close_dists, est_close_recombs, c='b', label='2nd_fit') # we exclude the markers with a residual outlier quartile_25, quartile_75 = percentile(residuals, [25, 75]) iqr = quartile_75 - quartile_25 outlayer_thrld = [quartile_25 - iqr * 1.5, quartile_75 + iqr * 1.5] ok_markers = [idx for idx, res in enumerate(residuals) if (not isnan(res) and (outlayer_thrld[0] < res < outlayer_thrld[1]))] ok_recombs = close_recombs[ok_markers] ok_dists = close_dists[ok_markers] if fig: axes.scatter(ok_dists, ok_recombs, c='g', label='For 3rd fit') if len(ok_dists) != len(close_dists): # If we've removed any points we fit again popt, pcov = _fit_kosambi(ok_dists, ok_recombs, init_params=popt) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, {'kosambi_fit_ok': False, 'reason_no_fit': '3rd fit failed'} var_recomb_at_dist_0 = pcov[1, 1] recomb_at_dist_0 = popt[1] ok_color = (0.3, 1, 0.6) if isinf(var_recomb_at_dist_0): conf_interval = None if abs(recomb_at_dist_0) < 0.01: # recomb is 0 for all points and the variance is inf snp_ok = True else: snp_ok = False else: if alpha_recomb_0 is None: conf_interval = None if abs(recomb_at_dist_0) <= max_zero_dist_recomb: snp_ok = True ok_color = (0.3, 1, 0.3) else: snp_ok = False else: num_data_points = len(ok_dists) num_params = len(popt) deg_of_freedom = max(0, num_data_points - num_params) tval = t.ppf(1.0 - alpha_recomb_0 / 2., deg_of_freedom) std_dev = var_recomb_at_dist_0 ** 0.5 conf_interval = (recomb_at_dist_0 - std_dev * tval, recomb_at_dist_0 + std_dev * tval) if abs(recomb_at_dist_0) <= max_zero_dist_recomb: snp_ok = True ok_color = (0.3, 1, 0.3) elif conf_interval[0] < 0 < conf_interval[1]: snp_ok = True else: snp_ok = False if plot_fhand: axes.vlines(0, conf_interval[0], conf_interval[1], label='conf. interval') if plot_fhand: color = ok_color if snp_ok else (1, 0.3, 0.3) axes.set_axis_bgcolor(color) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, {'kosambi_fit_ok': False, 'reason_no_fit': '3rd fit failed'} est2_recombs = _kosambi(ok_dists, popt[0], popt[1]) if fig: axes.plot(ok_dists, est2_recombs, c='g', label='3rd_fit') _print_figure(axes, fig, plot_fhand) return recomb_at_dist_0, snp_ok, {'kosambi_fit_ok': True, 'conf_interval': conf_interval}
def _calc_ajusted_recomb(dists, recombs, max_recomb, max_zero_dist_recomb, alpha_recomb_0, plot_fhand=None): # first rough interpolation # we remove the physical distances with high recombination rates because # they're not very informative. e.g. more than 40 cM will not discriminate # between false recombination due to hidden segregation in the parents and # true recombination if plot_fhand: fig = Figure() axes = fig.add_subplot(111) axes.set_axis_bgcolor((1, 0.6, 0.6)) axes.scatter(dists, recombs, c='r', label='For 1st fit') else: axes = None fig = None dists = array(dists) recombs = array(recombs) recomb_rate = 1e-7 popt, pcov = _fit_kosambi(dists, recombs, init_params=[recomb_rate, 0]) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, { 'kosambi_fit_ok': False, 'reason_no_fit': '1st fit failed' } est_dists = dists est_recombs = _kosambi(est_dists, popt[0], popt[1]) if fig: axes.plot(est_dists, est_recombs, label='1st fit', c='r') # now we perform a second fit but only with those markers that are a # distance that results in a recombination fraction lower than max_recomb close_markers = est_recombs < max_recomb close_recombs = recombs[close_markers] close_dists = dists[close_markers] if plot_fhand: axes.scatter(close_dists, close_recombs, c='b', label='For 2nd fit') if len(close_dists) < 1: # This marker is so bad that their closest markers are at a large # distance _print_figure(axes, fig, plot_fhand) return None, False, { 'kosambi_fit_ok': False, 'reason_no_fit': 'no close region left' } if len(close_dists) != len(dists): # If we've removed any points we fit again popt, pcov = _fit_kosambi(close_dists, close_recombs, init_params=popt) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, { 'kosambi_fit_ok': False, 'reason_no_fit': '2nd fit failed' } est_close_recombs = _kosambi(close_dists, popt[0], popt[1]) residuals = close_recombs - est_close_recombs if fig: axes.plot(close_dists, est_close_recombs, c='b', label='2nd_fit') # we exclude the markers with a residual outlier quartile_25, quartile_75 = percentile(residuals, [25, 75]) iqr = quartile_75 - quartile_25 outlayer_thrld = [quartile_25 - iqr * 1.5, quartile_75 + iqr * 1.5] ok_markers = [ idx for idx, res in enumerate(residuals) if (not isnan(res) and (outlayer_thrld[0] < res < outlayer_thrld[1])) ] ok_recombs = close_recombs[ok_markers] ok_dists = close_dists[ok_markers] if fig: axes.scatter(ok_dists, ok_recombs, c='g', label='For 3rd fit') if len(ok_dists) != len(close_dists): # If we've removed any points we fit again popt, pcov = _fit_kosambi(ok_dists, ok_recombs, init_params=popt) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, { 'kosambi_fit_ok': False, 'reason_no_fit': '3rd fit failed' } var_recomb_at_dist_0 = pcov[1, 1] recomb_at_dist_0 = popt[1] ok_color = (0.3, 1, 0.6) if isinf(var_recomb_at_dist_0): conf_interval = None if abs(recomb_at_dist_0) < 0.01: # recomb is 0 for all points and the variance is inf snp_ok = True else: snp_ok = False else: if alpha_recomb_0 is None: conf_interval = None if abs(recomb_at_dist_0) <= max_zero_dist_recomb: snp_ok = True ok_color = (0.3, 1, 0.3) else: snp_ok = False else: num_data_points = len(ok_dists) num_params = len(popt) deg_of_freedom = max(0, num_data_points - num_params) tval = t.ppf(1.0 - alpha_recomb_0 / 2., deg_of_freedom) std_dev = var_recomb_at_dist_0**0.5 conf_interval = (recomb_at_dist_0 - std_dev * tval, recomb_at_dist_0 + std_dev * tval) if abs(recomb_at_dist_0) <= max_zero_dist_recomb: snp_ok = True ok_color = (0.3, 1, 0.3) elif conf_interval[0] < 0 < conf_interval[1]: snp_ok = True else: snp_ok = False if plot_fhand: axes.vlines(0, conf_interval[0], conf_interval[1], label='conf. interval') if plot_fhand: color = ok_color if snp_ok else (1, 0.3, 0.3) axes.set_axis_bgcolor(color) if popt is None: _print_figure(axes, fig, plot_fhand) return None, False, { 'kosambi_fit_ok': False, 'reason_no_fit': '3rd fit failed' } est2_recombs = _kosambi(ok_dists, popt[0], popt[1]) if fig: axes.plot(ok_dists, est2_recombs, c='g', label='3rd_fit') _print_figure(axes, fig, plot_fhand) return recomb_at_dist_0, snp_ok, { 'kosambi_fit_ok': True, 'conf_interval': conf_interval }