Esempio n. 1
0
 def plot_parent_coding_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self.indexes, fill=True, log=True, bins=20, rwidth=1)
     axes.axvline(x=self.parent_index_threshold)
     axes.set_xlabel('support index for parental coding')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 2
0
 def plot_parent_coding_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self.indexes, fill=True, log=True, bins=20, rwidth=1)
     axes.axvline(x=self.parent_index_threshold)
     axes.set_xlabel('support index for parental coding')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 3
0
 def plot_failed_freq_dist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self._failed_freqs, fill=True, log=True, bins=20, rwidth=1)
     axes.axvline(x=self.max_failed_freq)
     axes.set_xlabel('% of adjacent SNPs segregating differently')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 4
0
 def plot_failed_freq_dist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self._failed_freqs, fill=True, log=True, bins=20,
               rwidth=1)
     axes.axvline(x=self.max_failed_freq)
     axes.set_xlabel('% of adjacent SNPs segregating differently')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 5
0
 def draw_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self._scores, fill=True, log=True, bins=20,
               rwidth=1)
     axes.axvline(x=self.threshold)
     axes.set_xlabel('% complexity score')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 6
0
 def draw_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(self._scores, fill=True, log=True, bins=20,
               rwidth=1)
     axes.axvline(x=self.threshold)
     axes.set_xlabel('% complexity score')
     axes.set_ylabel('num. SNPs')
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 7
0
    def _plot_segregation_debug(plot_info, fhand):
        greens = ['#2d6e12', '#76b75b', '#164900']
        reds = ['#8f0007', '#fb1f2a', '#b90009']
        grays = ['0.5', '0.25', '0.75']
        fig = Figure(figsize=(10, 4))
        axes1 = fig.add_subplot(211)
        axes2 = fig.add_subplot(212)
        plot_info.sort(key=itemgetter('pos'))

        is_close_snp = [geno_info['close_snp'] for geno_info in plot_info]
        tested_snp_idx = is_close_snp.index(False)
        passed = [geno_info['result'] for geno_info in plot_info]
        total_cnts = [
            geno_info['AA'] + geno_info['Aa'] + geno_info['aa']
            for geno_info in plot_info
        ]

        num_snvs = len(passed)
        bottoms = [0] * num_snvs
        freq_bottoms = [0] * num_snvs
        lefts = range(num_snvs)
        for idx, geno in enumerate(('AA', 'Aa', 'aa')):
            cnts = [geno_info[geno] for geno_info in plot_info]
            backcolors = [greens[idx] if pss else reds[idx] for pss in passed]
            edgecolors = [grays[idx] for pss in passed]
            backcolors[tested_snp_idx], edgecolors[tested_snp_idx] = \
                edgecolors[tested_snp_idx], backcolors[tested_snp_idx]
            heights = cnts
            axes1.bar(lefts,
                      heights,
                      bottom=bottoms,
                      color=backcolors,
                      edgecolor=edgecolors)

            freq_heights = [
                height / total_cnt
                for height, total_cnt in zip(heights, total_cnts)
            ]
            axes2.bar(lefts,
                      freq_heights,
                      bottom=freq_bottoms,
                      color=backcolors,
                      edgecolor=edgecolors)

            bottoms = [bot + heig for bot, heig in zip(bottoms, heights)]
            freq_bottoms = [
                bot + heig for bot, heig in zip(freq_bottoms, freq_heights)
            ]

        axes1.tick_params(axis='y', which='both', left='off', right='off')
        axes2.tick_params(axis='y', which='both', left='off', right='off')

        canvas = FigureCanvas(fig)
        canvas.print_figure(fhand)
        fhand.flush()
Esempio n. 8
0
def plot_haplotypes(vcf_fhand, plot_fhand, genotype_mode=REFERENCE,
                    filter_alleles_gt=FILTER_ALLELES_GT):
    reader = VCFReader(vcf_fhand)

    # collect data
    genotypes = None
    samples = []
    for snv in reader.parse_snvs():
        if genotypes is None:
            genotypes = {}
            for call in snv.calls:
                sample = call.sample
                genotypes[sample] = []
                samples.append(sample)

        for call in snv.calls:
            alleles = _get_alleles(call, filter_alleles_gt=filter_alleles_gt)
            genotypes[call.sample].append(alleles)

    # draw
    n_samples = len(samples)
    xsize = len(genotypes[sample]) / 100
    if xsize >= 100:
        xsize = 100
    if xsize <= 8:
        xsize = 8
    ysize = n_samples * 2
    if ysize >= 100:
        ysize = 100
    # print xsize, ysize
    figure_size = (xsize, ysize)

    fig = Figure(figsize=figure_size)

    for index, sample in enumerate(samples):
        axes = fig.add_subplot(n_samples, 1, index)
        axes.set_title(sample)
        y_data = genotypes[sample]
        x_data = [i + 1 for i in range(len(y_data))]
        x_data, y_data = _flatten_data(x_data, y_data)

        axes.plot(x_data, y_data, marker='o',
                  linestyle='None', markersize=3.0, markeredgewidth=0,
                  markerfacecolor='red')
        ylim = axes.get_ylim()
        ylim = ylim[0] - 0.1, ylim[1] + 0.1
        axes.set_ylim(ylim)
        axes.tick_params(axis='x', bottom='off', top='off', which='both',
                         labelbottom='off')
        axes.tick_params(axis='y', left='on', right='off', labelleft='off')
        axes.set_ylabel(sample)

    canvas = FigureCanvas(fig)
    canvas.print_figure(plot_fhand, dpi=300)
    plot_fhand.flush()
Esempio n. 9
0
 def _plot_hist(self, fhand, values, xlabel, ylabel, min_value=None,
                max_value=None):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(values, fill=True, log=True, bins=20, rwidth=1)
     if min_value is not None:
         axes.axvline(x=min_value)
     if max_value is not None:
         axes.axvline(x=max_value)
     axes.set_xlabel(xlabel)
     axes.set_ylabel(ylabel)
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 10
0
    def plot_smooth_hist(self, fhand):
        bins = 20
        fig = Figure()
        axes2 = fig.add_subplot(111)
        x = self._smoothes
        y = self._recombs
        image = axes2.hist2d(x, y, bins=bins, norm=LogNorm())[-1]

        axes2.tick_params(
            which='both',  # both major and minor ticks are affected
            bottom='off',  # ticks along the bottom edge are off
            top='off',  # ticks along the top edge are off
            labelbottom='off',
            right='off',
            left='off',
            labelleft='off')
        xmin2d, xmax2d = axes2.get_xlim()
        ymin2d, ymax2d = axes2.get_ylim()
        axes2.vlines(self.smooth_threhsold, ymin2d, ymax2d, color='r')
        if self.recomb_threshold is not None:
            axes2.hlines(self.recomb_threshold, xmin2d, xmax2d, color='r')

        divider = make_axes_locatable(axes2)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        fig.colorbar(image, cax=cax)

        axes = divider.append_axes('bottom', size=2, pad=0.1, sharex=axes2)

        #axes = fig.add_subplot(224)
        #print axes2.get_position().bounds
        axes.hist(x, fill=True, log=True, bins=bins, rwidth=1)
        axes.set_xlabel('Smooth index')
        axes.set_ylabel('Num. SNPs')
        axes.set_xlim(xmin2d, xmax2d)
        ymin, ymax = axes.get_ylim()
        axes.vlines(self.smooth_threhsold, ymin, ymax, color='r')

        axes = divider.append_axes('left', size=2, pad=0.1, sharey=axes2)
        #axes = fig.add_subplot(221)
        axes.hist(y,
                  orientation='horizontal',
                  fill=True,
                  log=True,
                  bins=bins,
                  rwidth=1)
        axes.set_ylabel('Num. recombs.')
        axes.set_xlabel('Num. SNPs')
        _print_figure(axes, fig, fhand, plot_legend=False)
        axes.set_ylim(ymin2d, ymax2d)
        xmin, xmax = axes.get_xlim()
        if self.recomb_threshold is not None:
            axes.hlines(self.recomb_threshold, xmin, xmax, color='r')
Esempio n. 11
0
 def plot_recomb_at_0_dist_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     data = [self.recomb_rates['ok'], self.recomb_rates['ok_conf_is_None'],
             self.recomb_rates['not_ok']]
     labels = ['OK', 'OK conf. is none', 'Not OK']
     colors = [(0.3, 1, 0.3), (0.3, 1, 0.6), (1, 0.3, 0.3)]
     some_data = [bool(dataset) for dataset in data]
     labels = [label for draw, label in zip(some_data, labels) if draw]
     colors = [color for draw, color in zip(some_data, colors) if draw]
     data = [dataset for draw, dataset in zip(some_data, data) if draw]
     axes.hist(data, stacked=True, fill=True, log=True, bins=20,
               label=labels, rwidth=1, color=colors)
     _print_figure(axes, fig, fhand)
Esempio n. 12
0
def get_canvas_and_axes(figure_size=FIGURE_SIZE, left=0.1, right=0.9, top=0.9,
                        bottom=0.1, plot_type=111):
    'It returns a matplotlib canvas and axes instance'
    try:
        fig = Figure(figsize=FIGURE_SIZE)
        canvas = FigureCanvas(fig)
    except NameError:
        msg = 'Matplotlib module is required to draw graphical histograms'
        raise OptionalRequirementError(msg)

    axes = fig.add_subplot(plot_type)
    fig.subplots_adjust(left=left, right=right, top=top, bottom=bottom)

    return canvas, axes
Esempio n. 13
0
    def plot_smooth_hist(self, fhand):
        bins = 20
        fig = Figure()
        axes2 = fig.add_subplot(111)
        x = self._smoothes
        y = self._recombs
        image = axes2.hist2d(x, y, bins=bins,
                             norm=LogNorm())[-1]

        axes2.tick_params(
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off',
        right='off', left='off', labelleft='off')
        xmin2d, xmax2d = axes2.get_xlim()
        ymin2d, ymax2d = axes2.get_ylim()
        axes2.vlines(self.smooth_threhsold, ymin2d, ymax2d, color='r')
        if self.recomb_threshold is not None:
            axes2.hlines(self.recomb_threshold, xmin2d, xmax2d, color='r')

        divider = make_axes_locatable(axes2)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        fig.colorbar(image, cax=cax)

        axes = divider.append_axes('bottom', size=2, pad=0.1, sharex=axes2)

        #axes = fig.add_subplot(224)
        #print axes2.get_position().bounds
        axes.hist(x, fill=True, log=True, bins=bins, rwidth=1)
        axes.set_xlabel('Smooth index')
        axes.set_ylabel('Num. SNPs')
        axes.set_xlim(xmin2d, xmax2d)
        ymin, ymax = axes.get_ylim()
        axes.vlines(self.smooth_threhsold, ymin, ymax, color='r')

        axes = divider.append_axes('left', size=2, pad=0.1, sharey=axes2)
        #axes = fig.add_subplot(221)
        axes.hist(y, orientation='horizontal', fill=True, log=True, bins=bins,
                  rwidth=1)
        axes.set_ylabel('Num. recombs.')
        axes.set_xlabel('Num. SNPs')
        _print_figure(axes, fig, fhand, plot_legend=False)
        axes.set_ylim(ymin2d, ymax2d)
        xmin, xmax = axes.get_xlim()
        if self.recomb_threshold is not None:
            axes.hlines(self.recomb_threshold, xmin, xmax, color='r')
Esempio n. 14
0
 def _plot_hist(self,
                fhand,
                values,
                xlabel,
                ylabel,
                min_value=None,
                max_value=None):
     fig = Figure()
     axes = fig.add_subplot(111)
     axes.hist(values, fill=True, log=True, bins=20, rwidth=1)
     if min_value is not None:
         axes.axvline(x=min_value)
     if max_value is not None:
         axes.axvline(x=max_value)
     axes.set_xlabel(xlabel)
     axes.set_ylabel(ylabel)
     _print_figure(axes, fig, fhand, plot_legend=False)
Esempio n. 15
0
def get_canvas_and_axes(figure_size=FIGURE_SIZE,
                        left=0.1,
                        right=0.9,
                        top=0.9,
                        bottom=0.1,
                        plot_type=111):
    'It returns a matplotlib canvas and axes instance'
    try:
        fig = Figure(figsize=FIGURE_SIZE)
        canvas = FigureCanvas(fig)
    except NameError:
        msg = 'Matplotlib module is required to draw graphical histograms'
        raise OptionalRequirementError(msg)

    axes = fig.add_subplot(plot_type)
    fig.subplots_adjust(left=left, right=right, top=top, bottom=bottom)

    return canvas, axes
Esempio n. 16
0
    def _plot_segregation_debug(plot_info, fhand):
        greens = ['#2d6e12', '#76b75b', '#164900']
        reds = ['#8f0007', '#fb1f2a', '#b90009']
        grays = ['0.5', '0.25', '0.75']
        fig = Figure(figsize=(10, 4))
        axes1 = fig.add_subplot(211)
        axes2 = fig.add_subplot(212)
        plot_info.sort(key=itemgetter('pos'))

        is_close_snp = [geno_info['close_snp'] for geno_info in plot_info]
        tested_snp_idx = is_close_snp.index(False)
        passed = [geno_info['result'] for geno_info in plot_info]
        total_cnts = [geno_info['AA'] + geno_info['Aa'] + geno_info['aa'] for geno_info in plot_info]

        num_snvs = len(passed)
        bottoms = [0] * num_snvs
        freq_bottoms = [0] * num_snvs
        lefts = range(num_snvs)
        for idx, geno in enumerate(('AA', 'Aa', 'aa')):
            cnts = [geno_info[geno] for geno_info in plot_info]
            backcolors = [greens[idx] if pss else reds[idx] for pss in passed]
            edgecolors = [grays[idx] for pss in passed]
            backcolors[tested_snp_idx], edgecolors[tested_snp_idx] = \
                edgecolors[tested_snp_idx], backcolors[tested_snp_idx]
            heights = cnts
            axes1.bar(lefts, heights, bottom=bottoms, color=backcolors,
                      edgecolor=edgecolors)

            freq_heights = [height/total_cnt for height, total_cnt in zip(heights, total_cnts)]
            axes2.bar(lefts, freq_heights, bottom=freq_bottoms,
                      color=backcolors, edgecolor=edgecolors)

            bottoms = [bot + heig for bot, heig in zip(bottoms, heights)]
            freq_bottoms = [bot + heig for bot, heig in zip(freq_bottoms, freq_heights)]

        axes1.tick_params(axis='y', which='both', left='off', right='off')
        axes2.tick_params(axis='y', which='both', left='off', right='off')

        canvas = FigureCanvas(fig)
        canvas.print_figure(fhand)
        fhand.flush()
Esempio n. 17
0
 def plot_recomb_at_0_dist_hist(self, fhand):
     fig = Figure()
     axes = fig.add_subplot(111)
     data = [
         self.recomb_rates['ok'], self.recomb_rates['ok_conf_is_None'],
         self.recomb_rates['not_ok']
     ]
     labels = ['OK', 'OK conf. is none', 'Not OK']
     colors = [(0.3, 1, 0.3), (0.3, 1, 0.6), (1, 0.3, 0.3)]
     some_data = [bool(dataset) for dataset in data]
     labels = [label for draw, label in zip(some_data, labels) if draw]
     colors = [color for draw, color in zip(some_data, colors) if draw]
     data = [dataset for draw, dataset in zip(some_data, data) if draw]
     axes.hist(data,
               stacked=True,
               fill=True,
               log=True,
               bins=20,
               label=labels,
               rwidth=1,
               color=colors)
     _print_figure(axes, fig, fhand)
Esempio n. 18
0
def get_fig_and_canvas(num_rows=1, num_cols=1, figsize=None):
    if figsize is None:
        height = 5.0 * num_rows
        width = 7.5 * num_cols
        if height > 320.0:
            height = 320.0
        figsize = (width, height)
    try:
        fig = Figure(figsize=figsize)
        canvas = FigureCanvas(fig)
    except NameError:
        msg = 'Matplotlib module is required to draw graphical histograms'
        raise OptionalRequirementError(msg)
    return fig, canvas
Esempio n. 19
0
def plot_haplotypes(vcf_fhand,
                    plot_fhand,
                    genotype_mode=REFERENCE,
                    filter_alleles_gt=FILTER_ALLELES_GT):
    reader = VCFReader(vcf_fhand)

    # collect data
    genotypes = None
    samples = []
    for snv in reader.parse_snvs():
        if genotypes is None:
            genotypes = {}
            for call in snv.calls:
                sample = call.sample
                genotypes[sample] = []
                samples.append(sample)

        for call in snv.calls:
            alleles = _get_alleles(call, filter_alleles_gt=filter_alleles_gt)
            genotypes[call.sample].append(alleles)

    # draw
    n_samples = len(samples)
    xsize = len(genotypes[sample]) / 100
    if xsize >= 100:
        xsize = 100
    if xsize <= 8:
        xsize = 8
    ysize = n_samples * 2
    if ysize >= 100:
        ysize = 100
    # print xsize, ysize
    figure_size = (xsize, ysize)

    fig = Figure(figsize=figure_size)

    for index, sample in enumerate(samples):
        axes = fig.add_subplot(n_samples, 1, index)
        axes.set_title(sample)
        y_data = genotypes[sample]
        x_data = [i + 1 for i in range(len(y_data))]
        x_data, y_data = _flatten_data(x_data, y_data)

        axes.plot(x_data,
                  y_data,
                  marker='o',
                  linestyle='None',
                  markersize=3.0,
                  markeredgewidth=0,
                  markerfacecolor='red')
        ylim = axes.get_ylim()
        ylim = ylim[0] - 0.1, ylim[1] + 0.1
        axes.set_ylim(ylim)
        axes.tick_params(axis='x',
                         bottom='off',
                         top='off',
                         which='both',
                         labelbottom='off')
        axes.tick_params(axis='y', left='on', right='off', labelleft='off')
        axes.set_ylabel(sample)

    canvas = FigureCanvas(fig)
    canvas.print_figure(plot_fhand, dpi=300)
    plot_fhand.flush()
Esempio n. 20
0
def _calc_ajusted_recomb(dists, recombs, max_recomb, max_zero_dist_recomb,
                         alpha_recomb_0, plot_fhand=None):
    # first rough interpolation
    # we remove the physical distances with high recombination rates because
    # they're not very informative. e.g. more than 40 cM will not discriminate
    # between false recombination due to hidden segregation in the parents and
    # true recombination

    if plot_fhand:
        fig = Figure()
        axes = fig.add_subplot(111)
        axes.set_axis_bgcolor((1, 0.6, 0.6))
        axes.scatter(dists, recombs, c='r', label='For 1st fit')
    else:
        axes = None
        fig = None

    dists = array(dists)
    recombs = array(recombs)
    recomb_rate = 1e-7
    popt, pcov = _fit_kosambi(dists, recombs, init_params=[recomb_rate, 0])
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '1st fit failed'}

    est_dists = dists
    est_recombs = _kosambi(est_dists, popt[0], popt[1])

    if fig:
        axes.plot(est_dists, est_recombs, label='1st fit', c='r')

    # now we perform a second fit but only with those markers that are a
    # distance that results in a recombination fraction lower than max_recomb
    close_markers = est_recombs < max_recomb
    close_recombs = recombs[close_markers]
    close_dists = dists[close_markers]

    if plot_fhand:
        axes.scatter(close_dists, close_recombs, c='b', label='For 2nd fit')

    if len(close_dists) < 1:
        # This marker is so bad that their closest markers are at a large
        # distance
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': 'no close region left'}

    if len(close_dists) != len(dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(close_dists, close_recombs, init_params=popt)
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '2nd fit failed'}

    est_close_recombs = _kosambi(close_dists, popt[0], popt[1])

    residuals = close_recombs - est_close_recombs
    if fig:
        axes.plot(close_dists, est_close_recombs, c='b', label='2nd_fit')

    # we exclude the markers with a residual outlier
    quartile_25, quartile_75 = percentile(residuals, [25, 75])
    iqr = quartile_75 - quartile_25
    outlayer_thrld = [quartile_25 - iqr * 1.5, quartile_75 + iqr * 1.5]
    ok_markers = [idx for idx, res in enumerate(residuals) if (not isnan(res) and (outlayer_thrld[0] < res < outlayer_thrld[1]))]
    ok_recombs = close_recombs[ok_markers]
    ok_dists = close_dists[ok_markers]

    if fig:
        axes.scatter(ok_dists, ok_recombs, c='g', label='For 3rd fit')

    if len(ok_dists) != len(close_dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(ok_dists, ok_recombs, init_params=popt)
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '3rd fit failed'}
    var_recomb_at_dist_0 = pcov[1, 1]
    recomb_at_dist_0 = popt[1]
    ok_color = (0.3, 1, 0.6)
    if isinf(var_recomb_at_dist_0):
        conf_interval = None
        if abs(recomb_at_dist_0) < 0.01:
            # recomb is 0 for all points and the variance is inf
            snp_ok = True
        else:
            snp_ok = False
    else:
        if alpha_recomb_0 is None:
            conf_interval = None
            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            else:
                snp_ok = False
        else:
            num_data_points = len(ok_dists)
            num_params = len(popt)
            deg_of_freedom = max(0, num_data_points - num_params)
            tval = t.ppf(1.0 - alpha_recomb_0 / 2., deg_of_freedom)
            std_dev = var_recomb_at_dist_0 ** 0.5
            conf_interval = (recomb_at_dist_0 - std_dev * tval,
                             recomb_at_dist_0 + std_dev * tval)

            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            elif conf_interval[0] < 0 < conf_interval[1]:
                snp_ok = True
            else:
                snp_ok = False
            if plot_fhand:
                axes.vlines(0, conf_interval[0], conf_interval[1],
                            label='conf. interval')

    if plot_fhand:
        color = ok_color if snp_ok else (1, 0.3, 0.3)
        axes.set_axis_bgcolor(color)

    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '3rd fit failed'}

    est2_recombs = _kosambi(ok_dists, popt[0], popt[1])

    if fig:
        axes.plot(ok_dists, est2_recombs, c='g', label='3rd_fit')
        _print_figure(axes, fig, plot_fhand)
    return recomb_at_dist_0, snp_ok, {'kosambi_fit_ok': True,
                                      'conf_interval': conf_interval}
Esempio n. 21
0
def _calc_ajusted_recomb(dists,
                         recombs,
                         max_recomb,
                         max_zero_dist_recomb,
                         alpha_recomb_0,
                         plot_fhand=None):
    # first rough interpolation
    # we remove the physical distances with high recombination rates because
    # they're not very informative. e.g. more than 40 cM will not discriminate
    # between false recombination due to hidden segregation in the parents and
    # true recombination

    if plot_fhand:
        fig = Figure()
        axes = fig.add_subplot(111)
        axes.set_axis_bgcolor((1, 0.6, 0.6))
        axes.scatter(dists, recombs, c='r', label='For 1st fit')
    else:
        axes = None
        fig = None

    dists = array(dists)
    recombs = array(recombs)
    recomb_rate = 1e-7
    popt, pcov = _fit_kosambi(dists, recombs, init_params=[recomb_rate, 0])
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {
            'kosambi_fit_ok': False,
            'reason_no_fit': '1st fit failed'
        }

    est_dists = dists
    est_recombs = _kosambi(est_dists, popt[0], popt[1])

    if fig:
        axes.plot(est_dists, est_recombs, label='1st fit', c='r')

    # now we perform a second fit but only with those markers that are a
    # distance that results in a recombination fraction lower than max_recomb
    close_markers = est_recombs < max_recomb
    close_recombs = recombs[close_markers]
    close_dists = dists[close_markers]

    if plot_fhand:
        axes.scatter(close_dists, close_recombs, c='b', label='For 2nd fit')

    if len(close_dists) < 1:
        # This marker is so bad that their closest markers are at a large
        # distance
        _print_figure(axes, fig, plot_fhand)
        return None, False, {
            'kosambi_fit_ok': False,
            'reason_no_fit': 'no close region left'
        }

    if len(close_dists) != len(dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(close_dists, close_recombs, init_params=popt)
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {
            'kosambi_fit_ok': False,
            'reason_no_fit': '2nd fit failed'
        }

    est_close_recombs = _kosambi(close_dists, popt[0], popt[1])

    residuals = close_recombs - est_close_recombs
    if fig:
        axes.plot(close_dists, est_close_recombs, c='b', label='2nd_fit')

    # we exclude the markers with a residual outlier
    quartile_25, quartile_75 = percentile(residuals, [25, 75])
    iqr = quartile_75 - quartile_25
    outlayer_thrld = [quartile_25 - iqr * 1.5, quartile_75 + iqr * 1.5]
    ok_markers = [
        idx for idx, res in enumerate(residuals)
        if (not isnan(res) and (outlayer_thrld[0] < res < outlayer_thrld[1]))
    ]
    ok_recombs = close_recombs[ok_markers]
    ok_dists = close_dists[ok_markers]

    if fig:
        axes.scatter(ok_dists, ok_recombs, c='g', label='For 3rd fit')

    if len(ok_dists) != len(close_dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(ok_dists, ok_recombs, init_params=popt)
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {
            'kosambi_fit_ok': False,
            'reason_no_fit': '3rd fit failed'
        }
    var_recomb_at_dist_0 = pcov[1, 1]
    recomb_at_dist_0 = popt[1]
    ok_color = (0.3, 1, 0.6)
    if isinf(var_recomb_at_dist_0):
        conf_interval = None
        if abs(recomb_at_dist_0) < 0.01:
            # recomb is 0 for all points and the variance is inf
            snp_ok = True
        else:
            snp_ok = False
    else:
        if alpha_recomb_0 is None:
            conf_interval = None
            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            else:
                snp_ok = False
        else:
            num_data_points = len(ok_dists)
            num_params = len(popt)
            deg_of_freedom = max(0, num_data_points - num_params)
            tval = t.ppf(1.0 - alpha_recomb_0 / 2., deg_of_freedom)
            std_dev = var_recomb_at_dist_0**0.5
            conf_interval = (recomb_at_dist_0 - std_dev * tval,
                             recomb_at_dist_0 + std_dev * tval)

            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            elif conf_interval[0] < 0 < conf_interval[1]:
                snp_ok = True
            else:
                snp_ok = False
            if plot_fhand:
                axes.vlines(0,
                            conf_interval[0],
                            conf_interval[1],
                            label='conf. interval')

    if plot_fhand:
        color = ok_color if snp_ok else (1, 0.3, 0.3)
        axes.set_axis_bgcolor(color)

    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {
            'kosambi_fit_ok': False,
            'reason_no_fit': '3rd fit failed'
        }

    est2_recombs = _kosambi(ok_dists, popt[0], popt[1])

    if fig:
        axes.plot(ok_dists, est2_recombs, c='g', label='3rd_fit')
        _print_figure(axes, fig, plot_fhand)
    return recomb_at_dist_0, snp_ok, {
        'kosambi_fit_ok': True,
        'conf_interval': conf_interval
    }