Esempio n. 1
0
def quartile_concentration_ratio(well, channel_num=0, threshold=None, peaks=None, min_events=4000):
    if peaks is None:
        peaks = accepted_peaks(well)
    
    if len(peaks) < min_events:
        return None
    
    if not threshold:
        threshold = well.channels[channel_num].statistics.threshold
    
    if not threshold:
        return None
    
    quartile_size = len(peaks)/4
    first_quartile = peaks[0:quartile_size]
    last_quartile = peaks[len(peaks)-quartile_size:]

    first_pos, first_neg = cluster_1d(first_quartile, channel_num, threshold)
    fq_conc = concentration(len(first_pos), len(first_neg), droplet_vol=well.droplet_volume) # could be nan
    last_pos, last_neg = cluster_1d(last_quartile, channel_num, threshold)
    lq_conc = concentration(len(last_pos), len(last_neg), droplet_vol=well.droplet_volume) # could be nan

    # if conc is nan or zero, we can't compute a real ratio
    if math.isnan(fq_conc) or math.isnan(lq_conc) or fq_conc == 0 or lq_conc == 0:
        return None
    else:
        return lq_conc/fq_conc
Esempio n. 2
0
def fpfn_by_bin(plate_objects, vic_channels, sample_names, bin_func):
    bins = set([bin_func(c) for c in vic_channels])
    bin_plots = dict([(bin, []) for bin in bins])

    bin_wells = defaultdict(list)
    # divide into plates
    for bin, group in groupinto(vic_channels, bin_func):
        # this is a wacky grouping, but for reuse in plate_objects (why did I not pick plate ids again?)
        plate_groups = groupinto(group, lambda c: (c.well.plate.file.dirname, c.well.plate.file.basename))
        for plate_id, channels in plate_groups:
            qplate = plate_objects[plate_id]
            positives = [c for c in channels if c.well.well_name in fpfn_positive_well_names]
            negatives = [c for c in channels if c.well.well_name in fpfn_negative_well_names]
            
            # compute a threshold which is 1/4 between the positive and negative means for the plate
            positive_means = []
            negative_means = []
            for p in positives:
                amps = vic_amplitudes(accepted_peaks(qplate.wells[p.well.well_name]))
                positive_means.append((len(amps), np.mean(amps)*len(amps)))
            
            if positive_means:
                positive_mean = sum([pm[1] for pm in positive_means])/sum([pm[0] for pm in positive_means])
            else:
                positive_mean = 32767
            
            for n in negatives:
                amps = vic_amplitudes(accepted_peaks(qplate.wells[n.well.well_name]))
                negative_means.append((len(amps), np.mean(amps)*len(amps)))
            
            if negative_means:
                negative_mean = sum([nm[1] for nm in negative_means])/sum([nm[0] for nm in negative_means])
            else:
                negative_mean = 0
            
            threshold = ((3*negative_mean)+positive_mean)/4

            fps = [c for c in channels if c.well.well_name in fpfn_fp_well_names]
            fns = [c for c in channels if c.well.well_name in fpfn_fn_well_names]

            fp_counts = []
            fn_counts = []

            for f in fps:
                pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold)
                fp_counts.append((f.well.id, len(pos),
                                  10000*(float(len(pos))/(float(len(pos))+float(len(neg)))),
                                  qplate.wells[f.well.well_name]))
            
            for f in fns:
                pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold)
                fn_counts.append((f.well.id, len(neg),
                                  10000*(float(len(neg))/(float(len(pos))+float(len(neg))))))
            
            bin_wells[bin].append((fp_counts, fn_counts, threshold))
    
    return bin_wells
Esempio n. 3
0
def dnr_by_bin(plate_objects, fam_channels, sample_names, bin_func):
    """
    @deprecated - use metrics
    """
    bins = set([bin_func(c) for c in fam_channels])
    # TODO: do sample names here
    bin_plots = dict([(bin, [[0.001,None],[0.01,None],[0.1,None],[1,None],[5,None]]) for bin in bins])

    groups = []
    groups.extend([(sample_name, groupinto(sample_name_channel_filter(fam_channels, sample_name), bin_func)) for sample_name in sample_names])

    for i, (sample, group) in enumerate(groups):
        for bin, channels in group:
            conc_array = []
            for c in channels:
                qplate = plate_objects[(c.well.plate.file.dirname, c.well.plate.file.basename)]
                well = qplate.wells[c.well.well_name]
                # TODO: use dynamic threshold or keep 4000?
                pos, neg = cluster_1d(accepted_peaks(well), 0, 4000)
                conc, clow, chigh = concentration_interval(len(pos), len(neg), droplet_vol=well.droplet_volume)
                conc_array.append((max(conc,0.0001), max(clow,0.0001), max(chigh,0.0001)))
            
            if len(conc_array) > 0:
                conc_mean = np.mean([ca[0] for ca in conc_array])
                bin_plots[bin][i][1] = (conc_mean, conc_array)
    
    return bin_plots
Esempio n. 4
0
def well_statistics(qlwell, override_thresholds=None):
    from pyqlb.nstats import concentration, concentration_interval
    from pyqlb.nstats.peaks import cluster_1d
    from pyqlb.nstats.well import well_observed_positives_negatives, accepted_peaks

    if override_thresholds is None:
        override_thresholds = [None]*len(qlwell.channels)
    wellui = WellStatisticsUI(channels=[])
    for idx, channel in enumerate(qlwell.channels):
        if not override_thresholds[idx]:
            positives, negatives, unclassified = well_observed_positives_negatives(qlwell, idx)
            threshold = channel.statistics.threshold
            conc = channel.statistics.concentration
            conc_interval = (channel.statistics.concentration,
                             channel.statistics.concentration_lower_bound,
                             channel.statistics.concentration_upper_bound)
        else:
            positives, negatives = cluster_1d(accepted_peaks(qlwell), idx, override_thresholds[idx])
            threshold = override_thresholds[idx]
            conc = concentration(len(positives), len(negatives), qlwell.droplet_volume)
            conc_interval = concentration_interval(len(positives), len(negatives), qlwell.droplet_volume)

        channelui = WellChannelStatisticsUI(positives=len(positives),
                                            negatives=len(negatives),
                                            threshold=threshold,
                                            concentration=conc,
                                            concentration_interval=conc_interval)
        wellui.channels.append(channelui)

    return wellui
Esempio n. 5
0
def count_positives_in_well(well, channel):
    # return count of positives and total events
    from pyqlb.nstats.peaks import cluster_1d, width_gated, quality_gated
    chn_stats = well.channels[channel].statistics
    threshold = chn_stats.threshold
    min_width_gate = chn_stats.min_width_gate
    max_width_gate = chn_stats.max_width_gate
    min_quality_gate = chn_stats.min_quality_gate
    accepted_events = accepted_peaks(well)
    positives, negatives = cluster_1d(accepted_events, channel, threshold)
    return len(positives), len(positives)+len(negatives)
Esempio n. 6
0
def fam_variation_splits(well, threshold=None):
    """
    Returns a 8-tuple: the gaussian parameters (A, mu, sigma) overall,
    then for the first half of the amplitudes, then for the second half;
    the overall mean, mean of the first half and the
    mean of the second half, and the number of peaks on each half.
    """
    from scipy.optimize import curve_fit
    
    if threshold is not None:
        positives, negatives = cluster_1d(well.peaks, 0, threshold)
        peaks = positives
    else:
        peaks = well.peaks

    amps = fam_amplitudes(peaks)
    first_half = amps[:len(amps)/2]
    second_half = amps[len(amps)/2:]

    fbins = amp_bins(first_half, num_bins=257)
    fvals, fpos = np.histogram(first_half, bins=fbins)
    fcenters = bin_centers(fpos)

    sbins = amp_bins(second_half, num_bins=257)
    svals, spos = np.histogram(second_half, bins=sbins)
    scenters = bin_centers(spos)

    abins = amp_bins(amps, num_bins=257)
    avals, apos = np.histogram(amps, bins=abins)
    acenters = bin_centers(apos)

    (gamp1, gmean1, gsigma1), covar = curve_fit(gauss, fcenters, fvals, p0=[max(fvals), np.mean(first_half), fpos[1]-fpos[0]])
    (gamp2, gmean2, gsigma2), covar = curve_fit(gauss, scenters, svals, p0=[max(svals), np.mean(second_half), spos[1]-spos[0]])
    (gamp, gmean, gsigma), covar = curve_fit(gauss, acenters, avals, p0=[max(avals), np.mean(amps), apos[1]-apos[0]])

    return ((gamp, gmean, gsigma),
            (gamp1, gmean1, gsigma1),
            (gamp2, gmean2, gsigma2),
            np.mean(amps),
            np.mean(first_half),
            np.mean(second_half),
            peak_count(fvals, min_peak_val=max(fvals)/3),
            peak_count(svals, min_peak_val=max(svals)/3))
Esempio n. 7
0
def stats_for_qlp_well(well, compute_clusters=False, override_thresholds=None):
    """
    Return statistics about a QLWell object read from a QLP file.
    The QLWell object should have a populated `peaks` attribute (reading from QLBs won't work)

    For parameter explanations and return values, see :func:`stats_for_qlp_well`.
    """
    from pyqlb.nstats.peaks import cluster_1d, channel_amplitudes
    from pyqlb.nstats.well import accepted_peaks, above_min_amplitude_peaks, well_channel_sp_values, well_cluster_peaks
    from pyqlb.nstats.well import well_observed_positives_negatives, well_s2d_values, getClusters
    from pyqlb.nstats.well import high_flier_droplets, low_flier_droplets, singleRain_droplets, doubleRain_droplets, diagonal_scatter
    from numpy import mean as np_mean, std as np_std

    if not override_thresholds:
        override_thresholds = (None, None)

    statistics = well_statistics(well, override_thresholds=override_thresholds)
    accepted = len(accepted_peaks(well))
    num_above_min = len(above_min_amplitude_peaks(well))

    if num_above_min > 0 and accepted > 0:
        if well.sum_amplitude_bins:
            peaksets, boundaries, amps = revb_polydisperse_peaks(well, 0, threshold=override_thresholds[0])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
        else:
            peaksets, boundaries, width_gates = polydisperse_peaks(well, 0, threshold=override_thresholds[0])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
    else:
        statistics[0].revb_polydispersity_pct = 0

    s, p_plus, p, p_minus = well_channel_sp_values(well, 0, override_threshold=override_thresholds[0])
    statistics[0].s_value = s
    statistics[0].p_plus = p_plus
    statistics[0].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None
    statistics[0].p = p
    statistics[0].p_drops = int(p*accepted) if p is not None else None
    statistics[0].p_minus = p_minus
    statistics[0].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None

    if num_above_min > 0 and accepted > 0:
        if well.sum_amplitude_bins:
            peaksets, boundaries, amps = revb_polydisperse_peaks(well, 1, threshold=override_thresholds[1])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
        else:
            peaksets, boundaries, width_gates = polydisperse_peaks(well, 1, threshold=override_thresholds[1])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
    else:
        statistics[1].revb_polydispersity_pct = 0

    s, p_plus, p, p_minus = well_channel_sp_values(well, 1, override_threshold=override_thresholds[1])
    statistics[1].s_value = s
    statistics[1].p_plus = p_plus
    statistics[1].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None
    statistics[1].p = p
    statistics[1].p_drops = int(p*accepted) if p is not None else None
    statistics[1].p_minus = p_minus
    statistics[1].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None

    ## compute s2d plots
    s2d_vals = well_s2d_values( well, thresholds=override_thresholds)
    statistics[0].s2d_value = s2d_vals[0] if s2d_vals is not None else None
    statistics[1].s2d_value = s2d_vals[1] if s2d_vals is not None else None

    ## compute extra cluster metrics
    clusters = getClusters( well, override_thresholds )
    dscatter = diagonal_scatter( clusters )
    statistics.diagonal_scatter = dscatter[1] if dscatter is not None else None
    statistics.diagonal_scatter_pct  = dscatter[2] *100 if dscatter is not None else None
    for channel in [0,1]:
        high_fliers = high_flier_droplets( clusters, channel )
        statistics[channel].high_flier_value = high_fliers[1] if high_fliers is not None else None
        statistics[channel].high_flier_pct = high_fliers[2] * 100 if high_fliers is not None else None

        low_fliers  = low_flier_droplets( clusters, channel )
        statistics[channel].low_flier_value  = low_fliers[1] if low_fliers is not None else None
        statistics[channel].low_flier_pct    = low_fliers[2] * 100 if low_fliers is not None else None
        
        singleRain  = singleRain_droplets( clusters, channel )
        statistics[channel].single_rain_value  = singleRain[1] if singleRain is not None else None
        statistics[channel].single_rain_pct  = singleRain[2] * 100 if singleRain is not None else None
        
        doubleRain  = doubleRain_droplets( clusters, channel )
        statistics[channel].double_rain_value = doubleRain[1] if doubleRain is not None else None
        statistics[channel].double_rain_pct = doubleRain[2] * 100 if doubleRain is not None else None


    if compute_clusters:
        clusters = well_cluster_peaks(well, override_thresholds)
    else:
        clusters = {'positive_peaks': {'positive_peaks': [], 'negative_peaks': []},
                    'negative_peaks': {'positive_peaks': [], 'negative_peaks': []}}
 
    # cheap hack
    statistics.alg_version = "%s.%s/%s.%s" % (well.statistics.peak_alg_major_version,
                                              well.statistics.peak_alg_minor_version,
                                              well.statistics.quant_alg_major_version,
                                              well.statistics.quant_alg_minor_version)
    statistics.ref_copy_num = well.ref_copy_num
    statistics[0].decision_tree = well.channels[0].decision_tree_verbose
    statistics[1].decision_tree = well.channels[1].decision_tree_verbose
    # end cheap hack

    # SNR
    for chan in (0,1):
        if override_thresholds[chan]:
            # TODO add this to pyqlb.nstats.well instead
            pos, neg = cluster_1d(accepted_peaks(well), chan, override_thresholds[chan])
        else:
            pos, neg, unknown = well_observed_positives_negatives(well, chan)

        for attr, coll in (('positive_snr', pos),('negative_snr',neg)):
            if len(pos) > 0:
                amps = channel_amplitudes(coll, chan)
                amp_mean = np_mean(amps)
                amp_std = np_std(amps)
                if amp_std > 0:
                    setattr(statistics[chan], attr, amp_mean/amp_std)
                else:
                    setattr(statistics[chan], attr, 10000)
            else:
                setattr(statistics[chan], attr, 0)

    for channel in [0,1]:
        means,stds = total_events_amplitude_vals(well,channel) 
        statistics[channel].total_events_amplitude_mean = means if means is not None else None
        statistics[channel].total_events_amplitude_stdev = stds if stds is not None else None

    return statistics, clusters