def quartile_concentration_ratio(well, channel_num=0, threshold=None, peaks=None, min_events=4000): if peaks is None: peaks = accepted_peaks(well) if len(peaks) < min_events: return None if not threshold: threshold = well.channels[channel_num].statistics.threshold if not threshold: return None quartile_size = len(peaks)/4 first_quartile = peaks[0:quartile_size] last_quartile = peaks[len(peaks)-quartile_size:] first_pos, first_neg = cluster_1d(first_quartile, channel_num, threshold) fq_conc = concentration(len(first_pos), len(first_neg), droplet_vol=well.droplet_volume) # could be nan last_pos, last_neg = cluster_1d(last_quartile, channel_num, threshold) lq_conc = concentration(len(last_pos), len(last_neg), droplet_vol=well.droplet_volume) # could be nan # if conc is nan or zero, we can't compute a real ratio if math.isnan(fq_conc) or math.isnan(lq_conc) or fq_conc == 0 or lq_conc == 0: return None else: return lq_conc/fq_conc
def fpfn_by_bin(plate_objects, vic_channels, sample_names, bin_func): bins = set([bin_func(c) for c in vic_channels]) bin_plots = dict([(bin, []) for bin in bins]) bin_wells = defaultdict(list) # divide into plates for bin, group in groupinto(vic_channels, bin_func): # this is a wacky grouping, but for reuse in plate_objects (why did I not pick plate ids again?) plate_groups = groupinto(group, lambda c: (c.well.plate.file.dirname, c.well.plate.file.basename)) for plate_id, channels in plate_groups: qplate = plate_objects[plate_id] positives = [c for c in channels if c.well.well_name in fpfn_positive_well_names] negatives = [c for c in channels if c.well.well_name in fpfn_negative_well_names] # compute a threshold which is 1/4 between the positive and negative means for the plate positive_means = [] negative_means = [] for p in positives: amps = vic_amplitudes(accepted_peaks(qplate.wells[p.well.well_name])) positive_means.append((len(amps), np.mean(amps)*len(amps))) if positive_means: positive_mean = sum([pm[1] for pm in positive_means])/sum([pm[0] for pm in positive_means]) else: positive_mean = 32767 for n in negatives: amps = vic_amplitudes(accepted_peaks(qplate.wells[n.well.well_name])) negative_means.append((len(amps), np.mean(amps)*len(amps))) if negative_means: negative_mean = sum([nm[1] for nm in negative_means])/sum([nm[0] for nm in negative_means]) else: negative_mean = 0 threshold = ((3*negative_mean)+positive_mean)/4 fps = [c for c in channels if c.well.well_name in fpfn_fp_well_names] fns = [c for c in channels if c.well.well_name in fpfn_fn_well_names] fp_counts = [] fn_counts = [] for f in fps: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fp_counts.append((f.well.id, len(pos), 10000*(float(len(pos))/(float(len(pos))+float(len(neg)))), qplate.wells[f.well.well_name])) for f in fns: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fn_counts.append((f.well.id, len(neg), 10000*(float(len(neg))/(float(len(pos))+float(len(neg)))))) bin_wells[bin].append((fp_counts, fn_counts, threshold)) return bin_wells
def dnr_by_bin(plate_objects, fam_channels, sample_names, bin_func): """ @deprecated - use metrics """ bins = set([bin_func(c) for c in fam_channels]) # TODO: do sample names here bin_plots = dict([(bin, [[0.001,None],[0.01,None],[0.1,None],[1,None],[5,None]]) for bin in bins]) groups = [] groups.extend([(sample_name, groupinto(sample_name_channel_filter(fam_channels, sample_name), bin_func)) for sample_name in sample_names]) for i, (sample, group) in enumerate(groups): for bin, channels in group: conc_array = [] for c in channels: qplate = plate_objects[(c.well.plate.file.dirname, c.well.plate.file.basename)] well = qplate.wells[c.well.well_name] # TODO: use dynamic threshold or keep 4000? pos, neg = cluster_1d(accepted_peaks(well), 0, 4000) conc, clow, chigh = concentration_interval(len(pos), len(neg), droplet_vol=well.droplet_volume) conc_array.append((max(conc,0.0001), max(clow,0.0001), max(chigh,0.0001))) if len(conc_array) > 0: conc_mean = np.mean([ca[0] for ca in conc_array]) bin_plots[bin][i][1] = (conc_mean, conc_array) return bin_plots
def well_statistics(qlwell, override_thresholds=None): from pyqlb.nstats import concentration, concentration_interval from pyqlb.nstats.peaks import cluster_1d from pyqlb.nstats.well import well_observed_positives_negatives, accepted_peaks if override_thresholds is None: override_thresholds = [None]*len(qlwell.channels) wellui = WellStatisticsUI(channels=[]) for idx, channel in enumerate(qlwell.channels): if not override_thresholds[idx]: positives, negatives, unclassified = well_observed_positives_negatives(qlwell, idx) threshold = channel.statistics.threshold conc = channel.statistics.concentration conc_interval = (channel.statistics.concentration, channel.statistics.concentration_lower_bound, channel.statistics.concentration_upper_bound) else: positives, negatives = cluster_1d(accepted_peaks(qlwell), idx, override_thresholds[idx]) threshold = override_thresholds[idx] conc = concentration(len(positives), len(negatives), qlwell.droplet_volume) conc_interval = concentration_interval(len(positives), len(negatives), qlwell.droplet_volume) channelui = WellChannelStatisticsUI(positives=len(positives), negatives=len(negatives), threshold=threshold, concentration=conc, concentration_interval=conc_interval) wellui.channels.append(channelui) return wellui
def count_positives_in_well(well, channel): # return count of positives and total events from pyqlb.nstats.peaks import cluster_1d, width_gated, quality_gated chn_stats = well.channels[channel].statistics threshold = chn_stats.threshold min_width_gate = chn_stats.min_width_gate max_width_gate = chn_stats.max_width_gate min_quality_gate = chn_stats.min_quality_gate accepted_events = accepted_peaks(well) positives, negatives = cluster_1d(accepted_events, channel, threshold) return len(positives), len(positives)+len(negatives)
def fam_variation_splits(well, threshold=None): """ Returns a 8-tuple: the gaussian parameters (A, mu, sigma) overall, then for the first half of the amplitudes, then for the second half; the overall mean, mean of the first half and the mean of the second half, and the number of peaks on each half. """ from scipy.optimize import curve_fit if threshold is not None: positives, negatives = cluster_1d(well.peaks, 0, threshold) peaks = positives else: peaks = well.peaks amps = fam_amplitudes(peaks) first_half = amps[:len(amps)/2] second_half = amps[len(amps)/2:] fbins = amp_bins(first_half, num_bins=257) fvals, fpos = np.histogram(first_half, bins=fbins) fcenters = bin_centers(fpos) sbins = amp_bins(second_half, num_bins=257) svals, spos = np.histogram(second_half, bins=sbins) scenters = bin_centers(spos) abins = amp_bins(amps, num_bins=257) avals, apos = np.histogram(amps, bins=abins) acenters = bin_centers(apos) (gamp1, gmean1, gsigma1), covar = curve_fit(gauss, fcenters, fvals, p0=[max(fvals), np.mean(first_half), fpos[1]-fpos[0]]) (gamp2, gmean2, gsigma2), covar = curve_fit(gauss, scenters, svals, p0=[max(svals), np.mean(second_half), spos[1]-spos[0]]) (gamp, gmean, gsigma), covar = curve_fit(gauss, acenters, avals, p0=[max(avals), np.mean(amps), apos[1]-apos[0]]) return ((gamp, gmean, gsigma), (gamp1, gmean1, gsigma1), (gamp2, gmean2, gsigma2), np.mean(amps), np.mean(first_half), np.mean(second_half), peak_count(fvals, min_peak_val=max(fvals)/3), peak_count(svals, min_peak_val=max(svals)/3))
def stats_for_qlp_well(well, compute_clusters=False, override_thresholds=None): """ Return statistics about a QLWell object read from a QLP file. The QLWell object should have a populated `peaks` attribute (reading from QLBs won't work) For parameter explanations and return values, see :func:`stats_for_qlp_well`. """ from pyqlb.nstats.peaks import cluster_1d, channel_amplitudes from pyqlb.nstats.well import accepted_peaks, above_min_amplitude_peaks, well_channel_sp_values, well_cluster_peaks from pyqlb.nstats.well import well_observed_positives_negatives, well_s2d_values, getClusters from pyqlb.nstats.well import high_flier_droplets, low_flier_droplets, singleRain_droplets, doubleRain_droplets, diagonal_scatter from numpy import mean as np_mean, std as np_std if not override_thresholds: override_thresholds = (None, None) statistics = well_statistics(well, override_thresholds=override_thresholds) accepted = len(accepted_peaks(well)) num_above_min = len(above_min_amplitude_peaks(well)) if num_above_min > 0 and accepted > 0: if well.sum_amplitude_bins: peaksets, boundaries, amps = revb_polydisperse_peaks(well, 0, threshold=override_thresholds[0]) poly_peaks = sum([len(p) for p in peaksets]) statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: peaksets, boundaries, width_gates = polydisperse_peaks(well, 0, threshold=override_thresholds[0]) poly_peaks = sum([len(p) for p in peaksets]) statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: statistics[0].revb_polydispersity_pct = 0 s, p_plus, p, p_minus = well_channel_sp_values(well, 0, override_threshold=override_thresholds[0]) statistics[0].s_value = s statistics[0].p_plus = p_plus statistics[0].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None statistics[0].p = p statistics[0].p_drops = int(p*accepted) if p is not None else None statistics[0].p_minus = p_minus statistics[0].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None if num_above_min > 0 and accepted > 0: if well.sum_amplitude_bins: peaksets, boundaries, amps = revb_polydisperse_peaks(well, 1, threshold=override_thresholds[1]) poly_peaks = sum([len(p) for p in peaksets]) statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: peaksets, boundaries, width_gates = polydisperse_peaks(well, 1, threshold=override_thresholds[1]) poly_peaks = sum([len(p) for p in peaksets]) statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: statistics[1].revb_polydispersity_pct = 0 s, p_plus, p, p_minus = well_channel_sp_values(well, 1, override_threshold=override_thresholds[1]) statistics[1].s_value = s statistics[1].p_plus = p_plus statistics[1].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None statistics[1].p = p statistics[1].p_drops = int(p*accepted) if p is not None else None statistics[1].p_minus = p_minus statistics[1].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None ## compute s2d plots s2d_vals = well_s2d_values( well, thresholds=override_thresholds) statistics[0].s2d_value = s2d_vals[0] if s2d_vals is not None else None statistics[1].s2d_value = s2d_vals[1] if s2d_vals is not None else None ## compute extra cluster metrics clusters = getClusters( well, override_thresholds ) dscatter = diagonal_scatter( clusters ) statistics.diagonal_scatter = dscatter[1] if dscatter is not None else None statistics.diagonal_scatter_pct = dscatter[2] *100 if dscatter is not None else None for channel in [0,1]: high_fliers = high_flier_droplets( clusters, channel ) statistics[channel].high_flier_value = high_fliers[1] if high_fliers is not None else None statistics[channel].high_flier_pct = high_fliers[2] * 100 if high_fliers is not None else None low_fliers = low_flier_droplets( clusters, channel ) statistics[channel].low_flier_value = low_fliers[1] if low_fliers is not None else None statistics[channel].low_flier_pct = low_fliers[2] * 100 if low_fliers is not None else None singleRain = singleRain_droplets( clusters, channel ) statistics[channel].single_rain_value = singleRain[1] if singleRain is not None else None statistics[channel].single_rain_pct = singleRain[2] * 100 if singleRain is not None else None doubleRain = doubleRain_droplets( clusters, channel ) statistics[channel].double_rain_value = doubleRain[1] if doubleRain is not None else None statistics[channel].double_rain_pct = doubleRain[2] * 100 if doubleRain is not None else None if compute_clusters: clusters = well_cluster_peaks(well, override_thresholds) else: clusters = {'positive_peaks': {'positive_peaks': [], 'negative_peaks': []}, 'negative_peaks': {'positive_peaks': [], 'negative_peaks': []}} # cheap hack statistics.alg_version = "%s.%s/%s.%s" % (well.statistics.peak_alg_major_version, well.statistics.peak_alg_minor_version, well.statistics.quant_alg_major_version, well.statistics.quant_alg_minor_version) statistics.ref_copy_num = well.ref_copy_num statistics[0].decision_tree = well.channels[0].decision_tree_verbose statistics[1].decision_tree = well.channels[1].decision_tree_verbose # end cheap hack # SNR for chan in (0,1): if override_thresholds[chan]: # TODO add this to pyqlb.nstats.well instead pos, neg = cluster_1d(accepted_peaks(well), chan, override_thresholds[chan]) else: pos, neg, unknown = well_observed_positives_negatives(well, chan) for attr, coll in (('positive_snr', pos),('negative_snr',neg)): if len(pos) > 0: amps = channel_amplitudes(coll, chan) amp_mean = np_mean(amps) amp_std = np_std(amps) if amp_std > 0: setattr(statistics[chan], attr, amp_mean/amp_std) else: setattr(statistics[chan], attr, 10000) else: setattr(statistics[chan], attr, 0) for channel in [0,1]: means,stds = total_events_amplitude_vals(well,channel) statistics[channel].total_events_amplitude_mean = means if means is not None else None statistics[channel].total_events_amplitude_stdev = stds if stds is not None else None return statistics, clusters