def dye_by_bin(plate_objects, analyzed_wells, eventful_wells, analyzed_sample_names, eventful_sample_names, bin_func, channel='FAM'): """ @deprecated -- use metrics """ # assumes standard dye concentrations (see bin_plots) bins = set([bin_func(w) for w in analyzed_wells]) bin_plots = dict([(bin, [[3,None],[10,None],[30,None],[100,None],[300,None],[500,None]]) for bin in bins]) groups = [] groups.extend([(sample_name, groupinto(sample_name_filter(analyzed_wells, sample_name), bin_func)) for sample_name in analyzed_sample_names]) groups.extend([(sample_name, groupinto(sample_name_filter(eventful_wells, sample_name), bin_func)) for sample_name in eventful_sample_names]) for i, (sample, group) in enumerate(groups): for bin, wells in group: amp_array = None for w in wells: qplate = plate_objects[(w.plate.file.dirname, w.plate.file.basename)] well = qplate.wells[w.well_name] if channel == 'VIC': if amp_array is None: amp_array = vic_amplitudes(well.peaks) else: amp_array = np.concatenate([amp_array, vic_amplitudes(well.peaks)]) else: if amp_array is None: amp_array = fam_amplitudes(well.peaks) else: amp_array = np.concatenate([amp_array, fam_amplitudes(well.peaks)]) if amp_array is not None: amp_mean = np.mean(amp_array) amp_sigma = np.std(amp_array) bin_plots[bin][i][1] = (amp_mean, amp_sigma) return bin_plots
def compute(self, qlwell, qlwell_channel, well_channel_metric, dyeset=None): if dyeset: blue_hi, blue_lo, green_hi, green_lo = single_well_calibration_clusters(qlwell, dyeset) elif qlwell.sample_name == DYES_FAM_VIC_LABEL: blue_hi, blue_lo, green_hi, green_lo = single_well_calibration_clusters(qlwell, DYES_FAM_VIC) elif qlwell.sample_name == DYES_FAM_HEX_LABEL: blue_hi, blue_lo, green_hi, green_lo = single_well_calibration_clusters(qlwell, DYES_FAM_HEX) else: # do not know how to compute, return wcm return well_channel_metric if well_channel_metric.channel_num == 0: hi_amplitudes = fam_amplitudes(blue_hi) lo_amplitudes = fam_amplitudes(blue_lo) well_channel_metric.positive_peaks = len(blue_hi) well_channel_metric.positive_mean = np.mean(hi_amplitudes) well_channel_metric.positive_stdev = np.std(hi_amplitudes) well_channel_metric.negative_peaks = len(blue_lo) well_channel_metric.negative_mean = np.mean(lo_amplitudes) well_channel_metric.negative_stdev = np.std(lo_amplitudes) well_channel_metric.width_mean_hi = np.mean( fam_widths( blue_hi ) ) elif well_channel_metric.channel_num == 1: hi_amplitudes = vic_amplitudes(green_hi) lo_amplitudes = vic_amplitudes(green_lo) well_channel_metric.positive_peaks = len(green_hi) well_channel_metric.positive_mean = np.mean(hi_amplitudes) well_channel_metric.positive_stdev = np.std(hi_amplitudes) well_channel_metric.negative_peaks = len(green_lo) well_channel_metric.negative_mean = np.mean(lo_amplitudes) well_channel_metric.negative_stdev = np.std(lo_amplitudes) well_channel_metric.width_mean_hi = np.mean( fam_widths( green_hi )) return well_channel_metric
def fpfn_by_bin(plate_objects, vic_channels, sample_names, bin_func): bins = set([bin_func(c) for c in vic_channels]) bin_plots = dict([(bin, []) for bin in bins]) bin_wells = defaultdict(list) # divide into plates for bin, group in groupinto(vic_channels, bin_func): # this is a wacky grouping, but for reuse in plate_objects (why did I not pick plate ids again?) plate_groups = groupinto(group, lambda c: (c.well.plate.file.dirname, c.well.plate.file.basename)) for plate_id, channels in plate_groups: qplate = plate_objects[plate_id] positives = [c for c in channels if c.well.well_name in fpfn_positive_well_names] negatives = [c for c in channels if c.well.well_name in fpfn_negative_well_names] # compute a threshold which is 1/4 between the positive and negative means for the plate positive_means = [] negative_means = [] for p in positives: amps = vic_amplitudes(accepted_peaks(qplate.wells[p.well.well_name])) positive_means.append((len(amps), np.mean(amps)*len(amps))) if positive_means: positive_mean = sum([pm[1] for pm in positive_means])/sum([pm[0] for pm in positive_means]) else: positive_mean = 32767 for n in negatives: amps = vic_amplitudes(accepted_peaks(qplate.wells[n.well.well_name])) negative_means.append((len(amps), np.mean(amps)*len(amps))) if negative_means: negative_mean = sum([nm[1] for nm in negative_means])/sum([nm[0] for nm in negative_means]) else: negative_mean = 0 threshold = ((3*negative_mean)+positive_mean)/4 fps = [c for c in channels if c.well.well_name in fpfn_fp_well_names] fns = [c for c in channels if c.well.well_name in fpfn_fn_well_names] fp_counts = [] fn_counts = [] for f in fps: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fp_counts.append((f.well.id, len(pos), 10000*(float(len(pos))/(float(len(pos))+float(len(neg)))), qplate.wells[f.well.well_name])) for f in fns: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fn_counts.append((f.well.id, len(neg), 10000*(float(len(neg))/(float(len(pos))+float(len(neg)))))) bin_wells[bin].append((fp_counts, fn_counts, threshold)) return bin_wells
def cluster_csv(self, id=None, show_only_gated=True, *args, **kwargs): from pyqlb.nstats.well import accepted_peaks qlwell = self.__qlwell_from_threshold_form(id) if show_only_gated != 'False': peaks = accepted_peaks(qlwell) else: peaks = qlwell.peaks from pyqlb.nstats.peaks import fam_amplitudes, fam_widths, vic_amplitudes, vic_widths, peak_times from pyqlb.nstats.well import well_observed_cluster_assignments response.headers['Content-Type'] = 'text/csv' h.set_download_response_header(request, response, "%s_%s%s.csv" % \ (str(c.well.plate.plate.name), str(c.well.well_name), '' if show_only_gated != 'False' else '_all')) out = StringIO.StringIO() csvwriter = csv_pkg.writer(out) csvwriter.writerow(['Plate',c.well.plate.plate.name]) csvwriter.writerow(['Well',c.well.well_name]) csvwriter.writerow([]) csvwriter.writerow(['Time','FAMAmplitude','FAMWidth','VICAmplitude','VICWidth','Cluster']) csvwriter.writerow([]) pts = peak_times(peaks) fas = fam_amplitudes(peaks) fws = fam_widths(peaks) vas = vic_amplitudes(peaks) vws = vic_widths(peaks) cls = well_observed_cluster_assignments(qlwell, peaks) for row in zip(pts, fas, fws, vas, vws, cls): csvwriter.writerow(row) csv = out.getvalue() out.close() return csv
def revb_extracluster_peaks(well, channel_num, threshold=None, pct_boundary=0.3, exclude_min_amplitude_peaks=True): """ Return the peaks that are outside the clusters. A superset of polydispersity peaks, meant primarily for dye wells, where there should be no biological basis for rain. Returns a 3-tuple: peaks, rain gates, width gates """ if not threshold: threshold = well.channels[channel_num].statistics.threshold if not threshold: threshold = None if exclude_min_amplitude_peaks: peaks = above_min_amplitude_peaks(well) else: peaks = well.peaks # get rain_pvalues p_plus, p, p_minus, pos, middle_high, middle_low, neg = \ rain_pvalues_thresholds(peaks, channel_num=channel_num, threshold=threshold, pct_boundary=pct_boundary) binned_peaks = bin_peaks_by_amplitude(peaks, well.sum_amplitude_bins) extra_peaks = np.ndarray([0], dtype=peak_dtype(2)) for bin, (min_gate, max_gate, boundary) in zip(binned_peaks, well.sum_amplitude_bins): if middle_high and middle_low: extra_peaks = np.hstack([extra_peaks, np.extract(np.logical_not( np.logical_or( reduce(np.logical_and, (channel_widths(bin, channel_num) > min_gate, channel_widths(bin, channel_num) < max_gate, channel_amplitudes(bin, channel_num) > middle_high, channel_amplitudes(bin, channel_num) < pos)), reduce(np.logical_and, (channel_widths(bin, channel_num) > min_gate, channel_widths(bin, channel_num) < max_gate, channel_amplitudes(bin, channel_num) > neg, channel_amplitudes(bin, channel_num) < middle_low)) ) ), bin)]) else: extra_peaks = np.hstack([extra_peaks, np.extract(np.logical_not( reduce(np.logical_and, (channel_widths(bin, channel_num) > min_gate, channel_widths(bin, channel_num) < max_gate, channel_amplitudes(bin, channel_num) > neg, channel_amplitudes(bin, channel_num) < pos) ) ), bin)]) return (extra_peaks, (pos, middle_high, middle_low, neg), (np.mean(fam_amplitudes(peaks)), np.mean(vic_amplitudes(peaks))))
def temporal2d(self, id=None, *args, **kwargs): from qtools.lib.nstats.peaks import accepted_peaks from pyqlb.nstats.peaks import peak_times, fam_amplitudes, vic_amplitudes qlwell = self.__qlwell_from_threshold_form(id) self.__set_threshold_context(qlwell) ok_peaks = accepted_peaks(qlwell) c.tvf = zip(peak_times(ok_peaks), vic_amplitudes(ok_peaks), fam_amplitudes(ok_peaks)) return render('/well/temporal2d.html')
def test_single_well_calibration_clusters(self): fam_hi, fam_lo, vic_hi, vic_lo = single_well_calibration_clusters( self.vic_well, DYES_FAM_VIC) assert len(fam_hi) == 5495 assert len(fam_lo) == 5201 assert len(vic_hi) == 5326 assert len(vic_lo) == 5257 fam_amp = np.mean(fam_amplitudes(fam_hi)) assert abs(20000 - fam_amp) < 10 fam_lo_amp = np.mean(fam_amplitudes(fam_lo)) assert abs(2369 - fam_lo_amp) < 10 vic_amp = np.mean(vic_amplitudes(vic_hi)) assert abs(10000 - vic_amp) < 15 vic_lo_amp = np.mean(vic_amplitudes(vic_lo)) assert abs(2025 - vic_lo_amp) < 10 fam_hi, fam_lo, hex_hi, hex_lo = single_well_calibration_clusters( self.hex_well, DYES_FAM_HEX) assert len(fam_hi) == 3623 assert len(fam_lo) == 3456 assert len(hex_hi) == 3719 assert len(hex_lo) == 3599 fam_amp = np.mean(fam_amplitudes(fam_hi)) assert abs(20000 - fam_amp) < 10 fam_lo_amp = np.mean(fam_amplitudes(fam_lo)) assert abs(2790 - fam_lo_amp) < 10 hex_amp = np.mean(vic_amplitudes(hex_hi)) assert abs(10000 - hex_amp) < 15 hex_lo_amp = np.mean(vic_amplitudes(hex_lo)) assert abs(2010 - hex_lo_amp) < 10
def bin_peaks_by_amplitude(peaks, amplitude_bins): """ Given a set of peaks and bins, bin the peaks into the bins by sum channel. """ amplitude_sums = fam_amplitudes(peaks) + vic_amplitudes(peaks) amplitude_boundaries = [bin[2] for bin in amplitude_bins] MAX_AMPLITUDE = 32768 amplitude_regions = zip(amplitude_boundaries[:-1],amplitude_boundaries[1:]) \ + [(amplitude_boundaries[-1], MAX_AMPLITUDE)] binned_peaks = [] for region in amplitude_regions: binned_peaks.append(np.extract( reduce(np.logical_and, (amplitude_sums >= region[0], amplitude_sums < region[1])), peaks)) return binned_peaks
def single_well_calibration_clusters(qlwell, dye_cal_props): """ Returns the clusters for the specified color calibration well. Returns them in ch0-HI, ch0-LO, ch1-HI, ch1-LO order. :param qlwell: The well to analyze :param dye_cal_props: A list of dye properties representing the calibration properties on the dyes for each channel. (Should be a 2-tuple.) """ ok_peaks = accepted_peaks(qlwell) if ( len( ok_peaks ) < 1 ): #pass back a 4-tuple of empty peaks return ( ok_peaks,ok_peaks,ok_peaks,ok_peaks) peaks = color_uncorrected_peaks(accepted_peaks(qlwell), qlwell.color_compensation_matrix) # FAM is y, VIC is x. polars = np.array([cmath.polar(complex(f, v)) for f, v in zip(vic_amplitudes(peaks), fam_amplitudes(peaks))]) blue_hi = np.extract(reduce(np.logical_and, (polars[...,1] >= THETA_THRESHOLD, polars[...,0] >= dye_cal_props[0].expected_magnitude_threshold)), ok_peaks) blue_lo = np.extract(reduce(np.logical_and, (polars[...,1] >= THETA_THRESHOLD, polars[...,0] < dye_cal_props[0].expected_magnitude_threshold)), ok_peaks) green_hi = np.extract(reduce(np.logical_and, (polars[...,1] < THETA_THRESHOLD, polars[...,0] >= dye_cal_props[1].expected_magnitude_threshold)), ok_peaks) green_lo = np.extract(reduce(np.logical_and, (polars[...,1] < THETA_THRESHOLD, polars[...,0] < dye_cal_props[1].expected_magnitude_threshold)), ok_peaks) return blue_hi, blue_lo, green_hi, green_lo
def peak_csv(self, id=None, show_only_gated=True, *args, **kwargs): from qtools.lib.nstats.peaks import accepted_peaks qlwell = self.__qlwell_from_threshold_form(id) if show_only_gated != 'False': peaks = accepted_peaks(qlwell) else: peaks = qlwell.peaks from pyqlb.nstats.peaks import fam_amplitudes, fam_widths, fam_quality, vic_amplitudes, vic_widths, vic_quality, peak_times response.headers['Content-Type'] = 'text/csv' h.set_download_response_header(request, response, "%s_%s%s.csv" % \ (str(c.well.plate.plate.name), str(c.well.well_name), '' if show_only_gated != 'False' else '_all')) out = StringIO.StringIO() csvwriter = csv_pkg.writer(out) csvwriter.writerow(['Plate',c.well.plate.plate.name]) csvwriter.writerow(['Well',c.well.well_name]) csvwriter.writerow([]) csvwriter.writerow(['FAMThreshold',qlwell.channels[0].statistics.threshold]) csvwriter.writerow(['VICThreshold',qlwell.channels[1].statistics.threshold]) csvwriter.writerow(['WidthGate',qlwell.channels[0].statistics.min_width_gate,qlwell.channels[0].statistics.max_width_gate]) csvwriter.writerow(['MinQualityGate',qlwell.channels[0].statistics.min_quality_gate]) csvwriter.writerow([]) csvwriter.writerow(['Time','FAMAmplitude','FAMWidth','FAMQuality','VICAmplitude','VICWidth','VICQuality']) csvwriter.writerow([]) pts = peak_times(peaks) fas = fam_amplitudes(peaks) fws = fam_widths(peaks) fqs = fam_quality(peaks) vas = vic_amplitudes(peaks) vws = vic_widths(peaks) vqs = vic_quality(peaks) for row in zip(pts, fas, fws, fqs, vas, vws, vqs): csvwriter.writerow(row) csv = out.getvalue() out.close() return csv
def revb_polydisperse_peaks(well, channel_num, threshold=None, pct_boundary=0.3, exclude_min_amplitude_peaks=True): """ Computes polydispersity for a well which has amplitude bins defined. Returns a 3-tuple (4-tuple, 4-tuple, 2-tuple). The first 4-tuple is: * positive droplets, with widths above the width gate set for that droplet's amplitude bin. * middle rain, with widths above the bin width gate. * middle rain, with width below the bin width gate. * negative rain, with width below the bin width gate. The second 4-tuple is: * positive rain boundary * middle rain upper boundary (can be None) * middle rain lower boundary (can be None) * negative rain boundary The third 2-tuple is: * mean FAM amplitude * mean VIC amplitude This is for being able to draw approximate single-channel polydispersity graphs down the line (this does beg the question, is there a better 2D definition of polydispersity?) Will raise an error if amplitude bins are not defined on the well. """ if not hasattr(well, 'sum_amplitude_bins') or len(well.sum_amplitude_bins) == 0: raise ValueError("No amplitude bins for this well.") if not threshold: threshold = well.channels[channel_num].statistics.threshold if not threshold: threshold = None if exclude_min_amplitude_peaks: peaks = above_min_amplitude_peaks(well) else: peaks = well.peaks p_plus, p, p_minus, pos, middle_high, middle_low, neg = \ rain_pvalues_thresholds(peaks, channel_num=channel_num, threshold=threshold, pct_boundary=pct_boundary) binned_peaks = bin_peaks_by_amplitude(peaks, well.sum_amplitude_bins) pos_peaks = np.ndarray([0], dtype=peak_dtype(2)) midhigh_peaks = np.ndarray([0], dtype=peak_dtype(2)) midlow_peaks = np.ndarray([0], dtype=peak_dtype(2)) neg_peaks = np.ndarray([0], dtype=peak_dtype(2)) for bin, (min_gate, max_gate, boundary) in zip(binned_peaks, well.sum_amplitude_bins): pos_peaks = np.hstack([pos_peaks, np.extract( reduce(np.logical_and, (channel_widths(bin, channel_num) > max_gate, channel_amplitudes(bin, channel_num) > pos)), bin)]) if middle_high and middle_low: midhigh_peaks = np.hstack([midhigh_peaks, np.extract( reduce(np.logical_and, (channel_widths(bin, channel_num) > max_gate, reduce(np.logical_and, (channel_amplitudes(bin, channel_num) < middle_high, channel_amplitudes(bin, channel_num) > middle_low)))), bin)]) midlow_peaks = np.hstack([midlow_peaks, np.extract( reduce(np.logical_and, (channel_widths(bin, channel_num) < min_gate, reduce(np.logical_and, (channel_amplitudes(bin, channel_num) < middle_high, channel_amplitudes(bin, channel_num) > middle_low)))), bin)]) neg_peaks = np.hstack([neg_peaks, np.extract( reduce(np.logical_and, (channel_widths(bin, channel_num) < min_gate, channel_amplitudes(bin, channel_num) < neg)), bin)]) return ((pos_peaks, midhigh_peaks, midlow_peaks, neg_peaks), (pos, middle_high, middle_low, neg), (np.mean(fam_amplitudes(peaks)), np.mean(vic_amplitudes(peaks))))