def get_g2_dna_loc(log_dna, x_dna, log_edu, edu_cutoff, dna_g1_loc, phase_candidates, nsmooth, ax): """Finds S phase peak location based on DNA content Parameters ---------- log_dna : 1d array log DNA content across all cells in a well x_dna : 1d array uniformly spaced grid based expected DNA content edu_cutoff : numpy float dna_g1_loc : float position of G1 peak in log DNA space phase_candidates : ndarray 3-by-n array comprising n candidates for G1, S, and G2 peaks nsmooth : int smoothing parameter ax : subplot object provides subplot with position reference for summary master plot Returns ------- dna_g2_loc : numpy float position of G2 phase peak in log DNA space """ high_dna_bool = ((log_dna > dna_g1_loc + 0.4 * np.log10(2)) & (log_edu < edu_cutoff)) f_dna = get_kde(log_dna[high_dna_bool], x_dna) peak_amp, peak_loc, _ = findpeaks(smooth.smooth(f_dna, nsmooth).tolist()) peak_loc = peak_loc[peak_amp > np.max(peak_loc/10)] g2_loc_candidates = x_dna[peak_loc] g2_loc_candidates = g2_loc_candidates[g2_loc_candidates > (dna_g1_loc + 0.5*np.log10(2))] if len(g2_loc_candidates) > 1: if np.any(phase_candidates[2, 0]): g2_loc = g2_loc_candidates[np.argmin( np.abs(g2_loc_candidates - phase_candidates[2, 0]))] else: bc = ((np.any(phase_candidates[1, 0])) & (np.any(g2_loc_candidates > phase_candidates[1, 0]))) if bc: g2_loc = g2_loc_candidates[ g2_loc_candidates > phase_candidates[1, 0]] g2_loc = g2_loc[np.argmin(np.abs( g2_loc - dna_g1_loc - np.log10(2)))] elif len(g2_loc_candidates) == 1: g2_loc = g2_loc_candidates else: g2_loc = dna_g1_loc + np.log10(2) if ax is not None: ax.plot(x_dna, f_dna, ':') return g2_loc
def get_ldrgates(ldrtxt, x_ldr=None): """Gating based on ldr intensities Parameters ---------- ldrtxt : 1d array ldr txt feature across all cells in a well x_ldr : 1d array uniformly distributed 1d grid based on expected range of ldr txt Returns ------- ldr_gates : list of floats gating based on minima of kernel density estimate of ldr txt """ if x_ldr is None: mx = np.max(ldrtxt.tolist()) + 0.01 x_ldr = np.arange(-0.01, mx, 0.0002) f_ldr = get_kde(ldrtxt, x_ldr) # ldrtxt should be an array peak_amp, peak_loc, peak_width = findpeaks(f_ldr.tolist(), npeaks=1) # Find location of minimun on the right f_neg = [-x for x in f_ldr[peak_loc[0]:]] _, trough_loc, _ = findpeaks(f_neg, npeaks=1) # If peakfinder cannot find peak minima, use ldrwidth_5x as default if np.any(trough_loc): trough_loc = trough_loc[0] + peak_loc[0] - 1 else: trough_loc = peak_loc + (5 * peak_width[0]) # choose LDR cutoff based on half-proximal width and right trough of peak ldrwidth_5x = peak_loc + (5 * peak_width[0]) ldrwidth_2p5 = peak_loc + (2.5 * peak_width[0]) cutoff_index_1 = len(x_ldr) - 2 cutoff_index_2 = np.max( [3, np.min([trough_loc, ldrwidth_5x]), ldrwidth_2p5]) ldr_cutoff = x_ldr[np.min([cutoff_index_1, int(cutoff_index_2)])] ldr_gates = [-np.inf, ldr_cutoff] return np.array(ldr_gates)
def get_low_edu_peaks(log_edu, px_edu, edu_shift, edu_g1_max, log_dna, dna_g1_loc, nsmooth=5): """ Returns peak for EdU intensities below edu_g1_max Parameters ---------- log_edu: 1d array log EdU intensities across all cells in a well px_edu: 1d array uniformly spaced grid based expected EdU range edu_shift: float Expected difference between G1/G2 and S phases in logEdU space edu_g1_max: float G1 phase gating (realtive to S) based on EdU intensity log_dna: 1d array log DNA content EdU intensities across all cells in a well dna_g1_loc: float G1 position based on log DNA nsmooth: int smoothing parameter Returns ------ low_edu_peaks: 1d array peaks found within the range of EdU intensities below edu_g1_max """ # f_edu = get_kde(log_edu, px_edu) log_edu_low_bool = ((log_dna > dna_g1_loc - 1) & (log_dna < dna_g1_loc + 0.1) & (log_edu > 2 * nsmooth * (px_edu[1] - px_edu[0])) & (log_edu < edu_g1_max)) if not np.any(log_edu_low_bool): log_edu_low_bool = ((log_dna > dna_g1_loc - 1) & (log_dna < dna_g1_loc + 0.1) & (log_edu < edu_g1_max)) f_edu_low = get_kde(log_edu[log_edu_low_bool], px_edu) bin_counts, _ = histc(log_edu[log_edu_low_bool], px_edu) # Check discrepency in array length when using 3 f_edu_low[smooth.smooth(bin_counts, 2.99, 'flat') <= 1 / 3] = 0 edu_amp, edu_loc, _ = findpeaks(smooth.smooth(f_edu_low, nsmooth).tolist(), npeaks=2) if np.any(edu_loc): edu_loc = edu_loc[edu_amp > 0.3 * np.max(edu_amp)] low_edu_peaks = px_edu[edu_loc[np.argmin(edu_loc)]] else: low_edu_peaks = np.median(log_edu[log_edu_low_bool]) return low_edu_peaks
def get_brdugates(brdu, x_brdu=None, plotting=False): """ Gating on minima of kernel density estimate of BrdU distribution Parameters ---------- brdu: 1d array brdu intensities across all cells in a well x_brdu: 1d array 1d grid of uniform size based on expected range of BrdU plotting: boolean if True, functions returns summary plot of gating Returns ------- brdu_cutoff: numpy float BrdU value corresponding to gate on minima of kde """ if x_brdu is None: mx = np.max(brdu.tolist()) + 0.01 x_brdu = np.arange(-0.01, mx, 1) f_brdu = findpeaks.get_kde(brdu, x_brdu) # brdu should be an array peak_amp, peak_loc, peak_width = findpeaks.findpeaks(f_brdu.tolist(), npeaks=1) # choose BRDU cutoff based on half-proximal width and # right trough of peak width_2p5 = int((peak_loc + 2.5 * peak_width[0])[0]) width_5 = int((peak_loc + 5 * peak_width[0])[0]) # Find location of minimun on the right f_neg = [-x for x in f_brdu[width_2p5:width_5]] _, trough_loc, _ = findpeaks.findpeaks(f_neg, npeaks=1) if np.any(trough_loc): trough_loc = trough_loc[0] + peak_loc[0] - 1 else: trough_loc = width_2p5 brdu_cutoff = x_brdu[trough_loc] if plotting: plt.plot(x_brdu, f_brdu) plt.plot([brdu_cutoff, brdu_cutoff], [0, 0.5 * peak_amp]) return brdu_cutoff
def get_s_phase_dna_loc(log_dna, x_dna, dna_g1_loc, log_edu, edu_cutoff, nsmooth=5, ax=None): """ Finds S phase peak location based on DNA content Parameters ---------- log_dna: 1d array log DNA content across all cells in a well x_dna: 1d array uniformly spaced grid based expected DNA content dna_g1_loc: float position of G1 peak in log DNA space log_edu: 1d array log EdU intensities across all cells in a well edu_cutoff: int nsmooth: int smoothing parameter ax: subplot object provides subplot with position reference for summary master plot Returns ------- dna_s_loc: numpy float position of S phase peak in log DNA space """ high_dna_bool = ((log_dna > dna_g1_loc - np.log10(2) * 0.5) & (log_dna < dna_g1_loc + np.log10(2) * 1.5) & (log_edu > edu_cutoff)) if np.any(high_dna_bool): ldh = log_dna[high_dna_bool] if len(ldh) > 10: f_dna = get_kde(log_dna[high_dna_bool], x_dna, bandwidth=0.0317) dna_amp, dna_loc, _ = findpeaks(smooth.smooth( f_dna, nsmooth, 'flat').tolist(), npeaks=1) dna_s_loc = x_dna[dna_loc[0]] if ax is not None: ax.plot(x_dna, f_dna, '-.') else: dna_s_loc = dna_g1_loc + 0.5 * np.log10(2) else: dna_s_loc = dna_g1_loc + 0.5 * np.log10(2) return dna_s_loc
def get_g1_dna_peak(log_dna, x_dna, log_edu, edu_shift, edu_s_min, edu_g1_max, phase_candidates, ax=None): """ Get position of G1 peak in log DNA space Parameters ---------- Returns ------- dna_g1_loc: numpy float position of G1 peak in log DNA space """ f_dna = get_kde(log_dna, x_dna) log_dna_low_edu = log_dna[(log_edu < edu_s_min + 0.2 * edu_shift) & (log_edu < edu_g1_max)] f_dna_low_edu = get_kde(log_dna_low_edu, x_dna) peak_amp, peak_loc, _ = findpeaks(f_dna_low_edu.tolist()) peak_loc = peak_loc[peak_amp > np.max(peak_amp / 10)] dna_g1_loc = x_dna[peak_loc[:3]] if len(dna_g1_loc) > 1: if phase_candidates[0, 0]: dna_g1_loc = dna_g1_loc[np.argmin( np.abs(dna_g1_loc - phase_candidates[0, 0]))] elif phase_candidates[1, 0]: dna_g1_loc = np.max( dna_g1_loc[dna_g1_loc > phase_candidates[1, 0]]) else: dna_g1_loc = np.min(dna_g1_loc) if not np.any(dna_g1_loc): dna_g1_loc = np.nanmin(phase_candidates[:, 0] - np.log10(1.2)) if ax is not None: ax.plot(x_dna, f_dna) ax.plot(dna_g1_loc, .1, 'xk') ax.set_xlabel('log (DNA content)') ax.set_ylabel('kernel density estimate') return dna_g1_loc
def get_g2_location(log_dna, x_dna, ldrtxt, ldr_gates, g1_loc): """Computes location of G2 based on DNA content Parameters ---------- log_dna : 1d array log DNA content of cells in a given well x_dna : 1d array Expected distribution of DNA content (used as x-axis grid) ldrtxt : 1d array ldr txt feature across all cells in a well ldr_gates : list of floats g1_loc : numpy float G1 location on log DNA scale Returns ------- g2_loc : numpy float G2 location on log DNA scale """ # Get G2 peak and location # Only consider subset of cells witt LDR internsity within ldr_gates and # DNA content > (g1_loc + 0.4 * log10(2)) if x_dna is None: x_dna = np.arange(2.5, 8, 0.02) log_dna_g2_range = log_dna[(log_dna > (g1_loc + 0.4 * np.log10(2))) & (ldr_gates[1] >= ldrtxt) & (ldrtxt >= ldr_gates[0])] f_dna_g2_range = get_kde(log_dna_g2_range, x_dna) f_smooth = smooth.smooth(f_dna_g2_range, 5, 'flat') peak_amp, peak_loc, _ = findpeaks(f_smooth.tolist()) peak_loc = peak_loc[peak_amp > np.max(peak_amp / 10)] xdna_loc = x_dna[peak_loc] xdna_loc = xdna_loc[xdna_loc > (g1_loc + 0.5 * np.log10(2))] if len(xdna_loc) > 1: g2_loc = xdna_loc[np.argmin(np.abs( (xdna_loc - (g1_loc + np.log10(2)))))] elif len(xdna_loc) == 1: g2_loc = xdna_loc[0] else: g2_loc = g1_loc + np.log10(2) return g2_loc
def get_g1_location(log_dna, x_dna, ldrtxt, ldr_gates): """Computes ocation of G1 based on DNA content Parameters ---------- log_dna : 1d array log DNA content of cells in a given well x_dna : 1d array Expected distribution of DNA content (used as x-axis grid) ldrtxt : 1d array ldr txt feature across all cells in a well ldr_gates : list of floats Returns ------- g1_loc : float G1 location on log DNA axis """ if x_dna is None: x_dna = np.arange(2.5, 8, 0.02) # Only consider susbet of cells with LDR within ldr_gates log_dna_low_ldr = log_dna[(ldr_gates[1] >= ldrtxt) & (ldrtxt >= ldr_gates[0])] f_dna_low_ldr = get_kde(log_dna_low_ldr, x_dna) dna_peaks_amp, dna_peaks_loc, _ = findpeaks(f_dna_low_ldr.tolist()) # Remove lesser peaks dna_peaks_loc = dna_peaks_loc[dna_peaks_amp > np.max(dna_peaks_amp / 10)] dna_peaks_amp = dna_peaks_amp[dna_peaks_amp > np.max(dna_peaks_amp / 10)] xdna_loc = x_dna[dna_peaks_loc[:4]] # take the 4 highest peaks # compute dna density surrounding peaks dna_density = [ np.mean( np.array(log_dna > (x - 0.2 * np.log10(2))) & np.array(log_dna < (x + 1.2 * np.log10(2)))) for x in xdna_loc ] + dna_peaks_amp # Find G1 peak if len(xdna_loc) == 2: g1_loc = np.min(xdna_loc) else: g1_loc = xdna_loc[np.argmax(dna_density)] return g1_loc
def get_ph3_gates(ph3, cell_identity, x_ph3=None, ph3_cutoff=None): """ gating based on pH3 intensities Parameter --------- ph3: 1d array ph3 intensities across all cells in a well cell_identitity: 1d array membership of each cell in cell cycle phase (1=G1, 2=S, 3=G2) x_ph3: 1d array uniformly distributed 1d grid based on expected range of pH3 intensities ph3_cutoff: numpy float (optional) USER defined pH3 gating Returns ------- f_ph3: 1d array kernel density estimate of pH3 distribution ph3_cutoff: numpy float pH3 gating on kernel density minima ph3_lims: list of floats bounds on pH3 intensities used as x_lim for plots """ if x_ph3 is None: x_ph3 = np.arange(2.5, 8, 0.02) log_ph3 = compute_log_ph3(ph3, x_ph3) if np.any((cell_identity == 1) | (cell_identity == 3)): log_ph3_g12 = log_ph3[(cell_identity == 1) | (cell_identity == 3)] if len(log_ph3_g12) >= 10: try: f_ph3 = get_kde(log_ph3_g12, x_ph3, 4 * (x_ph3[1] - x_ph3[0])) except np.linalg.LinAlgError as e: print(str(e)) if 'singular matrix' in str(e): f_ph3 = get_kde(log_ph3, x_ph3, 4 * (x_ph3[1] - x_ph3[0])) else: f_ph3 = get_kde(log_ph3, x_ph3, 4 * (x_ph3[1] - x_ph3[0])) else: f_ph3 = get_kde(log_ph3, x_ph3, 4 * (x_ph3[1] - x_ph3[0])) # if not ph3_cutoff or np.mean(log_ph3 > ph3_cutoff): _, peak_loc, peak_width = findpeaks(f_ph3.tolist(), npeaks=3) # Enforce that no more than 30% of cells are in M-phase min_idx = np.nonzero(np.cumsum(f_ph3)/np.sum(f_ph3) > 0.3)[0][0] - 5 if np.any(peak_loc > min_idx): peak_width = peak_width[np.nonzero(peak_width >= min_idx)[0][0]] peak_loc = np.max( (peak_loc[np.nonzero(peak_loc >= min_idx)[0][0]], np.nonzero(np.cumsum(f_ph3)/np.sum(f_ph3) > .3)[0][0]) ) else: peak_loc = min_idx peak_width = np.max(peak_width) # find miniminum # -------------- f_ph3_neg = [-x for x in f_ph3[peak_loc:]] _, peak_loc_min, _ = findpeaks(f_ph3_neg, npeaks=1) if not np.any(peak_loc_min): # Check peak_loc_min = np.array([0]) # Check peak_loc_min += peak_loc - 1 ph3_cutoff = x_ph3[math.ceil(np.max(( np.min((peak_loc_min[0], peak_loc + 9 * peak_width)), peak_loc + 2 * peak_width)) ) ] if not np.any(ph3_cutoff): ph3_cutoff = x_ph3[np.nonzero(smooth.smooth(f_ph3, 5, 'flat') > 0.1/len(ph3))[0][0] + 1] ph3_lims = (quantile(log_ph3, [5e-3, 0.995]) + [(3 * (x_ph3[1] - x_ph3[0])) * q for q in [-1, 10]]) if np.max(ph3_lims) < ph3_cutoff: ph3_lims[1] = ph3_cutoff + 0.02 return f_ph3, ph3_cutoff, ph3_lims
def evaluate_cell_cycle_phase(log_dna, dna_gates, x_dna, log_edu, edu_gates, px_edu, dna_peaks=None, edu_peaks=None, nsmooth=5, ax=None): """Evaluates cell cycle phase of each cell based on gatings Parameters ---------- log_dna : 1d array log DNA content across all cells in a well dna_gates : list of floats inner and outer gates defined by DNA content x_dna : 1d array uniformly spaced grid based on expected DNA content dna_peaks : list of floats G1, S and G2 peak locations log_edu : 1d array log EdU intensities across all cells in a well edu_gates : list of floats location of gates seperating S and G1/G2 based on EdU intensities px_edu : 1d array uniformly spaced grid based on expected EdU intensities edu_peaks : list of floats location of high and low edu peaks nsmooth : int smoothing parameter ax : subplot object provides positional reference for subplot in master summary plot Returns ------- fractions : dict dictionary where keys are cell cycle phases and values are fractions of cells in each phase cell_id : 1d array membership of each cell in cell cycle phase (1=G1, 2=S, 3=G2) """ cell_id = (1 * ((log_dna > dna_gates[0]) & # G1 (log_dna < dna_gates[1]) & (log_edu < edu_gates[0])) + 2 * ((log_dna >= dna_gates[0]) & # S (log_dna < dna_gates[3]) & (log_edu >= edu_gates[0])) + #(log_edu < edu_gates[1])) + 2.1 * ((log_dna >= dna_gates[1]) # S dropout & (log_dna < dna_gates[2]) & (log_edu < edu_gates[0])) + 3 * ((log_dna >= dna_gates[2]) & # G2 (log_dna < dna_gates[3]) & (log_edu < edu_gates[0])) + 0.5 * (log_dna < dna_gates[0]) + 3.1 * (log_dna > dna_gates[3])) fractions = {} for state, val in zip(['subG1', 'G1', 'S', 'S_dropout', 'G2', 'beyondG2'], [0.5, 1, 2, 2.1, 3, 3.1]): fractions[state] = np.mean(cell_id == (val % 4)) if dna_peaks is not None: for ig in np.arange(1, 4): if sum(cell_id == ig) > 10: f_dna = get_kde(log_dna[cell_id == ig], x_dna) _, dna_loc, _ = findpeaks( smooth.smooth(f_dna, 3 * nsmooth).tolist(), npeaks=1) dna_peaks[ig-1] = x_dna[dna_loc] f_edu = get_kde(log_edu[cell_id == ig], px_edu) _, edu_loc, _ = findpeaks( smooth.smooth(f_edu, 3 * nsmooth).tolist(), npeaks=1) edu_peaks[ig-1] = px_edu[edu_loc] else: dna_peaks[ig-1] = np.mean(dna_gates[ig-1:2]) edu_peaks[ig-1] = np.mean((edu_gates[0], (ig == 2)*edu_gates[1])) edu_peaks[1] = np.max((edu_peaks[1], edu_gates[0] + 0.1)) peaks = [dna_peaks, edu_peaks] else: peaks = None if ax is not None: ax.pie(fractions.values(), labels=fractions.keys(), autopct='%1.1f%%') ax.axis('equal') return fractions, cell_id, peaks
def get_dna_cutoff(log_dna, x_dna, log_edu, edu_cutoff, dna_g1_loc, dna_s_loc, phase_candidates, nsmooth, ax): """Get DNA cutoff and return G2 peak location based on DNA cutoff Parameters ---------- log_dna : 1d array log DNA content across all cells in a well x_dna : 1d array uniformly spaced grid based expected DNA content log_edu : 1d array log EdU intensities across all cells in a well edu_cutoff : numpy float dna_g1_loc : float position of G1 peak in log DNA space dna_s_loc : float position of S peak in log DNA space phase_candidates : ndarray 3-by-n array comprising n candidates for G1, S and G2 peaks nsmooth : int smoothing parameter ax : subplot object provides subplot with position reference for summary master plot Returns ------- dna_cutoff : numpy float dna_g2_loc : numpy float position of G2 phase peak in log DNA space """ high_dna_bool = ((log_dna > dna_g1_loc + 0.4 * np.log10(2)) & (log_edu < edu_cutoff)) if np.any(high_dna_bool): dna_g2_loc = get_g2_dna_loc(log_dna, x_dna, log_edu, edu_cutoff, dna_g1_loc, # dna_fig, phase_candidates, nsmooth, ax=ax) f_dna = get_kde(log_dna[log_edu < edu_cutoff], x_dna) smooth_f_dna = smooth.smooth(f_dna, nsmooth, 'flat') if len(smooth_f_dna) > len(f_dna): smooth_f_dna = smooth.smooth(f_dna, 0.5 * nsmooth, 'flat') _, peak_loc, _ = findpeaks([-x for x in smooth_f_dna]) peak_loc = np.array([p for p in peak_loc if p < len(x_dna)]) if np.any(peak_loc): dna_cutoff = x_dna[peak_loc[((x_dna[peak_loc] > dna_g1_loc) & (x_dna[peak_loc] < dna_g2_loc)) ]] else: dna_cutoff = np.min((np.max((dna_s_loc, dna_g1_loc + 0.02)), dna_g2_loc - 0.02)) if not np.any(dna_cutoff): dna_cutoff = np.min((np.max((dna_s_loc, dna_g1_loc + 0.02)), dna_g2_loc - 0.02)) elif isinstance(dna_cutoff, (list, np.ndarray)): dna_cutoff = dna_cutoff[0] else: dna_cutoff = dna_cutoff else: dna_cutoff = dna_g1_loc + 0.3 * np.log10(2) dna_g2_loc = dna_g1_loc + np.log10(2) if ax is not None: ax.plot([dna_g1_loc, dna_s_loc, dna_g2_loc], [0, 0, 0], 'xk') ax.plot(dna_cutoff, 0, 'xk') return dna_cutoff, dna_g2_loc
def get_high_edu_peaks(log_edu, px_edu, edu_shift, low_edu_peaks, log_dna, dna_g1_loc, nsmooth=5): """Returns peak for EdU intensities above edu_shift values Parameters ---------- log_edu : 1d array log EdU intensities across all cells in a well px_edu : 1d array uniformly spaced grid based expected EdU range edu_shift : float Expected difference between G1/G2 and S phases in logEdU space low_edu_peaks : 1d array peaks found within the range of EdU intensities below edu_g1_ma log_dna : 1d array log DNA content across all cells in a well dna_g1_loc : float position of G1 peak in log DNA space nsmooth : int smoothing parameter Returns ------- edu_peaks : list of floats [low_edu_peaks, high_edu_peaks] where - low_edu_peaks correspond to peak found within the range of EdU intensities below edu_g1_max high_edu_peaks correspond to peak found within the range of EdU intensities above edu_shift values edu_cutoff : float edu_lims : list of floats EdU limits to define range of EdU edu_gate : float gating between G1/G2 and S phase based on EdU content """ high_edu_bool = ((log_dna > dna_g1_loc - np.log10(2)/2) & (log_dna < dna_g1_loc + np.log10(2) * 1.5) & (log_edu > low_edu_peaks + edu_shift * 0.8)) if (np.any(high_edu_bool) | sum(high_edu_bool) >= 10): f_edu_high = get_kde(log_edu[high_edu_bool], px_edu) edu_amp, edu_loc, _ = findpeaks(smooth.smooth( f_edu_high, nsmooth, 'flat').tolist()) # Remove lesser peaks high_edu_loc = edu_loc[edu_amp > np.max(edu_amp/10)] high_edu_bool = px_edu[high_edu_loc] > low_edu_peaks + edu_shift if np.any(high_edu_bool): high_edu_peaks = px_edu[high_edu_loc[ np.nonzero(high_edu_bool)[0][0]]] else: high_edu_peaks = low_edu_peaks + edu_shift f_edu = get_kde(log_edu, px_edu) neg_f_edu = np.array([-x for x in f_edu]) _, edu_loc, _ = findpeaks(neg_f_edu.tolist(), thresh=0.01) try: edu_cutoff = px_edu[edu_loc[ ((np.nonzero(((px_edu[edu_loc] > low_edu_peaks) & (px_edu[edu_loc] < high_edu_peaks))))[0][0]) ]] except IndexError: high_edu_peaks = low_edu_peaks + edu_shift edu_cutoff = np.mean((low_edu_peaks, high_edu_peaks)) #smf = smooth.smooth(f_edu.T, nsmooth, 'flat') #if len(smf) > len(f_edu): # smf = smooth.smooth(f_edu.T, 0.5 * nsmooth, 'flat') #edu_cutoff = px_edu[np.argmin(smf.T + # ((px_edu < low_edu_peaks) | # (px_edu > high_edu_peaks)) # )] edu_lims = [px_edu[2], np.min((2 * high_edu_peaks - edu_cutoff, px_edu[-2]))] # dna_s_loc = get_s_phase_dna_peaks(log_dna, x_dna, dna_g1_loc, # log_edu, edu_cutoff) else: high_edu_peaks = low_edu_peaks + edu_shift edu_cutoff = np.mean((low_edu_peaks, high_edu_peaks)) edu_lims = [-0.02, np.min((2*high_edu_peaks - edu_cutoff + 0.1, px_edu[-2]))] # dna_s_loc = dna_g1_loc + 0.5 * np.log10(2) edu_gates = [edu_cutoff, high_edu_peaks + np.max((high_edu_peaks-edu_cutoff, 1))] edu_gates = np.array(edu_gates) edu_lims[1] = np.max((edu_lims[1], edu_gates[1]+0.1)) edu_lims = np.array(edu_lims) edu_peaks = [low_edu_peaks, high_edu_peaks] return edu_peaks, edu_cutoff, edu_lims, edu_gates
def get_edu_gates(edu, px_edu=None, ax=None): """Returns estimate of max EdU for G1 gating and min EdU for S phase gating Parameters ---------- edu : 1D array edu intensities across all cells in a given well px_edu : 1D array uniformly spaced grid based expected EdU range plotting: boolean plots summary of edu gating if set to True ax : subplot object passes subplot object specifying location on grid Returns ------- edu_shift : float difference between G1/2 and S phases offset_edu: float edu_g1_max : float G1 gating based on EdU intensity edu_s_min : float S phase gating based on EdU intensity """ if px_edu is None: px_edu = np.arange(-0.2, 5.3, .02) # x_edu = np.arange(100, 4e2, 1) x_edu = np.arange(-200, 4e3+1, 1) # Note: Bandwidth = 90 reproduced MATLAB output f_edu = get_kde(edu, x_edu, bandwidth=90) peak_amp, peak_loc, peak_width = findpeaks(f_edu.tolist(), npeaks=2) peak_amp = peak_amp[peak_amp > np.max(f_edu)/10] peak_loc = peak_loc[peak_amp > np.max(f_edu)/10] peak_width = peak_width[peak_amp > np.max(f_edu)/10] if peak_loc.size == 0: x_edu = np.arange(-200, 2e4+1, 1) f_edu = get_kde(edu, x_edu, bandwidth=90) peak_amp, peak_loc, peak_width = findpeaks(f_edu.tolist(), npeaks=2) peak_amp = peak_amp[peak_amp > np.max(f_edu)/10] peak_loc = peak_loc[peak_amp > np.max(f_edu)/10] peak_width = peak_width[peak_amp > np.max(f_edu)/10] peak_width = peak_width[np.argmin(peak_loc)] peak_loc = x_edu[math.ceil(np.min(peak_loc))] # Find location of minimum on right if np.any(edu > (peak_loc + 30)): edu_higher = edu[edu > peak_loc + 30] f2_edu = get_kde(edu_higher, x_edu, bandwidth=510) f2_edu_neg = [-x for x in f2_edu] _, peak_trough, _ = findpeaks(f2_edu_neg, npeaks=2) try: peak_trough = x_edu[math.ceil( peak_trough[np.argmin( np.abs([x - 500 for x in peak_trough]) )] )] except ValueError: peak_trough = 0 peak_trough = np.max([peak_trough, peak_loc+3*peak_width]) else: peak_trough = peak_loc + 3 * peak_width # Edu offset # ** Not entirely clear to me yet offset_edu = np.max((peak_loc-1.5 * peak_width, 1)) log_edu = compute_log_edu(edu, px_edu, offset_edu) # EdU max for G1 gating edu_g1_max = np.max(( np.log10(peak_trough - offset_edu), # Expected EdU max for G1 (optn 1) quantile(log_edu, 0.2) + 0.1 # Expected EdU max for G1 (optn 2) )) # Edu min for S phase gating edu_s_min = np.max(( np.log10(peak_loc + 2 * peak_width - offset_edu), # Expected EdU min for S phase (optn 1) edu_g1_max - 0.1 # Expected Edu min for S phase (option 2) )) # Expected differene between G1/G2 and S edu_shift = np.max(( (np.log10(peak_loc + 2 * peak_width - offset_edu) - # Expected EdU min for S phase (optn 1) np.log10(np.max((peak_loc - offset_edu, 1)))), # G1 peak loc (offset) 1 )) # Plotting # -------- if ax is not None: idx = np.random.permutation(len(edu)) idx = idx[:np.min((len(edu), 1000))] edu = np.array(edu) ax.plot(edu[idx], log_edu[idx], '.c') px_edu = np.arange(-0.2, 5.3, .02) ax.plot(x_edu, np.max(px_edu) * (f_edu/np.max(f_edu)), 'k-') ax.plot(x_edu, np.max(px_edu) * (f2_edu/np.max(f2_edu)), 'k--') ax.plot([-200, 300, np.nan, -100, 500], [edu_s_min, edu_s_min, np.nan, edu_shift+np.log10(np.max((peak_loc-offset_edu, 1))), edu_shift+np.log10(np.max((peak_loc-offset_edu, 1)))], '-r') ax.plot([offset_edu, offset_edu], [0, 5], ':r') ax.plot([100, peak_trough, peak_trough], [edu_g1_max, edu_g1_max, 0], '--r') ax.set_ylim((px_edu[0], px_edu[-1])) ax.set_xlim([-200, np.max((peak_loc + 5 * peak_width, 500))]) ax.set_xlabel('EdU intensity') ax.set_ylabel('log10 (EdU') return edu_shift, offset_edu, edu_g1_max, edu_s_min