def fit(self, X, axis=None): ''' accumulated fit of scaler :param X: :param axis: :return: ''' self.axis = axis if axis != None else tuple(range(len(X.shape) - 1)) if isinstance(self.axis, int): self.axis = (self.axis, ) aX = AutoScaler.void_scale( X, self.offset, self.threshold) if self.do_void_scale == True else X self.sum0 = (~np.isnan(aX)).sum(axis=self.axis) + self.sum0 self.sum1 = (bn.nansum(aX, axis=self.axis[0]) if len(self.axis) == 1 and aX.dtype == np.float64 else np.nansum(aX, axis=self.axis)) + self.sum1 aX = aX * aX self.sum2 = (bn.nansum(aX.astype(np.float64), axis=self.axis[0]) if len(self.axis) == 1 and aX.dtype == np.float64 else np.nansum(aX, axis=self.axis)) + self.sum2 self.mean = self.sum1 / self.sum0 self.std = np.sqrt(self.sum2 / self.sum0 - self.mean * self.mean) self.fitted = True return self
def __count_nuc_per_position(self, mat): nuc_counts = {} nuc_counts["A"] = bn.nansum((mat == 1).astype(int), axis=0) nuc_counts["C"] = bn.nansum((mat == 2).astype(int), axis=0) nuc_counts["G"] = bn.nansum((mat == 3).astype(int), axis=0) nuc_counts["T"] = bn.nansum((mat == 4).astype(int), axis=0) return nuc_counts
def test_memory_leak(): import resource arr = np.arange(1).reshape((1, 1)) starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for i in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() print(diff_bytes) # For 1.3.0 release, this had value of ~100kB assert diff_bytes == 0
def srebin(oldWave, newWave, oldFlux, kind='linear'): nPix = len(newWave) #Number of pixels in new binned spectrum newFlux = np.zeros(len(newWave)) #Set up array to store rebinned fluxes interpObj = interp1d( oldWave, oldFlux, kind=kind, bounds_error=False ) #Create a 1D linear interpolation object for finding the flux density at any given wavelength #wavebindiffs = newWave[1:] - newWave[:-1] #Calculate difference in wavelengths between each pixel on the new wavelength grid wavebindiffs = np.diff( newWave ) #Calculate difference in wavelengths between each pixel on the new wavelength grid wavebindiffs = np.hstack( [wavebindiffs, wavebindiffs[-1]] ) #Reflect last difference so that wavebindiffs is the same size as newWave wavebinleft = newWave - 0.5 * wavebindiffs #Get left side wavelengths for each bin wavebinright = newWave + 0.5 * wavebindiffs #get right side wavelengths for each bin fluxbinleft = interpObj(wavebinleft) fluxbinright = interpObj(wavebinright) for i in range(nPix): #Loop through each pixel on the new wavelength grid useOldWaves = (oldWave >= wavebinleft[i]) & ( oldWave <= wavebinright[i] ) #Find old wavelength points that are inside the new bin nPoints = bn.nansum(useOldWaves) wavePoints = np.zeros(nPoints + 2) fluxPoints = np.zeros(nPoints + 2) wavePoints[0] = wavebinleft[i] wavePoints[1:-1] = oldWave[useOldWaves] wavePoints[-1] = wavebinright[i] fluxPoints[0] = fluxbinleft[i] fluxPoints[1:-1] = oldFlux[useOldWaves] fluxPoints[-1] = fluxbinright[i] newFlux[i] = 0.5 * bn.nansum((fluxPoints[:-1] + fluxPoints[1:]) * np.diff(wavePoints)) / wavebindiffs[i] return newFlux
def switch_hidden(x, nloci, maxMOI, alleles_definitions_RR_arr: List[np.ndarray], state: SiteInstanceState, rand: np.random.RandomState): ''' Update the hidden/inferred alleles for the samples NOTE: Update takes place entirely via side-effects/in-place updates to the state (this function doesn't currently return anything) :param x: The index of the sample ID to update :param nloci: The number of loci in the dataset :param maxMOI: The maximum multiplicity of infection in the whole dataset :param alleles_definitions_RR_arr: ? :param state: The current state of the algorithm :param rand: The random number generator to use ''' # Section A: If number of inferred alleles > 0 # It will probably be more efficient to sum the two seperately, because concatenation # could induce memory-related performance cost, if a new memory block is being created behind the scenes. inferred_allele_count = bn.nansum(state.hidden0) + bn.nansum(state.hiddenf) if inferred_allele_count <= 0: return sh_state = _setup_initial_state(x, nloci, maxMOI, alleles_definitions_RR_arr, state, rand) is_reinfection = state.classification[x] == SampleType.REINFECTION.value if is_reinfection: _update_reinfection(state, sh_state, x, maxMOI) else: _update_recrudescence(state, sh_state, x, maxMOI)
def test_memory_leak() -> None: import resource arr = np.arange(1).reshape((1, 1)) n_attempts = 3 results = [] for _ in range(n_attempts): starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for _ in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() # For 1.3.0 release, this had value of ~100kB if diff_bytes: results.append(diff_bytes) else: break assert len(results) < n_attempts
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
def compute_entropy(U): HGauss0 = 0.5 + 0.5 * np.log(2 * np.pi) nSingVals = U.shape[1] H = np.empty(nSingVals, dtype='float64') for iBasisVector in range(nSingVals): kde = KDE(np.abs(U[:, iBasisVector])) kde.fit(gridsize=1000) pdf = kde.density x = kde.support dx = x[1] - x[0] # Calculate the Gaussian entropy pdfMean = nansum(x * pdf) * dx with np.errstate(invalid='ignore'): sigma = np.sqrt(nansum(((x - pdfMean)**2) * pdf) * dx) HGauss = HGauss0 + np.log(sigma) # Calculate vMatrix entropy pdf_pos = (pdf > 0) HVMatrix = -np.sum(xlogy(pdf[pdf_pos], pdf[pdf_pos])) * dx # Returned entropy is difference between V-Matrix entropy and Gaussian entropy of similar width (sigma) H[iBasisVector] = HVMatrix - HGauss return H
def MH_cluster_params(self, old_params, cells, trans_prob=False): """ Update cluster parameters Arguments: old_parameter (float): old val of cluster parameter data (np.array): data for cells in the cluster Return: np.array: New cluster parameter float: Sum of MH decision paramters A int: Number of declined MH updates """ # Propose new parameter from normal distribution std = np.random.choice(self.param_proposal_sd, size=self.muts_total) a = (TMIN - old_params) / std b = (TMAX - old_params) / std new_params = truncnorm.rvs(a, b, loc=old_params, scale=std) \ .astype(np.float32) A = self._get_log_A(new_params, old_params, cells, a, b, std, trans_prob) u = np.log(np.random.random(self.muts_total)) decline = u >= A new_params[decline] = old_params[decline] if trans_prob: A[decline] = np.log(-1 * np.expm1(A[decline])) return new_params, bn.nansum(A), bn.nansum(decline) else: return new_params, np.nan, bn.nansum(decline)
def expectation(gamma, alpha): """calculates the components needed for loglikelihood for each iteration of gamma and alpha gamma: np matrix of the estimated 'true' methylation proportions alpha: np matrix of estimated mixing proportions""" individuals, tissues = alpha.shape sites = gamma.shape[1] # pre-defines probability matrices p0 = np.zeros((tissues, sites, individuals)) p1 = np.zeros((tissues, sites, individuals)) for n in range(individuals): for j in range(sites): p0_j = (1 - gamma[:, j]) * alpha[n, :] p0_j = p0_j / (bn.nansum(p0_j)) p1_j = (gamma[:, j]) * alpha[n, :] p1_j = p1_j / (bn.nansum(p1_j)) p0[:, j, n] = p0_j p1[:, j, n] = p1_j return p0, p1
def _calc_ll(self, x, theta, flat=False): ll_FN = theta * self._Bernoulli_FN(x) ll_FP = (1 - theta) * self._Bernoulli_FP(x) ll_full = np.log(ll_FN + ll_FP) if flat: return bn.nansum(ll_full) else: return bn.nansum(ll_full, axis=1)
def update_parameters(self, step_no=None): # Iterate over all populated clusters declined_t = np.zeros(len(self.cells_per_cluster), dtype=int) for i, cl_id in enumerate(self.cells_per_cluster): self.parameters[cl_id], _, declined = self.MH_cluster_params( self.parameters[cl_id], np.argwhere(self.assignment == cl_id).flatten()) declined_t[i] = declined return bn.nansum(declined_t), bn.nansum(self.muts_total - declined_t)
def _get_ltrans_prob_size_ratio_split(self, ltrans_prob_size, cluster_size): n_j = bn.nansum(self.rg_assignment) + 1 n_i = self.rg_assignment.size + 2 - n_j # Eq. 5 paper, first term norm = bn.nansum(1 / np.append(cluster_size, [n_i, n_j])) ltrans_prob_rev = np.log(1 / n_i / norm) + np.log(1 / n_j / norm) return ltrans_prob_rev - ltrans_prob_size[0]
async def internal_periodic_callback(): if sv_streamctrl.is_activated and sv_streamctrl.is_receiving: sv_rt.metadata, sv_rt.image = sv_streamctrl.get_stream_data(-1) sv_rt.thresholded_image, sv_rt.aggregated_image, sv_rt.reset = sv_imageproc.update( sv_rt.metadata, sv_rt.image) if sv_rt.image.shape == (1, 1): # skip client update if the current image is dummy return _, metadata = sv_rt.image, sv_rt.metadata thr_image, reset, aggr_image = sv_rt.thresholded_image, sv_rt.reset, sv_rt.aggregated_image sv_colormapper.update(aggr_image) sv_main.update(aggr_image) sv_spots.update(metadata) sv_resolrings.update(metadata) sv_intensity_roi.update(metadata) sv_saturated_pixels.update(metadata) sv_disabled_modules.update(metadata) sv_zoom1_proj_v.update(sv_zoom1.displayed_image) sv_zoom1_proj_h.update(sv_zoom1.displayed_image) sv_zoom2_proj_v.update(sv_zoom2.displayed_image) sv_zoom2_proj_h.update(sv_zoom2.displayed_image) # Deactivate auto histogram range if aggregation is on if sv_imageproc.aggregate_toggle.active: sv_hist.auto_toggle.active = [] im_block1 = aggr_image[sv_zoom1.y_start:sv_zoom1.y_end, sv_zoom1.x_start:sv_zoom1.x_end] total_sum_zoom1 = bn.nansum(im_block1) im_block2 = aggr_image[sv_zoom2.y_start:sv_zoom2.y_end, sv_zoom2.x_start:sv_zoom2.x_end] total_sum_zoom2 = bn.nansum(im_block2) # Update total intensities plots sv_streamgraph.update( [bn.nansum(aggr_image), total_sum_zoom1, total_sum_zoom2]) if sv_streamctrl.is_activated and sv_streamctrl.is_receiving: # Update histograms if reset: sv_hist.update([im_block1, im_block2]) else: im_block1 = thr_image[sv_zoom1.y_start:sv_zoom1.y_end, sv_zoom1.x_start:sv_zoom1.x_end] im_block2 = thr_image[sv_zoom2.y_start:sv_zoom2.y_end, sv_zoom2.x_start:sv_zoom2.x_end] sv_hist.update([im_block1, im_block2], accumulate=True) sv_metadata.update(metadata)
def get_lprior_full(self): lprior = self.DP_a_prior.logpdf(self.DP_a) \ + bn.nansum(self.CRP_prior[ np.fromiter(self.cells_per_cluster.values(), dtype=int)] ) if not self.beta_prior_uniform: cl_ids = np.fromiter(self.cells_per_cluster.keys(), dtype=int) lprior += bn.nansum( self.param_prior.logpdf(self.parameters[cl_ids])) return lprior
def ms_int(n_th, ranges, first_pt, last_pt, beta_c, theta_sq, divlR2, fovR2): """ms_size_int(n_th,ranges,first_pt,last_pt,beta_c,divlR2,fovR2) Compute the multiple integral for nth order scattering with a recursive python routine--note this does not contain the tau^(n-1)/tau(n-1)! term or factors for P(180,n)/P(180) and overlap function. See 'http:\\lidar.ssec.wisc.edu/mscat/derivation' for details. n_th = order of scattering. ranges = range vector (column vector, in meters). first_pt= range index at which to start computation. optical depth below firstR does not contribute to computed result. last_pt = range index of the turn-around slab (ie range to single scatter). beta_c = scattering cross section at each range (column vector). divergence * range to single scatter/(2*lambda))^2. q= (pi*divl*ranges(last_pt)/(lambda*2))^2; diveregence in radains and range in meters, lambda in microns. fovR2 = (pi*full field-of-view * range to single scatter/(2*lambda))^2. fovR2 = (pi*fov*ranges(last_pt)/2)^2; fov in radians and range in meters, lambda in microns. """ dr = ranges[1] - ranges[0] npts = last_pt - first_pt + 1 kern = np.zeros_like(ranges) if n_th == 2: #do innermost range and particle size integrals for i in range(first_pt, last_pt): # integrate over range kern[i-first_pt] = np.exp(-fovR2 \ /(theta_sq[i] * (ranges[last_pt]-ranges[i])**2 + divlR2)) #no contribution at first_pt ms_integral = dr * (nansum(beta_c[first_pt:last_pt] * kern[0:npts-1]) \ -beta_c[last_pt] * kern[npts-1]/2) else: #do one of outer integrals n_th = n_th - 1 sint = np.zeros_like(ranges) for i in range(first_pt, last_pt): s = divlR2 + theta_sq[i] * (ranges[last_pt] - ranges[i])**2 sint[i] = ms_int(n_th, ranges, i, last_pt, beta_c, theta_sq, s, fovR2) first_term = beta_c[first_pt] * sint[first_pt] / 2.0 if np.isnan(first_term): first_term = 0.0 last_term = beta_c[last_pt] * sint[last_pt] / 2.0 if np.isnan(last_term): last_term = 0.0 ms_integral = dr * (nansum(beta_c[first_pt:last_pt] * sint[first_pt:last_pt])\ -first_term - last_term) #-beta_c[first_pt] * sint[first_pt]/2.0-beta_c[last_pt]*sint[last_pt]/2.0) return ms_integral
def compute_aurocs_default(sum_in, study_ct_uniq, pheno, study_col, ct_col, compute_p): """Helper function to compute AUROCs from votes matrix of cells Arguments: sum_in {np.ndarray} -- votes matrix, cells x cell types votes study_ct_uniq {vector} -- vector of study_id|cell_type labels pheno {pd.DataFrame} -- dataframe wtih study_ct, study_id and ct_col for all cells study_col {str} -- String name of study_col in pheno ct_col {str} -- Stirng name of cell type col in pheno Returns: pd.DataFrame -- ROCs for cell type x cell type labels """ cell_nv = pd.DataFrame(index=study_ct_uniq) if compute_p: cell_p = pd.DataFrame(index=study_ct_uniq) for ct in study_ct_uniq: predicts_tmp = sum_in.copy() study, cellT = (pheno[pheno.study_ct == ct].drop_duplicates()[[ study_col, ct_col ]].values[0]) # Don't want to split string in case of charcter issues slicer = pheno[study_col] == study pheno2 = pheno[slicer] predicts_tmp = predicts_tmp[slicer] predicts_tmp = bottleneck.nanrankdata(predicts_tmp, axis=0) filter_mat = np.zeros_like(predicts_tmp) filter_mat[pheno2.study_ct == ct] = 1 predicts_tmp[filter_mat == 0] = 0 n_p = bottleneck.nansum(filter_mat, axis=0) nn = filter_mat.shape[0] - n_p p = bottleneck.nansum(predicts_tmp, axis=0) roc = (p / n_p - (n_p + 1) / 2) / nn cell_nv[ct] = roc if compute_p: U = roc * n_p * nn Z = (np.abs(U - (n_p * nn / 2))) / np.sqrt(n_p * nn * (n_p + nn + 1) / 12) P = stats.norm.sf(Z) cell_p[ct] = P del predicts_tmp, filter_mat gc.collect() if compute_p: return cell_nv, cell_p return cell_nv
def ms_sums(diameter, beta, depol, multiple_scatter_parameters, ms_obj): ''' ms_sums(diameter,beta,depol,multiple_scatter_parameters,ms_obj): diameter = particle diameter profile or profiles beta = extinction or backscatter cross section profile or profiles depol = depolarization profile or profiles returns: ms_obj = summed profiles " .diameter_ice " .diameter_water " .beta_ice " .beta_water " .n_samples_ice " .n_samples_water ''' is_ice = np.zeros_like(beta) is_water = np.zeros_like(beta) is_ice[depol >= multiple_scatter_parameters['h2o_depol_threshold']] = 1.0 is_water[depol < multiple_scatter_parameters['h2o_depol_threshold']] = 1.0 #diameter_ice = diameter * is_ice #diameter_water = diameter * is_water beta_ice = beta * is_ice beta_water = beta * is_water #compute diameter * beta for ice and water components if not diameter == None: diameter_ice = diameter * beta * is_ice diameter_water = diameter * beta * is_water #if diameter is not supplied from lidar-radar retrieval get it from constants in multiple_scatter_parameters else: diameter_ice = hau.Z_Array( np.ones_like(beta) * multiple_scatter_parameters['mode_diameter_ice'] * beta * is_ice) diameter_water = hau.Z_Array( np.ones_like(beta) * multiple_scatter_parameters['mode_diameter_water'] * beta * is_water) if ms_obj == None: ms_obj = hau.Time_Z_Group() setattr(ms_obj, 'beta_ice', hau.Z_Array(nansum(beta_ice, 0))) setattr(ms_obj, 'beta_water', hau.Z_Array(nansum(beta_water, 0))) setattr(ms_obj, 'diameter_ice', hau.Z_Array(nansum(diameter_ice, 0))) setattr(ms_obj, 'diameter_water', hau.Z_Array(nansum(diameter_water, 0))) setattr(ms_obj, 'n_samples_ice', hau.Z_Array(sum(~np.isnan(beta_ice) * is_ice, 0))) setattr(ms_obj, 'n_samples_water', hau.Z_Array(sum(~np.isnan(beta_water) * is_water, 0))) else: ms_obj.beta_ice += nansum(beta_ice, 0) ms_obj.beta_water += nansum(beta_water, 0) ms_obj.diameter_ice += nansum(diameter_ice, 0) ms_obj.diameter_water += nansum(diameter_water, 0) ms_obj.n_samples_ice += sum(~np.isnan(beta) * is_ice, 0) ms_obj.n_samples_water += sum(~np.isnan(beta) * is_water, 0) return ms_obj
def calcOnOffParamMat(onMatIn, offMatIn, maskedMat): """ Calculation of vector of SFED values Parameters ------------- onMat : array_like freq-time mat of On data offMat : array_like freq-time mat of Off data Returns ------------- float the SEFD value float power on float power off """ assert len(onMatIn) == len( offMatIn), "both vectors should have the same size" assert len(onMatIn) == len( maskedMat), "mask vector should have the same size as the others" onMat = onMatIn offMat = offMatIn unq = np.unique(maskedMat) assert len(unq) == 2, "mask vector should be a binary" assert np.all(unq == np.array([0, 1])), "mask vector should be a binary" onMat[maskedMat == 1] = np.nan offMat[maskedMat == 1] = np.nan maskedArrOkNsamps = maskedMat.shape[1] - maskedMat.sum(axis=1) #pdb.set_trace() tmpMat = numpy.divide(offMat, (onMat - offMat)) onoffparam = bn.nanmedian(tmpMat, axis=1) powOn = np.sqrt(bn.nansum(onMat * onMat, axis=1)) / maskedArrOkNsamps powOff = np.sqrt(bn.nansum(offMat * offMat, axis=1)) / maskedArrOkNsamps return onoffparam, powOn, powOff
def derive_transmission_2_tapers(self, x_0, show_progress=False, **kwargs): ''' derive transmission between two tapers with equal parameters. Following eq (3) from Crespo-Ballesteros M, Yang Y, Toropov N, Sumetsky M. Four-port SNAP microresonator device. Opt Lett 2019;44:3498. https://doi.org/10.1364/OL.44.003498. z_0 is the position of the input taper ''' taper_D = self.D() taper_S = self.S() T = np.zeros((len(self.lambdas), len(self.x))) U = -2 * self.k0**2 * self.ERV * (1e-3) / self.R_0 eigvals, eigvecs = self.solve_Shrodinger(U) ind_0 = np.argmin(abs(self.x - x_0)) for ii, wavelength in enumerate(self.lambdas): if ii % 50 == 0 and show_progress: print('Deriving T for wl={}, {} of {}'.format( wavelength, ii, len(self.lambdas))) E = -2 * self.k0**2 * (wavelength - self.lambda_0) / self.lambda_0 G = bn.nansum( np.transpose(eigvecs[:, ind_0]) * (eigvecs) / (E - eigvals + 1j * self.res_width_norm + (eigvecs[:, ind_0]**2 + eigvecs**2) * taper_D), 1) ComplexTransmission = (taper_S - 1j * self.taper_Csquared * G) ## T[ii, :] = abs(ComplexTransmission)**2 self.need_to_update_transmission = False self.transmission = T if np.amax(T) > 1: print( 'Some error in the algorimth! Transmission became larger than 1' ) return self.x, self.lambdas, self.transmission
def _update_inhibition_radius(self): """Set the inhibition radius from the average receptive field size. The average receptive field size is the distance of the connected synapses with respect to to their input column. In other words, it is the distance between a column and its input source averaged across all connected synapses. The distance used is the Euclidean distance. Refer to the initialization of self.syn_dist for more details. NOTE - This should only be called after phase 1. - The minimum inhibition radius is lower-bounded by 1. """ self.inhibition_radius = max( bn.nansum(self.syn_dist * self.syn_c) / max(bn.nansum(self.syn_c), 1), 1)
def pairwise_covariance(x_mat, y=None, correlation=False): x_mat = x_mat.copy() x_nan = np.isnan(x_mat) if y is not None: if y.shape[0] != 1: assert y.shape == x_mat.shape, 'y and x_mat should be of the same shape if y has more than 1 rows' y_mat = y else: y_mat = np.tile(y, (x_mat.shape[0], 1)) y_nan = np.isnan(y_mat) x_mat[y_nan] = np.nan y_mat[x_nan] = np.nan pw_multiply = np.multiply( x_mat - bn.nanmean(x_mat, axis=1).reshape(-1, 1), y_mat - bn.nanmean(y_mat, axis=1).reshape(-1, 1)) cov = bn.nansum(pw_multiply, axis=1) / ( pw_multiply.shape[1] - np.isnan(pw_multiply).sum(axis=1) - 1) if correlation: return cov / np.multiply(bn.nanstd(x_mat, axis=1, ddof=1), bn.nanstd(y_mat, axis=1, ddof=1)) return cov else: if correlation: return pd.DataFrame(x_mat).T.corr().values return pd.DataFrame(x_mat).T.cov().values
def fast_helper(data, distances, rlimit, numan): size = data.shape rstep = rlimit / numan r1 = 0.0 r2 = float(rstep) # we need to cast this as a float so that numba # doesn't complain r_vec = np.zeros(numan,dtype=np.float32) mean_vec = np.zeros(numan,dtype=np.float32) error_vec = np.zeros(numan,dtype=np.float32) for k in range(numan): anlist = [] for i in range(size[0]): for j in range(size[1]): if distances[i,j] > r1: if distances[i,j] <= r2: anlist.append(data[i,j]) anarray = np.array(anlist,dtype=np.float32) mean_vec[k] = bn.nansum(anarray) error_vec[k] = bn.nanstd(anarray) r_vec[k] = (r1 + r2)*0.5 r1 = r2 r2 += rstep return np.array([r_vec,mean_vec,error_vec])
def compute_varrat(featdictrow): """ Returns the variance ratio and number of significant harmonics Inputs ----------------- featdictrow: Returns ----------------- varrat: float (varinit - sum(var_of_sines)) / varinit, where var_of_sines = Amp ** 2 / 2 significant_harmonics: int number of harmonics of f1 that is not nan """ tab = featdictrow['frequencies'] peak1 = tab[tab['num'] == 1] amps = np.array(peak1['amplitude']) significant_harmonics = int( np.sum((peak1['harmonic'] > 0) & ~np.isnan(peak1['amplitude']))) varrat = (featdictrow['variance'] - nansum(amps**2 / 2)) / featdictrow['variance'] if np.isnan(varrat): varrat = 1 return varrat, significant_harmonics
def _update_inhibition_radius(self): """ Sets the inhibition radius based off the average receptive field size. The average receptive field size is the distance of the connected synapses with respect to to their input column. In other words, it is the distance between a column and its input source averaged across all connected synapses. The distance used is the Euclidean distance. Refer to the initialization of self.syn_dist for more details. NOTE - This should only be called after phase 1. - The minimum inhibition radius is lower-bounded by 1. """ self.inhibition_radius = max(bn.nansum(self.syn_dist * self.syn_c) / max(bn.nansum(self.syn_c), 1), 1)
def numba_cent(data, distances, maxd, numan): size = data.shape rstep = maxd / numan r1 = 0.0 r2 = rstep stdarr = np.zeros(numan,dtype=np.float32) rarr = np.zeros(numan,dtype=np.float32) outarr = np.zeros(numan,dtype=np.float32) for k in range(numan): anlist = [] for i in range(size[0]): for j in range(size[1]): if distances[i,j] > r1: if distances[i,j] <= r2: anlist.append(data[i,j]) # outarr[k] += data[i,j] anarray = np.array(anlist,dtype=np.float32) outarr[k] = bn.nansum(anarray) stdarr[k] = bn.nanstd(anarray) rarr[k] = (r1 + r2)*0.5 r1 = r2 r2 += rstep return np.array([rarr,outarr,stdarr])
def get_qs(self, state): logging.debug("Getting q-values for state %s" % str(state)) self.q_searches += 1 state = int(state) dist = 0 distances = None table_ind = self._state_to_table.get(state) nsa = np.zeros(self.n_acts) sum_q = np.zeros(self.n_acts) if table_ind is not None: ind_nsa, ind_qs = self.table[table_ind] nsa += ind_nsa sum_q += np.nan_to_num(ind_qs) * nsa missing_qs = nsa == 0 while np.any(missing_qs): dist += 1 if dist == 5: distances = self.get_table_hamming_distances(state) if dist > self.n_z: self.dists += dist return np.zeros(self.n_acts) neighbours = self.get_table_hamming_neighbours( state, dist, distances) neighbours_nsa, neighbours_qs = self.table[neighbours] nsa[missing_qs] += np.sum(neighbours_nsa, 0)[missing_qs] sum_q[missing_qs] += (bn.nansum(neighbours_nsa * neighbours_qs, 0))[missing_qs] missing_qs = nsa == 0 self.dists += dist return sum_q / nsa
def normalize_cells(X, ranked=True): """ Scale matrix sthat all cells (rows) sum to 1 and have l2-norm of 1 Arguments: X {array} -- Cell x gene matrix (sparse or dense) Keyword Arguments: ranked {bool} -- Indicator whether to rank cells (default: {True}) Returns: np.ndarray -- Cells x genes matrix of normalized cells """ if sparse.issparse(X): res = X.toarray() else: res = X if ranked: res = bottleneck.rankdata(res, axis=1) avg = np.mean(res, axis=1) res -= avg[:, None] norm = np.sqrt(bottleneck.nansum(res**2, axis=1))[:, None] res /= norm return res
def _pixel_distribution(dataset, tolerance=0.001, min_frames=1000): """Estimate the distribution of pixel intensities for each channel. Parameters ---------- tolerance : float The maximum relative error in the estimates that must be achieved for termination. min_frames: int The minimum number of frames that must be evaluated before termination. Returns ------- mean_est : array Mean intensities of each channel. var_est : Variances of the intensity of each channel. """ # TODO: separate distributions for each plane sums = np.zeros(dataset.frame_shape[-1]).astype(float) sum_squares = np.zeros_like(sums) counts = np.zeros_like(sums) t = 0 for frame in it.chain.from_iterable(dataset): for plane in frame: if t > 0: mean_est = sums / counts var_est = (sum_squares / counts) - (mean_est ** 2) if t > min_frames and np.all( np.sqrt(var_est / counts) / mean_est < tolerance): break # im = np.concatenate( # [np.expand_dims(x, 0) for x in plane], # axis=0).astype(float) # NOTE: integers overflow # sums += im.sum(axis=0).sum(axis=0) # sum_squares += (im ** 2).sum(axis=0).sum(axis=0) # cnt += np.prod(im.shape[0] * im.shape[1]) sums += np.nan_to_num(nansum(nansum(plane, axis=0), axis=0)) sum_squares += np.nan_to_num( nansum(nansum(plane ** 2, axis=0), axis=0)) counts += np.isfinite(plane).sum(axis=0).sum(axis=0) t += 1 assert np.all(mean_est > 0) assert np.all(var_est > 0) return mean_est, var_est
def GreenFunction(self, eigvals, eigvecs, wavelength, x1, x2): ind_1 = np.argmin(abs(self.x - x1)) ind_2 = np.argmin(abs(self.x - x2)) E = -2 * self.k0**2 * (wavelength - self.lambda_0) / self.lambda_0 return bn.nansum( eigvecs[:, ind_1] * eigvecs[:, ind_2] / (E - eigvals + 1j * self.res_width_norm), 1)
def _rg_get_split_prob(self, cells): std = np.random.choice(self.param_proposal_sd, size=(2, self.muts_total)) a = (0 - self.rg_params_split) / std b = (1 - self.rg_params_split) / std i = cells[0] cl_i = self.assignment[i] j = cells[-1] cl_j = self.assignment[j] S = cells[1:-1] # Get paramter transition probabilities prob_param_i = bn.nansum( self._get_log_A( self.parameters[cl_i], self.rg_params_split[0], np.append(S[np.argwhere(self.rg_assignment == 0)], i), a[0], b[0], std[0], True)) prob_param_j = bn.nansum( self._get_log_A( self.parameters[cl_j], self.rg_params_split[1], np.append(S[np.argwhere(self.rg_assignment == 1)], j), a[1], b[1], std[1], True)) # Get assignment transition probabilities ll = self._rg_get_ll(cells[1:-1], (self.parameters[cl_i], self.parameters[cl_j])) n = cells.size prob_assign = np.zeros(S.size) assign = np.where(self.assignment[S] == cl_i, 0, 1) # Iterate over all obersavtions k != [i,j] for obs in range(S.size): self.rg_assignment[obs] = -1 n_j = bn.nansum(self.rg_assignment) + 2 n_i = n - n_j - 1 # Get normalized log probs of assigning an obs. to clusters i or j log_post = ll[obs] + self.log_CRP_prior([n_i, n_j], n, self.DP_a) log_probs = self._normalize_log(log_post) # assign to original cluster and add probability self.rg_assignment[obs] = assign[obs] prob_assign[obs] = log_probs[assign[obs]] return prob_param_i + prob_param_j + bn.nansum(prob_assign)
def _get_lprior_ratio_merge(self, cells): """ [eq. 8 in Jain and Neal, 2007] """ n = cells.size n_j = bn.nansum(self.rg_assignment) + 1 n_i = n - n_j # Cluster priors lprior_rate = gammaln(n) - np.log(self.DP_a) if n_i > 0: lprior_rate -= gammaln(n_i) if n_j > 0: lprior_rate -= gammaln(n_j) # Parameter priors if not self.beta_prior_uniform: cl_ids = self.assignment[[cells[0], cells[-1]]] lprior_rate += \ bn.nansum(self.param_prior.logpdf(self.rg_params_merge)) \ - bn.nansum(self.param_prior.logpdf(self.parameters[cl_ids])) return lprior_rate
def _get_lprior_ratio_split(self, cells): """ [eq. 7 in Jain and Neal, 2007] """ n = self.rg_assignment.size + 2 n_j = bn.nansum(self.rg_assignment) + 1 n_i = n - n_j # Cluster assignment prior lprior_rate = np.log(self.DP_a) - gammaln(n) if n_i > 0: lprior_rate += gammaln(n_j) if n_j > 0: lprior_rate += gammaln(n_i) # Cluster parameter prior if not self.beta_prior_uniform: cl_id = self.assignment[cells[0]] lprior_rate += \ bn.nansum(self.param_prior.logpdf(self.rg_params_split)) \ - bn.nansum(self.param_prior.logpdf(self.parameters[cl_id])) return lprior_rate
def solo(p,slaydict,name,output_file,size,par0,fixed,flare): '''This is the minimizing function that is called by cutting_session. It constructs the necessary model galaxies, computes the moments of the lines, and returns a reduce chi squared value quantifying the goodness of fit. The reduced chi-squared is computed across all moments and all heights. So if you are fitting 3 heights that each have N radial bins (as they should, if using correctly binned data) then the chi-squared will be computed with 3x3xN points (three from moments, three from heights). ''' pl = list(p) pars = [par0[j] if j in fixed else pl.pop(0) for j in range(len(par0))] chis = np.array([]) if flare: flarepars = {'h_zR':pars[-1],'ftype':flare} else: flarepars = None output_file.write(str('{:11.4f} '*len(pars)).format(*pars)) for z in slaydict.keys(): simfile = make_boring([pars[0]],[pars[1]],name=name,size=size,z=z, h_dust=pars[3],kappa_0=pars[2],z_d=pars[4], flarepars=flarepars,nofits=True)[0] radii, m1, m2, m3 = moments_notice(slaydict[z][0],simfile, skip_radii=slaydict[z][2], flip=slaydict[z][1],nofits=True) for moment in [m1]:#m1,m2,m3]: red_chi = (moment[0] - moment[2])/moment[1] output_file.write('{:11.4f} '.format(bn.nansum(red_chi**2))) chis = np.r_[chis,red_chi] value = bn.nansum(chis**2)/(chis.size - p.size - 1) output_file.write('{:11.4f}\n'.format(value)) print 'v_r: {}\nh_rot: {}\nkappa_0: {}\nh_dust: {}\nz_dust: {}\nvalue: {}\n'.\ format(pars[0],pars[1],pars[2],pars[3],pars[4],value) return value
def _pixel_distribution(dataset, tolerance=0.001, min_frames=1000): """Estimate the distribution of pixel intensities for each channel. Parameters ---------- tolerance : float The maximum relative error in the estimates that must be achieved for termination. min_frames: int The minimum number of frames that must be evaluated before termination. Returns ------- mean_est : array Mean intensities of each channel. var_est : Variances of the intensity of each channel. """ # TODO: separate distributions for each plane sums = np.zeros(dataset.frame_shape[-1]).astype(float) sum_squares = np.zeros_like(sums) counts = np.zeros_like(sums) t = 0 for frame in it.chain.from_iterable(dataset): for plane in frame: if t > 0: mean_est = sums / counts var_est = (sum_squares / counts) - (mean_est**2) if t > min_frames and np.all( np.sqrt(var_est / counts) / mean_est < tolerance): break sums += np.nan_to_num(nansum(nansum(plane, axis=0), axis=0)) sum_squares += np.nan_to_num( nansum(nansum(plane**2, axis=0), axis=0)) counts += np.isfinite(plane).sum(axis=0).sum(axis=0) t += 1 assert np.all(mean_est > 0) assert np.all(var_est > 0) return mean_est, var_est
def _normalize_log_probs(probs): max_i = bn.nanargmax(probs) try: exp_probs = np.exp(probs[np.arange(probs.size) != max_i] \ - probs[max_i]) except FloatingPointError: exp_probs = np.exp( np.clip(probs[np.arange(probs.size) != max_i] - probs[max_i], log_EPSILON, 0)) probs_norm = probs - probs[max_i] - np.log1p(bn.nansum(exp_probs)) return np.exp(np.clip(probs_norm, log_EPSILON, 0))
def create_mom0(outpath): npix = hp.nside2npix(1024) cK2nh = 1.28821496912415 * 1.822e18 with tables.open_file(outpath, mode="a") as store: store.create_array( store.root, name='mom0', shape=(npix, 1), atom=tables.atom.Float32Atom()) for i, row in enumerate(store.root.survey.iterrows()): store.root.mom0[i] = bn.nansum(row) * cK2nh
def create_mom0_from_table(table): """ Create a moment-0 map from a given EBHIS pytable """ nside = 1024 mom0 = np.zeros(12*nside**2, dtype=np.float32) * np.nan for i, row in enumerate(table.iterrows()): if not (i % 100000): print i mom0[row['HPXINDEX']] = bn.nansum(row['DATA']) return mom0
def score(self, NAFac=1, sizeFac=1e-4): # We need also to include NAFac, number of reactions in the model # for the sizeFac # time 1 only is taken into account #self.diff = np.square(self.measures[self.time] - self.simulated[self.time]) diff = self.measures[self.time] - self.simulated[self.time] diff *= diff N = diff.shape[0] * diff.shape[1] Nna = np.isnan(diff).sum() N-= Nna #nInTot = number of edges on in global model #nInTot = len(self.model.reactions) nInTot = self.nInputs # should be correct nDataPts = diff.shape[0] * diff.shape[1] nDataP = N # N points excluding the NA if any #print(N) #NAPen = NAFac * sum(self.simulated.isnull()) # nInTot: number of inputs of expanded miodel # nInputs: number of inputs of cut model # In CNO: # nDataPts = number of points irrespective of NA # nDataP sum(!is.na(CNOlist@signals[[timeIndex]])) # nInputs = number of inputs of the cut model # for now, ketassume it is the same as the number of reactions # TODO AND gates should count for 1 edge nInputs = self.number_edges sizePen = nDataPts * sizeFac * nInputs / float(nInTot) #self.debug("nDataPts=%s" % nDataPts) #self.debug("nInputs=%s" % nInputs) #self.debug("nInTot=%s" % nInTot) #self.debug('sizePen=%s' %sizePen) # TODO deviationPen = bn.nansum(diff) / 2. # to be in agreement with CNO but wrong self.diff = diff / 2. #self.debug("deviationPen=%s"% deviationPen) #self.debug("Nna=%s"% Nna) #self.debug("nDataP=%s"% nDataP) deviationPen /= float(nDataP) #self.debug("deviationPen=%s"% deviationPen) S = deviationPen + sizePen / nDataP return S
def _phase1(self): """ Execute phase 1 of the SP region. This phase is used to compute the overlap. Note - This should only be called once the input has been updated. """ # Compute the connected synapse mask self.syn_c = self.p >= self.syn_th # Compute the overlaps self.overlap[:, 1:] = self.overlap[:, :-1] # Shift self.overlap[:, 0] = bn.nansum(self.x[self.syn_map] * self.syn_c, 1) self.overlap[:, 0][self.overlap[:, 0] < self.seg_th] = 0 self.overlap[:, 0] = self.overlap[:, 0] * self.boost
def group_mean(x, groups, axis=0): """ Mean with groups along an axis. Parameters ---------- x : ndarray Input data. groups : list List of group membership of each element along the axis. axis : int, {default: 0} axis along which the mean is calculated Returns ------- idx : ndarray An array with the same shape as the input array where every element is replaced by the group mean along the given axis. """ # Find set of unique groups ugroups = unique_group(groups) # Convert groups to a numpy array groups = np.asarray(groups) # Loop through unique groups and normalize xmean = np.nan * np.zeros(x.shape) for group in ugroups: idx = groups == group idxall = [slice(None)] * x.ndim idxall[axis] = idx if idx.sum() > 0: norm = 1.0 * (~np.isnan(x[idxall])).sum(axis) ns = bn.nansum(x[idxall], axis=axis) / norm xmean[idxall] = np.expand_dims(ns, axis) return xmean
def score(self): """ Akt Hsp27 NFkB Erk p90RSK Jnk cJun [1,] 0.0081 0.00 0.7396 0.04 0.0144 0 0 [2,] 0.0324 0.09 0.0100 0.00 0.0000 0 0 [3,] 0.0081 0.09 0.0100 0.04 0.0144 0 0 [4,] 0.0081 0.00 0.7396 0.00 0.0000 0 0 [5,] 0.0324 0.09 0.0100 0.00 0.0000 0 0 [6,] 0.0081 0.09 0.0100 0.00 0.0000 0 0 [7,] 0.0000 0.00 0.0000 0.04 0.0144 0 0 [8,] 0.0000 0.09 0.0100 0.00 0.0000 0 0 [9,] 0.0000 0.09 0.0100 0.04 0.0144 0 0 [1] "------------" [1] 0.03805909 1 0 0 1 1 0 0 [2,] 1 1 1 0 0 0 0 [3,] 1 1 1 1 1 0 0 [4,] 1 0 0 0 0 0 0 [5,] 1 1 1 0 0 0 0 [6,] 1 1 1 0 0 0 0 [7,] 0 0 0 1 1 0 0 [8,] 0 1 1 0 0 0 0 [9,] 0 1 1 1 1 0 0 "nDataPts= 63" [1] "nInputs= 13" okay [1] "nInTot= 22" okay [1] "deviationPen= 2.394" [1] "NAPen= 0" [1] "sizePen= 0.00372272727272727" okay [1] 0.03805909 For T2 --> 0.03805909 :return: """ # time 1 only is taken into account #self.diff = np.square(self.measures[self.time] - self.simulated[self.time]) diff1 = (self.measures[self.time] - self.simulated[self.time])**2 if self._params['include_time_zero'] is True: diff0 = (self.measures[0] - self.simulated[0])**2 else: diff0 = 0 # FIXME we could have an option to ignore time 0 diff = diff1 + diff0 N = diff.shape[0] * diff.shape[1] # FIXME Another issue with CNOR is that NAs are takem from the simulated data only, not the data itself... Nna1 = np.isnan(diff1).sum() Nna0 = np.isnan(diff0).sum() #we should check for NA is the measured and simulated data so in the diff as above but in cnor, htis is # coming from the simulated data only.... Nna1 = np.isnan(self.simulated[self.time]).sum() # FIXME in cNOR, NAs at time 0 are ignored. why ? Nna = np.isnan(self.measures[self.time]).sum() N-= Nna #nInTot = number of edges on in global model #nInTot = len(self.model.reactions) nInTot = self.nInputs # should be correct nDataPts = diff.shape[0] * diff.shape[1] nDataP = N # N points excluding the NA if any #NAPen = NAFac * sum(self.simulated[self.time].isnull()) # nInTot: number of inputs of expanded miodel # nInputs: number of inputs of cut model # In CNO: # nDataPts = number of points irrespective of NA # nDataP sum(!is.na(CNOlist@signals[[timeIndex]])) # nInputs = number of inputs of the cut model # for now, ketassume it is the same as the number of reactions # TODO AND gates should count for 1 edge nInputs = self.number_edges # sizePen should be same as in CNOR sizePen = nDataPts * self._params.sizeFac * nInputs / float(nInTot) debug = self.debug_score if debug: print("----") print("nDataPts=%s" % nDataPts) print("nInputs=%s" % nInputs) print("nInTot=%s" % nInTot) print('sizePen=%s' %sizePen) print('diff0=%s', (bn.nansum(diff0)) ) print('diff1=%s', (bn.nansum(diff1 ))) # TODO self.diff0 = diff0 self.diff1 = diff1 deviationPen = (bn.nansum(diff1) + bn.nansum(diff0))/ 2. #self.diff = diff / 2. #if self._params['include_time_zero'] is True: # deviationPen *=2 # does not really matter but agrees with CNOR if debug: print("deviationPen=%s"% deviationPen) print("Nna=(%s %s)"% (Nna0, Nna1)) print("nDataP=%s"% nDataP) print("deviationPen=%s"% deviationPen) if nDataP !=0: deviationPen /= float(nDataP) S = deviationPen + sizePen / nDataP else: S = deviationPen self._na_contrib = Nna/float(nDataPts) S = (S + self._params.NAFac * Nna1/float(nDataPts)) #print self._previous_fit if debug: print("score=%s" %S) return S
def correlation(arr1, arr2, axis=None): """ Correlation between two Numpy arrays along the specified axis. This is not a cross correlation function. If the two input arrays have shape (n, m), for example, then the output will have shape (m,) if axis is 0 and shape (n,) if axis is 1. Parameters ---------- arr1 : Numpy ndarray Input array. arr2 : Numpy ndarray Input array. axis : {int, None}, optional The axis along which to measure the correlation. The default, axis None, flattens the input arrays before finding the correlation and returning it as a scalar. Returns ------- corr : Numpy ndarray, scalar The correlation between `arr1` and `arr2` along the specified axis. Examples -------- Make two Numpy arrays: >>> a1 = np.array([[1, 2], [3, 4]]) >>> a2 = np.array([[2, 1], [4, 3]]) >>> a1 array([[1, 2], [3, 4]]) >>> a2 array([[2, 1], [4, 3]]) Find the correlation between the two arrays along various axes: >>> correlation(a1, a2) 0.59999999999999998 >>> correlation(a1, a2, axis=0) array([ 1., 1.]) >>> correlation(a1, a2, axis=1) array([-1., -1.]) """ mask = np.logical_or(np.isnan(arr1), np.isnan(arr2)) if mask.any(): # arr1 and/or arr2 contain NaNs, so use slower NaN functions if needed if axis == None: x1 = arr1.flatten() x2 = arr2.flatten() idx = ~mask.flatten() x1 = x1[idx] x2 = x2[idx] x1 = x1 - x1.mean() x2 = x2 - x2.mean() num = (x1 * x2).sum() den = np.sqrt((x1**2).sum() * (x2**2).sum()) else: x1 = arr1.copy() x2 = arr2.copy() np.putmask(x1, mask, np.nan) np.putmask(x2, mask, np.nan) if axis == 0: x1 = x1 - bn.nanmean(x1, axis) x2 = x2 - bn.nanmean(x2, axis) else: idx = [slice(None)] * x1.ndim idx[axis] = None x1 = x1 - bn.nanmean(x1, axis)[idx] x2 = x2 - bn.nanmean(x2, axis)[idx] num = bn.nansum(x1 * x2, axis) den = np.sqrt(bn.nansum(x1**2, axis) * bn.nansum(x2**2, axis)) else: # Neither arr1 or arr2 contains nans, so use faster non-nan functions if axis == None: x1 = arr1.flatten() x2 = arr2.flatten() x1 = x1 - x1.mean() x2 = x2 - x2.mean() num = (x1 * x2).sum() den = np.sqrt((x1**2).sum() * (x2**2).sum()) else: x1 = arr1 x2 = arr2 if axis == 0: x1 = x1 - x1.mean(axis) x2 = x2 - x2.mean(axis) else: idx = [slice(None)] * x1.ndim idx[axis] = None x1 = x1 - x1.mean(axis)[idx] x2 = x2 - x2.mean(axis)[idx] num = np.sum(x1 * x2, axis) den = np.sqrt(np.sum(x1**2, axis) * np.sum(x2**2, axis)) return num / den
def nansum(array, axis=None): if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 return bt.nansum(array, axis=axis)
def _jll(self, X, i): n_ij = self._n_ij[i] - 0.5 * bn.nansum(((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1) n_ij[np.isnan(n_ij)] = 0 return self._logprior[i] + n_ij
def simulate(self, tick=1, debug=False, reactions=None): """ """ # pandas is very convenient but slower than numpy # The dataFrame instanciation is costly as well. # For small models, it has a non-negligeable cost. # inhibitors will be changed if not ON #self.tochange = [x for x in self.model.nodes() if x not in self.stimuli_names # and x not in self.and_gates] # what about a species that is both inhibited and measured testVal = 1e-3 values = self.values.copy() if self.debug: self.debug_values = [] self.residuals = [] self.penalties = [] self.count = 0 self.nSp = len(values) residual = 1. frac = 1.2 # #FIXME +1 is to have same resrults as in CellnOptR # It means that if due to the cycles, you may not end up with same results. # this happends if you have cyvles with inhbititions # and an odd number of edges. if reactions is None: reactions = self.model.buffer_reactions self.number_edges = len(reactions) # 10 % time here #predecessors = self.reactions_to_predecessors(reactions) predecessors = defaultdict(collections.deque) for r in reactions: k,v = self._reac2pred[r] predecessors[k].extend(v) # speed up keys = self.values.keys() length_predecessors = dict([(node, len(predecessors[node])) for node in keys]) #self._length_predecessors = length_predecessors # if there is an inhibition/drug, the node is 0 values = self.values.copy() for inh in self.inhibitors_names: if length_predecessors[inh] == 0: #values[inh] = np.array([np.nan for x in range(0,self.N)]) #values[inh] = np.array([0 for x in range(0,self.N)]) values[inh] = np.zeros(self.N) while (self.count < self.nSp * frac +1.) and residual > testVal: self.previous = values.copy() #self.X0 = pd.DataFrame(self.values) #self.X0 = self.values.copy() # compute AND gates first. why for node in self.and_gates: # replace na by large number so that min is unchanged if length_predecessors[node] != 0: # not a min anymore but a consensus #values[node] = np.nanmin(np.array([values[x].copy() for x in # predecessors[node]]), axis=0) #print(np.array([values[x].copy() for x in # predecessors[node]])) # TODO/TODO # TODO/TODO # TODO/TODO dummy = np.array([values[x].copy() for x in predecessors[node]]) values[node] = consensus2(dummy.transpose()) else: values[node] = self.previous[node] for node in self.tochange: # easy one, just the value of predecessors #if len(self.predecessors[node]) == 1: # self.values[node] = self.values[self.predecessors[node][0]].copy() if length_predecessors[node] == 0: pass # nothing to change else: # TODO/TODO # TODO/TODO # here the nagative edges are not 1-x anymore but just -x dummy = np.array([values[x] if (x,node) not in self.toflip else - values[x] for x in predecessors[node]]) # TODO/TODO not an AND but a consensus # TODO/TODO # TODO/TODO values[node] = accept_anything(dummy.transpose()) # take inhibitors into account # TODO/TODO do we want to change this behaviour ? # TODO/TODO # TODO/TODO if node in self.inhibitors_names: values[node] *= 1- self.inhibitors[node].values # 30 % of the time is here # here NAs are set automatically to zero because of the int16 cast # but it helps speeding up a bit the code by removig needs to take care # of NAs. if we use sumna, na are ignored even when 1 is compared to NA self.m1 = np.array([self.previous[k] for k in self.previous.keys() ], dtype=np.int16) self.m2 = np.array([values[k] for k in self.previous.keys() ], dtype=np.int16) residual = bn.nansum(np.square(self.m1 - self.m2)) # TODO stop criteria should account for the length of the species to the # the node itself so count < nSp should be taken into account whatever is residual. # if self.debug: self.debug_values.append(self.previous.copy()) self.residuals.append(residual) self.count += 1 if self.debug is True: # add the latest values simulated in the while loop self.debug_values.append(values.copy()) # Need to set undefined values to NAs self.simulated[self.time] = np.array([values[k] for k in self.data.df.columns ], dtype=float)#.transpose() self.prev = {} self.prev[self.time] = np.array([self.previous[k] for k in self.data.df.columns ], dtype=float)#.transpose() mask = self.prev[self.time] != self.simulated[self.time] self.simulated[self.time][mask] = np.nan self.simulated[self.time] = self.simulated[self.time].transpose()
def compute(self, today, assets, out, base, weight): out[:] = nansum(base * weight, axis=0) / nansum(weight, axis=0)
def run_wbm(k_s, k_g, et): """ Run the WBM with the specified parameters. Args: k_s: Soil residence time (days). k_g: Groundwater residence time (days). et: Correction factor applied to the ET data (dimensionless). Returns: Pandas dataframe of monthly river flows. """ # Define dicts storing number of days in each period. One dict for leap years # the other for non-leap years days_in_month_dict = {1:31, 2:28, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31} days_in_month_lpyr_dict = {1:31, 2:29, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31} # Validate user input check_params() # Get array indices for bounding box xmin_idx, xmax_idx, ymin_idx, ymax_idx = get_grid_indices(xmin, xmax, ymin, ymax) # Open H5 file h5 = h5py.File(in_h5_path, 'r') # Get soil properties fc, sat, bfi = io.read_soil_properties(h5, xmin_idx, xmax_idx, ymin_idx, ymax_idx) # Get LU grid lu_grid = io.read_land_use(h5, lu_grid_path, xmin_idx, xmax_idx, ymin_idx, ymax_idx) # Get soil and groundwater time constant grids k_s = np.ones(shape=bfi.shape)*k_s k_g = np.ones(shape=bfi.shape)*k_g # Water between fc and sat phi = sat - fc # Loop over models for model in models: # Initial water level mid-way between fc and sat surf_lev = (fc + sat)/2. gw_lev = np.zeros(shape=phi.shape) # Dict to store data data_dict = {'Date':[], 'of':[], 'ssf':[], 'gwf':[], 'Runoff_m3/s':[]} # Loop over years for year in range(st_yr, end_yr+1): for month in range(1, 13): # Get date date = dt.date(year, month, 1) # Get number of days in month, allowing for leap years if calendar.isleap(year): days_in_month = days_in_month_lpyr_dict[month] else: days_in_month = days_in_month_dict[month] # Get met data pptn, pet = io.read_met_data(h5, model, xmin_idx, xmax_idx, ymin_idx, ymax_idx, year, month) # Correct grass reference PET for land use and apply ET # correction factor pet = pet*lu_grid*et # Convert from mm/month to mm/day pptn = pptn/days_in_month pet = pet/days_in_month # Calculate HER her = pptn - pet # mm/day # Get drainage params for this time step drainage_params = dr.calculate_drainage(her, surf_lev, gw_lev, fc, days_in_month, phi, k_s, k_g, bfi) # NB: the net_her value returned here is for the whole month # i.e. mm/month NOT mm/day like her, above net_her, of, ssf, gwf, surf_lev, gw_lev = drainage_params # Calculate monthly runoff ro = of + ssf + gwf # Apply mask ro[mask!=site_code] = np.nan of[mask!=site_code] = np.nan ssf[mask!=site_code] = np.nan gwf[mask!=site_code] = np.nan # Calculate monthly total in m3/s ro_mon = 1000.*bn.nansum(ro)/(days_in_month*24*60*60) of_mon = 1000.*bn.nansum(of)/(days_in_month*24*60*60) ssf_mon = 1000.*bn.nansum(ssf)/(days_in_month*24*60*60) gwf_mon = 1000.*bn.nansum(gwf)/(days_in_month*24*60*60) # Append to data dict data_dict['Date'].append(date) data_dict['of'].append(of_mon) data_dict['ssf'].append(ssf_mon) data_dict['gwf'].append(gwf_mon) data_dict['Runoff_m3/s'].append(ro_mon) # Close file h5.close() # Build df df = pd.DataFrame(data_dict) df.index = pd.to_datetime(df['Date']) del df['Date'] return df
def interpolateFine(self, **kwargs): ''' interpolate NaNs (empty points) in matrix. In contrast to :py:func:`interpolateFast` this function is comparatively slow. Therefore it can take a some minutes to finish interpolation on mergeMatrices that have reslutions > 30 in each basisDimension. However this function enables you to: * blurring between points (*focusExponent*). Thus it is possible to get smooth intersections between heavy scattered values. * extrapolate * limit the maximum interpolation/extrapolation distance (related to the unit of the chosen basisDimension) * weight distances in each basisDimension **Optional kwargs** ("keyword arguments") are: ================== =============== ========= ================ Keyword Type Default Description ================== =============== ========= ================ *mergeName* list(mergeDim) [{all}] one or more merge-dims to do the method on *focusExponent* float 10 blurring between points (sharpness will increase with increasing focusExponent) *evalPointDensity* bool True weights the moments: 'moment = pointDensity*moment' *maxDistance* dict {None} {basisName:maxDistance,..} *distanceFactor* dict {1} {basisName:factor,..} ================== =============== ========= ================ .. math:: moment = 1/distance^{focusExponent} ''' #standard mergeNames = deepcopy(self.mergeNames) focus_exponent = 5 eval_pointDesity = True max_interpolate_distance_dict = {} basis_distance_factor_dict = {} basis_distance_factor = [] max_interpolate_distance =[] for b in self._basis_dim: max_interpolate_distance_dict[b.name] = None basis_distance_factor_dict[b.name] = 1 max_interpolate_distance.append(None) basis_distance_factor.append(1) #individual for key in kwargs: if key == "mergeName": mergeNames = [] if type(kwargs[key]) != list and type(kwargs[key]) != tuple: kwargs[key] = [ kwargs[key] ] for m in kwargs[key]: if m not in self.mergeNames: raise KeyError("ERROR: mergeName '%s' not known" %m) mergeNames.append(m) elif key == "method": if kwargs[key] not in ["nearest", "linear", "cubic"]: raise KeyError("ERROR: method '%s' not known" %kwargs[key]) else: method = kwargs[key] elif key == "focusExponent": focus_exponent = float(abs(kwargs[key])) elif key == "evalPointDensity": eval_pointDesity = bool(kwargs[key]) elif key == "maxDistance": for i in dict(kwargs[key]): if i not in max_interpolate_distance_dict.keys(): raise KeyError("basisName '%s' in maxDistance not known" %i) max_interpolate_distance_dict[i] = kwargs[key][i] elif key == "distanceFactor": for i in dict(kwargs[key]): if i not in basis_distance_factor_dict.keys(): raise KeyError("basisName '%s' in distanceFactor not known" %i) basis_distance_factor_dict[i] = kwargs[key][i] else: raise KeyError("keyword '%s' not known" %key) max_moment = 1 / ( 0.5 **focus_exponent ) momentMatrix = deepcopy(self.mergeMatrix[0]) #generate list from dict sorted via indices for i in basis_distance_factor_dict: for n,b in enumerate(self._basis_dim): if b.name == i: basis_distance_factor[n] = basis_distance_factor_dict[i] for i in max_interpolate_distance_dict: for n,b in enumerate(self._basis_dim): if b.name == i: max_interpolate_distance[n] = max_interpolate_distance_dict[i] #normalize distances sumB = float(sum(basis_distance_factor)) for n in range(len(basis_distance_factor)): basis_distance_factor[n] /= sumB #prepare regarded-array-slice n_regarded_cells = [] regarded_cell_range = [] abs_max_interpolation_dist = 0 pos_corr = [] for n,b in enumerate(self._basis_dim): basis_distance_factor.append(1) if max_interpolate_distance[n] != None: cell_len = (b._include_range[1]-b._include_range[0]) / b.resolution nCells = int(max_interpolate_distance[n] /cell_len) if nCells == 0: nCells = 1 n_regarded_cells.append(nCells) abs_max_interpolation_dist += nCells else: n_regarded_cells.append(1)#dummy regarded_cell_range.append(0) pos_corr.append(0) abs_max_interpolation_dist **= 0.5 print abs_max_interpolation_dist try: for m in mergeNames: merge_index = self.mergeNames.index(m) print "--> interpolate matrix %s" %m mergeM = self.mergeMatrix[merge_index] new_mergeM = deepcopy(mergeM) densityM = self.densityMatrix[merge_index] status_counter = 0 next_status = int(mergeM.size/100) #len_matrix = len(mergeM) for nPoi, poi in enumerate(np.ndindex( mergeM.shape ) ): for n,b in enumerate(self._basis_dim): # only work with a part of the matrix depending on the size # of the regarded cell range if max_interpolate_distance[n] != None: start = poi[n] - n_regarded_cells[n] if start < 0: start= 0 stop = poi[n] + n_regarded_cells[n] if stop > b.resolution-1: stop= b.resolution-1 regarded_cell_range[n] = slice(start, stop) pos_corr[n] = start else: regarded_cell_range[n] = slice(None,None) t_regarded_cell_range = tuple(regarded_cell_range) sliced_moment_matrix = momentMatrix[t_regarded_cell_range] for mergePosition,mergeValue in np.ndenumerate( mergeM[t_regarded_cell_range]): #empty merge-positions have no moment if np.isnan(mergeValue): sliced_moment_matrix[mergePosition] = 0 else: #calc distance of each mergePoint to each mergePoint distance = 0 for k in range(self.nBasis): distance += (abs(poi[k] - (mergePosition[k] + pos_corr[k]) )* basis_distance_factor[k]) **2 distance = distance**0.5 if distance == 0: #above point of interest sliced_moment_matrix[mergePosition] = max_moment elif (max_interpolate_distance[n] != None and distance > abs_max_interpolation_dist): #no moments if to far away from existant points sliced_moment_matrix[mergePosition] = np.nan else: #calc moments sliced_moment_matrix[mergePosition] = 1 / ( distance **focus_exponent ) if eval_pointDesity: #multiply moment with number of points sliced_moment_matrix *= densityM[t_regarded_cell_range] ##normalize moments that sum(moments)=1 nansum = bn.nansum(sliced_moment_matrix) if nansum != 0: sliced_moment_matrix /= nansum ##mergeMatrix[point]=sum(moments*matrix) new_mergeM[poi] = bn.nansum( mergeM[t_regarded_cell_range] * sliced_moment_matrix ) status_counter += 1 if status_counter == next_status: #print status _utils.statusBar(nPoi+1, mergeM.size) status_counter = 0 self.mergeMatrix[merge_index] = new_mergeM except KeyboardInterrupt: print "...interrupted" print "done"
def parallel_compute(queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist, operation): #queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist = worker_args buffer = shmem_as_ndarray(shmem_buffer).reshape((size_x, size_y, len_filelist)) result_buffer = shmem_as_ndarray(shmem_results).reshape((size_x, size_y)) while (True): cmd_quit, line = queue.get() if (cmd_quit): queue.task_done() return if (operation == "median"): result_buffer[line,:] = numpy.median(buffer[line,:,:], axis=1) elif (operation == "medsigclip"): # Do not use (yet), is slow as hell # (maskedarrays are pure python, not C as all the rest) #print buffer[line,:,:].shape _sigma_plus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _sigma_minus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _median = numpy.median(buffer[line,:,:], axis=1) nrep = 3 valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:]) for rep in range(nrep): _median_2d = _median.reshape(_median.shape[0],1).repeat(buffer.shape[2], axis=1) _min = _median_2d - 3 * _sigma_minus _max = _median_2d + 3 * _sigma_plus #valid_pixels = numpy.ma.masked_inside(buffer[line,:,:], _min, _max) valid = (buffer[line,:,:] > _min) & (buffer[line,:,:] < _max) valid_pixels = numpy.ma.array(buffer[line,:,:], mask=valid) #valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:], valid) #print _min.shape, valid.shape, valid_pixels.shape #if (numpy.sum(valid, axis=1).any() <= 0): # break #_median = numpy.median(buffer[line,:,:][valid], axis=1) _median = numpy.median(valid_pixels, axis=1) if (rep < nrep-1): #_sigma_plus = scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 84) - _median #_sigma_minus = _median - scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 16) _sigma_plus = scipy.stats.scoreatpercentile(valid_pixels, 84) - _median _sigma_minus = _median - scipy.stats.scoreatpercentile(valid_pixels, 16) result_buffer[line,:] = _median elif (operation == "medclip"): intermediate = numpy.sort(buffer[line,:,:], axis=1) result_buffer[line,:] = numpy.median(intermediate[:,1:-2], axis=1) elif (operation == "min"): result_buffer[line,:] = numpy.min(buffer[line,:,:], axis=1) elif (operation == "max"): result_buffer[line,:] = numpy.max(buffer[line,:,:], axis=1) elif (operation == "nanmean"): result_buffer[line,:] = scipy.stats.nanmean(buffer[line,:,:], axis=1) elif (operation == "nanmedian"): #print "nanmedian" result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1) elif (operation == "nanmean.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmean(x, axis=1) elif (operation == "nanmedian.bn"): #print "nanmedian" x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmedian(x, axis=1) #result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1) elif (operation == "nansum.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nansum(x, axis=1) else: result_buffer[line,:] = numpy.mean(buffer[line,:,:], axis=1) queue.task_done()
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = range(p) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # ---------------------------------------------------------------------- # FIND FIRST FEATURE # ---------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max-k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI [i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # ---------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # ---------------------------------------------------------------------- while len(S) < self.n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S),F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:],9,2,1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # ---------------------------------------------------------------------- # SAVE RESULTS # ---------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def _simulate(self, reactions=None, time=None, ntic=None): """ """ # pandas is very convenient but slower than numpy # The dataFrame instanciation is costly as well. # For small models, it has a non-negligeable cost. # inhibitors will be changed if not ON #self.tochange = [x for x in self.model.nodes() if x not in self.stimuli_names # and x not in self.and_gates] if time is None: time = self.time # what about a species that is both inhibited and measured testVal = 1e-3 import copy #values = copy.deepcopy(self.values) values = self.values # !! reference but should be reset when calling _init_values / simulate() if self.debug: self.debug_values = [values.copy()] self.residuals = [] self.penalties = [] self.count = 0 self.nSp = len(values) residual = 1. frac = 1.2 # _shift is set to +1 FIXME +1 is to have same results as in CellnOptR # It means that if due to the cycles, you may not end up with same results. # this happends if you have cyvles with inhbititions # and an odd number of edges. if reactions is None: reactions = self.model.buffer_reactions self.number_edges = len([r for r in reactions]) + sum([this.count('^') for this in reactions]) # 10 % time here #predecessors = self.reactions_to_predecessors(reactions) predecessors = defaultdict(collections.deque) for r in reactions: k, v = self._reac2pred[r] predecessors[k].extend(v) # speed up keys = sorted(self.values.keys()) length_predecessors = dict([(node, len(predecessors[node])) for node in keys]) #self._length_predecessors = length_predecessors # if there is an inhibition/drug, the node is 0 # FIXME is this required ?? for inh in self.inhibitors_names: if length_predecessors[inh] == 0: #values[inh] = np.array([np.nan for x in range(0,self.N)]) values[inh] = np.zeros(self.N) # to get same results as in cnor, it is sometimes required # to add one more count. # to have same results at time 0 as in LiverDream, +3 is required if ntic is None: ntic = self.nSp * frac + self._shift else: # we want to use the ntic as unique stopping criteria testVal = -1 while ((self.count < ntic) and residual > testVal): self.previous = values.copy() #self.X0 = pd.DataFrame(self.values) #self.X0 = self.values.copy() # compute AND gates first. why # an paradoxical effects induced by drugs ? # should be first before updating other nodes #for inh in self.paradoxical.keys(): # if node in self.paradoxical[inh]: # values[node][(self.inhibitors[inh]==1).values] = 1 # #values[inh][(self.inhibitors[inh]==1).values] = 1 for node in self.and_gates: # replace na by large number so that min is unchanged # THere are always predecessors if length_predecessors[node] != 0: values[node] = bn.nanmin(np.array([self.previous[x] for x in predecessors[node]]), axis=0) else: #assert 1==0, "%s %s" % (node, predecessors[node]) values[node] = self.previous[node] for node in self.tochange: # easy one, just the value of predecessors #if len(self.predecessors[node]) == 1: # self.values[node] = self.values[self.predecessors[node][0]].copy() if length_predecessors[node] == 0: pass # nothing to change else: dummy = np.array([self.previous[x] if (x,node) not in self.toflip else 1 - self.previous[x] for x in predecessors[node]]) try: values[node] = bn.nanmax(dummy, axis=0) except: # in some simple cases, we must reset the type. why. values[node] = bn.nanmax(dummy.astype('int'), axis=0) # take inhibitors into account if node in self.inhibitors_names and node not in self.inhibitors_failed: # if inhibitors is on (1), multiply by 0 # if inhibitors is not active, (0), does nothing. values[node] *= 1 - self.inhibitors[node].values # an paradoxical effects induced by drugs ? for inh in self.paradoxical.keys(): if node in self.paradoxical[inh]: values[node][(self.inhibitors[inh]==1).values] = 1 for inh in self.repressors.keys(): if node in self.repressors[inh]: values[node][(self.inhibitors[inh]==1).values] = 0 # here NAs are set automatically to zero because of the int16 cast # but it helps speeding up a bit the code by removig needs to take care # of NAs. if we use sumna, na are ignored even when 1 is compared to NA self.m1 = np.array([self.previous[k] for k in keys ], dtype=np.int16) self.m2 = np.array([values[k] for k in keys ], dtype=np.int16) residual = bn.nansum(np.square(self.m1 - self.m2)) #residual = np.nansum(np.square(self.m1 - self.m2)) # TODO stop criteria should account for the length of the species to the # the node itself so count < nSp should be taken into account whatever is residual. # if self.debug: self.debug_values.append(self.values.copy()) self.residuals.append(residual) if self.stopcount : if self.count <10: residual+=1 self.count += 1 #if self.debug is True: # # add the latest values simulated in the while loop # self.debug_values.append(values.copy()) #self._values2 = values # Need to set undefined values to NAs mask = self.m1 != self.m2 data = np.array([values[k] for k in keys], dtype=float) data[mask] = np.nan self.dd = data indices = [keys.index(x) for x in self.data.df.columns] if time == 0: self.simulated[0] = data[indices,:].transpose() else: self.simulated[self.time] = data[indices,:].transpose()
def update(self): #get merge-extract for n,m in enumerate(self.show_merge): if self.show_merge_as_density[m]: self.merge_extract = self.densityMatrix[m][tuple(self.basis_dim_plot_range)] else: self.merge_extract = self.mergeMatrix[m][tuple(self.basis_dim_plot_range)] for b in range(len(self._basis_dim)-1,-1,-1): #basis dim to concentrate if b not in self.show_basis: pos_corr = self.concentrate_basis_dim[:b].count("pos") if self.concentrate_basis_dim[b] == "sum": self.merge_extract = bn.nansum(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "mean": self.merge_extract = bn.nanmean(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "max": self.merge_extract = bn.nanmax(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "min": self.merge_extract = bn.nanmin(self.merge_extract,b-pos_corr) for b in range(len(self._basis_dim)-2,-1,-1): # check from end to start whether to roll-axis # the time-axis has to be the last one # dont roll the last basis-dim (start with len(self._basis_dim)-2 ) basis_time_index = None if b not in self.show_basis and self.concentrate_basis_dim[b] == "time": #reshape the matrix self.merge_extract = np.rollaxis(self.merge_extract,b,0) basis_time_index = b break # dont needto continue iterating because only one dim can be 'time' if len(self.show_basis) == 1: basis_extract = self.basisMatrix[self.show_basis[0]][self.basis_dim_plot_range[self.show_basis[0]]] if self.scale_plot == True: self.plot.enableAutoRange('xy', True) else: if self.enableAutoRangeX: self.plot.enableAutoRange('x', True) #self.plot.setXRange( #self._basis_dim[self.show_basis[0]]._include_range[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.plot.enableAutoRange('y', True) if self.transpose_axes: self.curves[n].setData(self.merge_extract, basis_extract) else: self.curves[n].setData(basis_extract, self.merge_extract) elif len(self.show_basis) >=2: #calc scale and zero-position for axes-tics x0=self._basis_dim[self.show_basis[0]]._include_range[0] x1=self._basis_dim[self.show_basis[0]]._include_range[1] y0=self._basis_dim[self.show_basis[1]]._include_range[0] y1=self._basis_dim[self.show_basis[1]]._include_range[1] xscale = (x1-x0) / self._basis_dim[self.show_basis[0]].resolution yscale = (y1-y0) / self._basis_dim[self.show_basis[1]].resolution args = {'pos':[x0, y0], 'scale':[xscale, yscale]} if self.transpose_axes: args = {'pos':[y0, x0], 'scale':[yscale, xscale]} #set time-ticks if basis_time_index != None: args["xvals"] = self.basisMatrix[basis_time_index] if self.enableAutoRangeX: self.view.enableAutoRange('x', True) #self.view.setXRange(**tuple(self._basis_dim[self.show_basis[0]]._include_range))#[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.view.enableAutoRange('y', True) #bydefault autoLevel (the colorlevel of the merge-dims) == True #(calc. by pyqtgraph) #thus it only can process array without nan-values the calc. colorlevel #is wrong when the real values are boyond the nan-replacement(zero) #therefore i calc the colorlevel by my self in case nans arein the array: anynan = bn.anynan(self.merge_extract) if anynan: mmin = bn.nanmin(self.merge_extract) mmax = bn.nanmax(self.merge_extract) if np.isnan(mmin): mmin,mmax=0,0 self.plot.setLevels(mmin, mmax) args["autoLevels"]= False ##the following line dont work with my version of pyQtGraph #args["levels"]= [mmin,mmax]#np.nanmin(merge_extract), np.nanmax(merge_extract)) self.merge_extract = _utils.nanToZeros(self.merge_extract) if self.transpose_axes: self.plot.setImage(self.merge_extract.transpose(), autoRange=self.scale_plot,**args) else: self.plot.setImage(self.merge_extract, autoRange=self.scale_plot,**args) if anynan: # scale the histogramm to the new range self.plot.ui.histogram.vb.setYRange(mmin,mmax) self.scale_plot = False
def extract_orders(specfile, wavfile, regionfile, outdir, onlyorders=None, wavmin=1000.0, wavmax=10000.0): """ Go through all the orders, extracting each one """ imhdu = pyfits.open(specfile + ".fits")[0] badpixhdu = pyfits.open(specfile + ".fits")["BADPIX"] wavhdu, = pyfits.open(wavfile + ".fits") regions = pyregion.open(regionfile + ".reg") # Tilt angles from the horizontal tilts = [r.coord_list[4] for r in regions] # Fractional y shift of box center from nearest grid point ypix_fractions = [r.coord_list[1] - int(r.coord_list[1]) for r in regions] wide_filters = regions.get_filter() # Restrict attention to the center of each slit regions = pyregion.ShapeList([shrink_box(region) for region in regions]) filters = regions.get_filter() ordernames = [box.attr[1]["text"] for box in regions] # # Auto-identify contiguous regions in the wavelength map # # All pixels that have a valid wavelength wavmask = (wavhdu.data >= wavmin) & (wavhdu.data <= wavmax) labels, nlabels = ndi.label(wavmask, structure=np.ones((3, 3))) # save a copy of the labels for debugging pyfits.PrimaryHDU(labels).writeto("orders-labels.fits", clobber=True) print "Number of order boxes found: ", len(ordernames) print "Number of objects found: ", nlabels for widefilter, orderfilter, ordername, tilt, ypix_frac in zip( wide_filters, filters, ordernames, tilts, ypix_fractions): # All pixels that we think are in the central part # of the slit in this order ordermask = orderfilter.mask(wavhdu.data.shape) # and the same for the entire order (adding some padding) widemask = dilate_mask(widefilter.mask(wavhdu.data.shape), 3) iorder = int(ordername.split()[-1]) if not (onlyorders is None or iorder in onlyorders): # Option for skipping all but some orders continue # First find wavelengths that ought to fall in the order orderwavs = wavhdu.data[ordermask & wavmask] if len(orderwavs): print "{}: {:.2f}-{:.2f}".format( ordername, orderwavs.min(), orderwavs.max()) else: print "{}: No valid wavelengths found".format(ordername) # Second, look at wavelengths in the contiguous wavelength box # that we found label = box2label[iorder] orderwavs = wavhdu.data[labels == label] if len(orderwavs): print "Label {}: {:.2f}-{:.2f}".format( label, orderwavs.min(), orderwavs.max()) else: print "{}*: No valid wavelengths found".format(ordername) print # enclosing rectangle around this entire order bbox, = ndi.find_objects(widemask.astype(int)) # Add a few more pixels at the top to give equal top/bottom margins # We have to do it like this since slice.stop is read-only # and tuples are immutable start, stop, step = bbox[0].indices(wavhdu.data.shape[0]) bbox = (slice(start, stop+6), bbox[1]) imorder = imhdu.data.copy()[bbox] badpixorder = badpixhdu.data.copy()[bbox] wavorder = wavhdu.data.copy()[bbox] # Construct a mask of all pixels both methods say # should be in this order if (iorder <= ordermax): # These orders are the easiest to deal with m = widemask & (labels == label) # This mask is just for the central strip, # which is what we need for the wavs cm = ordermask & (labels == label) else: m = labels == label cm = labels == label m = m[bbox] cm = cm[bbox] print( "Number of good wavelength pixels found in order box: ", np.sum(m), np.sum(cm)) mm = widemask[bbox] # less stringent mask # Use a single average wavelength for each column ny, nx = wavorder.shape meanwav = bn.nansum(wavorder*cm, axis=0) / bn.nansum(cm, axis=0) meanwav = np.vstack([meanwav]*ny) # # Remove the horizontal tilt of the orders # # First the linear tilt yshifts = np.arange(nx)*np.tan(np.radians(tilt)) # Then the parabolic residual distortion yshifts += parabolic_distorsion(iorder)*( 2*np.arange(nx).astype(float)/nx - 1.0)**2 # Finally, align the box center to the pixel grid yshifts += ypix_frac jshifts = yshifts.astype(int) # required shift of each column jshiftset = set(jshifts) # Amount to trim off the top of the strip at the end jtrim = jshifts.max() if INTERPOLATE is not None: # These are the grid points we want grid_x, grid_y = np.meshgrid( np.arange(nx, dtype=np.float), np.arange(ny, dtype=np.float) ) # And these are the coordinates we currently have # Note that only the y's change, not the x's x, y = grid_x, grid_y - yshifts[np.newaxis, :] # Interpolate image onto new grid imorder = scipy.interpolate.griddata( (x.ravel(), y.ravel()), imorder.ravel(), (grid_x, grid_y), method=INTERPOLATE ) # Use nearest-neighbor for the masks, # so they don't get converted to reals badpixorder = scipy.interpolate.griddata( (x.ravel(), y.ravel()), badpixorder.ravel(), (grid_x, grid_y), method="nearest" ) m = scipy.interpolate.griddata( (x.ravel(), y.ravel()), m.ravel(), (grid_x, grid_y), method="nearest" ) mm = scipy.interpolate.griddata( (x.ravel(), y.ravel()), mm.ravel(), (grid_x, grid_y), method="nearest" ) else: jshifts = np.vstack([jshifts]*ny) # Expand back to 2D for jshift in jshiftset: # Consider each unique value of jshift # Split up into one or more contiguous chunks # that have this value of jshift chunklabels, nlabels = ndi.label(jshifts == jshift) for chunk in ndi.find_objects(chunklabels): # apply the shift to all the arrays # (except meanwav, which is constant in y) imorder[chunk] = np.roll(imorder[chunk], -jshift, axis=0) badpixorder[chunk] = np.roll(badpixorder[chunk], -jshift, axis=0) m[chunk] = np.roll(m[chunk], -jshift, axis=0) mm[chunk] = np.roll(mm[chunk], -jshift, axis=0) # Trim the useless space off the top imorder = imorder[:-jtrim, :] badpixorder = badpixorder[:-jtrim, :] meanwav = meanwav[:-jtrim, :] # And save each order to FITS files pri = pyfits.PrimaryHDU() sci = pyfits.ImageHDU(imorder, name='SCI') bad = pyfits.ImageHDU(badpixorder, name='BAD') wav = pyfits.ImageHDU(meanwav, name='WAV') outfile = "{}-order{}.fits".format(os.path.split(specfile)[-1], iorder) outfile = os.path.join(outdir, outfile) pyfits.HDUList([pri, sci, wav, bad]).writeto(outfile, clobber=True)
def _simulate(self, reactions=None, time=None, ntic=None): """ reactions is a list of parameters between 0 and 1 """ if reactions is None: reactions = [1] * self.N_reactions self.parameters_in = reactions[:] self.number_edges = self.N_reactions if time is None: time = self.time # what about a species that is both inhibited and measured testVal = 1e-3 import copy #values = copy.deepcopy(self.values) values = self.values # !! reference but should be reset when calling _init_values / simulate() if self.debug: self.debug_values = [values.copy()] self.residuals = [] self.penalties = [] self.count = 0 self.nSp = len(values) residual = 1. frac = 1.2 # all reactions are always on but for now keep this: reactions = self.model.buffer_reactions # speed up keys = sorted(self.values.keys()) if ntic is None: ntic = self.nSp * frac + self._shift else: # we want to use the ntic as unique stopping criteria testVal = -1 # CCK81: inhibitors_failed = ['mTOR', 'PI3K'] but has effects on AKT # CCK81: repressors['mTOR'] = ['AKT'] while ((self.count < ntic) and residual > testVal): self.previous = values.copy() # 1. for each AND edge and for each normal edge, # get inputs, multiply by strength (parameter) #edge_values = [self.function_edges[i](i) for i in range(0,N)] # 2. for each node, compute max of predecessors for node in self.input_edges.keys(): #data = (self.function_edges[i](i) for i in self.input_edges[node]) # bootleneck is here. in the for loops and function_edges calls M = len(self.input_edges[node]) if M>1: count = 0 data = np.zeros((M, self.N)) for i in self.input_edges[node]: data[count] = self.function_edges[i](i) count+=1 values[node] = np.sum(data, axis=0) else: i = self.input_edges[node][0] values[node] = self.function_edges[i](i) if node in self.inhibitors_names and node not in self.inhibitors_failed: # if inhibitors is on (1), multiply by 0 # if inhibitors is not active, (0), does nothing. #values[self.drug_targets[node]] *= 1-self.inhibitors[node].values mask = self.inhibitors[node].values == 1 values[node][mask] *= 0 # could be optimised as well # could be a value between 0 and 1 or even negative if we have neg values #if node != 'AKT': # values[node] = np.clip(values[node], 0, 1) # an paradoxical effects induced by drugs ? #for inh in self.paradoxical.keys(): # if node in self.paradoxical[inh]: # values[node][(self.inhibitors[inh]==1).values] = 1 for inh in self.repressors.keys(): if node in self.repressors[inh]: values[node][(self.inhibitors[inh]==1).values] -= 1 values[node] = np.clip(values[node], -1, 1) # here NAs are set automatically to zero because of the int16 cast # but it helps speeding up a bit the code by removig needs to take care # of NAs. if we use sumna, na are ignored even when 1 is compared to NA self.m1 = np.array([self.previous[k] for k in keys ], dtype=np.int16) self.m2 = np.array([values[k] for k in keys ], dtype=np.int16) residual = bn.nansum(np.square(self.m1 - self.m2)) #residual = np.nansum(np.square(self.m1 - self.m2)) # TODO stop criteria should account for the length of the species to the # the node itself so count < nSp should be taken into account whatever is residual. # if self.debug: self.debug_values.append(self.values.copy()) self.residuals.append(residual) if self.stopcount : if self.count <10: residual+=1 self.count += 1 #if self.debug is True: # # add the latest values simulated in the while loop # self.debug_values.append(values.copy()) #self._values2 = values # Need to set undefined values to NAs mask = self.m1 != self.m2 data = np.array([values[k] for k in keys], dtype=float) data[mask] = np.nan self.dd = data indices = [keys.index(x) for x in self.data.df.columns] if time == 0: self.simulated[0] = data[indices,:].transpose() else: self.simulated[self.time] = data[indices,:].transpose()