def r45(y, e, vv, p, asdf1, dataType, r41, nlhc): o, a, definiteRange = func82(y, e, vv, asdf1, dataType, p, r41) if p.debug and any(a + 1e-15 < o): p.warn('interval lower bound exceeds upper bound, it seems to be FuncDesigner kernel bug') if p.debug and any(logical_xor(isnan(o), isnan(a))): p.err('bug in FuncDesigner intervals engine') m, n = e.shape o, a = o.reshape(2*n, m).T, a.reshape(2*n, m).T if p.probType not in ('SNLE', 'NLSP') and asdf1.isUncycled and not p.probType.startswith('MI') \ and len(p._discreteVarsList)==0:# for SNLE fo = 0 # TODO: # handle constraints with restricted domain and matrix definiteRange if all(definiteRange): # TODO: if o has at least one -inf => prob is unbounded tmp1 = o[nlhc==0] if nlhc is not None else o if tmp1.size != 0: tmp1 = nanmin(tmp1) ## to prevent roundoff issues ## tmp1 += 1e-14*abs(tmp1) if tmp1 == 0: tmp1 = 1e-300 ###################### r41 = nanmin((r41, tmp1)) else: pass return o, a, r41
def updateNodes(nodesToUpdate, fo): if len(nodesToUpdate) == 0: return a_tmp = array([node.a for node in nodesToUpdate]) Tmp = a_tmp Tmp[Tmp>fo] = fo o_tmp = array([node.o for node in nodesToUpdate]) Tmp -= o_tmp Tmp[Tmp<1e-300] = 1e-300 Tmp[o_tmp>fo] = nan tnlh_all_new = - log2(Tmp) del Tmp, a_tmp tnlh_all_new += vstack([node.tnlhf for node in nodesToUpdate])#tnlh_fixed[ind_update] tnlh_curr_best = nanmin(tnlh_all_new, 1) o_tmp[o_tmp > fo] = -inf M = atleast_1d(nanmax(o_tmp, 1)) for j, node in enumerate(nodesToUpdate): node.fo = fo node.tnlh_curr = tnlh_all_new[j] node.tnlh_curr_best = tnlh_curr_best[j] node.th_key = M[j]
def minimum(self, domain, domain_ind = slice(None)): c = self.c oovars = set(self.d.keys()) | set(self.d2.keys()) Vals = domain.values() n = np.asarray(Vals[0][0] if type(Vals) == list else next(iter(Vals))[0]).size active_domain_ind = type(domain_ind)==np.ndarray r = np.zeros(domain_ind.size if active_domain_ind else n) + c for k in oovars: l, u = domain[k][0][domain_ind], domain[k][1][domain_ind] d1, d2 = self.d.get(k, 0.0), self.d2.get(k, None) if active_domain_ind: if type(d1) == np.ndarray and d1.size != 1: d1 = d1[domain_ind] if type(d2) == np.ndarray and d2.size != 1: d2 = d2[domain_ind] if d2 is None: r += where(d1 > 0, l, u) * d1 continue rr = np.vstack(((d2 * l + d1)*l, (d2*u + d1)*u)) #rr.sort(axis=0) #r_min, r_max = rr r_min = nanmin(rr, axis=0) tops = -d1 / (2.0 * d2) ind_inside = logical_and(l < tops, tops < u) if any(ind_inside): top_vals = (d2*tops + d1) * tops ind_m = logical_and(ind_inside, r_min>top_vals) r_min = where(ind_m, top_vals, r_min) r += r_min return r
def scale(values, min=0, max=1): """Return values scaled to [min, max]""" minval = np.float_(bn.nanmin(values)) ptp = bn.nanmax(values) - minval if ptp == 0: return np.clip(values, min, max) return (-minval + values) / ptp * (max - min) + min
def mul_handle_nan(R, R1, R2, domain): if all(np.isfinite(R1)) and all(np.isfinite(R2)): return R RR = R.resolve()[0] R2_is_scalar = isscalar(R2) ind = logical_or(np.isnan(RR[0]), np.isnan(RR[1])) # ind_z1 = logical_or(lb1 == 0, ub1 == 0) # ind_z2 = logical_or(lb2 == 0, ub2 == 0) # ind_i1 = logical_or(np.isinf(lb1), np.isinf(ub1)) # ind_i2 = logical_or(np.isinf(lb2), np.isinf(ub2)) # ind = logical_or(logical_and(ind_z1, ind_i2), logical_and(ind_z2, ind_i1)) if any(ind): lb1, ub1 = R1 lb2, ub2 = (R2, R2) if R2_is_scalar or R2.size == 1 else R2 lb1, lb2, ub1, ub2 = lb1[ind], lb2[ind], ub1[ind], ub2[ind] R1, R2 = R1[:, ind], R2[:, ind] t = np.vstack((lb1 * lb2, ub1 * lb2, lb1 * ub2, ub1 * ub2)) t_min, t_max = np.atleast_1d(nanmin(t, 0)), np.atleast_1d(nanmax(t, 0)) # !!!!!!!!!!!!!!!!1 TODO: check it t = np.vstack((t_min, t_max)) update_mul_inf_zero(R1, R2, t) t_min, t_max = t definiteRange_Tmp = \ R.definiteRange if type(R.definiteRange) == bool or R.definiteRange.size == 1\ else R.definiteRange[ind] R_Tmp_nan = boundsurf(surf({}, t_min), surf({}, t_max), definiteRange_Tmp, domain) R = R_Tmp_nan if all(ind) \ else boundsurf_join((ind, logical_not(ind)), (R_Tmp_nan, R.extract(logical_not(ind)))) return R
def quickMinMax(self, data): """ Estimate the min/max values of *data* by subsampling. Returns [(min, max), ...] with one item per channel """ while data.size > 1e6: ax = np.argmax(data.shape) sl = [slice(None)] * data.ndim sl[ax] = slice(None, None, 2) data = data[sl] cax = self.axes['c'] if cax is None: return [(float(nanmin(data)), float(nanmax(data)))] else: return [(float(nanmin(data.take(i, axis=cax))), float(nanmax(data.take(i, axis=cax)))) for i in range(data.shape[-1])]
def quickMinMax(self, data): """ Estimate the min/max values of *data* by subsampling. """ while data.size > 1e6: ax = np.argmax(data.shape) sl = [slice(None)] * data.ndim sl[ax] = slice(None, None, 2) data = data[sl] return nanmin(data), nanmax(data)
def _phase2(self): """ Execute phase 2 of the SP region. This phase is used to compute the active columns. Note - This should only be called after phase 1 has been called and after the inhibition radius and neighborhood have been updated. """ # Shift the outputs self.y[:, 1:] = self.y[:, :-1] self.y[:, 0] = 0 # Calculate k # - For a column to be active its overlap must be at least as large # as the overlap of the k-th largest column in its neighborhood. k = self._get_num_cols() if self.global_inhibition: # The neighborhood is all columns, thus the set of active columns # is simply columns that have an overlap >= the k-th largest in the # entire region # Compute the winning column indexes if self.learn: # Randomly break ties ix = np.argpartition(-self.overlap[:, 0] - self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k] else: # Choose the same set of columns each time ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k] # Set the active columns self.y[ix, 0] = self.overlap[ix, 0] > 0 else: # The neighborhood is bounded by the inhibition radius, therefore # each column's neighborhood must be considered for i in xrange(self.ncolumns): # Get the neighbors ix = np.where(self.neighbors[i])[0] # Compute the minimum top overlap if ix.shape[0] <= k: # Desired number of candidates is at or below the desired # activity level, so find the overall min m = max(bn.nanmin(self.overlap[ix, 0]), 1) else: # Desired number of candidates is above the desired # activity level, so find the k-th largest m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1], 1) # Set the column activity if self.overlap[i, 0] >= m: self.y[i, 0] = True
def __call__(self, data): """ Remove columns with constant values from the data set and return the resulting data table. Parameters ---------- data : an input data set """ oks = bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0) atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return Orange.data.Table(domain, data)
def interval(domain, dtype): lb_ub, definiteRange = inp._interval(domain, dtype) lb, ub = lb_ub[0], lb_ub[1] ind1, ind2 = lb < 0.0, ub > 0.0 ind = logical_and(ind1, ind2) tmp = vstack((lb, ub)) TMP = func(tmp) t_min, t_max = atleast_1d(nanmin(TMP, 0)), atleast_1d(nanmax(TMP, 0)) if any(ind): F0 = func(0.0) t_min[atleast_1d(logical_and(ind, t_min > F0))] = F0 t_max[atleast_1d(logical_and(ind, t_max < F0))] = F0 return vstack((t_min, t_max)), definiteRange
def mul_interval(self, other, isOtherOOFun, Prod, domain, dtype): lb1_ub1, definiteRange = self._interval(domain, dtype, ia_surf_level = 2) if isOtherOOFun: lb2_ub2, definiteRange2 = other._interval(domain, dtype, ia_surf_level = 2) definiteRange = logical_and(definiteRange, definiteRange2) else: lb2_ub2 = other if type(lb2_ub2) in (boundsurf, boundsurf2) or type(lb1_ub1) in (boundsurf, boundsurf2): if type(lb2_ub2) in (boundsurf, boundsurf2) and type(lb1_ub1) in (boundsurf, boundsurf2): resolveSchedule = domain.resolveSchedule.get(Prod, ()) r = lb1_ub1.__mul__(lb2_ub2, resolveSchedule) else: r = lb1_ub1 * lb2_ub2 r.definiteRange = definiteRange return r, r.definiteRange elif isscalar(other) or (type(other) == ndarray and other.size == 1): r = lb1_ub1 * other if other >= 0 else lb1_ub1[::-1] * other return r, definiteRange lb1, ub1 = lb1_ub1 lb2, ub2 = lb2_ub2 if isOtherOOFun else (other, other) firstPositive = all(lb1 >= 0) firstNegative = all(ub1 <= 0) secondPositive = all(lb2 >= 0) secondNegative = all(ub2 <= 0) if firstPositive and secondPositive: t= vstack((lb1 * lb2, ub1 * ub2)) elif firstNegative and secondNegative: t = vstack((ub1 * ub2, lb1 * lb2)) elif firstPositive and secondNegative: t = vstack((lb2 * ub1, lb1 * ub2)) elif firstNegative and secondPositive: t = vstack((lb1 * ub2, lb2 * ub1)) #t = vstack((lb1 * other, ub1 * other) if other >= 0 else (ub1 * other, lb1 * other)) elif isOtherOOFun: t = vstack((lb1 * lb2, ub1 * lb2, lb1 * ub2, ub1 * ub2))# TODO: improve it t = vstack((nanmin(t, 0), nanmax(t, 0))) else: t = vstack((lb1 * other, ub1 * other))# TODO: improve it t.sort(axis=0) #assert isinstance(t_min, ndarray) and isinstance(t_max, ndarray), 'Please update numpy to more recent version' if isOtherOOFun: update_mul_inf_zero(lb1_ub1, lb2_ub2, t) return t, definiteRange
def func5(an, nn, g, p): m = len(an) if m <= nn: return an, g mino = np.array([node.key for node in an]) if nn == 1: # box-bound probs with exact interval analysis ind = argmin(mino) assert ind in (0, 1), 'error in interalg engine' g = nanmin((mino[1-ind], g)) an = [an[i] for i in ind] elif m > nn: if p.solver.dataHandling == 'raw': ind = argsort(mino) th = mino[ind[nn]] ind2 = where(mino < th)[0] g = nanmin((th, g)) #an = take(an, ind2, axis=0, out=an[:ind2.size]) an = [an[i] for i in ind2]#an[ind2] else: g = nanmin((mino[nn], g)) an = an[:nn] return an, g
def div_interval(self, other, Div, domain, dtype): lb2_ub2, definiteRange2 = other._interval(domain, dtype, ia_surf_level = 2) secondIsBoundsurf = isinstance(lb2_ub2, boundsurf) lb1_ub1, definiteRange1 = self._interval(domain, dtype, ia_surf_level = 2)# if type(lb2_ub2)==ndarray else 1) firstIsBoundsurf = type(lb1_ub1) in (boundsurf, boundsurf2) # if type(lb1_ub1) == boundsurf2: # lb1_ub1 = lb1_ub1.to_linear() # TODO: mention in doc definiteRange result for 0 / 0 definiteRange = logical_and(definiteRange1, definiteRange2) tmp = None if not firstIsBoundsurf and secondIsBoundsurf: # TODO: check handling zeros if not hasattr(other, '_inv'): other._inv = other ** -1 #1.0/other # other._inv.engine_convexity = other._inv.engine_monotonity = -1 Tmp = pow_const_interval(other, other._inv, -1, domain, dtype)[0] if isinstance(Tmp, boundsurf): tmp = lb1_ub1 * Tmp#lb2_ub2 ** -1 elif firstIsBoundsurf and not secondIsBoundsurf:# and (t1_positive or t1_negative or t2_positive or t2_negative): # TODO: handle zeros Tmp2 = 1.0 / lb2_ub2 Tmp2.sort(axis=0) tmp = lb1_ub1 * Tmp2 #tmp = lb1_ub1 * (1.0 / tmp2[::-1]) elif firstIsBoundsurf and secondIsBoundsurf: tmp = lb1_ub1.__div__(lb2_ub2, domain.resolveSchedule.get(Div, ())) if tmp is not None: if type(tmp) in (boundsurf, boundsurf2): tmp.definiteRange = definiteRange return tmp, tmp.definiteRange else: return tmp, definiteRange tmp1 = lb1_ub1.resolve()[0] if firstIsBoundsurf else lb1_ub1 tmp2 = lb2_ub2.resolve()[0] if secondIsBoundsurf else lb2_ub2 lb1, ub1 = tmp1[0], tmp1[1] lb2, ub2 = tmp2[0], tmp2[1] tmp = vstack((td(lb1, lb2), td(lb1, ub2), td(ub1, lb2), td(ub1, ub2))) r = vstack((nanmin(tmp, 0), nanmax(tmp, 0))) update_div_zero(lb1, ub1, lb2, ub2, r) return r, definiteRange
def func9(an, fo, g, p): #ind = searchsorted(ar, fo, side='right') if p.probType in ('NLSP', 'SNLE') and p.maxSolutions != 1: mino = atleast_1d([node.key for node in an]) ind = mino > 0 if not any(ind): return an, g else: g = nanmin((g, nanmin(mino[ind]))) ind2 = where(logical_not(ind))[0] #an = take(an, ind2, axis=0, out=an[:ind2.size]) #an = asarray(an[ind2]) an = [an[i] for i in ind2] return an, g elif p.solver.dataHandling == 'sorted': #OLD mino = [node.key for node in an] ind = bisect_right(mino, fo) if ind == len(mino): return an, g else: g = nanmin((g, nanmin(atleast_1d(mino[ind])))) return an[:ind], g elif p.solver.dataHandling == 'raw': #NEW mino = atleast_1d([node.key for node in an]) r10 = mino > fo if not any(r10): return an, g else: ind = where(r10)[0] g = nanmin((g, nanmin(atleast_1d(mino)[ind]))) #an = asarray(an) ind2 = where(logical_not(r10))[0] #an = take(an, ind2, axis=0, out=an[:ind2.size]) an = [an[i] for i in ind2] return an, g # NEW 2 # curr_tnlh = [node.tnlh_curr for node in an] # import warnings # warnings.warn('! fix g') return an, g else: assert 0, 'incorrect nodes remove approach'
def create_wl_calibration_plot(wls_data, hdulist, plotfile): fig = pl.figure() ax = fig.add_subplot(111) spec_combined = wls_data['spec_combined'] # find wavelength range to plot l_min, l_max = numpy.min(spec_combined[:,0]), numpy.max(spec_combined[:,0]) # and use this range for the plot ax.set_xlim((l_min, l_max)) # also find good min and max ranges fluxrange = numpy.nanpercentile(spec_combined[:,1], [3, 99.5]) f_min, f_max = bottleneck.nanmin(spec_combined[:,1]), bottleneck.nanmax(spec_combined[:,1]) # ax.set_ylim((0.9*f_min if f_min > 1 else 1, 1.1*f_max)) # ax.set_ylim((100, 1.1*f_max)) ax.set_ylim((fluxrange[0] if fluxrange[0] > 1. else 1., 1.1*f_max)) # plot the actual spectrum we extracted for calibration ax.plot(spec_combined[:,0], spec_combined[:,1], "-g") # now draw vertical lines showing where we the arc lines from the catalog are for catline in wls_data['linelist_ref']: ax.axvline(x=catline[0], color='grey') # set the y-scale to be logarithmic ax.set_yscale('log') # add some labels ax.set_xlabel("Wavelength [angstroems]") ax.set_ylabel("flux [counts]") ax.set_title("name of file") fig.subplots_adjust(left=0.09, bottom=0.08, right=0.98, top=0.93, wspace=None, hspace=None) #fig.tight_layout(pad=0.1) if (not plotfile == None): fig.savefig(plotfile) else: fig.show() pl.show()
def __init__(self, lightcurve, fit_mean=False): """ Parameters: lightcurve (:class:`lightkurve.LightCurve`): Lightcurve to estimate power spectrum for. fit_mean (boolean, optional): """ # Store the input settings: self.fit_mean = fit_mean # Calculate standard properties of the timeseries: indx = np.isfinite(lightcurve.flux) self.df = 1 / (86400 * (nanmax(lightcurve.time[indx]) - nanmin(lightcurve.time[indx]))) # Hz self.nyquist = 1 / ( 2 * 86400 * nanmedian(np.diff(lightcurve.time[indx]))) # Hz self.standard = None # Create LombScargle object of timeseries, where time is in seconds: self.ls = LombScargle(lightcurve.time[indx] * 86400, lightcurve.flux[indx], center_data=True, fit_mean=self.fit_mean) # Calculate a better estimate of the fundamental frequency spacing: self.df = self.fundamental_spacing_integral() # Calculate standard power density spectrum: # Start by calculating a complete un-scaled power spectrum: self.standard = self.powerspectrum(oversampling=1, nyquist_factor=1, scale=None) # Use the un-scaled power spectrum to finding the normalisation factor # which will ensure that Parseval's theorem holds: N = len(self.ls.t) tot_MS = np.sum((self.ls.y - nanmean(self.ls.y))**2) / N tot_lomb = np.sum(self.standard[1]) self.normfactor = tot_MS / tot_lomb # Re-scale the standard power spectrum to being in power density: self.standard = list(self.standard) self.standard[1] *= self.normfactor / (self.df * 1e6) self.standard = tuple(self.standard)
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.SNV: data.X = (data.X - bottleneck.nanmean(data.X, axis=1).reshape(-1, 1)) / \ bottleneck.nanstd(data.X, axis=1).reshape(-1, 1) replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") elif self.method == Normalize.MinMax: min = bottleneck.nanmin(data.X, axis=1).reshape(-1, 1) max = bottleneck.nanmax(data.X, axis=1).reshape(-1, 1) data.X = data.X / (max - min) replace_infs(data.X) return data.X
def __call__(self, data): if data.domain != self.domain: data = data.from_table(self.domain, data) x = getx(data) data = data.copy() if self.limits == 1: x_sorter = np.argsort(x) lim_min = np.searchsorted(x, self.lower, sorter=x_sorter, side="left") lim_max = np.searchsorted(x, self.upper, sorter=x_sorter, side="right") limits = [lim_min, lim_max] y_s = data.X[:, x_sorter][:, limits[0]:limits[1]] else: y_s = data.X if self.method == Normalize.MinMax: data.X /= nanmax(np.abs(y_s), axis=1).reshape((-1, 1)) elif self.method == Normalize.Vector: # zero offset correction applies to entire spectrum, regardless of limits y_offsets = nanmean(data.X, axis=1).reshape((-1, 1)) data.X -= y_offsets y_s -= y_offsets rssq = np.sqrt(nansum(y_s**2, axis=1).reshape((-1, 1))) data.X /= rssq elif self.method == Normalize.Offset: data.X -= nanmin(y_s, axis=1).reshape((-1, 1)) elif self.method == Normalize.Attribute: # attr normalization applies to entire spectrum, regardless of limits # meta indices are -ve and start at -1 if self.attr not in (None, "None", ""): attr_index = -1 - data.domain.index(self.attr) factors = data.metas[:, attr_index].astype(float) data.X /= factors[:, None] return data.X
def __div__(self, other, resolveSchedule=()): isBoundSurf = isinstance(other, boundsurf) assert isBoundSurf r = aux_mul_div_boundsurf((self, other), operator.truediv, resolveSchedule) # return r # ind_inf_z = logical_or(logical_or(R2[0]==0, R2[1]==0), logical_or(isinf(R1[0]), isinf(R1[1]))) #(R2[0]==0) | (R2[1]==0) | (isinf(R2[0])) | (isinf(R2[1])) | (isinf(R1[0])) | isinf(R1[1]) isBoundsurf = isinstance(r, boundsurf) rr = r.resolve()[0] if isBoundsurf else r#[0] # import pylab, numpy # xx = numpy.linspace(-1, 0, 1000) # t=r.l.d.keys()[0] # tmp=r # pylab.plot(xx, tmp.l.d2.get(t, 0.0)*xx**2+ tmp.l.d.get(t, 0.0)*xx+ tmp.l.c, 'r') # pylab.plot(xx, tmp.u.d2.get(t, 0.0)*xx**2+ tmp.u.d.get(t, 0.0)*xx+ tmp.u.c, 'b') # pylab.grid() # pylab.show() # nans may be from other computations from a level below, although ind_nan = logical_or(isnan(rr[0]), isnan(rr[1])) if not any(ind_nan) or not isBoundsurf: return r #if isBoundsurf else rr Ind_finite = where(logical_not(ind_nan))[0] r_finite = r.extract(Ind_finite) ind_nan = where(ind_nan)[0] R1 = self.resolve()[0] R2 = other.resolve()[0] lb1, ub1, lb2, ub2 = R1[0, ind_nan], R1[1, ind_nan], R2[0, ind_nan], R2[1, ind_nan] tmp = np.vstack((td(lb1, lb2), td(lb1, ub2), td(ub1, lb2), td(ub1, ub2))) R = np.vstack((nanmin(tmp, 0), nanmax(tmp, 0))) update_div_zero(lb1, ub1, lb2, ub2, R) b = boundsurf(surf({}, R[0]), surf({}, R[1]), False, self.domain) r = boundsurf_join((ind_nan, Ind_finite), (b, r_finite)) definiteRange = logical_and(self.definiteRange, other.definiteRange) r.definiteRange = definiteRange return r
def distance_curves(x, ys, q1): """ Distances to the curves. :param x: x values of curves (they have to be sorted). :param ys: y values of multiple curves sharing x values. :param q1: a point to measure distance to. :return: """ # convert curves into a series of startpoints and endpoints xp = rolling_window(x, 2) ysp = rolling_window(ys, 2) r = bottleneck.nanmin(distance_line_segment(xp[:, 0], ysp[:, :, 0], xp[:, 1], ysp[:, :, 1], q1[0], q1[1]), axis=1) return r
def pow_oofun_interval(self, other, domain, dtype): # TODO: handle discrete cases lb1_ub1, definiteRange1 = self._interval(domain, dtype, ia_surf_level = 2) lb2_ub2, definiteRange2 = other._interval(domain, dtype, ia_surf_level = 2) if isinstance(lb1_ub1, boundsurf) or isinstance(lb2_ub2, boundsurf): r = (lb2_ub2 * lb1_ub1.log()).exp() return r, r.definiteRange lb1, ub1 = lb1_ub1#[0], lb1_ub1[1] lb2, ub2 = lb2_ub2#[0], lb2_ub2[1] T = vstack((lb1 ** lb2, lb1** ub2, ub1**lb2, ub1**ub2)) t_min, t_max = nanmin(T, 0), nanmax(T, 0) definiteRange = logical_and(definiteRange1, definiteRange2) ind1 = lb1 < 0 if any(ind1): definiteRange = logical_and(definiteRange, logical_not(ind1)) ind2 = ub1 >= 0 t_min[atleast_1d(logical_and(logical_and(ind1, ind2), logical_and(t_min > 0.0, ub2 > 0.0)))] = 0.0 t_max[atleast_1d(logical_and(ind1, logical_not(ind2)))] = nan t_min[atleast_1d(logical_and(ind1, logical_not(ind2)))] = nan return vstack((t_min, t_max)), definiteRange
def _update_online_orbits(self): """.""" posx, posy = self._get_orbit_from_processes() posx /= 1000 posy /= 1000 nanx = _np.isnan(posx) nany = _np.isnan(posy) posx[nanx] = self.ref_orbs['X'][nanx] posy[nany] = self.ref_orbs['Y'][nany] if self._ring_extension > 1: posx = _np.tile(posx, (self._ring_extension, )) posy = _np.tile(posy, (self._ring_extension, )) orbs = {'X': posx, 'Y': posy} for plane in ('X', 'Y'): with self._lock_raw_orbs: raws = self.raw_orbs raws[plane].append(orbs[plane]) raws[plane] = raws[plane][-self._smooth_npts:] if not raws[plane]: return if self._smooth_meth == self._csorb.SmoothMeth.Average: orb = _np.mean(raws[plane], axis=0) else: orb = _np.median(raws[plane], axis=0) self.smooth_orb[plane] = orb self.new_orbit.set() for plane in ('X', 'Y'): orb = self.smooth_orb[plane] dorb = orb - self.ref_orbs[plane] self.run_callbacks(f'SlowOrb{plane:s}-Mon', _np.array(orb)) self.run_callbacks(f'DeltaOrb{plane:s}Avg-Mon', _bn.nanmean(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Std-Mon', _bn.nanstd(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Min-Mon', _bn.nanmin(dorb)) self.run_callbacks(f'DeltaOrb{plane:s}Max-Mon', _bn.nanmax(dorb))
def posterize(self, **kwargs): ''' This method discretize/posterize the values in the mergeMatrix (of a given mergeDimension) to a given ammount of values (e.g. nValues= 4 ) or to a given list or values (e.g. values = [1,2,4]) **Optional kwargs** ("keyword arguments") are: ================== ================= ========= ================ Keyword Type Default Description ================== ================= ========= ================ *mergeName* list(mergeNames) [{all}] one or more merge-dims to do the method on *nValues* int 5 the amount of different values *values* list None given different values ================== ================= ========= ================ ''' #standard mergeNames = deepcopy(self.mergeNames) nValues=5 values=None #individual for key in kwargs: if key == "mergeName": mergeNames = [] if type(kwargs[key]) != list and type(kwargs[key]) != tuple: kwargs[key] = [ kwargs[key] ] for m in kwargs[key]: #_utils.checkClassInstance(m,mergeDimension) if m not in self.mergeNames: exit("ERROR: mergeName %s not known" %m) mergeNames.append(m) elif key == "nValues": nValues = int(kwargs[key]) elif key == "values": values = list(kwargs[key]) else: raise KeyError("keyword '%s' not known" %key) print "-> Posterize mergeMatrices..." for i in mergeNames: #which mergeMatrix is involved? #find index merge_index = self.mergeNames.index(i) m=self.mergeMatrix[merge_index] if values == None: #create values-list values_i = np.linspace(bn.nanmin(m),bn.nanmax(m),nValues) else: values_i = np.array(values) print "... %s to the values %s" %(i, values_i) #do posterizing for x in np.nditer(m, op_flags=['readwrite']): diff=values_i-x diff=abs(diff) nearest = diff.argmin() x[...] = values_i[ nearest ]
def nanmin(array, axis=None): if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 return bt.nanmin(array, axis=axis)
def time_nanmax(self, dtype, shape, order, axis): bn.nanmin(self.arr, axis=axis)
def func12(an, maxActiveNodes, p, Solutions, vv, varTols, fo): solutions, r6 = Solutions.solutions, Solutions.coords if len(an) == 0: return array([]), array([]), array([]), array([]) _in = an if r6.size != 0: r11, r12 = r6 - varTols, r6 + varTols y, e, S = [], [], [] Tnlhf_curr_local = [] n = p.n N = 0 maxSolutions = p.maxSolutions # new = 1 # # new # if new and p.probType in ('MOP', 'SNLE', 'GLP', 'NLP', 'MINLP') and p.maxSolutions == 1: # # # return y, e, _in, _s while True: an1Candidates, _in = func3(_in, maxActiveNodes, p.solver.dataHandling) #print nanmax(2**(-an1Candidates[0].tnlh_curr)) , nanmax(2**(-an1Candidates[-1].tnlh_curr)) yc, ec, oc, ac, SIc = asarray([t.y for t in an1Candidates]), \ asarray([t.e for t in an1Candidates]), \ asarray([t.o for t in an1Candidates]), \ asarray([t.a for t in an1Candidates]), \ asarray([t._s for t in an1Candidates]) if p.probType == 'MOP': tnlhf_curr = asarray([t.tnlh_all for t in an1Candidates]) tnlhf = None elif p.solver.dataHandling == 'raw': tnlhf = asarray([t.tnlhf for t in an1Candidates]) tnlhf_curr = asarray([t.tnlh_curr for t in an1Candidates]) else: tnlhf, tnlhf_curr = None, None if p.probType != 'IP': #nlhc = asarray([t.nlhc for t in an1Candidates]) #residual = asarray([t.residual for t in an1Candidates]) residual = None indT = func4(p, yc, ec, oc, ac, fo, tnlhf_curr) if an1Candidates[0].indtc is not None: indtc = asarray([t.indtc for t in an1Candidates]) indT = logical_or(indT, indtc) else: residual = None indT = None t, _s, indD = func1(tnlhf, tnlhf_curr, residual, yc, ec, oc, ac, SIc, p, indT) new = 0 nn = 0 if new and p.probType in ('MOP', 'SNLE', 'NLSP', 'GLP', 'NLP', 'MINLP') and p.maxSolutions == 1: arr = tnlhf_curr if p.solver.dataHandling == 'raw' else oc M = arr.shape[0] w = arange(M) Midles = 0.5 * (yc[w, t] + ec[w, t]) arr_1, arr2 = arr[w, t], arr[w, n + t] Arr = hstack((arr_1, arr2)) ind = np.argsort(Arr) Ind = set(ind[:maxActiveNodes]) tag_all, tag_1, tag_2 = [], [], [] sn = [] # TODO: get rid of the cycles for i in range(M): cond1, cond2 = i in Ind, (i + M) in Ind if cond1: if cond2: tag_all.append(i) else: tag_1.append(i) else: if cond2: tag_2.append(i) else: sn.append(an1Candidates[i]) list_lx, list_ux = [], [] _s_new = [] updateTC = an1Candidates[0].indtc is not None isRaw = p.solver.dataHandling == 'raw' for i in tag_1: node = an1Candidates[i] I = t[i] # if node.o[n+I] >= node.o[I]: # print '1' # else: # print i, I, node.o[n+I] , node.o[I], node.key, node.a[n+I] , node.a[I], node.nlhc[n+I], node.nlhc[I] node.key = node.o[n + I] node._s = _s[i] if isRaw: node.tnlh_curr[I] = node.tnlh_curr[n + I] node.tnlh_curr_best = nanmin(node.tnlh_curr) #assert node.o[n+I] >= node.o[I] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] if nn: #node.o[I], node.a[I] = node.o[n+I], node.a[n+I] node.o[I], node.a[I] = node.o[n + I], node.a[n + I] node.o[node.o < node.o[n + I]], node.a[ node.a > node.a[n + I]] = node.o[n + I], node.a[n + I] else: node.o[n + I], node.a[n + I] = node.o[I], node.a[I] node.o[node.o < node.o[I]], node.a[ node.a > node.a[I]] = node.o[I], node.a[I] # if p.solver.dataHandling == 'raw': for Attr in ('nlhf', 'nlhc', 'tnlhf', 'tnlh_curr', 'tnlh_all'): r = getattr(node, Attr, None) if r is not None: if nn: r[I] = r[n + I] else: r[n + I] = r[I] mx = ux.copy() mx[I] = Midles[i] #0.5*(lx[I] + ux[I]) list_lx.append(lx) list_ux.append(mx) node.y = lx.copy() node.y[I] = Midles[i] #0.5*(lx[I] + ux[I]) if updateTC: node.indtc = True _s_new.append(node._s) sn.append(node) for i in tag_2: node = an1Candidates[i] I = t[i] node.key = node.o[I] node._s = _s[i] # for raw only if isRaw: node.tnlh_curr[n + I] = node.tnlh_curr[I] node.tnlh_curr_best = nanmin(node.tnlh_curr) #assert node.o[I] >= node.o[n+I] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] if nn: node.o[n + I], node.a[n + I] = node.o[I], node.a[I] node.o[node.o < node.o[I]], node.a[ node.a > node.a[I]] = node.o[I], node.a[I] else: node.o[I], node.a[I] = node.o[n + I], node.a[n + I] node.o[node.o < node.o[n + I]], node.a[ node.a > node.a[n + I]] = node.o[n + I], node.a[n + I] for Attr in ('nlhf', 'nlhc', 'tnlhf', 'tnlh_curr', 'tnlh_all'): r = getattr(node, Attr, None) if r is not None: if nn: r[n + I] = r[I] else: r[I] = r[n + I] mx = lx.copy() mx[I] = Midles[i] #0.5*(lx[I] + ux[I]) list_lx.append(mx) list_ux.append(ux) node.e = ux.copy() node.e[I] = Midles[i] #0.5*(lx[I] + ux[I]) if updateTC: node.indtc = True _s_new.append(node._s) sn.append(node) for i in tag_all: node = an1Candidates[i] I = t[i] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] mx = ux.copy() mx[I] = Midles[i] #0.5 * (lx[I] + ux[I]) list_lx.append(lx) list_ux.append(mx) mx = lx.copy() mx[I] = Midles[i] #0.5 * (lx[I] + ux[I]) #mx[n+ t] = 0.5 * (lx[n + t] + ux[n + t]) list_lx.append(mx) list_ux.append(ux) #_s_new += [_s[i]] * 2 _s_new.append(_s[i]) _s_new.append(_s[i]) # print 'y_new:', vstack(list_lx) # print 'e_new:', vstack(list_ux) # print '_s_new:', hstack(_s) _in = sn + _in.tolist() if p.solver.dataHandling == 'sorted': _in.sort(key=lambda obj: obj.key) else: #pass _in.sort(key=lambda obj: obj.tnlh_curr_best) # print 'tag 1:', len(tag_1), 'tag 2:', len(tag_2), 'tag all:', len(tag_all) # print 'lx:', list_lx # print 'sn lx:', [node.y for node in sn] # print 'ux:', list_ux # print 'sn ux:', [node.e for node in sn] # print '-'*10 #print '!', vstack(list_lx), vstack(list_ux), hstack(_s_new) NEW_lx, NEW_ux, NEW__in, NEW__s = \ vstack(list_lx), vstack(list_ux), array(_in), hstack(_s_new) return NEW_lx, NEW_ux, NEW__in, NEW__s NewD = 1 if NewD and indD is not None: s4d = _s[indD] sf = _s[logical_not(indD)] _s = hstack((s4d, s4d, sf)) yf, ef = yc[logical_not(indD)], ec[logical_not(indD)] yc, ec = yc[indD], ec[indD] t = t[indD] else: _s = tile(_s, 2) #yc, ec, tnlhf_curr_local = func2(yc, ec, t, vv, tnlhf_curr) yc, ec = func2(yc, ec, t, vv) if NewD and indD is not None: yc = vstack((yc, yf)) ec = vstack((ec, ef)) if maxSolutions == 1 or len(solutions) == 0: #y, e, Tnlhf_curr_local = yc, ec, tnlhf_curr_local y, e = yc, ec break # TODO: change cycle variable if len(solutions) >> maxActiveNodes for i in range(len(solutions)): ind = logical_and(all(yc >= r11[i], 1), all(ec <= r12[i], 1)) if any(ind): j = where(logical_not(ind))[0] lj = j.size yc = take(yc, j, axis=0, out=yc[:lj]) ec = take(ec, j, axis=0, out=ec[:lj]) _s = _s[j] # if tnlhf_curr_local is not None: # tnlhf_curr_local = tnlhf_curr_local[j] y.append(yc) e.append(ec) S.append(_s) #Tnlhf_curr_local.append(tnlhf_curr_local) N += yc.shape[0] if len(_in) == 0 or N >= maxActiveNodes: y, e, _s = vstack(y), vstack(e), hstack(S) #Tnlhf_curr_local = hstack(Tnlhf_curr_local) break # if Tnlhf_curr_local is not None and len(Tnlhf_curr_local) != 0 and Tnlhf_curr_local[0] is not None: # #print len(where(isfinite(Tnlhf_curr_local))[0]), Tnlhf_curr_local.size # pass # print 'y_prev:', y # print 'e_prev:', e # print '_s_prev:', hstack(_s) #print 'prev!', y, e, _s # from numpy import array_equal # if not array_equal(NEW_lx.sort(), y.sort()): # pass # if not array_equal(NEW_ux.sort(), e.sort()): # pass # if not array_equal(NEW__s.sort(), _s.sort()): # pass #, NEW_ux, NEW__in, NEW__s return y, e, _in, _s
def autoZoom(self, **kwargs): ''' **Required kwargs** ("keyword arguments") are: ================== =============== ============= ================ Keyword Type Example Description ================== =============== ============= ================ *mergeName* str myMergeName the name of the merge-dim to do the method on *value* float/string *max* The merge-value to zoom in. Type 'min' or 'max' to use the equivalent extrem-value in the matrix *scale* string *'relative'* **OR** *'absolute'* *level* float 0.3 The relative zoom-level 0.3 means +-30% around the zoom-point ================== =============== ============= ================ **Optional kwargs** ("keyword arguments") are: ================== ================= ============ ================ Keyword Type Default Description ================== ================= ============ ================ *basisNames* list(basisNames) [{all}] Which basisDimensions get a new scale. *operator* string "==" The zoom-point is defined as the first point in matrix where a value in "==" (equal), ">" (bigger) etc. than the given *value* ================== ================= ============ ================ ''' #standard mergeDim = None value=None scale=None level=None operator = "==" basisDim = range(self.nBasis) #individual for key in kwargs: if key == "mergeName": if kwargs[key] not in self.mergeNames: raise KeyError("ERROR: mergeName %s not known" %kwargs[key]) mergeDim = self.mergeNames.index(kwargs[key]) elif key == "value": if type(kwargs[key]) == str: if kwargs[key] != "max" and kwargs[key] != "min": exit("ERROR: 'value' can only be 'max', 'min' or a float") value = kwargs[key] else: value = str(kwargs[key]) elif key == "scale": if kwargs[key] != "absolute" and kwargs[key] != "relative": exit("ERROR: 'scale' in method 'autoZoom' has to be 'absolute' or 'relative'") scale = str(kwargs[key]) elif key == "level": level = abs(float(kwargs[key])) elif key == "operator": operator = str(kwargs[key]) elif key == "basisNames": basisDim = list(kwargs[key]) for n,b in enumerate(basisDim): if b not in self.basisNames: exit("ERROR: the given basisDimension %s does not belong to those from target" %b) basisDim[n] = self.basisNames.index(b) else: raise KeyError("keyword '%s' not known" %key) _utils.checkRequiredArgs({ "mergeName":mergeDim, "value":value, "scale":scale, "level":level}) #which mergeMatrix is involved? m=self.mergeMatrix[mergeDim] #prepare value if value=="max": value = bn.nanmax(m) elif value=="min": value = bn.nanmin(m) if np.isnan(value): raise ValueError("cannot autoZoom to nan") #get position in matrix #try: positions=np.argwhere(eval(str(value) + operator + "m"))[0] print "\n... do autoZoom for basisDimensions at a mergeValue of %s" %value for n,p in enumerate(positions): if n in basisDim: #get basis-values at those positions zoompoint = self.basisMatrix[n][p] #calc. the new range if scale == "relative": basis_range = self._basis_dim[n]._include_range[1]-self._basis_dim[n]._include_range[0] zoomrange=[zoompoint-abs(basis_range*level),zoompoint+abs(basis_range*level)] ampl = zoomrange[1]-zoomrange[0] elif scale == "absolute": zoomrange=[zoompoint-level,zoompoint+level] #define a new include_range-range for that basisDim print "%s --> %s (offset: %s, amplitude: %s)" %(self._basis_dim[n].name,zoomrange, zoompoint, ampl) self._basis_dim[n]._includeRange(zoomrange) else: print "ignored %s for autozoom" %self._basis_dim[n].name
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = [v for v in range(p)] if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max - k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI[i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- while len(S) < self.n_features if not isinstance(self.n_features, str) else True: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def f(self, *args, **kwargs): return bn.nanmin(*args, **kwargs) def argf(self, *args, **kwargs): return bn.nanargmin(*args, **kwargs)
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(experiment.data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data # get the scale scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(data[self.channel]) #print scaled_data kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (50, 1000) num_bins = max(min(num_bins, 1000), 50) xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] bins = np.append(bins, xmax) num_hues = len(data[self.huefacet].unique()) bins_per_hue = math.ceil(num_bins / num_hues) new_bins = [xmin] for end in [b for b in bins if (b > xmin and b <= xmax)]: new_bins = np.append(new_bins, np.linspace(new_bins[-1], end, bins_per_hue + 1, endpoint = True)[1:]) bins = scale.inverse(new_bins) else: bin_width = (xmax - xmin) / num_bins bins = scale.inverse(np.arange(xmin, xmax, bin_width)) bins = np.append(bins, scale.inverse(xmax)) # take care of a rare rounding error, where the last observation is # a liiiitle bit more than the last bin, which makes plt.hist() puke bins[-1] += 1 kwargs.setdefault('bins', bins) # mask out the data that's not in the scale domain data = data[~np.isnan(scaled_data)] g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) g.map(plt.hist, self.channel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), vmax = np.max(g.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: g.add_legend(title = self.huefacet)
def r14(p, nlhc, residual, definiteRange, y, e, vv, asdf1, C, r40, g, nNodes, \ r41, fTol, Solutions, varTols, _in, dataType, \ maxNodes, _s, indTC, xRecord): isSNLE = p.probType in ('NLSP', 'SNLE') maxSolutions, solutions, coords = Solutions.maxNum, Solutions.solutions, Solutions.coords if len(p._discreteVarsNumList): y, e = adjustDiscreteVarBounds(y, e, p) o, a, r41 = r45(y, e, vv, p, asdf1, dataType, r41, nlhc) fo_prev = float(0 if isSNLE else min((r41, r40 - (fTol if maxSolutions == 1 else 0)))) if fo_prev > 1e300: fo_prev = 1e300 y, e, o, a, _s, indTC, nlhc, residual = func7(y, e, o, a, _s, indTC, nlhc, residual) if y.size == 0: return _in, g, fo_prev, _s, Solutions, xRecord, r41, r40 nodes = func11(y, e, nlhc, indTC, residual, o, a, _s, p) #nodes, g = func9(nodes, fo_prev, g, p) #y, e = func4(y, e, o, a, fo) if p.solver.dataHandling == 'raw': tmp = o.copy() tmp[tmp > fo_prev] = -inf M = atleast_1d(nanmax(tmp, 1)) for i, node in enumerate(nodes): node.th_key = M[i] if not isSNLE: for node in nodes: node.fo = fo_prev if nlhc is not None: for i, node in enumerate(nodes): node.tnlhf = node.nlhf + node.nlhc else: for i, node in enumerate(nodes): node.tnlhf = node.nlhf # TODO: improve it an = hstack((nodes, _in)) #tnlh_fixed = vstack([node.tnlhf for node in an]) tnlh_fixed_local = vstack([node.tnlhf for node in nodes ]) #tnlh_fixed[:len(nodes)] tmp = a.copy() tmp[tmp > fo_prev] = fo_prev tmp2 = tmp - o tmp2[tmp2 < 1e-300] = 1e-300 tmp2[o > fo_prev] = nan tnlh_curr = tnlh_fixed_local - log2(tmp2) tnlh_curr_best = nanmin(tnlh_curr, 1) for i, node in enumerate(nodes): node.tnlh_curr = tnlh_curr[i] node.tnlh_curr_best = tnlh_curr_best[i] # TODO: use it instead of code above #tnlh_curr = tnlh_fixed_local - log2(where() - o) else: tnlh_curr = None # TODO: don't calculate PointVals for zero-p regions PointVals, PointCoords = getr4Values(vv, y, e, tnlh_curr, asdf1, C, p.contol, dataType, p) if PointVals.size != 0: xk, Min = r2(PointVals, PointCoords, dataType) else: # all points have been removed by func7 xk = p.xk Min = nan if r40 > Min: r40 = Min xRecord = xk.copy() # TODO: is copy required? if r41 > Min: r41 = Min fo = float(0 if isSNLE else min((r41, r40 - (fTol if maxSolutions == 1 else 0)))) if p.solver.dataHandling == 'raw': if fo != fo_prev and not isSNLE: fos = array([node.fo for node in an]) #prev #ind_update = where(fos > fo + 0.01* fTol)[0] #new th_keys = array([node.th_key for node in an]) delta_fos = fos - fo ind_update = where(10 * delta_fos > fos - th_keys)[0] nodesToUpdate = an[ind_update] update_nlh = True if ind_update.size != 0 else False # print 'o MB:', float(o_tmp.nbytes) / 1e6 # print 'percent:', 100*float(ind_update.size) / len(an) if update_nlh: # from time import time # tt = time() updateNodes(nodesToUpdate, fo) # if not hasattr(p, 'Time'): # p.Time = time() - tt # else: # p.Time += time() - tt tmp = asarray([node.key for node in an]) r10 = where(tmp > fo)[0] if r10.size != 0: mino = [an[i].key for i in r10] mmlf = nanmin(asarray(mino)) g = nanmin((g, mmlf)) NN = atleast_1d([node.tnlh_curr_best for node in an]) r10 = logical_or(isnan(NN), NN == inf) if any(r10): ind = where(logical_not(r10))[0] an = an[ind] #tnlh = take(tnlh, ind, axis=0, out=tnlh[:ind.size]) #NN = take(NN, ind, axis=0, out=NN[:ind.size]) NN = NN[ind] if not isSNLE or p.maxSolutions == 1: #pass astnlh = argsort(NN) an = an[astnlh] # print(an[0].nlhc, an[0].tnlh_curr_best) # Changes # if NN.size != 0: # ind = searchsorted(NN, an[0].tnlh_curr_best+1) # tmp1, tmp2 = an[:ind], an[ind:] # arr = [node.key for node in tmp1] # Ind = argsort(arr) # an = hstack((tmp1[Ind], tmp2)) #print [node.tnlh_curr_best for node in an[:10]] else: #if p.solver.dataHandling == 'sorted': if isSNLE and p.maxSolutions != 1: an = hstack((nodes, _in)) else: nodes.sort(key=lambda obj: obj.key) if len(_in) == 0: an = nodes else: arr1 = [node.key for node in _in] arr2 = [node.key for node in nodes] r10 = searchsorted(arr1, arr2) an = insert(_in, r10, nodes) # if p.debug: # arr = array([node.key for node in an]) # #print arr[0] # assert all(arr[1:]>= arr[:-1]) if maxSolutions != 1: Solutions = r46(o, a, PointCoords, PointVals, fTol, varTols, Solutions) p._nObtainedSolutions = len(solutions) if p._nObtainedSolutions > maxSolutions: solutions = solutions[:maxSolutions] p.istop = 0 p.msg = 'user-defined maximal number of solutions (p.maxSolutions = %d) has been exeeded' % p.maxSolutions return an, g, fo, None, Solutions, xRecord, r41, r40 #p.iterfcn(xk, Min) p.iterfcn(xRecord, r40) if p.istop != 0: return an, g, fo, None, Solutions, xRecord, r41, r40 if isSNLE and maxSolutions == 1 and Min <= fTol: # TODO: rework it for nonlinear systems with non-bound constraints p.istop, p.msg = 1000, 'required solution has been obtained' return an, g, fo, None, Solutions, xRecord, r41, r40 an, g = func9(an, fo, g, p) nn = maxNodes #1 if asdf1.isUncycled and all(isfinite(o)) and p._isOnlyBoxBounded and not p.probType.startswith('MI') else maxNodes an, g = func5(an, nn, g, p) nNodes.append(len(an)) return an, g, fo, _s, Solutions, xRecord, r41, r40
def apply(self, experiment): """Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.low and less than self.high; it is False otherwise. """ if not experiment: raise util.CytoflowOpError("no experiment specified") if not self.name: raise util.CytoflowOpError("name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError( "name {0} is in the experiment already".format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError( "bin_count_name {0} is in the experiment already".format( self.bin_count_name)) if not self.channel: raise util.CytoflowOpError("channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError( "channel {0} isn't in the experiment".format(self.channel)) if not self.num_bins and not self.bin_width: raise util.CytoflowOpError("must set either bin number or width") if self.bin_width \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError( "Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_data = scale(experiment.data[self.channel]) scaled_min = bn.nanmin(scaled_data) scaled_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins else \ (scaled_max - scaled_min) / self.bin_width if num_bins > self._max_num_bins: raise util.CytoflowOpError( "Too many bins! To increase this limit, " "change _max_num_bins (currently {})".format( self._max_num_bins)) scaled_bins = np.linspace(start=scaled_min, stop=scaled_max, num=num_bins) if len(scaled_bins) < 2: raise util.CytoflowOpError("Must have more than one bin") # put the data in bins bin_idx = np.digitize(scaled_data, scaled_bins[1:-1]) # now, back into data space bins = scale.inverse(scaled_bins) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "float", bins[bin_idx]) # if we're log-scaled (for example), don't label data that isn't # showable on a log scale! # new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.nan # new_experiment.data.dropna(inplace = True) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append( self.clone_traits(transient=lambda t: True)) return new_experiment
def update(self): #get merge-extract for n,m in enumerate(self.show_merge): if self.show_merge_as_density[m]: self.merge_extract = self.densityMatrix[m][tuple(self.basis_dim_plot_range)] else: self.merge_extract = self.mergeMatrix[m][tuple(self.basis_dim_plot_range)] for b in range(len(self._basis_dim)-1,-1,-1): #basis dim to concentrate if b not in self.show_basis: pos_corr = self.concentrate_basis_dim[:b].count("pos") if self.concentrate_basis_dim[b] == "sum": self.merge_extract = bn.nansum(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "mean": self.merge_extract = bn.nanmean(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "max": self.merge_extract = bn.nanmax(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "min": self.merge_extract = bn.nanmin(self.merge_extract,b-pos_corr) for b in range(len(self._basis_dim)-2,-1,-1): # check from end to start whether to roll-axis # the time-axis has to be the last one # dont roll the last basis-dim (start with len(self._basis_dim)-2 ) basis_time_index = None if b not in self.show_basis and self.concentrate_basis_dim[b] == "time": #reshape the matrix self.merge_extract = np.rollaxis(self.merge_extract,b,0) basis_time_index = b break # dont needto continue iterating because only one dim can be 'time' if len(self.show_basis) == 1: basis_extract = self.basisMatrix[self.show_basis[0]][self.basis_dim_plot_range[self.show_basis[0]]] if self.scale_plot == True: self.plot.enableAutoRange('xy', True) else: if self.enableAutoRangeX: self.plot.enableAutoRange('x', True) #self.plot.setXRange( #self._basis_dim[self.show_basis[0]]._include_range[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.plot.enableAutoRange('y', True) if self.transpose_axes: self.curves[n].setData(self.merge_extract, basis_extract) else: self.curves[n].setData(basis_extract, self.merge_extract) elif len(self.show_basis) >=2: #calc scale and zero-position for axes-tics x0=self._basis_dim[self.show_basis[0]]._include_range[0] x1=self._basis_dim[self.show_basis[0]]._include_range[1] y0=self._basis_dim[self.show_basis[1]]._include_range[0] y1=self._basis_dim[self.show_basis[1]]._include_range[1] xscale = (x1-x0) / self._basis_dim[self.show_basis[0]].resolution yscale = (y1-y0) / self._basis_dim[self.show_basis[1]].resolution args = {'pos':[x0, y0], 'scale':[xscale, yscale]} if self.transpose_axes: args = {'pos':[y0, x0], 'scale':[yscale, xscale]} #set time-ticks if basis_time_index != None: args["xvals"] = self.basisMatrix[basis_time_index] if self.enableAutoRangeX: self.view.enableAutoRange('x', True) #self.view.setXRange(**tuple(self._basis_dim[self.show_basis[0]]._include_range))#[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.view.enableAutoRange('y', True) #bydefault autoLevel (the colorlevel of the merge-dims) == True #(calc. by pyqtgraph) #thus it only can process array without nan-values the calc. colorlevel #is wrong when the real values are boyond the nan-replacement(zero) #therefore i calc the colorlevel by my self in case nans arein the array: anynan = bn.anynan(self.merge_extract) if anynan: mmin = bn.nanmin(self.merge_extract) mmax = bn.nanmax(self.merge_extract) if np.isnan(mmin): mmin,mmax=0,0 self.plot.setLevels(mmin, mmax) args["autoLevels"]= False ##the following line dont work with my version of pyQtGraph #args["levels"]= [mmin,mmax]#np.nanmin(merge_extract), np.nanmax(merge_extract)) self.merge_extract = _utils.nanToZeros(self.merge_extract) if self.transpose_axes: self.plot.setImage(self.merge_extract.transpose(), autoRange=self.scale_plot,**args) else: self.plot.setImage(self.merge_extract, autoRange=self.scale_plot,**args) if anynan: # scale the histogramm to the new range self.plot.ui.histogram.vb.setYRange(mmin,mmax) self.scale_plot = False
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = range(p) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # ---------------------------------------------------------------------- # FIND FIRST FEATURE # ---------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max-k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI [i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # ---------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # ---------------------------------------------------------------------- while len(S) < self.n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S),F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:],9,2,1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # ---------------------------------------------------------------------- # SAVE RESULTS # ---------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def func12(an, maxActiveNodes, p, Solutions, vv, varTols, fo): solutions, r6 = Solutions.solutions, Solutions.coords if len(an) == 0: return array([]), array([]), array([]), array([]) _in = an if r6.size != 0: r11, r12 = r6 - varTols, r6 + varTols y, e, S = [], [], [] Tnlhf_curr_local = [] n = p.n N = 0 maxSolutions = p.maxSolutions # new = 1 # # new # if new and p.probType in ('MOP', 'SNLE', 'GLP', 'NLP', 'MINLP') and p.maxSolutions == 1: # # # return y, e, _in, _s while True: an1Candidates, _in = func3(_in, maxActiveNodes, p.solver.dataHandling) #print nanmax(2**(-an1Candidates[0].tnlh_curr)) , nanmax(2**(-an1Candidates[-1].tnlh_curr)) yc, ec, oc, ac, SIc = asarray([t.y for t in an1Candidates]), \ asarray([t.e for t in an1Candidates]), \ asarray([t.o for t in an1Candidates]), \ asarray([t.a for t in an1Candidates]), \ asarray([t._s for t in an1Candidates]) if p.probType == 'MOP': tnlhf_curr = asarray([t.tnlh_all for t in an1Candidates]) tnlhf = None elif p.solver.dataHandling == 'raw': tnlhf = asarray([t.tnlhf for t in an1Candidates]) tnlhf_curr = asarray([t.tnlh_curr for t in an1Candidates]) else: tnlhf, tnlhf_curr = None, None if p.probType != 'IP': #nlhc = asarray([t.nlhc for t in an1Candidates]) indtc = asarray([t.indtc for t in an1Candidates]) #residual = asarray([t.residual for t in an1Candidates]) residual = None indT = func4(p, yc, ec, oc, ac, fo, tnlhf_curr) if indtc[0] is not None: indT = logical_or(indT, indtc) else: residual = None indT = None t, _s, indD = func1(tnlhf, tnlhf_curr, residual, yc, ec, oc, ac, SIc, p, indT) new = 0 nn = 0 if new and p.probType in ('MOP', 'SNLE', 'NLSP','GLP', 'NLP', 'MINLP') and p.maxSolutions == 1: arr = tnlhf_curr if p.solver.dataHandling == 'raw' else oc M = arr.shape[0] w = arange(M) Midles = 0.5*(yc[w, t] + ec[w, t]) arr_1, arr2 = arr[w, t], arr[w, n+t] Arr = hstack((arr_1, arr2)) ind = np.argsort(Arr) Ind = set(ind[:maxActiveNodes]) tag_all, tag_1, tag_2 = [], [], [] sn = [] # TODO: get rid of the cycles for i in range(M): cond1, cond2 = i in Ind, (i+M) in Ind if cond1: if cond2: tag_all.append(i) else: tag_1.append(i) else: if cond2: tag_2.append(i) else: sn.append(an1Candidates[i]) list_lx, list_ux = [], [] _s_new = [] updateTC = an1Candidates[0].indtc is not None isRaw = p.solver.dataHandling == 'raw' for i in tag_1: node = an1Candidates[i] I = t[i] # if node.o[n+I] >= node.o[I]: # print '1' # else: # print i, I, node.o[n+I] , node.o[I], node.key, node.a[n+I] , node.a[I], node.nlhc[n+I], node.nlhc[I] node.key = node.o[n+I] node._s = _s[i] if isRaw: node.tnlh_curr[I] = node.tnlh_curr[n+I] node.tnlh_curr_best = nanmin(node.tnlh_curr) #assert node.o[n+I] >= node.o[I] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] if nn: #node.o[I], node.a[I] = node.o[n+I], node.a[n+I] node.o[I], node.a[I] = node.o[n+I], node.a[n+I] node.o[node.o<node.o[n+I]], node.a[node.a>node.a[n+I]] = node.o[n+I], node.a[n+I] else: node.o[n+I], node.a[n+I] = node.o[I], node.a[I] node.o[node.o<node.o[I]], node.a[node.a>node.a[I]] = node.o[I], node.a[I] # if p.solver.dataHandling == 'raw': for Attr in ('nlhf','nlhc', 'tnlhf', 'tnlh_curr', 'tnlh_all'): r = getattr(node, Attr, None) if r is not None: if nn: r[I] = r[n+I] else: r[n+I] = r[I] mx = ux.copy() mx[I] = Midles[i]#0.5*(lx[I] + ux[I]) list_lx.append(lx) list_ux.append(mx) node.y = lx.copy() node.y[I] = Midles[i]#0.5*(lx[I] + ux[I]) if updateTC: node.indtc = True _s_new.append(node._s) sn.append(node) for i in tag_2: node = an1Candidates[i] I = t[i] node.key = node.o[I] node._s = _s[i] # for raw only if isRaw: node.tnlh_curr[n+I] = node.tnlh_curr[I] node.tnlh_curr_best = nanmin(node.tnlh_curr) #assert node.o[I] >= node.o[n+I] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] if nn: node.o[n+I], node.a[n+I] = node.o[I], node.a[I] node.o[node.o<node.o[I]], node.a[node.a>node.a[I]] = node.o[I], node.a[I] else: node.o[I], node.a[I] = node.o[n+I], node.a[n+I] node.o[node.o<node.o[n+I]], node.a[node.a>node.a[n+I]] = node.o[n+I], node.a[n+I] for Attr in ('nlhf','nlhc', 'tnlhf', 'tnlh_curr', 'tnlh_all'): r = getattr(node, Attr, None) if r is not None: if nn: r[n+I] = r[I] else: r[I] = r[n+I] mx = lx.copy() mx[I] = Midles[i]#0.5*(lx[I] + ux[I]) list_lx.append(mx) list_ux.append(ux) node.e = ux.copy() node.e[I] = Midles[i]#0.5*(lx[I] + ux[I]) if updateTC: node.indtc = True _s_new.append(node._s) sn.append(node) for i in tag_all: node = an1Candidates[i] I = t[i] #lx, ux = node.y, node.e lx, ux = yc[i], ec[i] mx = ux.copy() mx[I] = Midles[i]#0.5 * (lx[I] + ux[I]) list_lx.append(lx) list_ux.append(mx) mx = lx.copy() mx[I] = Midles[i]#0.5 * (lx[I] + ux[I]) #mx[n+ t] = 0.5 * (lx[n + t] + ux[n + t]) list_lx.append(mx) list_ux.append(ux) #_s_new += [_s[i]] * 2 _s_new.append(_s[i]) _s_new.append(_s[i]) # print 'y_new:', vstack(list_lx) # print 'e_new:', vstack(list_ux) # print '_s_new:', hstack(_s) _in = sn + _in.tolist() if p.solver.dataHandling == 'sorted': _in.sort(key = lambda obj: obj.key) else: #pass _in.sort(key = lambda obj: obj.tnlh_curr_best) # print 'tag 1:', len(tag_1), 'tag 2:', len(tag_2), 'tag all:', len(tag_all) # print 'lx:', list_lx # print 'sn lx:', [node.y for node in sn] # print 'ux:', list_ux # print 'sn ux:', [node.e for node in sn] # print '-'*10 #print '!', vstack(list_lx), vstack(list_ux), hstack(_s_new) NEW_lx, NEW_ux, NEW__in, NEW__s = \ vstack(list_lx), vstack(list_ux), array(_in), hstack(_s_new) return NEW_lx, NEW_ux, NEW__in, NEW__s NewD = 1 if NewD and indD is not None: s4d = _s[indD] sf = _s[logical_not(indD)] _s = hstack((s4d, s4d, sf)) yf, ef = yc[logical_not(indD)], ec[logical_not(indD)] yc, ec = yc[indD], ec[indD] t = t[indD] else: _s = tile(_s, 2) yc, ec, tnlhf_curr_local = func2(yc, ec, t, vv, tnlhf_curr) if NewD and indD is not None: yc = vstack((yc, yf)) ec = vstack((ec, ef)) if maxSolutions == 1 or len(solutions) == 0: y, e, Tnlhf_curr_local = yc, ec, tnlhf_curr_local break # TODO: change cycle variable if len(solutions) >> maxActiveNodes for i in range(len(solutions)): ind = logical_and(all(yc >= r11[i], 1), all(ec <= r12[i], 1)) if any(ind): j = where(logical_not(ind))[0] lj = j.size yc = take(yc, j, axis=0, out=yc[:lj]) ec = take(ec, j, axis=0, out=ec[:lj]) _s = _s[j] # if tnlhf_curr_local is not None: # tnlhf_curr_local = tnlhf_curr_local[j] y.append(yc) e.append(ec) S.append(_s) #Tnlhf_curr_local.append(tnlhf_curr_local) N += yc.shape[0] if len(_in) == 0 or N >= maxActiveNodes: y, e, _s = vstack(y), vstack(e), hstack(S) #Tnlhf_curr_local = hstack(Tnlhf_curr_local) break # if Tnlhf_curr_local is not None and len(Tnlhf_curr_local) != 0 and Tnlhf_curr_local[0] is not None: # #print len(where(isfinite(Tnlhf_curr_local))[0]), Tnlhf_curr_local.size # pass # print 'y_prev:', y # print 'e_prev:', e # print '_s_prev:', hstack(_s) #print 'prev!', y, e, _s # from numpy import array_equal # if not array_equal(NEW_lx.sort(), y.sort()): # pass # if not array_equal(NEW_ux.sort(), e.sort()): # pass # if not array_equal(NEW__s.sort(), _s.sort()): # pass #, NEW_ux, NEW__in, NEW__s return y, e, _in, _s
def update(self, input_data, accumulate=False): """Trigger an update for the histogram plots. Args: input_data (ndarray): Source values for histogram plots. accumulate (bool, optional): Add together bin values of the previous and current data. Defaults to False. """ if self.auto_toggle.active and not accumulate: # automatic # find the lowest and the highest value in input data lower = 0 upper = 1 for data in input_data: min_val = bn.nanmin(data) min_val = 0 if np.isnan(min_val) else min_val lower = min(lower, min_val) max_val = bn.nanmax(data) max_val = 1 if np.isnan(max_val) else max_val upper = max(upper, max_val) self.lower_spinner.value = int(np.floor(lower)) self.upper_spinner.value = int(np.ceil(upper)) # get histogram counts and update plots for i, data in enumerate(input_data): # np.histogram on 16M values can take around 0.5 sec, which is too much, thus reduce the # number of processed values (not the ideal implementation, but should be good enough) ratio = np.sqrt(data.size / 2_000_000) if ratio > 1: shape_x, shape_y = data.shape stride_x = ratio * shape_x / shape_y stride_y = ratio * shape_y / shape_x if stride_x < 1: stride_y = int(np.ceil(stride_y * stride_x)) stride_x = 1 elif stride_y < 1: stride_x = int(np.ceil(stride_y * stride_x)) stride_y = 1 else: stride_x = int(np.ceil(stride_x)) stride_y = int(np.ceil(stride_y)) data = data[::stride_y, ::stride_x] next_counts, edges = np.histogram(data, bins=self.nbins, range=(self.lower, self.upper)) if self.log10counts_toggle.active: next_counts = np.log10(next_counts, where=next_counts > 0) if accumulate: self._counts[i] += next_counts else: self._counts[i] = next_counts self._plot_sources[i].data.update(left=edges[:-1], right=edges[1:], top=self._counts[i])
def prepare_window_metrics(self, bq_threshold=20, low_qual_bq_thr=10): if self.window_array_seq is None: self.__generate_window_arrays() #tmp = np.hstack((self.window_array_qual.transpose(), self.window_array_seq.transpose()) ) #np.savetxt("qual_seq_array.txt", tmp, delimiter='\t', fmt='%d') read_num = self.window_array_seq.shape[0] if read_num != 0: # all bases in reads, including soft-clipping self.all_bases = (self.window_array_seq != 0) # Coverage: Mask unmapped, N and clipped bases, count everything else as aligned self.aligned_mask = np.logical_or(self.window_array_seq < 1, self.window_array_seq == 5) # Exclude dels from SNV self.snv_mask = np.logical_or(self.aligned_mask, self.window_array_seq == 6) # Exclude low quality bases self.quality_mask = self.window_array_qual < bq_threshold # translate ref seq to numeric code self.ref_code = np.array([ int(e) for e in string.translate(self.window_ref_seq, self.trans_tab) ], dtype=int) # identify positions in ref seq that are not determined (i.e. N bases in ref) and exclude them self.genomic_N_pos = np.zeros_like(self.window_array_seq) self.genomic_N_pos[:, (self.ref_code == 5)] = 1 # define the array of non-N-ref positions with valid (non-del, aligned) bases self.valid_ref_snv_mask = np.logical_or(self.snv_mask, self.genomic_N_pos) self.valid_pos_seq_array = np.copy(self.window_array_seq) self.valid_pos_qual_array = np.copy(self.window_array_qual) # determine differences between ref seq and valid positions self.snv = np.array( (self.valid_pos_seq_array.transpose() - self.ref_code[:, np.newaxis]).transpose() != 0, dtype=float) self.snv[self.valid_ref_snv_mask] = np.NaN self.valid_pos_seq_array[self.valid_ref_snv_mask] = np.NaN self.valid_pos_qual_array[self.valid_ref_snv_mask] = np.NaN # compute coverage per position in the window self.coverage_per_pos = bn.nansum(np.logical_not( self.aligned_mask).astype(int), axis=0) # some reads have higher number of mismatches - identify them self.mismatch_reads = bn.nansum(self.snv, axis=1) > 2 self.mq_per_read = np.array( [e["mq"] for e in self.read_characteristics], dtype=int) self.low_qual_bases = (self.window_array_qual < low_qual_bq_thr) # quality values #self.read_median_bq = bn.nanmedian(self.valid_pos_qual_array, axis=1) self.read_mean_bq = bn.nanmean(self.valid_pos_qual_array, axis=1) self.read_min_bq = bn.nanmin(self.valid_pos_qual_array, axis=1) self.read_lowqual_bases = np.logical_and( np.logical_not(self.valid_ref_snv_mask), self.low_qual_bases) self.read_lowqual_base_count = bn.nansum( self.read_lowqual_bases.astype(int), axis=1) # identify positions that contain mismatches self.snv_pos = bn.nansum(self.snv, axis=0) > 0 # define array of high-quality bases and compute SNV on them self.valid_ref_snv_good_qual_mask = np.logical_or( self.valid_ref_snv_mask, self.quality_mask) self.snv_bq_filtered = np.copy(self.snv) self.snv_bq_filtered[self.valid_ref_snv_good_qual_mask] = np.NaN self.snv_pos_bq_filtered = bn.nansum(self.snv_bq_filtered, axis=0) > 0 # quality of aligned positions only self.qual_aligned_mask = np.logical_or(self.window_array_seq == 0, self.genomic_N_pos) self.qual_aligned_lowqual_bases = np.logical_and( np.logical_not(self.qual_aligned_mask), self.low_qual_bases) # from array of high-quality bases also compute "bad" reads and compute SNV on the rest. self.good_read_mask = np.zeros_like(self.window_array_seq) self.good_read_mask[np.logical_or( np.logical_or(self.read_lowqual_base_count > 2, self. mismatch_reads), self.mq_per_read == 0), :] = 1 # SNV from high-quality aligned bases and high-quality reads self.valid_ref_snv_good_qual_good_reads_mask = np.logical_or( self.good_read_mask, self.valid_ref_snv_good_qual_mask) self.snv_bq_filtered_good_reads = np.copy(self.snv) self.snv_bq_filtered_good_reads[ self.valid_ref_snv_good_qual_good_reads_mask] = np.NaN self.snv_pos_bq_filtered_good_reads = bn.nansum( self.snv_bq_filtered_good_reads, axis=0) > 0 # SNV from high-quality aligned bases and high-quality reads with known SNP positions excluded self.known_snp_mask = np.zeros_like(self.window_array_seq) self.known_snp_mask[:, self.known_snp_pos] = 1 self.valid_ref_snv_good_qual_good_reads_no_known_site_mask = np.logical_or( self.valid_ref_snv_good_qual_good_reads_mask, self.known_snp_mask) self.snv_bq_filtered_good_reads_no_snp = np.copy(self.snv) self.snv_bq_filtered_good_reads_no_snp[ self. valid_ref_snv_good_qual_good_reads_no_known_site_mask] = np.NaN self.snv_pos_bq_filtered_good_reads_no_snp = bn.nansum( self.snv_bq_filtered_good_reads_no_snp, axis=0) > 0 self.pos_median_bq = bn.nanmedian(self.valid_pos_qual_array, axis=0) self.pos_min_bq = bn.nanmin(self.valid_pos_qual_array, axis=0) self.pos_lowqual_base_count = bn.nansum( self.read_lowqual_bases.astype(int), axis=0) # note that for now we only count start positions # FIXME: CHeck if we can code this in the windows sequence matrix, possibly using an bit-overlay matrix or similar (i.e. code insertion flanking nucleotides with different higher bits) self.window_ins_positions = {} self.window_del_positions = {} tmp_insert_sizes = {} for e in self.read_characteristics: insertions = e["indels"]["I"] deletions = e["indels"]["D"] rstart = e["read_start"] + e["start_position_offset"] for ii in insertions.keys(): genomic_pos = ii + rstart if genomic_pos >= self.window_start and genomic_pos <= self.window_end: try: self.window_ins_positions[genomic_pos] += 1 except KeyError: self.window_ins_positions[genomic_pos] = 1 for ii in deletions.keys(): genomic_pos = ii + rstart if genomic_pos >= self.window_start and genomic_pos <= self.window_end: try: self.window_del_positions[genomic_pos] += 1 except KeyError: self.window_del_positions[genomic_pos] = 1 # make sure we consider insert sizes only once per fragment in the same window tmp_insert_sizes[e["read_id"][0]] = np.abs( e["insert_size"]) if e["insert_size"] <> 0 else np.nan self.insert_sizes_unique = np.array(tmp_insert_sizes.values(), dtype=float) self.window_description["read_number"] = read_num
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT): m, n = y.shape w = arange(m) if p.probType == 'IP': oc_modL, oc_modU = o[:, :n], o[:, n:] ac_modL, ac_modU = a[:, :n], a[:, n:] # # TODO: handle nans mino = where(oc_modL < oc_modU, oc_modL, oc_modU) maxa = where(ac_modL < ac_modU, ac_modU, ac_modL) # Prev tmp = a[:, 0:n]-o[:, 0:n]+a[:, n:]-o[:, n:] t = nanargmin(tmp,1) d = 0.5*tmp[w, t] #New # tmp = a - o # t_ = nanargmin(tmp,1) # t = t_% n # d = tmp[w, t_] # ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') ind = 2**(1.0/n) * d >= _s_prev #new # ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1) #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64') #s2 = nanmin(maxa - mino, 1) #print (abs(s2/_s)) # Prev _s = nanmin(maxa - mino, 1) # New #_s = nanmax(maxa - mino, 1) # _s = nanmax(a - o, 1) #ind = _s_prev <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) indD = logical_not(ind) indD = ind indD = None #print len(where(indD)[0]), len(where(logical_not(indD))[0]) # elif p.probType == 'MOP': # # raise 'unimplemented' else: if p.solver.dataHandling == 'sorted': _s = func13(o, a) t = nanargmin(a, 1) % n d = nanmax([a[w, t] - o[w, t], a[w, n+t] - o[w, n+t]], 0) ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ### #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') #NEW ind = d >= _s_prev / 2 ** (1.0e-12/n) #ind = d >= _s_prev / 2 ** (1.0/n) indD = empty(m, bool) indD.fill(True) #ind.fill(False) ################################################### elif p.solver.dataHandling == 'raw': if p.probType == 'MOP': t = p._t[:m] p._t = p._t[m:] d = _s = p.__s[:m] p.__s = p.__s[m:] else: # tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:] # TNHLF_min = where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1) # # Set _s # _s = nanmin(TNHLF_min, 1) T = tnlhf_curr tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:] TNHL_curr_min = where(logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)), tnlh_curr_1, tnlh_curr_2) t = nanargmin(TNHL_curr_min, 1) T = tnlhf d = nanmin(vstack(([T[w, t], T[w, n+t]])), 0) _s = d #OLD #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!# #ind = _s_prev <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #NEW if any(_s_prev < d): pass ind = _s_prev <= d + 1.0/n # T = TNHL_curr_min #ind2 = nanmin(TNHL_curr_min, 0) indQ = d >= _s_prev - 1.0/n #indQ = logical_and(indQ, False) indD = logical_or(indQ, logical_not(indT)) # print _s_prev[:2], d[:2] #print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0]) #print _s_prev - d ################################################### #d = ((tnlh[w, t]* tnlh[w, n+t])**0.5) else: assert 0 if any(ind): r10 = where(ind)[0] #print('r10:', r10) # print _s_prev # print ((_s_prev -d)*n)[r10] # print('ind length: %d' % len(where(ind)[0])) # print where(ind)[0].size #bs = e[ind] - y[ind] #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well bs = e[r10] - y[r10] t[r10] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well return t, _s, indD
def cr_min(data): min = bk.nanmin(data,axis=1) return min
def ndcombine(arr, mask=None, copy=True, blank=np.nan, offsets=None, thresholds=[-np.inf, np.inf], zero=None, scale=None, weight=None, zero_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, scale_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, zero_to_0th=True, scale_to_0th=True, zero_section=None, scale_section=None, reject=None, cenfunc='median', sigma=[3., 3.], maxiters=3, ddof=1, nkeep=1, maxrej=None, n_minmax=[1, 1], rdnoise=0., gain=1., snoise=0., pclip=-0.5, combine='average', dtype='float32', memlimit=2.5e+9, irafmode=True, verbose=False, full=False, return_variance=False): if copy: arr = arr.copy() if np.array(arr).ndim == 1: raise ValueError("1-D array combination is not supported!") _mask = _set_mask(arr, mask) # _mask = propagated through this function. sigma_lower, sigma_upper = _set_sigma(sigma) nkeep, maxrej = _set_keeprej(arr, nkeep, maxrej, axis=0) cenfunc = _set_cenfunc(cenfunc) reject_fullname = _set_reject_name(reject) maxiters = int(maxiters) ddof = int(ddof) combfunc = _set_combfunc(combine, nameonly=False, nan=True) if verbose and reject is not None: print("- Rejection") if thresholds != [-np.inf, np.inf]: print(f"-- thresholds (low, upp) = {thresholds}") print(f"-- {reject=} ({irafmode=})") print(f"-- params: {nkeep=}, {maxrej=}, {maxiters=}, {cenfunc=}") if reject_fullname == "sigclip": print(f" (for sigclip): {sigma=}, {ddof=}") elif reject_fullname == "ccdclip": print(f" (for ccdclip): {gain=}, {rdnoise=}, {snoise=}") # elif reject_fullnme == "pclip": # print(f" (for pclip) : spclip={pclip}") # elif reject_fullname == "minmax": # print(f" (for minmaxclip): n_minmax={n_minmax}") # == 01 - Thresholding + Initial masking ============================================= # # Updating mask: _mask = _mask | mask_thresh mask_thresh = _set_thresh_mask(arr=arr, mask=_mask, thresholds=thresholds, update_mask=True) # if safemode: # # Backup the pixels which are rejected by thresholding and # initial # mask for future restoration (see below) for debugging # purpose. # backup_thresh = arr[mask_thresh] # backup_thresh_inmask = arr[_mask] # TODO: remove this np.nan and instead, let `get_zsw` to accept mask. arr[_mask] = np.nan # ------------------------------------------------------------------------------------ # # == 02 - Calculate zero, scale, weights ============================================= # # This should be done before rejection but after threshold masking.. zeros, scales, weights = get_zsw(arr=arr, zero=zero, scale=scale, weight=weight, zero_kw=zero_kw, scale_kw=scale_kw, zero_to_0th=zero_to_0th, scale_to_0th=scale_to_0th, zero_section=zero_section, scale_section=scale_section) arr = do_zs(arr, zeros=zeros, scales=scales) # ------------------------------------------------------------------------------------ # # == 02 - Rejection ================================================================== # if isinstance(reject_fullname, str): if reject_fullname == 'sigclip': _mask_rej = sigclip_mask(arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, irafmode=irafmode, full=full) elif reject_fullname == 'minmax': _mask_rej = minmax_mask(arr, mask=_mask, n_minmax=n_minmax, full=full) elif reject_fullname == 'ccdclip': _mask_rej = ccdclip_mask(arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, scale_ref=np.mean(scales), zero_ref=np.mean(zeros), maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, gain=gain, rdnoise=rdnoise, snoise=snoise, irafmode=irafmode, full=True) elif reject_fullname == 'pclip': pass else: raise ValueError("reject not understood.") if full: _mask_rej, low, upp, nit, rejcode = _mask_rej # _mask is a subset of _mask_rej, so to extract pixels which are # masked PURELY due to the rejection is: mask_rej = _mask_rej ^ _mask elif reject_fullname is None: mask_rej = _set_mask(arr, None) if full: low = bn.nanmin(arr, axis=0) upp = bn.nanmax(arr, axis=0) nit = None rejcode = None else: raise ValueError("reject not understood.") if reject is not None and verbose: print("Done.") _mask |= mask_rej # ------------------------------------------------------------------------------------ # # TODO: add "grow" rejection here? # == 03 - combine ==================================================================== # # Replace rejected / masked pixel to NaN and backup for debugging purpose. This is done to reduce # memory (instead of doing _arr = arr.copy()) # backup_nan = arr[_mask] if verbose: print("- Combining") print(f"-- combine = {combine}") arr[_mask] = np.nan # Combine and calc sigma comb = combfunc(arr, axis=0) if verbose: print("Done.") # Restore NaN-replaced pixels of arr for debugging purpose. # arr[_mask] = backup_nan # arr[mask_thresh] = backup_thresh_inmask if full: if verbose: print("- Error calculation") print("-- to skip this, use `full=False`") print(f"-- return_variance={return_variance}, ddof={ddof}") if return_variance: err = bn.nanvar(arr, ddof=ddof, axis=0) else: err = bn.nanstd(arr, ddof=ddof, axis=0) if verbose: print("Done.") return comb, err, mask_rej, mask_thresh, low, upp, nit, rejcode else: return comb
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError( "Channel {0} not in the experiment".format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} not in the experiment".format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} not in the experiment".format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} not in the experiment".format(self.huefacet)) facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") if self.subset: try: data = experiment.query(self.subset).data.reset_index() except util.CytoflowError as e: raise util.CytoflowViewError(str(e)) except Exception as e: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no events".format( self.subset)) else: data = experiment.data # get the scale scale = kwargs.pop('scale', None) if scale is None: scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_data = scale(data[self.channel]) kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (50, 1000) num_bins = max(min(num_bins, 1000), 50) xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] bins = np.append(bins, xmax) num_hues = len(data[self.huefacet].unique()) bins_per_hue = math.ceil(num_bins / num_hues) new_bins = [xmin] for end in [b for b in bins if (b > xmin and b <= xmax)]: new_bins = np.append( new_bins, np.linspace(new_bins[-1], end, bins_per_hue + 1, endpoint=True)[1:]) bins = scale.inverse(new_bins) else: bin_width = (xmax - xmin) / num_bins bins = scale.inverse(np.arange(xmin, xmax, bin_width)) bins = np.append(bins, scale.inverse(xmax)) # take care of a rare rounding error, where the first observation is # less than the first bin or the last observation is more than the last # bin, which makes plt.hist() puke bins[-1] += 1 bins[0] -= 1 kwargs.setdefault('bins', bins) # mask out the data that's not in the scale domain data = data[~np.isnan(scaled_data)] # adjust the limits to clip extreme values min_quantile = kwargs.pop("min_quantile", 0.001) max_quantile = kwargs.pop("max_quantile", 0.999) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (data[self.channel].quantile(min_quantile), data[self.channel].quantile(max_quantile)) sharex = kwargs.pop("sharex", True) sharey = kwargs.pop("sharey", True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 g = sns.FacetGrid(data, size=6 / cols, aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey, xlim=xlim) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) legend = kwargs.pop('legend', True) g.map(plt.hist, self.channel, **kwargs) # if we are sharing y axes, make sure the y scale is the same for each if sharey: fig = plt.gcf() fig_y_max = float("-inf") for ax in fig.get_axes(): _, ax_y_max = ax.get_ylim() if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(None, fig_y_max) # if we are sharing x axes, make sure the x scale is the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) # if we have a hue facet, the y scaling is frequently wrong. if self.huefacet: h = np.histogram(data[self.channel], bins=bins) ymax = np.max(h[0]) plt.ylim(0, 1.1 * ymax) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet and legend: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, label=self.huefacet) plt.sca(plot_ax) else: g.add_legend(title=self.huefacet) return g
def __solver__(self, p): isMOP = p.probType == 'MOP' if isMOP: from interalgMOP import r14MOP #isOpt = p.probType in ['NLP', 'NSP', 'GLP', 'MINLP'] isODE = p.probType == 'ODE' isSNLE = p.probType in ('NLSP', 'SNLE') if not p.__isFiniteBoxBounded__() and not isODE: p.err(''' solver %s requires finite lb, ub: lb <= x <= ub (you can use "implicitBoounds") ''' % self.__name__) # if p.fixedVars is not None: # p.err('solver %s cannot handle FuncDesigner problems with some variables declared as fixed' % self.__name__) if p.probType in ('LP', 'MILP'): p.err("the solver can't handle problems of type " + p.probType) if not p.isFDmodel: p.err('solver %s can handle only FuncDesigner problems' % self.__name__) dataType = self.dataType if type(dataType) == str: if not hasattr(np, dataType): p.pWarn( 'your architecture has no type "%s", float64 will be used instead' % dataType) dataType = 'float64' dataType = getattr(np, dataType) self.dataType = dataType isIP = p.probType == 'IP' if isIP: pb = r14IP p._F = asarray(0, self.dataType) p._residual = 0.0 f_int = p.user.f[0].interval(p.domain, self.dataType) p._r0 = prod(p.ub - p.lb) * (f_int.ub - f_int.lb) p._volume = 0.0 p.kernelIterFuncs.pop(IS_NAN_IN_X) elif isMOP: pb = r14MOP else: pb = r14 for val in p._x0.values(): if isinstance(val, (list, tuple, np.ndarray)) and len(val) > 1: p.pWarn(''' solver %s currently can handle only single-element variables, use oovars(n) instead of oovar(size=n), elseware correct result is not guaranteed ''' % self.__name__) vv = list(p._freeVarsList) x0 = dict([(v, p._x0[v]) for v in vv]) for val in x0.values(): if isinstance(val, (list, tuple, np.ndarray)) and len(val) > 1: p.err(''' solver %s currently can handle only single-element variables, use oovars(n) instead of oovar(size=n)''' % self.__name__) point = p.point p.kernelIterFuncs.pop(SMALL_DELTA_X, None) p.kernelIterFuncs.pop(SMALL_DELTA_F, None) p.kernelIterFuncs.pop(MAX_NON_SUCCESS, None) if not bottleneck_is_present and not isODE: p.pWarn(''' installation of Python module "bottleneck" (http://berkeleyanalytics.com/bottleneck, available via easy_install, takes several minutes for compilation) could speedup the solver %s''' % self.__name__) n = p.n maxSolutions = p.maxSolutions if maxSolutions == 0: maxSolutions = 10**50 if maxSolutions != 1 and p.fEnough != -np.inf: p.warn(''' using the solver interalg with non-single solutions mode is not ajusted with fEnough stop criterium yet, it will be omitted ''') p.kernelIterFuncs.pop(FVAL_IS_ENOUGH) nNodes = [] p.extras['nNodes'] = nNodes nActiveNodes = [] p.extras['nActiveNodes'] = nActiveNodes Solutions = Solution() Solutions.maxNum = maxSolutions Solutions.solutions = [] Solutions.coords = np.array([]).reshape(0, n) p.solutions = Solutions lb, ub = asarray(p.lb, dataType).copy(), asarray(p.ub, dataType).copy() fTol = p.fTol if isIP or isODE: if p.ftol is None: if fTol is not None: p.ftol = fTol else: p.err( 'interalg requires user-supplied ftol (required precision)' ) if fTol is None: fTol = p.ftol elif fTol != p.ftol: p.err('you have provided both ftol and fTol') if fTol is None and not isMOP: # TODO: require tols for MOP fTol = 1e-7 p.warn( 'solver %s require p.fTol value (required objective function tolerance); 10^-7 will be used' % self.__name__) xRecord = 0.5 * (lb + ub) adjustr4WithDiscreteVariables(xRecord.reshape(1, -1), p) r40 = np.inf y = lb.reshape(1, -1) e = ub.reshape(1, -1) r41 = np.inf # TODO: maybe rework it, especially for constrained case fStart = self.fStart # TODO: remove it after proper SNLE handling implementation if isSNLE: r41 = 0.0 # asdf1 = None eqs = [fd_abs(elem) for elem in p.user.f] asdf1 = fd_sum(eqs) # TODO: check it, for reducing calculations #C.update([elem == 0 for elem in p.user.f]) elif isMOP: asdf1 = p.user.f Solutions.F = [] if point(p.x0).isFeas(altLinInEq=False): Solutions.solutions.append(p.x0.copy()) Solutions.coords = asarray(Solutions.solutions) Solutions.F.append(p.f(p.x0)) p._solutions = Solutions elif not isODE: asdf1 = p.user.f[0] #if p.fOpt is not None: fOpt = p.fOpt if p.goal in ('max', 'maximum'): asdf1 = -asdf1 if p.fOpt is not None: p.fOpt = -p.fOpt if fStart is not None and fStart < r40: r41 = fStart for X0 in [point(xRecord), point(p.x0)]: if X0.isFeas(altLinInEq=False) and X0.f() < r40: r40 = X0.f() if p.isFeas(p.x0): tmp = asdf1(p._x0) if tmp < r41: r41 = tmp if p.fOpt is not None: if p.fOpt > r41: p.warn('user-provided fOpt seems to be incorrect, ') r41 = p.fOpt # if isSNLE: # if self.dataHandling == 'raw': # p.pWarn(''' # this interalg data handling approach ("%s") # is unimplemented for SNLE yet, dropping to "sorted"'''%self.dataHandling) # # # handles 'auto' as well # self.dataHandling ='sorted' domain = oopoint([(v, [p.lb[i], p.ub[i]]) for i, v in enumerate(vv)], skipArrayCast=True) domain.dictOfFixedFuncs = p.dictOfFixedFuncs #from FuncDesigner.ooFun import BooleanOOFun, SmoothFDConstraint if self.dataHandling == 'auto': if isIP or isODE: self.dataHandling = 'sorted' elif isMOP or p.hasLogicalConstraints: self.dataHandling = 'raw' else: r = p.user.f[0].interval(domain, self.dataType) M = np.max((np.max(np.atleast_1d(np.abs(r.lb))), np.max(np.atleast_1d(np.abs(r.ub))))) for ( c, func, lb, ub, tol ) in p._FD.nonBoxCons: #[Elem[1] for Elem in p._FD.nonBoxCons]: # !!!!!!!!!!!!!!!!!!!! check it - mb 2nd condition is incorrect #if isinstance(c, BooleanOOFun) and not isinstance(c, SmoothFDConstraint): continue if hasattr(c, '_unnamedBooleanOOFunNumber'): continue r = func.interval(domain, self.dataType) M = np.max((M, np.max(np.atleast_1d(np.abs(r.lb))))) M = np.max((M, np.max(np.atleast_1d(np.abs(r.ub))))) self.dataHandling = 'raw' if M < 1e5 else 'sorted' #self.dataHandling = 'sorted' if isIP or (p.__isNoMoreThanBoxBounded__() and n < 50) else 'raw' # TODO: is it required yet? if not isMOP and not p.hasLogicalConstraints: p._isOnlyBoxBounded = p.__isNoMoreThanBoxBounded__() if isODE or (asdf1.isUncycled and p._isOnlyBoxBounded and np.all( np.isfinite(p.user.f[0].interval(domain).lb))): #maxNodes = 1 self.dataHandling = 'sorted' if self.dataHandling == 'sorted' and p.hasLogicalConstraints: p.warn( "interalg: for general logical constraints only dataHandling='raw' mode works" ) self.dataHandling = 'raw' self.maxActiveNodes = int(self.maxActiveNodes) # if self.maxActiveNodes < 2: # p.warn('maxActiveNodes should be at least 2 while you have provided %d. Setting it to 2.' % self.maxActiveNodes) self.maxNodes = int(self.maxNodes) _in = np.array([], object) g = np.inf C = p._FD.nonBoxConsWithTolShift C0 = p._FD.nonBoxCons # if isOpt: # r = [] # for (elem, lb, ub, tol) in C0: # if tol == 0: tol = p.contol # if lb == ub: # r.append(fd_max((fd_abs(elem-lb)-tol, 0)) * (fTol/tol)) # elif lb == -inf: # r.append(fd_max((0, elem-ub-tol)) * (fTol/tol)) # elif ub == inf: # r.append(fd_max((0, lb-elem-tol)) * (fTol/tol)) # else: # p.err('finite box constraints are unimplemented for interalg yet') #p._cons_obj = 1e100 * fd_sum(r) if len(r) != 0 else None #p._cons_obj = fd_sum(r) if len(r) != 0 else None if isSNLE: C += [(elem == 0, elem, -(elem.tol if elem.tol != 0 else p.ftol), (elem.tol if elem.tol != 0 else p.ftol)) for elem in p.user.f] C0 += [(elem == 0, elem, 0, 0, (elem.tol if elem.tol != 0 else p.ftol)) for elem in p.user.f] # TODO: hanlde fixed variables here varTols = p.variableTolerances if Solutions.maxNum != 1: if not isSNLE: p.err(''' "search several solutions" mode is unimplemented for the prob type %s yet''' % p.probType) if any(varTols == 0): p.err(''' for the mode "search all solutions" you have to provide all non-zero tolerances for each variable (oovar) ''') pnc = 0 an = [] maxNodes = self.maxNodes # TODO: change for constrained probs _s = atleast_1d(inf) if isODE or (isIP and p.n == 1): interalg_ODE_routine(p, self) return while 1: if len(C0) != 0: y, e, nlhc, residual, definiteRange, indT, _s = processConstraints( C0, y, e, _s, p, dataType) else: nlhc, residual, definiteRange, indT = None, None, True, None if y.size != 0: an, g, fo, _s, Solutions, xRecord, r41, r40 = \ pb(p, nlhc, residual, definiteRange, y, e, vv, asdf1, C, r40, g, \ nNodes, r41, fTol, Solutions, varTols, _in, \ dataType, maxNodes, _s, indT, xRecord) if _s is None: break else: an = _in fo = 0.0 if isSNLE or isMOP else min( (r41, r40 - (fTol if Solutions.maxNum == 1 else 0.0))) pnc = max((len(np.atleast_1d(an)), pnc)) if isIP: y, e, _in, _s = \ func12(an, self.maxActiveNodes, p, Solutions, vv, varTols, np.inf) else: y, e, _in, _s = \ func12(an, self.maxActiveNodes, p, Solutions, vv, varTols, fo) nActiveNodes.append(y.shape[0] / 2) if y.size == 0: if len(Solutions.coords) > 1: p.istop, p.msg = 1001, 'all solutions have been obtained' else: p.istop, p.msg = 1000, 'solution has been obtained' break ############# End of main cycle ############### if not isSNLE and not isIP and not isMOP: if p._bestPoint.betterThan(p.point(p.xk)): p.iterfcn(p._bestPoint) else: p.iterfcn(p.xk) ff = p.fk # ff may be not assigned yet # ff = p._bestPoint.f() # p.xk = p._bestPoint.x if isIP: p.xk = np.array([np.nan] * p.n) p.rk = p._residual p.fk = p._F isFeas = len(Solutions.F) != 0 if isMOP else p.isFeas( p.xk) if not isIP else p.rk < fTol if not isFeas and p.istop > 0: p.istop, p.msg = -1000, 'no feasible solution has been obtained' o = asarray([t.o for t in an]) if o.size != 0: g = nanmin([nanmin(o), g]) if not isMOP: p.extras['isRequiredPrecisionReached'] = \ True if ff - g < fTol and isFeas else False # and (k is False or (isSNLE and (p._nObtainedSolutions >= maxSolutions or maxSolutions==1))) if not isMOP and not p.extras[ 'isRequiredPrecisionReached'] and p.istop > 0: p.istop = -1 p.msg = 'required precision is not guarantied' # TODO: simplify it if not isMOP: tmp = [ nanmin(np.hstack((ff, g, o.flatten()))), np.asscalar(np.array(ff)) ] if p.goal in ['max', 'maximum']: tmp = (-tmp[1], -tmp[0]) p.extras[ 'extremumBounds'] = tmp if not isIP else 'unimplemented for IP yet' p.solutions = [p._vector2point(s) for s in Solutions.coords] if not isMOP else \ MOPsolutions([p._vector2point(s) for s in Solutions.coords]) if isMOP: for i, s in enumerate(p.solutions): s.useAsMutable = True for j, goal in enumerate(p.user.f): s[goal] = Solutions.F[i][j] s.useAsMutable = False p.solutions.values = np.asarray(Solutions.F) p.solutions.coords = Solutions.coords if not isMOP and p.maxSolutions == 1: delattr(p, 'solutions') if isSNLE and p.maxSolutions != 1: for v in p._categoricalVars: for elem in r.solutions: elem.useAsMutable = True elem[v] = v.aux_domain[elem[v]] elem.useAsMutable = False if p.iprint >= 0 and not isMOP: # s = 'Solution with required tolerance %0.1e \n is%s guarantied (obtained precision: %0.1e)' \ # %(fTol, '' if p.extras['isRequiredPrecisionReached'] else ' NOT', tmp[1]-tmp[0]) s = 'Solution with required tolerance %0.1e \n is%s guarantied' \ %(fTol, '' if p.extras['isRequiredPrecisionReached'] else ' NOT') if not isIP and p.maxSolutions == 1: s += ' (obtained precision: %0.1e)' % np.abs(tmp[1] - tmp[0]) if not p.extras[ 'isRequiredPrecisionReached'] and pnc == self.maxNodes: s += '\nincrease maxNodes (current value %d)' % self.maxNodes p.info(s)
def simulate(self, tick=1, parameters=[]): """ parameters will be a list of params on each edge. """ # pandas is very convenient but slower than numpy # The dataFrame instanciation is costly as well. # For small models, it has a non-negligeable cost. # inhibitors will be changed if not ON #self.tochange = [x for x in self.model.nodes() if x not in self.stimuli_names # and x not in self.and_gates] # what about a species that is both inhibited and measured testVal = 1e-3 values = self.values.copy() if self.debug: self.debug_values = [] self.residuals = [] self.penalties = [] self.count = 0 self.nSp = len(values) residual = 1. frac = 1.2 # #FIXME +1 is to have same resrults as in CellnOptR # It means that if due to the cycles, you may not end up with same results. # this happends if you have cyvles with inhbititions # and an odd number of edges. if reactions is None: reactions = self.model.buffer_reactions self.number_edges = len(reactions) # 10 % time here #predecessors = self.reactions_to_predecessors(reactions) predecessors = defaultdict(collections.deque) for r in reactions: k,v = self._reac2pred[r] predecessors[k].extend(v) # speed up keys = self.values.keys() length_predecessors = dict([(node, len(predecessors[node])) for node in keys]) #self._length_predecessors = length_predecessors # if there is an inhibition/drug, the node is 0 values = self.values.copy() for inh in self.inhibitors_names: if length_predecessors[inh] == 0: #values[inh] = np.array([np.nan for x in range(0,self.N)]) #values[inh] = np.array([0 for x in range(0,self.N)]) values[inh] = np.zeros(self.N) while (self.count < self.nSp * frac +1.) and residual > testVal: self.previous = values.copy() #self.X0 = pd.DataFrame(self.values) #self.X0 = self.values.copy() # compute AND gates first. why for node in self.and_gates: # replace na by large number so that min is unchanged # THere are always predecessors if length_predecessors[node] != 0: values[node] = bn.nanmin(np.array([values[x] for x in predecessors[node]]), axis=0) else: #assert 1==0, "%s %s" % (node, predecessors[node]) values[node] = self.previous[node] for node in self.tochange: # easy one, just the value of predecessors #if len(self.predecessors[node]) == 1: # self.values[node] = self.values[self.predecessors[node][0]].copy() if length_predecessors[node] == 0: pass # nothing to change else: # TODO: if only one input, no need for that, just propagate signal. dummy = np.array([values[x] if (x,node) not in self.toflip else 1 - values[x] for x in predecessors[node]]) values[node] = bn.nanmax(dummy, axis=0) # take inhibitors into account if node in self.inhibitors_names: # if inhibitors is on (1), multiply by 0 # if inhibitors is not active, (0), does nothing. values[node] *= 1 - self.inhibitors[node].values # here NAs are set automatically to zero because of the int16 cast # but it helps speeding up a bit the code by removig needs to take care # of NAs. if we use sumna, na are ignored even when 1 is compared to NA self.m1 = np.array([self.previous[k] for k in keys ], dtype=np.int16) self.m2 = np.array([values[k] for k in keys ], dtype=np.int16) #residual = bn.nansum(np.square(self.m1 - self.m2)) #residual = np.nansum(np.square(self.m1 - self.m2)) residual = np.nansum(np.square(self.m1 - self.m2)) # TODO stop criteria should account for the length of the species to the # the node itself so count < nSp should be taken into account whatever is residual. # if self.debug: self.debug_values.append(self.previous.copy()) self.residuals.append(residual) self.count += 1 if self.debug is True: # add the latest values simulated in the while loop self.debug_values.append(values.copy()) # Need to set undefined values to NAs self.simulated[self.time] = np.array([values[k] for k in self.data.df.columns ], dtype=float)#.transpose() self.prev = {} self.prev[self.time] = np.array([self.previous[k] for k in self.data.df.columns ], dtype=float)#.transpose() mask = self.prev[self.time] != self.simulated[self.time] self.simulated[self.time][mask] = np.nan self.simulated[self.time] = self.simulated[self.time].transpose()
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT): m, n = y.shape w = arange(m) if p.probType == 'IP': oc_modL, oc_modU = o[:, :n], o[:, n:] ac_modL, ac_modU = a[:, :n], a[:, n:] # # TODO: handle nans mino = where(oc_modL < oc_modU, oc_modL, oc_modU) maxa = where(ac_modL < ac_modU, ac_modU, ac_modL) # Prev tmp = a[:, 0:n] - o[:, 0:n] + a[:, n:] - o[:, n:] t = nanargmin(tmp, 1) d = 0.5 * tmp[w, t] #New # tmp = a - o # t_ = nanargmin(tmp,1) # t = t_% n # d = tmp[w, t_] # ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') ind = 2**(1.0 / n) * d >= _s_prev #new # ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1) #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64') #s2 = nanmin(maxa - mino, 1) #print (abs(s2/_s)) # Prev _s = nanmin(maxa - mino, 1) # New #_s = nanmax(maxa - mino, 1) # _s = nanmax(a - o, 1) #ind = _s_prev <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) indD = logical_not(ind) indD = ind indD = None #print len(where(indD)[0]), len(where(logical_not(indD))[0]) # elif p.probType == 'MOP': # # raise 'unimplemented' else: if p.solver.dataHandling == 'sorted': _s = func13(o, a) t = nanargmin(a, 1) % n d = nanmax([a[w, t] - o[w, t], a[w, n + t] - o[w, n + t]], 0) ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ### #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') #NEW ind = d >= _s_prev / 2**(1.0e-12 / n) #ind = d >= _s_prev / 2 ** (1.0/n) indD = empty(m, bool) indD.fill(True) #ind.fill(False) ################################################### elif p.solver.dataHandling == 'raw': if p.probType == 'MOP': t = p._t[:m] p._t = p._t[m:] d = _s = p.__s[:m] p.__s = p.__s[m:] else: # tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:] # TNHLF_min = where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1) # # Set _s # _s = nanmin(TNHLF_min, 1) T = tnlhf_curr tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:] TNHL_curr_min = where( logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)), tnlh_curr_1, tnlh_curr_2) t = nanargmin(TNHL_curr_min, 1) T = tnlhf d = nanmin(vstack(([T[w, t], T[w, n + t]])), 0) _s = d #OLD #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!# #ind = _s_prev <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #NEW if any(_s_prev < d): pass ind = _s_prev <= d + 1.0 / n # T = TNHL_curr_min #ind2 = nanmin(TNHL_curr_min, 0) indQ = d >= _s_prev - 1.0 / n #indQ = logical_and(indQ, False) indD = logical_or(indQ, logical_not(indT)) # print '------' # print indQ[:10] # print indD[:10] # print _s_prev[:2], d[:2] #print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0]) #print _s_prev - d ################################################### #d = ((tnlh[w, t]* tnlh[w, n+t])**0.5) else: assert 0 if any(ind): r10 = where(ind)[0] #print('r10:', r10) # print _s_prev # print ((_s_prev -d)*n)[r10] # print('ind length: %d' % len(where(ind)[0])) # print where(ind)[0].size #bs = e[ind] - y[ind] #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well bs = e[r10] - y[r10] t[r10] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well return t, _s, indD
def r14MOP(p, nlhc, residual, definiteRange, y, e, vv, asdf1, C, r40, g, nNodes, \ r41, fTol, Solutions, varTols, _in, dataType, \ maxNodes, _s, indTC, xRecord): assert p.probType == 'MOP' if len(p._discreteVarsNumList): y, e = adjustDiscreteVarBounds(y, e, p) if p.nProc != 1 and getattr(p, 'pool', None) is None: p.pool = Pool(processes = p.nProc) elif p.nProc == 1: p.pool = None ol, al = [], [] targets = p.targets # TODO: check it m, n = y.shape ol, al = [[] for k in range(m)], [[] for k in range(m)] for i, t in enumerate(targets): o, a, definiteRange = func82(y, e, vv, t.func, dataType, p) o, a = o.reshape(2*n, m).T, a.reshape(2*n, m).T for j in range(m): ol[j].append(o[j]) al[j].append(a[j]) #ol.append(o.reshape(2*n, m).T.tolist()) #al.append(a.reshape(2*n, m).T.tolist()) nlhf = r43(targets, Solutions.F, ol, al, p.pool, p.nProc) fo_prev = 0 # TODO: remove NaN nodes here if y.size == 0: return _in, g, fo_prev, _s, Solutions, xRecord, r41, r40 nodes = func11(y, e, nlhc, indTC, residual, ol, al, _s, p) #y, e = func4(y, e, o, a, fo) assert p.solver.dataHandling == 'raw', '"sorted" mode is unimplemented for MOP yet' if nlhf is None: new_nodes_tnlh_all = nlhc elif nlhc is None: new_nodes_tnlh_all = nlhf else: new_nodes_tnlh_all = nlhf + nlhc asdf1 = [t.func for t in p.targets] r5F, r5Coords = getr4Values(vv, y, e, new_nodes_tnlh_all, asdf1, C, p.contol, dataType, p) nIncome, nOutcome = r44(Solutions, r5Coords, r5F, targets, p.solver.sigma) fo = 0 # unused for MOP # TODO: better of nlhc for unconstrained probs if len(_in) != 0: an = hstack((nodes, _in)) else: an = atleast_1d(nodes) hasNewParetoNodes = False if nIncome == 0 else True if hasNewParetoNodes: ol2 = [node.o for node in an] al2 = [node.a for node in an] nlhc2 = [node.nlhc for node in an] nlhf2 = r43(targets, Solutions.F, ol2, al2, p.pool, p.nProc) tnlh_all = asarray(nlhc2) if nlhf2 is None else nlhf2 if nlhc2[0] is None else asarray(nlhc2) + nlhf2 else: tnlh_all = vstack([new_nodes_tnlh_all] + [node.tnlh_all for node in _in]) if len(_in) != 0 else new_nodes_tnlh_all for i, node in enumerate(nodes): node.tnlh_all = tnlh_all[i] r10 = logical_not(any(isfinite(tnlh_all), 1)) if any(r10): ind = where(logical_not(r10))[0] #an = take(an, ind, axis=0, out=an[:ind.size]) an = asarray(an[ind]) tnlh_all = take(tnlh_all, ind, axis=0, out=tnlh_all[:ind.size]) # else: # tnlh_all = hstack([node.tnlh_all for node in an]) T1, T2 = tnlh_all[:, :tnlh_all.shape[1]/2], tnlh_all[:, tnlh_all.shape[1]/2:] T = where(logical_or(T1 < T2, isnan(T2)), T1, T2) t = nanargmin(T, 1) w = arange(t.size) NN = T[w, t].flatten() for i, node in enumerate(an): node.tnlh_all = tnlh_all[i] node.tnlh_curr_best = NN[i] astnlh = argsort(NN) an = an[astnlh] p._t = t # TODO: form _s in other level (for active nodes only), to reduce calculations if len(an) != 0: nlhf_fixed = asarray([node.nlhf for node in an]) nlhc_fixed = asarray([node.nlhc for node in an]) T = nlhf_fixed + nlhc_fixed if nlhc_fixed[0] is not None else nlhf_fixed p.__s = \ nanmin(vstack(([T[w, t], T[w, n+t]])), 0) else: p.__s = array([]) # p._nObtainedSolutions = len(solutions) # if p._nObtainedSolutions > maxSolutions: # solutions = solutions[:maxSolutions] # p.istop = 0 # p.msg = 'user-defined maximal number of solutions (p.maxSolutions = %d) has been exeeded' % p.maxSolutions # return an, g, fo, None, solutions, coords, xRecord, r41, r40 # TODO: fix it p._frontLength = len(Solutions.F) p._nIncome = nIncome p._nOutcome = nOutcome p.iterfcn(p.x0) #print('iter: %d (%d) frontLenght: %d' %(p.iter, itn, len(Solutions.coords))) if p.istop != 0: return an, g, fo, None, Solutions, xRecord, r41, r40 #an, g = func9(an, fo, g, p) nn = maxNodes#1 if asdf1.isUncycled and all(isfinite(o)) and p._isOnlyBoxBounded and not p.probType.startswith('MI') else maxNodes an, g = func5(an, nn, g, p) nNodes.append(len(an)) return an, g, fo, _s, Solutions, xRecord, r41, r40
def time_nanmax(self, dtype, shape): bn.nanmin(self.arr)
def r14(p, nlhc, residual, definiteRange, y, e, vv, asdf1, C, r40, g, nNodes, \ r41, fTol, Solutions, varTols, _in, dataType, \ maxNodes, _s, indTC, xRecord): isSNLE = p.probType in ('NLSP', 'SNLE') maxSolutions, solutions, coords = Solutions.maxNum, Solutions.solutions, Solutions.coords if len(p._discreteVarsNumList): y, e = adjustDiscreteVarBounds(y, e, p) o, a, r41 = r45(y, e, vv, p, asdf1, dataType, r41, nlhc) fo_prev = float(0 if isSNLE else min((r41, r40 - (fTol if maxSolutions == 1 else 0)))) if fo_prev > 1e300: fo_prev = 1e300 y, e, o, a, _s, indTC, nlhc, residual = func7(y, e, o, a, _s, indTC, nlhc, residual) if y.size == 0: return _in, g, fo_prev, _s, Solutions, xRecord, r41, r40 nodes = func11(y, e, nlhc, indTC, residual, o, a, _s, p) #nodes, g = func9(nodes, fo_prev, g, p) #y, e = func4(y, e, o, a, fo) if p.solver.dataHandling == 'raw': tmp = o.copy() tmp[tmp > fo_prev] = -inf M = atleast_1d(nanmax(tmp, 1)) for i, node in enumerate(nodes): node.th_key = M[i] if not isSNLE: for node in nodes: node.fo = fo_prev if nlhc is not None: for i, node in enumerate(nodes): node.tnlhf = node.nlhf + node.nlhc else: for i, node in enumerate(nodes): node.tnlhf = node.nlhf # TODO: improve it an = hstack((nodes, _in)) #tnlh_fixed = vstack([node.tnlhf for node in an]) tnlh_fixed_local = vstack([node.tnlhf for node in nodes])#tnlh_fixed[:len(nodes)] tmp = a.copy() tmp[tmp>fo_prev] = fo_prev tmp2 = tmp - o tmp2[tmp2<1e-300] = 1e-300 tmp2[o > fo_prev] = nan tnlh_curr = tnlh_fixed_local - log2(tmp2) tnlh_curr_best = nanmin(tnlh_curr, 1) for i, node in enumerate(nodes): node.tnlh_curr = tnlh_curr[i] node.tnlh_curr_best = tnlh_curr_best[i] # TODO: use it instead of code above #tnlh_curr = tnlh_fixed_local - log2(where() - o) else: tnlh_curr = None # TODO: don't calculate PointVals for zero-p regions PointVals, PointCoords = getr4Values(vv, y, e, tnlh_curr, asdf1, C, p.contol, dataType, p) if PointVals.size != 0: xk, Min = r2(PointVals, PointCoords, dataType) else: # all points have been removed by func7 xk = p.xk Min = nan if r40 > Min: r40 = Min xRecord = xk.copy()# TODO: is copy required? if r41 > Min: r41 = Min fo = float(0 if isSNLE else min((r41, r40 - (fTol if maxSolutions == 1 else 0)))) if p.solver.dataHandling == 'raw': if fo != fo_prev and not isSNLE: fos = array([node.fo for node in an]) #prev #ind_update = where(fos > fo + 0.01* fTol)[0] #new th_keys = array([node.th_key for node in an]) delta_fos = fos - fo ind_update = where(10 * delta_fos > fos - th_keys)[0] nodesToUpdate = an[ind_update] update_nlh = True if ind_update.size != 0 else False # print 'o MB:', float(o_tmp.nbytes) / 1e6 # print 'percent:', 100*float(ind_update.size) / len(an) if update_nlh: # from time import time # tt = time() updateNodes(nodesToUpdate, fo) # if not hasattr(p, 'Time'): # p.Time = time() - tt # else: # p.Time += time() - tt tmp = asarray([node.key for node in an]) r10 = where(tmp > fo)[0] if r10.size != 0: mino = [an[i].key for i in r10] mmlf = nanmin(asarray(mino)) g = nanmin((g, mmlf)) NN = atleast_1d([node.tnlh_curr_best for node in an]) r10 = logical_or(isnan(NN), NN == inf) if any(r10): ind = where(logical_not(r10))[0] an = an[ind] #tnlh = take(tnlh, ind, axis=0, out=tnlh[:ind.size]) #NN = take(NN, ind, axis=0, out=NN[:ind.size]) NN = NN[ind] if not isSNLE or p.maxSolutions == 1: #pass astnlh = argsort(NN) an = an[astnlh] # print(an[0].nlhc, an[0].tnlh_curr_best) # Changes # if NN.size != 0: # ind = searchsorted(NN, an[0].tnlh_curr_best+1) # tmp1, tmp2 = an[:ind], an[ind:] # arr = [node.key for node in tmp1] # Ind = argsort(arr) # an = hstack((tmp1[Ind], tmp2)) #print [node.tnlh_curr_best for node in an[:10]] else: #if p.solver.dataHandling == 'sorted': if isSNLE and p.maxSolutions != 1: an = hstack((nodes, _in)) else: nodes.sort(key = lambda obj: obj.key) if len(_in) == 0: an = nodes else: arr1 = [node.key for node in _in] arr2 = [node.key for node in nodes] r10 = searchsorted(arr1, arr2) an = insert(_in, r10, nodes) # if p.debug: # arr = array([node.key for node in an]) # #print arr[0] # assert all(arr[1:]>= arr[:-1]) if maxSolutions != 1: Solutions = r46(o, a, PointCoords, PointVals, fTol, varTols, Solutions) p._nObtainedSolutions = len(solutions) if p._nObtainedSolutions > maxSolutions: solutions = solutions[:maxSolutions] p.istop = 0 p.msg = 'user-defined maximal number of solutions (p.maxSolutions = %d) has been exeeded' % p.maxSolutions return an, g, fo, None, Solutions, xRecord, r41, r40 #p.iterfcn(xk, Min) p.iterfcn(xRecord, r40) if p.istop != 0: return an, g, fo, None, Solutions, xRecord, r41, r40 if isSNLE and maxSolutions == 1 and Min <= fTol: # TODO: rework it for nonlinear systems with non-bound constraints p.istop, p.msg = 1000, 'required solution has been obtained' return an, g, fo, None, Solutions, xRecord, r41, r40 an, g = func9(an, fo, g, p) nn = maxNodes#1 if asdf1.isUncycled and all(isfinite(o)) and p._isOnlyBoxBounded and not p.probType.startswith('MI') else maxNodes an, g = func5(an, nn, g, p) nNodes.append(len(an)) return an, g, fo, _s, Solutions, xRecord, r41, r40
def iqg(Self, domain, dtype = float, lb=None, ub=None, UB = None): if type(domain) != ooPoint: domain = ooPoint(domain, skipArrayCast=True) domain.isMultiPoint=True domain.useSave = True r0 = Self.interval(domain, dtype, resetStoredIntervals = False) r0.lb, r0.ub = atleast_1d(r0.lb).copy(), atleast_1d(r0.ub).copy() # is copy required? # TODO: get rid of useSave domain.useSave = False # TODO: rework it with indexation of required data if lb is not None and ub is not None: ind = logical_or(logical_or(r0.ub < lb, r0.lb > ub), all(logical_and(r0.lb >= lb, r0.ub <= ub))) elif UB is not None: ind = r0.lb > UB else: ind = None useSlicing = False if ind is not None: if all(ind): return {}, r0 j = where(~ind)[0] #DOESN'T WORK FOR FIXED OOVARS AND DefiniteRange != TRUE YET if 0 and j.size < 0.85*ind.size: # at least 15% of values to skip useSlicing = True tmp = [] for key, val in domain.storedIntervals.items(): Interval, definiteRange = val if type(definiteRange) not in (bool, bool_): definiteRange = definiteRange[j] tmp.append((key, (Interval[:, j], definiteRange))) _storedIntervals = dict(tmp) Tmp = [] for key, val in domain.storedSums.items(): # TODO: rework it R0, DefiniteRange0 = val.pop(-1) #R0, DefiniteRange0 = val[-1] R0 = R0[:, j] if type(DefiniteRange0) not in (bool, bool_): DefiniteRange0 = DefiniteRange0[j] tmp = [] for k,v in val.items(): # TODO: rework it # if k is (-1): continue v = v[:, j] tmp.append((k,v)) val = dict(tmp) val[-1] = (R0, DefiniteRange0) Tmp.append((key,val)) _storedSums = dict(Tmp) #domain.storedSums = dict(tmp) Tmp = [] for key, val in domain.items(): lb_,ub_ = val # TODO: rework it when lb, ub will be implemented as 2-dimensional Tmp.append((key, (lb_[j],ub_[j]))) dictOfFixedFuncs = domain.dictOfFixedFuncs domain2 = ooPoint(Tmp, skipArrayCast=True) domain2.storedSums = _storedSums domain2.storedIntervals = _storedIntervals domain2.dictOfFixedFuncs = dictOfFixedFuncs domain2.isMultiPoint=True domain = domain2 domain.useAsMutable = True r = {} Dep = (Self._getDep() if not Self.is_oovar else set([Self])).intersection(domain.keys()) for i, v in enumerate(Dep): domain.modificationVar = v r_l, r_u = _iqg(Self, domain, dtype, r0) if useSlicing and r_l is not r0:# r_l is r0 when array_equal(lb, ub) lf1, lf2, uf1, uf2 = r_l.lb, r_u.lb, r_l.ub, r_u.ub Lf1, Lf2, Uf1, Uf2 = Copy(r0.lb), Copy(r0.lb), Copy(r0.ub), Copy(r0.ub) Lf1[:, j], Lf2[:, j], Uf1[:, j], Uf2[:, j] = lf1, lf2, uf1, uf2 r_l.lb, r_u.lb, r_l.ub, r_u.ub = Lf1, Lf2, Uf1, Uf2 if type(r0.definiteRange) not in (bool, bool_): d1, d2 = r_l.definiteRange, r_u.definiteRange D1, D2 = atleast_1d(r0.definiteRange).copy(), atleast_1d(r0.definiteRange).copy() D1[j], D2[j] = d1, d2 r_l.definiteRange, r_u.definiteRange = D1, D2 r[v] = r_l, r_u if not Self.isUncycled: lf1, lf2, uf1, uf2 = r_l.lb, r_u.lb, r_l.ub, r_u.ub lf, uf = nanmin(vstack((lf1, lf2)), 0), nanmax(vstack((uf1, uf2)), 0) if i == 0: L, U = lf.copy(), uf.copy() else: L[L<lf] = lf[L<lf].copy() U[U>uf] = uf[U>uf].copy() if not Self.isUncycled: for R in r.values(): r1, r2 = R if type(r1.lb) != np.ndarray: r1.lb, r2.lb, r1.ub, r2.ub = atleast_1d(r1.lb), atleast_1d(r2.lb), atleast_1d(r1.ub), atleast_1d(r2.ub) r1.lb[r1.lb < L] = L[r1.lb < L] r2.lb[r2.lb < L] = L[r2.lb < L] r1.ub[r1.ub > U] = U[r1.ub > U] r2.ub[r2.ub > U] = U[r2.ub > U] r0.lb[r0.lb < L] = L[r0.lb < L] r0.ub[r0.ub > U] = U[r0.ub > U] # for more safety domain.useSave = True domain.useAsMutable = False domain.modificationVar = None domain.storedIntervals = {} return r, r0
def filter_position_1d(time, flux, star_movement, timescale_position_smooth=None, dt=None): """Filter the lightcurve for correlations in the stars position on the CCD.""" # Check input: assert len(time)==len(flux), "TIME and FLUX should have the same number of elements." if not timescale_position_smooth is None and dt is None: dt = median(diff(time)) # Settings: # num_knots = 15 # min_points_per_knot = 3 # spline_degree = 2 # sigma_clip_spline = 4.0 # Build up xpos chunk by chunk of the timeseries: xpos = np.empty_like(time, dtype='float64') for chk,chunk in enumerate(star_movement['chunks']): # Extract needed information: cl = star_movement['curvelength'][chk] # Sorted in position indx_possort = star_movement['indx_possort'][chk] indx_timesort = star_movement['indx_timesort'][chk] # Create smooth curve as flux as a function of curvelength: # The resulting "xp" will be sorted by position fl = flux[chunk][indx_possort] """indx_finite = isfinite(cl) & isfinite(fl) knots = spline_set_knots(cl[indx_finite], num_knots) # Create the fixed knots for the spline function: knots = np.linspace(nanmin(cl[indx_finite]), nanmax(cl[indx_finite]), num_knots+2)[1:-2] # Remove knots if there is not at least 3 points between them: newknots = array([], dtype='float64') for i in range(len(knots)-1): indx_data_between_knots = (knots[i] < cl[indx_finite]) & (cl[indx_finite] < knots[i+1]) if sum(indx_data_between_knots) > min_points_per_knot: newknots = append(newknots, knots[i]) knots = newknots # Do a spline where all points are given the same weight: spline = LSQUnivariateSpline(cl[indx_finite], fl[indx_finite], knots, w=None, k=spline_degree) # Begin iterating so we can change the weights: for iterations in range(2): # Calculate weight of points based of their distance to # the previously calculated spline: d = np.abs( fl[indx_finite] - spline(cl[indx_finite]) ) s = mad_to_sigma * median(d) w = 0.5*(np.sign(sigma_clip_spline - d/s) + 1) # Heaviside cutoff-function # Recalculate the spline, using the weights: spline = LSQUnivariateSpline(cl[indx_finite], fl[indx_finite], knots, w=w, k=spline_degree) # Evaluate the spline function at the curvelengths of the datapoints: # The spline function will return NaN if passed a NaN xp = spline(cl) """ lowess_frac = 0.1/ (nanmax(cl[np.isfinite(fl)]) - nanmin(cl[np.isfinite(fl)])) xp = lowess(fl, cl, frac=lowess_frac, it=3, is_sorted=True, return_sorted=False) # Sort back into time-sorting and put NaN's back, # then low-pass filter the result: if timescale_position_smooth is None: xpos[chunk] = xp[indx_timesort] else: xpos[chunk] = moving_nanmedian(time[chunk], xp[indx_timesort], timescale_position_smooth, dt=dt) # Return the final time-sorted series: return xpos
def __solver__(self, p): isMOP = p.probType == 'MOP' if isMOP: from interalgMOP import r14MOP #isOpt = p.probType in ['NLP', 'NSP', 'GLP', 'MINLP'] isODE = p.probType == 'ODE' isSNLE = p.probType in ('NLSP', 'SNLE') if not p.__isFiniteBoxBounded__() and not isODE: p.err(''' solver %s requires finite lb, ub: lb <= x <= ub (you can use "implicitBoounds") ''' % self.__name__) # if p.fixedVars is not None: # p.err('solver %s cannot handle FuncDesigner problems with some variables declared as fixed' % self.__name__) if p.probType in ('LP', 'MILP'): p.err("the solver can't handle problems of type " + p.probType) if not p.isFDmodel: p.err('solver %s can handle only FuncDesigner problems' % self.__name__) dataType = self.dataType if type(dataType) == str: if not hasattr(np, dataType): p.pWarn('your architecture has no type "%s", float64 will be used instead' % dataType) dataType = 'float64' dataType = getattr(np, dataType) self.dataType = dataType isIP = p.probType == 'IP' if isIP: pb = r14IP p._F = asarray(0, self.dataType) p._residual = 0.0 f_int = p.user.f[0].interval(p.domain, self.dataType) p._r0 = prod(p.ub-p.lb) * (f_int.ub - f_int.lb) p._volume = 0.0 p.kernelIterFuncs.pop(IS_NAN_IN_X) elif isMOP: pb = r14MOP else: pb = r14 for val in p._x0.values(): if isinstance(val, (list, tuple, np.ndarray)) and len(val) > 1: p.pWarn(''' solver %s currently can handle only single-element variables, use oovars(n) instead of oovar(size=n), elseware correct result is not guaranteed '''% self.__name__) vv = list(p._freeVarsList) x0 = dict([(v, p._x0[v]) for v in vv]) for val in x0.values(): if isinstance(val, (list, tuple, np.ndarray)) and len(val) > 1: p.err(''' solver %s currently can handle only single-element variables, use oovars(n) instead of oovar(size=n)'''% self.__name__) point = p.point p.kernelIterFuncs.pop(SMALL_DELTA_X, None) p.kernelIterFuncs.pop(SMALL_DELTA_F, None) p.kernelIterFuncs.pop(MAX_NON_SUCCESS, None) if not bottleneck_is_present and not isODE: p.pWarn(''' installation of Python module "bottleneck" (http://berkeleyanalytics.com/bottleneck, available via easy_install, takes several minutes for compilation) could speedup the solver %s''' % self.__name__) n = p.n maxSolutions = p.maxSolutions if maxSolutions == 0: maxSolutions = 10**50 if maxSolutions != 1 and p.fEnough != -np.inf: p.warn(''' using the solver interalg with non-single solutions mode is not ajusted with fEnough stop criterium yet, it will be omitted ''') p.kernelIterFuncs.pop(FVAL_IS_ENOUGH) nNodes = [] p.extras['nNodes'] = nNodes nActiveNodes = [] p.extras['nActiveNodes'] = nActiveNodes Solutions = Solution() Solutions.maxNum = maxSolutions Solutions.solutions = [] Solutions.coords = np.array([]).reshape(0, n) p.solutions = Solutions lb, ub = asarray(p.lb, dataType).copy(), asarray(p.ub, dataType).copy() fTol = p.fTol if isIP or isODE: if p.ftol is None: if fTol is not None: p.ftol = fTol else: p.err('interalg requires user-supplied ftol (required precision)') if fTol is None: fTol = p.ftol elif fTol != p.ftol: p.err('you have provided both ftol and fTol') if fTol is None and not isMOP: # TODO: require tols for MOP fTol = 1e-7 p.warn('solver %s require p.fTol value (required objective function tolerance); 10^-7 will be used' % self.__name__) xRecord = 0.5 * (lb + ub) adjustr4WithDiscreteVariables(xRecord.reshape(1, -1), p) r40 = np.inf y = lb.reshape(1, -1) e = ub.reshape(1, -1) r41 = np.inf # TODO: maybe rework it, especially for constrained case fStart = self.fStart # TODO: remove it after proper SNLE handling implementation if isSNLE: r41 = 0.0 # asdf1 = None eqs = [fd_abs(elem) for elem in p.user.f] asdf1 = fd_sum(eqs) # TODO: check it, for reducing calculations #C.update([elem == 0 for elem in p.user.f]) elif isMOP: asdf1 = p.user.f Solutions.F = [] if point(p.x0).isFeas(altLinInEq=False): Solutions.solutions.append(p.x0.copy()) Solutions.coords = asarray(Solutions.solutions) Solutions.F.append(p.f(p.x0)) p._solutions = Solutions elif not isODE: asdf1 = p.user.f[0] #if p.fOpt is not None: fOpt = p.fOpt if p.goal in ('max', 'maximum'): asdf1 = -asdf1 if p.fOpt is not None: p.fOpt = -p.fOpt if fStart is not None and fStart < r40: r41 = fStart for X0 in [point(xRecord), point(p.x0)]: if X0.isFeas(altLinInEq=False) and X0.f() < r40: r40 = X0.f() if p.isFeas(p.x0): tmp = asdf1(p._x0) if tmp < r41: r41 = tmp if p.fOpt is not None: if p.fOpt > r41: p.warn('user-provided fOpt seems to be incorrect, ') r41 = p.fOpt # if isSNLE: # if self.dataHandling == 'raw': # p.pWarn(''' # this interalg data handling approach ("%s") # is unimplemented for SNLE yet, dropping to "sorted"'''%self.dataHandling) # # # handles 'auto' as well # self.dataHandling ='sorted' domain = oopoint([(v, [p.lb[i], p.ub[i]]) for i, v in enumerate(vv)], skipArrayCast=True) domain.dictOfFixedFuncs = p.dictOfFixedFuncs #from FuncDesigner.ooFun import BooleanOOFun, SmoothFDConstraint if self.dataHandling == 'auto': if isIP or isODE: self.dataHandling = 'sorted' elif isMOP or p.hasLogicalConstraints: self.dataHandling = 'raw' else: r = p.user.f[0].interval(domain, self.dataType) M = np.max((np.max(np.atleast_1d(np.abs(r.lb))), np.max(np.atleast_1d(np.abs(r.ub))))) for (c, func, lb, ub, tol) in p._FD.nonBoxCons:#[Elem[1] for Elem in p._FD.nonBoxCons]: # !!!!!!!!!!!!!!!!!!!! check it - mb 2nd condition is incorrect #if isinstance(c, BooleanOOFun) and not isinstance(c, SmoothFDConstraint): continue if hasattr(c,'_unnamedBooleanOOFunNumber'): continue r = func.interval(domain, self.dataType) M = np.max((M, np.max(np.atleast_1d(np.abs(r.lb))))) M = np.max((M, np.max(np.atleast_1d(np.abs(r.ub))))) self.dataHandling = 'raw' if M < 1e5 else 'sorted' #self.dataHandling = 'sorted' if isIP or (p.__isNoMoreThanBoxBounded__() and n < 50) else 'raw' # TODO: is it required yet? if not isMOP and not p.hasLogicalConstraints: p._isOnlyBoxBounded = p.__isNoMoreThanBoxBounded__() if isODE or (asdf1.isUncycled and p._isOnlyBoxBounded and np.all(np.isfinite(p.user.f[0].interval(domain).lb))): #maxNodes = 1 self.dataHandling = 'sorted' if self.dataHandling == 'sorted' and p.hasLogicalConstraints: p.warn("interalg: for general logical constraints only dataHandling='raw' mode works") self.dataHandling = 'raw' self.maxActiveNodes = int(self.maxActiveNodes) # if self.maxActiveNodes < 2: # p.warn('maxActiveNodes should be at least 2 while you have provided %d. Setting it to 2.' % self.maxActiveNodes) self.maxNodes = int(self.maxNodes) _in = np.array([], object) g = np.inf C = p._FD.nonBoxConsWithTolShift C0 = p._FD.nonBoxCons # if isOpt: # r = [] # for (elem, lb, ub, tol) in C0: # if tol == 0: tol = p.contol # if lb == ub: # r.append(fd_max((fd_abs(elem-lb)-tol, 0)) * (fTol/tol)) # elif lb == -inf: # r.append(fd_max((0, elem-ub-tol)) * (fTol/tol)) # elif ub == inf: # r.append(fd_max((0, lb-elem-tol)) * (fTol/tol)) # else: # p.err('finite box constraints are unimplemented for interalg yet') #p._cons_obj = 1e100 * fd_sum(r) if len(r) != 0 else None #p._cons_obj = fd_sum(r) if len(r) != 0 else None if isSNLE: C += [(elem==0, elem, -(elem.tol if elem.tol != 0 else p.ftol), (elem.tol if elem.tol != 0 else p.ftol)) for elem in p.user.f] C0 += [(elem==0, elem, 0, 0, (elem.tol if elem.tol != 0 else p.ftol)) for elem in p.user.f] # TODO: hanlde fixed variables here varTols = p.variableTolerances if Solutions.maxNum != 1: if not isSNLE: p.err(''' "search several solutions" mode is unimplemented for the prob type %s yet''' % p.probType) if any(varTols == 0): p.err(''' for the mode "search all solutions" you have to provide all non-zero tolerances for each variable (oovar) ''') pnc = 0 an = [] maxNodes = self.maxNodes # TODO: change for constrained probs _s = atleast_1d(inf) if isODE or (isIP and p.n == 1): interalg_ODE_routine(p, self) return while 1: if len(C0) != 0: y, e, nlhc, residual, definiteRange, indT, _s = processConstraints(C0, y, e, _s, p, dataType) else: nlhc, residual, definiteRange, indT = None, None, True, None if y.size != 0: an, g, fo, _s, Solutions, xRecord, r41, r40 = \ pb(p, nlhc, residual, definiteRange, y, e, vv, asdf1, C, r40, g, \ nNodes, r41, fTol, Solutions, varTols, _in, \ dataType, maxNodes, _s, indT, xRecord) if _s is None: break else: an = _in fo = 0.0 if isSNLE or isMOP else min((r41, r40 - (fTol if Solutions.maxNum == 1 else 0.0))) pnc = max((len(np.atleast_1d(an)), pnc)) if isIP: y, e, _in, _s = \ func12(an, self.maxActiveNodes, p, Solutions, vv, varTols, np.inf) else: y, e, _in, _s = \ func12(an, self.maxActiveNodes, p, Solutions, vv, varTols, fo) nActiveNodes.append(y.shape[0]/2) if y.size == 0: if len(Solutions.coords) > 1: p.istop, p.msg = 1001, 'all solutions have been obtained' else: p.istop, p.msg = 1000, 'solution has been obtained' break ############# End of main cycle ############### if not isSNLE and not isIP and not isMOP: if p._bestPoint.betterThan(p.point(p.xk)): p.iterfcn(p._bestPoint) else: p.iterfcn(p.xk) ff = p.fk # ff may be not assigned yet # ff = p._bestPoint.f() # p.xk = p._bestPoint.x if isIP: p.xk = np.array([np.nan]*p.n) p.rk = p._residual p.fk = p._F isFeas = len(Solutions.F) != 0 if isMOP else p.isFeas(p.xk) if not isIP else p.rk < fTol if not isFeas and p.istop > 0: p.istop, p.msg = -1000, 'no feasible solution has been obtained' o = asarray([t.o for t in an]) if o.size != 0: g = nanmin([nanmin(o), g]) if not isMOP: p.extras['isRequiredPrecisionReached'] = \ True if ff - g < fTol and isFeas else False # and (k is False or (isSNLE and (p._nObtainedSolutions >= maxSolutions or maxSolutions==1))) if not isMOP and not p.extras['isRequiredPrecisionReached'] and p.istop > 0: p.istop = -1 p.msg = 'required precision is not guarantied' # TODO: simplify it if not isMOP: tmp = [nanmin(np.hstack((ff, g, o.flatten()))), np.asscalar(np.array(ff))] if p.goal in ['max', 'maximum']: tmp = (-tmp[1], -tmp[0]) p.extras['extremumBounds'] = tmp if not isIP else 'unimplemented for IP yet' p.solutions = [p._vector2point(s) for s in Solutions.coords] if not isMOP else \ MOPsolutions([p._vector2point(s) for s in Solutions.coords]) if isMOP: for i, s in enumerate(p.solutions): s.useAsMutable = True for j, goal in enumerate(p.user.f): s[goal] = Solutions.F[i][j] s.useAsMutable = False p.solutions.values = np.asarray(Solutions.F) p.solutions.coords = Solutions.coords if not isMOP and p.maxSolutions == 1: delattr(p, 'solutions') if isSNLE and p.maxSolutions != 1: for v in p._categoricalVars: for elem in r.solutions: elem.useAsMutable = True elem[v] = v.aux_domain[elem[v]] elem.useAsMutable = False if p.iprint >= 0 and not isMOP: # s = 'Solution with required tolerance %0.1e \n is%s guarantied (obtained precision: %0.1e)' \ # %(fTol, '' if p.extras['isRequiredPrecisionReached'] else ' NOT', tmp[1]-tmp[0]) s = 'Solution with required tolerance %0.1e \n is%s guarantied' \ %(fTol, '' if p.extras['isRequiredPrecisionReached'] else ' NOT') if not isIP and p.maxSolutions == 1: s += ' (obtained precision: %0.1e)' % np.abs(tmp[1]-tmp[0]) if not p.extras['isRequiredPrecisionReached'] and pnc == self.maxNodes: s += '\nincrease maxNodes (current value %d)' % self.maxNodes p.info(s)
def _grid_plot(self, experiment, grid, **kwargs): kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. scale = kwargs.pop('scale')[self.channel] lim = kwargs.pop('lim')[self.channel] scaled_data = scale(experiment[self.channel]) num_bins = kwargs.pop('num_bins', util.num_hist_bins(scaled_data)) num_bins = util.num_hist_bins( scaled_data) if num_bins is None else num_bins # clip num_bins to (100, 1000) num_bins = max(min(num_bins, 1000), 100) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] scaled_bins = scale(bins) num_hues = len(experiment[self.huefacet].unique()) bins_per_hue = math.floor(num_bins / num_hues) if bins_per_hue == 1: new_bins = scaled_bins else: new_bins = [] for idx in range(1, len(scaled_bins)): new_bins = np.append( new_bins, np.linspace(scaled_bins[idx - 1], scaled_bins[idx], bins_per_hue + 1, endpoint=False)) bins = scale.inverse(new_bins) else: xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) bins = scale.inverse( np.linspace(xmin, xmax, num=int(num_bins), endpoint=True)) kwargs.setdefault('bins', bins) kwargs.setdefault('orientation', 'vertical') if ('linewidth' not in kwargs) or ('linewidth' in kwargs and kwargs['linewidth'] is None): kwargs[ 'linewidth'] = 0 if kwargs['histtype'] == "stepfilled" else 2 # if we have a hue facet, the y scaling is frequently wrong. this # will capture the maximum bin count of each call to plt.hist, so # we don't have to compute the histogram multiple times count_max = [] def hist_lims(*args, **kwargs): # there's some bug in the above code where we get data that isn't # in the range of `bins`, which makes hist() puke. so get rid # of it. bins = kwargs.get('bins') new_args = [] for x in args: x = x[x > bins[0]] x = x[x < bins[-1]] new_args.append(x) if scale.name != "linear" and kwargs.get("density"): kwargs["density"] = False counts, _ = np.histogram(new_args, bins=kwargs["bins"]) kwargs["weights"] = counts / np.sum(counts) n, _, _ = plt.hist(kwargs["bins"][:-1], **kwargs) else: n, _, _ = plt.hist(*new_args, **kwargs) count_max.append(max(n)) grid.map(hist_lims, self.channel, **kwargs) ret = {} if kwargs['orientation'] == 'vertical': ret['xscale'] = scale ret['xlim'] = lim ret['ylim'] = (0, 1.05 * max(count_max)) else: ret['yscale'] = scale ret['ylim'] = lim ret['xlim'] = (0, 1.05 * max(count_max)) return ret
def apply(self, experiment): """Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.low and less than self.high; it is False otherwise. """ if not experiment: raise util.CytoflowOpError("no experiment specified") if not self.name: raise util.CytoflowOpError("name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError("name {0} is in the experiment already" .format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError("bin_count_name {0} is in the experiment already" .format(self.bin_count_name)) if not self.channel: raise util.CytoflowOpError("channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError("channel {0} isn't in the experiment" .format(self.channel)) if self.num_bins is Undefined and self.bin_width is Undefined: raise util.CytoflowOpError("must set either bin number or width") if self.num_bins is Undefined \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError("Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(experiment.data[self.channel]) channel_min = bn.nanmin(scaled_data) channel_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins is not Undefined else \ (channel_max - channel_min) / self.bin_width bins = np.linspace(start = channel_min, stop = channel_max, num = num_bins) # bins need to be internal; drop the first and last one bins = bins[1:-1] new_experiment = experiment.clone() new_experiment.add_condition(self.name, "int", np.digitize(scaled_data, bins)) # if we're log-scaled (for example), don't label data that isn't # showable on a log scale! new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.NaN # keep track of the bins we used, for pretty plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append(self.clone_traits()) return new_experiment
def apply(self, experiment): """ Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- Experiment A new experiment with a condition column named :attr:`name`, which contains the location of the left-most edge of the bin that the event is in. If :attr:`bin_count_name` is set, another column is added with that name as well, containing the number of events in the same bin as the event. """ if experiment is None: raise util.CytoflowOpError('experiment', "no experiment specified") if not self.name: raise util.CytoflowOpError('name', "Name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError( 'name', "Name {} is in the experiment already".format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError( 'bin_count_name', "bin_count_name {} is in the experiment already".format( self.bin_count_name)) if not self.channel: raise util.CytoflowOpError('channel', "channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError( 'channel', "channel {} isn't in the experiment".format(self.channel)) if not self.num_bins and not self.bin_width: raise util.CytoflowOpError('num_bins', "must set either bin number or width") if self.bin_width \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError( 'scale', "Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_data = scale(experiment.data[self.channel]) scaled_min = bn.nanmin(scaled_data) scaled_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins else \ (scaled_max - scaled_min) / self.bin_width if num_bins > self._max_num_bins: raise util.CytoflowOpError( None, "Too many bins! To increase this limit, " "change _max_num_bins (currently {})".format( self._max_num_bins)) scaled_bins = np.linspace(start=scaled_min, stop=scaled_max, num=num_bins) if len(scaled_bins) < 2: raise util.CytoflowOpError('num_bins', "Must have more than one bin") # put the data in bins bin_idx = np.digitize(scaled_data, scaled_bins[1:-1]) # now, back into data space bins = scale.inverse(scaled_bins) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "float", bins[bin_idx]) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment
def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. scaled_data = xscale(experiment[self.channel]) num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (100, 1000) num_bins = max(min(num_bins, 1000), 100) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] scaled_bins = xscale(bins) num_hues = len(experiment[self.huefacet].unique()) bins_per_hue = math.floor(num_bins / num_hues) if bins_per_hue == 1: new_bins = scaled_bins else: new_bins = [] for idx in range(1, len(scaled_bins)): new_bins = np.append( new_bins, np.linspace(scaled_bins[idx - 1], scaled_bins[idx], bins_per_hue + 1, endpoint=False)) bins = xscale.inverse(new_bins) else: xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) bins = xscale.inverse( np.linspace(xmin, xmax, num=num_bins, endpoint=True)) bins = np.append(bins, xscale.inverse(xmax)) kwargs.setdefault('bins', bins) # if we have a hue facet, the y scaling is frequently wrong. this # will capture the maximum bin count of each call to plt.hist, so # we don't have to compute the histogram multiple times ymax = [] def hist_lims(*args, **kwargs): # there's some bug in the above code where we get data that isn't # in the range of `bins`, which makes hist() puke. so get rid # of it. bins = kwargs.get('bins') new_args = [] for x in args: x = x[x > bins[0]] x = x[x < bins[-1]] new_args.append(x) n, _, _ = plt.hist(*new_args, **kwargs) ymax.append(max(n)) grid.map(hist_lims, self.channel, **kwargs) plt.ylim(0, 1.05 * max(ymax)) return {}
def ndcombine( arr, mask=None, copy=True, blank=np.nan, offsets=None, thresholds=[-np.inf, np.inf], zero=None, scale=None, weight=None, statsec=None, zero_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, scale_kw={ 'cenfunc': 'median', 'stdfunc': 'std', 'std_ddof': 1 }, zero_to_0th=True, scale_to_0th=True, scale_sample=None, zero_sample=None, reject=None, cenfunc='median', sigma=[3., 3.], maxiters=3, ddof=1, nkeep=1, maxrej=None, n_minmax=[1, 1], rdnoise=0., gain=1., snoise=0., pclip=-0.5, combine='average', dtype='float32', memlimit=2.5e+9, irafmode=True, verbose=False, full=False, ): if copy: arr = arr.copy() if np.array(arr).ndim == 1: raise ValueError("1-D array combination is not supported!") _mask = _set_mask(arr, mask) # _mask = propagated through this function. sigma_lower, sigma_upper = _set_sigma(sigma) nkeep, maxrej = _set_keeprej(arr, nkeep, maxrej, axis=0) cenfunc = _set_cenfunc(cenfunc) reject = _set_reject_name(reject) maxiters = int(maxiters) ddof = int(ddof) ndim = arr.ndim ncombine = arr.shape[0] combfunc = _set_combfunc(combine, nameonly=False, nan=True) # == 01 - Thresholding + Initial masking ================================ # # Updating mask: _mask = _mask | mask_thresh mask_thresh = _set_thresh_mask(arr=arr, mask=_mask, thresholds=thresholds, update_mask=True) # if safemode: # # Backup the pixels which are rejected by thresholding and # # initial mask for future restoration (see below) for debugging # # purpose. # backup_thresh = arr[mask_thresh] # backup_thresh_inmask = arr[_mask] arr[_mask] = np.nan # ----------------------------------------------------------------------- # # == 02 - Calculate zero, scale, weights ================================ # # This should be done before rejection but after threshold masking.. zeros, scales, weights = get_zsw(arr=arr, zero=zero, scale=scale, weight=weight, zero_kw=zero_kw, scale_kw=scale_kw, zero_to_0th=zero_to_0th, scale_to_0th=scale_to_0th) arr = do_zs(arr, zeros=zeros, scales=scales) # ----------------------------------------------------------------------- # # == 02 - Rejection ===================================================== # if isinstance(reject, str): if reject == 'sigclip': _mask_rej, low, upp, nit, rejcode = sigclip_mask( arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, irafmode=irafmode, full=True) # _mask is a subset of _mask_rej, so to extract pixels which # are masked PURELY due to the rejection is: mask_rej = _mask_rej ^ _mask elif reject == 'minmax': pass elif reject == 'ccdclip': _mask_rej, low, upp, nit, rejcode = ccdclip_mask( arr, mask=_mask, sigma_lower=sigma_lower, sigma_upper=sigma_upper, scale_ref=np.mean(scales), zero_ref=np.mean(zeros), maxiters=maxiters, ddof=ddof, nkeep=nkeep, maxrej=maxrej, cenfunc=cenfunc, axis=0, gain=gain, rdnoise=rdnoise, snoise=snoise, irafmode=irafmode, full=True) # _mask is a subset of _mask_rej, so to extract pixels which # are masked PURELY due to the rejection is: mask_rej = _mask_rej ^ _mask elif reject == 'pclip': pass else: raise ValueError("reject not understood.") elif reject is None: mask_rej = _set_mask(arr, None) low = bn.nanmin(arr, axis=0) upp = bn.nanmax(arr, axis=0) nit = None rejcode = None else: raise ValueError("reject not understood.") _mask |= mask_rej # ----------------------------------------------------------------------- # # TODO: add "grow" rejection here? # == 03 - combine ======================================================= # # Replace rejected / masked pixel to NaN and backup for debugging purpose. # This is done to reduce memory (instead of doing _arr = arr.copy()) # backup_nan = arr[_mask] arr[_mask] = np.nan # Combine and calc sigma comb = combfunc(arr, axis=0) if full: sigma = bn.nanstd(arr, axis=0) # Restore NaN-replaced pixels of arr for debugging purpose. # arr[_mask] = backup_nan # arr[mask_thresh] = backup_thresh_inmask if full: return comb, sigma, mask_rej, mask_thresh, low, upp, nit, rejcode else: return comb
def fit(self, X, y): """ Fits the MI_FS feature selection with the chosen MI_FS method. Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ # Check if n_jobs is negative if self.n_jobs < 0: self.n_jobs = NUM_CORES - self.n_jobs self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = list(range(p)) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- xy_MI = np.array(mimy.get_first_mi_vector(self, self.k)) #print(xy_MI) #xy_MI[np.where(np.isnan(xy_MI))]=0. #print("first", sorted(enumerate(xy_MI), key=lambda x:x[1], reverse=True)[0]) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 # Calculate s-th row of feature_mi_matrix which contains the JMI score of the last element in S # with all remaining features in F feature_mi_matrix[s, F] = mimy.get_mi_vector(self, F, S[-1]) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': # Which feature in F has the largest \sum_{s\in S} selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'JMIM': if bn.allnan(bn.nanmin(fmm, axis=0)): break selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'MRMR': if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(MRMR)) # record the JMIM of the newly selected feature and add it to S if self.method != 'MRMR': S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self._support_mask = np.zeros(p, dtype=np.bool) self._support_mask[S] = True self.ranking_ = S self.mi_ = S_mi return self