def cumprod(self, axis=None): """ Return cumulative product over requested axis as DataFrame Parameters ---------- axis : {0, 1} 0 for row-wise, 1 for column-wise Returns ------- y : DataFrame """ if axis is None: axis = self._default_stat_axis else: axis = self._get_axis_number(axis) y = self.values.copy() if not issubclass(y.dtype.type, np.int_): mask = np.isnan(self.values) np.putmask(y, mask, 1.0) result = y.cumprod(axis) np.putmask(result, mask, np.nan) else: result = y.cumprod(axis) return self._wrap_array(result, self.axes, copy=False)
def cum_fit(distr, xvals, alpha, thresh): """ Integral of the fitted function above a given value (reverse CDF) The fitted function is normalized to 1 above threshold Parameters ---------- xvals : sequence of floats Values where the function is to be evaluated alpha : float The fitted parameter thresh : float Threshold value applied to fitted values Returns ------- cum_fit : array of floats Reverse CDF of fitted function at the requested xvals """ xvals = numpy.array(xvals) cum_fit = cum_fndict[distr](xvals, alpha, thresh) # set fitted values below threshold to 0 numpy.putmask(cum_fit, xvals < thresh, 0.) return cum_fit
def nankurt(values, axis=None, skipna=True): if not isinstance(values.dtype.type, np.floating): values = values.astype('f8') mask = isnull(values) count = _get_counts(mask, axis) if skipna: values = values.copy() np.putmask(values, mask, 0) A = values.sum(axis) / count B = (values ** 2).sum(axis) / count - A ** 2 C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A B = _zero_out_fperr(B) C = _zero_out_fperr(C) D = _zero_out_fperr(D) result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) / ((count - 2.) * (count - 3.))) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 4] = np.nan return result else: result = 0 if B == 0 else result if count < 4: return np.nan return result
def nankurt(values, axis=None, skipna=True): mask = isnull(values) if not is_floating_dtype(values): values = values.astype('f8') count = _get_counts(mask, axis) if skipna: values = values.copy() np.putmask(values, mask, 0) A = values.sum(axis) / count B = (values ** 2).sum(axis) / count - A ** 2 C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A B = _zero_out_fperr(B) D = _zero_out_fperr(D) if not isinstance(B, np.ndarray): # if B is a scalar, check these corner cases first before doing division if count < 4: return np.nan if B == 0: return 0 result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) / ((count - 2.) * (count - 3.))) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 4] = np.nan return result
def fit_fn(distr, xvals, alpha, thresh): """ The fitted function normalized to 1 above threshold To normalize to a given total count multiply by the count. Parameters ---------- xvals : sequence of floats Values where the function is to be evaluated alpha : float The fitted parameter thresh : float Threshold value applied to fitted values Returns ------- fit : array of floats Fitted function at the requested xvals """ xvals = numpy.array(xvals) fit = fitfn_dict[distr](xvals, alpha, thresh) # set fitted values below threshold to 0 numpy.putmask(fit, xvals < thresh, 0.) return fit
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = com._maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) return values, mask, dtype
def nanskew(values, axis=None, skipna=True): if not isinstance(values.dtype.type, np.floating): values = values.astype('f8') mask = isnull(values) count = _get_counts(mask, axis) if skipna: values = values.copy() np.putmask(values, mask, 0) A = values.sum(axis) / count B = (values ** 2).sum(axis) / count - A ** 2 C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B # floating point error B = _zero_out_fperr(B) C = _zero_out_fperr(C) result = ((np.sqrt((count ** 2 - count)) * C) / ((count - 2) * np.sqrt(B) ** 3)) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if B == 0 else result if count < 3: return np.nan return result
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwds): """ Percent change over given number of periods Parameters ---------- periods : int, default 1 Periods to shift for forming percent change fill_method : str, default 'pad' How to handle NAs before computing percent changes limit : int, default None The number of consecutive NAs to fill before stopping freq : DateOffset, timedelta, or offset alias string, optional Increment to use from time series API (e.g. 'M' or BDay()) Returns ------- chg : Series or DataFrame """ if fill_method is None: data = self else: data = self.fillna(method=fill_method, limit=limit) rs = data / data.shift(periods=periods, freq=freq, **kwds) - 1 if freq is None: mask = com.isnull(self.values) np.putmask(rs.values, mask, np.nan) return rs
def cummin(self, axis=None, skipna=True): """ Return DataFrame of cumulative min over requested axis. Parameters ---------- axis : {0, 1} 0 for row-wise, 1 for column-wise skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA Returns ------- y : DataFrame """ if axis is None: axis = self._default_stat_axis else: axis = self._get_axis_number(axis) y = self.values.copy() if not issubclass(y.dtype.type, np.integer): mask = np.isnan(self.values) if skipna: np.putmask(y, mask, np.inf) result = np.minimum.accumulate(y, axis) if skipna: np.putmask(result, mask, np.nan) else: result = np.minimum.accumulate(y,axis) return self._wrap_array(result, self.axes, copy=False)
def nanall(values, axis=None, skipna=True): mask = isnull(values) if skipna: values = values.copy() np.putmask(values, mask, True) return values.all(axis)
def _reindex_index(self, index, method, copy, level, fill_value=np.nan, limit=None): if level is not None: raise Exception('Reindex by level not supported for sparse') if self.index.equals(index): if copy: return self.copy() else: return self if len(self.index) == 0: return SparseDataFrame(index=index, columns=self.columns) indexer = self.index.get_indexer(index, method, limit=limit) indexer = com._ensure_platform_int(indexer) mask = indexer == -1 need_mask = mask.any() new_series = {} for col, series in self.iteritems(): values = series.values new = values.take(indexer) if need_mask: np.putmask(new, mask, fill_value) new_series[col] = new return SparseDataFrame(new_series, index=index, columns=self.columns, default_fill_value=self.default_fill_value)
def _make_labels(self): if self._was_factor: # pragma: no cover raise Exception('Should not call this method grouping by level') else: values = self.grouper if values.dtype != np.object_: values = values.astype('O') # khash rizer = lib.Factorizer(len(values)) labels, counts = rizer.factorize(values, sort=False) uniques = Index(rizer.uniques, name=self.name) if self.sort and len(counts) > 0: sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype=np.int32) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) uniques = uniques.take(sorter) counts = counts.take(sorter) self._labels = labels self._group_index = uniques self._counts = counts
def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): from pandas.core.series import Series if not len(arr): return np.ndarray(0, dtype=dtype) if isinstance(arr, Series): arr = arr.values if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) if na_mask: mask = isnull(arr) try: result = lib.map_infer_mask(arr, f, mask.view(np.uint8)) except (TypeError, AttributeError): def g(x): try: return f(x) except (TypeError, AttributeError): return na_value return _map(g, arr, dtype=dtype) if na_value is not np.nan: np.putmask(result, mask, na_value) if result.dtype == object: result = lib.maybe_convert_objects(result) return result else: return lib.map_infer(arr, f)
def _factorize_keys(lk, rk, sort=True): if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk): klass = lib.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) else: klass = lib.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab = rizer.factorize(lk) rlab = rizer.factorize(rk) count = rizer.get_count() if sort: uniques = rizer.uniques.to_array() llab, rlab = _sort_labels(uniques, llab, rlab) # NA group lmask = llab == -1; lany = lmask.any() rmask = rlab == -1; rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def factorize(values, sort=False, order=None, na_sentinel=-1): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : sequence sort : order : Returns ------- """ hash_klass, values = _get_hash_table_and_cast(values) uniques = [] table = hash_klass(len(values)) labels, counts = table.get_labels(values, uniques, 0, na_sentinel) uniques = com._asarray_tuplesafe(uniques) if sort and len(counts) > 0: sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype=np.int32) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) uniques = uniques.take(sorter) counts = counts.take(sorter) return labels, uniques, counts
def getFrequency(T, D, start, stop, dir='Y'): import scipy.ndimage freq_list = [] N = len(D[:,0]) print "Fitting from T : ", T[start], " - ", T[stop] # Note We assume constant time-steps ! for n in range(N): time_series = D[n,start:stop] FS = np.fft.rfft(time_series) #np.sin(time_series)) # Get Maximum Frequency m = np.argmax(abs(FS)) fftfreq = np.fft.fftfreq(len(time_series), d = (T[-10]-T[-11])) abs_freq = 2.*np.pi*fftfreq[m] # Needs sqrt(2.) from velocity normalization # Get sign of frequency by taking gradient of phase shift (how to deal with jump?) time_series = scipy.ndimage.gaussian_filter(time_series, 0.01) grad = np.gradient(time_series, T[-10]-T[-11]) # remove jump values np.putmask(grad, abs(grad) > 1.05*abs_freq, 0.) np.putmask(grad, abs(grad) < 0.95*abs_freq, 0.) sig = -np.sign(sum(grad)) freq_list.append(sig * abs_freq) print "Getting Frequency from T = ", T[start], " to T = " , T[stop] return np.array(freq_list)
def remapRaster(infile, out_file, lookup): '''remap raster values to those in lookup table''' inmap = gdal.Open(infile) rows = inmap.RasterYSize cols = inmap.RasterXSize map_arr = inmap.ReadAsArray() #remap values remap_dict = df.getDictfromCSV(lookup,'\t',1,0) remap_dict[0]=2000 #ag remap_dict[255]=32767 #nodata map_out = map_arr.astype(np.int16) print 'input map labels', np.unique(map_out) for r in remap_dict: print 'reclassifying', r, ': ', remap_dict[r] outval=int(remap_dict[r]) temp=np.equal(map_out, int(r)) np.putmask(map_out, temp, int(remap_dict[r])) temp=None print 'output map labels', np.unique(map_out) #output raster driver=inmap.GetDriver() outDs = driver.Create(out_file, cols, rows, 1, GDT_Int16) outDs.SetGeoTransform(inmap.GetGeoTransform()) outDs.SetProjection(inmap.GetProjection()) outband = outDs.GetRasterBand(1) outband.WriteArray(map_out, 0 ,0) outband.SetNoDataValue(32767) outband.FlushCache()
def binary_search_np(A, B): # assume A and B are numpy arrays idx2 = np.minimum(len(A) - 1, np.searchsorted(A, B)) idx1 = np.maximum(0, idx2 - 1) idx2_is_better = np.abs(A[idx1] - B) > np.abs(A[idx2] - B) np.putmask(idx1, idx2_is_better, idx2) return A[idx1]
def normalize_phase(phase): """ Normalize phase to the range [-pi, pi]. Parameters ---------- phase : array of float Phase to normalize. Returns ------- array of float Normalized phases. """ # Convert to range [-2*pi, 2*pi]. out = np.fmod(phase, 2.0 * np.pi) # Remove nans nans = np.isnan(out) np.putmask(out, nans, 0) # Convert to range [-pi, pi] out[out < -np.pi] += 2.0 * np.pi out[out > np.pi] -= 2.0 * np.pi # Put nans back np.putmask(out, nans, np.nan) return out
def ave_array_2d(x, y, z, nxbin, xlow, xhigh, nybin, ylow, yhigh, completeness=None): nx = len(x) ny = len(y) if nx != ny: print 'Error: len(x) != len(y)' return xstep = float(xhigh-xlow)/nxbin ystep = float(yhigh-ylow)/nybin x_bin = N.arange(nxbin) * xstep + xlow + xstep/2.0 y_bin = N.arange(nybin) * ystep + ylow + ystep/2.0 d_bin = N.zeros((nybin, nxbin), N.float) z_bin = N.zeros((nybin, nxbin), N.float) for k in range(nx): jbin_index = int((x[k] - xlow)/xstep) ibin_index = int((y[k] - ylow)/ystep) if completeness is None: c = 1 else: c = completeness[k] if 0 <= jbin_index < nxbin and 0 <= ibin_index < nybin: d_bin[ibin_index, jbin_index] += 1.0/c z_bin[ibin_index, jbin_index] += z[k] z_bin /= d_bin N.putmask(z_bin, d_bin < 1, 0.0) return x_bin, y_bin, z_bin
def test_frame_getitem_setitem_boolean( self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data df = frame.T.copy() values = df.values result = df[df > 0] expected = df.where(df > 0) tm.assert_frame_equal(result, expected) df[df > 0] = 5 values[values > 0] = 5 tm.assert_almost_equal(df.values, values) df[df == 5] = 0 values[values == 5] = 0 tm.assert_almost_equal(df.values, values) # a df that needs alignment first df[df[:-1] < 0] = 2 np.putmask(values[:-1], values[:-1] < 0, 2) tm.assert_almost_equal(df.values, values) with pytest.raises(TypeError, match='boolean values only'): df[df * 0] = 2
def deviance(self, Y, mu, scale=1.): ''' Poisson deviance function Parameters ---------- Y : array-like Endogenous response variable mu : array-like Fitted mean response variable scale : float, optional An optional scale argument Returns ------- deviance : float The deviance function at (Y,mu) as defined below. Notes ----- If a constant term is included it is defined as :math:`deviance = 2*\\sum_{i}(Y*\\log(Y/\\mu))` ''' if np.any(Y==0): retarr = np.zeros(Y.shape) Ymu = Y/mu mask = Ymu != 0 YmuMasked = Ymu[mask] Ymasked = Y[mask] np.putmask(retarr, mask, Ymasked*np.log(YmuMasked)/scale) return 2*np.sum(retarr) else: return 2*np.sum(Y*np.log(Y/mu))/scale
def usefullness(data, targetClass, otherClass = None, **args) : '''A feature score for discrete data optional arguments: threshold fraction ''' if 'threshold' in args : threshold = args['threshold'] else : threshold = 5 if 'fraction' in args : fraction = args['fraction'] else : fraction = 0.0 Y, targetClassSize, otherClassSize, otherI, feature = parseArgs( data, targetClass, otherClass, **args) threshold = max(threshold, fraction * float(targetClassSize)) s1 = featureCount(data, targetClass=targetClass, Y=Y, feature=feature) s2 = featureCount(data, I = otherI, Y=Y, feature=feature) / float(otherClassSize) s2 = 1 - s2 numpy.putmask(s2, numpy.less(s1, threshold), 0.0) return s2
def get_closure_phase(infile='L401323_SB349_uv.dppp.MS',\ triangle = ['TS001','DE601HBA','DE605HBA']): a=inspect.stack() stacklevel=0 for k in range(len(a)): if (string.find(a[k][1],'ipython console')>0): stacklevel=k myf=sys._getframe(stacklevel).f_globals myf['__last_task']='mytask' myf['taskname']='mytask' tb=myf['tb'] oroot = infile.split('uv')[0] for lfile in np.sort(glob.glob(oroot+'*ms')): os.system('ms2uvfits in='+lfile+' out='+lfile.replace('ms','fits')+' writesyscal=F') if lfile == infile: continue tb.open(lfile+'/ANTENNA') names = tb.getcol('NAME') trnum = [] for itr in range(3): trnum.append(np.argwhere(names==triangle[itr])[0][0]) tb.close() trnum.sort() tb.open(lfile) ant1 = tb.getcol('ANTENNA1') ant2 = tb.getcol('ANTENNA2') data = tb.getcol('DATA') ph12 = +np.angle(data[0,0,(ant1==trnum[0])&(ant2==trnum[1])]) ph23 = +np.angle(data[0,0,(ant1==trnum[1])&(ant2==trnum[2])]) ph31 = -np.angle(data[0,0,(ant1==trnum[0])&(ant2==trnum[2])]) clph = ph12+ph23+ph31 np.putmask(clph,clph>np.pi,clph-2.*np.pi) np.putmask(clph,clph<-np.pi,clph+2.*np.pi) # np.savetxt(lfile.replace('ms','txt'),np.unwrap(clph)) np.savetxt(lfile.replace('ms','txt'),clph)
def _reindex_index(self, index, method, copy): if self.index.equals(index): if copy: return self.copy() else: return self if len(self.index) == 0: return SparseDataFrame(index=index, columns=self.columns) indexer = self.index.get_indexer(index, method) mask = indexer == -1 need_mask = mask.any() new_series = {} for col, series in self.iteritems(): values = series.values new = values.take(indexer) if need_mask: np.putmask(new, mask, nan) new_series[col] = new return SparseDataFrame(new_series, index=index, columns=self.columns, default_fill_value=self.default_fill_value)
def map(self, arg): """ Map values of Series using input correspondence (which can be a dict, Series, or function). Parameters ---------- arg : function, dict, or Series Returns ------- y : Series same index as caller """ if isinstance(arg, (dict, Series)): if isinstance(arg, dict): arg = Series(arg) indexer, mask = tseries.getMergeVec(self, arg.index.indexMap) newValues = arg.view(np.ndarray).take(indexer) np.putmask(newValues, -mask, np.nan) newSer = Series(newValues, index=self.index) return newSer else: return Series([arg(x) for x in self], index=self.index)
def returns(prices, method='simple', periods=1, fill_method='pad', limit=None, freq=None): """ compute the returns for the specified prices. method: [simple,compound,log], compound is log """ if method not in ('simple', 'compound', 'log'): raise ValueError("Invalid method type. Valid values are ('simple', 'compound')") if method == 'simple': return prices.pct_change(periods=periods, fill_method=fill_method, limit=limit, freq=freq) else: if freq is not None: raise NotImplementedError("TODO: implement this logic if needed") if isinstance(prices, pd.Series): if fill_method is None: data = prices else: data = prices.fillna(method=fill_method, limit=limit) data = np.log(data / data.shift(periods=periods)) mask = pd.isnull(prices.values) np.putmask(data.values, mask, np.nan) return data else: return pd.DataFrame( {name: returns(col, method, periods, fill_method, limit, freq) for name, col in prices.iteritems()}, columns=prices.columns, index=prices.index)
def golub(data, targetClass, otherClass, **args) : '''The Golub feature score: s = (mu1 - mu2) / sqrt(sigma1^2 + sigma2^2) ''' if 'Y' in args : Y = args['Y'] targetClassSize = numpy.sum(numpy.equal(Y, targetClass)) otherClassSize = numpy.sum(numpy.equal(Y, otherClass)) else : Y = None targetClassSize = data.labels.classSize[targetClass] otherClassSize = data.labels.classSize[otherClass] m1 = numpy.array(featureMean(data, targetClass, Y)) m2 = numpy.array(featureMean(data, otherClass, Y)) s1 = numpy.array(featureStd(data, targetClass, Y)) s2 = numpy.array(featureStd(data, otherClass, Y)) s = numpy.sqrt(s1**2 + s2**2) m = (m1 + m2) / 2.0 # perfect features will have s[i] = 0, so need to take care of that: numpy.putmask(s, numpy.equal(s, 0), m) # features that are zero will still have s[i] = 0 so : numpy.putmask(s, numpy.equal(s, 0) ,1) g = (m1 - m2) / s return g
def makeGridDomain(cLon, cLat, minLon, maxLon, minLat, maxLat, margin=2, resolution=0.01): """ Generate a grid of the distance and angle of a grid of points surrounding a storm centre given the location of the storm. The grid margin and grid size can be set in configuration files. xMargin, yMargin and gridSize are in degrees """ if (type(cLon)==list or type(cLat)==list or type(cLon)==np.ndarray or type(cLat)==np.ndarray): raise TypeError, "Input values must be scalar values" gridSize = int(resolution * 1000) minLon_ = int(1000 * (minLon)) - int(1000 * margin) maxLon_ = int(1000 * (maxLon)) + int(1000 * margin) + 1 minLat_ = int(1000 * (minLat)) - int(1000 * margin) maxLat_ = int(1000 * (maxLat)) + int(1000 * margin) + 1 xGrid = np.array(np.arange(minLon_, maxLon_, gridSize), dtype=int) yGrid = np.array(np.arange(minLat_, maxLat_, gridSize), dtype=int) R = gridLatLonDist(cLon, cLat, xGrid / 1000., yGrid / 1000.) np.putmask(R, R==0, 1e-30) theta = np.pi / 2. - gridLatLonBear(cLon, cLat, xGrid / 1000., yGrid / 1000.) return R, theta
def computeDailyMean(dicoBand,nbBandByDay,typeData): def meanCalc(values): return np.nanmean(values) mean={} footprint = np.array([[0,1,0], [1,0,1], [0,1,0]]) for i in range(0,len(dicoBand.keys())/nbBandByDay): maxRange=nbBandByDay+i*nbBandByDay #on ne prend pas la dernière bande... correspondante à 00-->3h for j in range (i*nbBandByDay,maxRange): if "array" in locals(): array=array+dicoBand.items()[j][1] np.putmask(dicoBand.items()[j][1], dicoBand.items()[j][1]==0, 0) mask=mask+(dicoBand.items()[j][1] > 0).astype(int) else: array=dicoBand.items()[j][1] np.putmask(dicoBand.items()[j][1], dicoBand.items()[j][1]==0, 0) mask=(dicoBand.items()[j][1] > 0).astype(int) mean[i]=array del array #utilisation de la fonction nanmean --> bcp plus simple mean[i]=mean[i]/mask indices = np.where(np.isnan(mean[i])) results = ndimage.generic_filter(mean[i], meanCalc, footprint=footprint) for row, col in zip(*indices): mean[i][row,col] = results[row,col] return mean
def set_data(self, data, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = None self.scaled_data = None self.no_jittering_scaled_data = None self.valid_data_array = None self.raw_data = None self.have_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = data self.raw_data = data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([ (full_data.domain[i].name, i) for i in range(len(full_data.domain)) ]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = full_data.domain.has_continuous_class self.data_has_discrete_class = full_data.domain.has_discrete_class self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[ self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data, )) sort_values_for_discrete_attrs = args.get( "sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if attr.is_discrete: self.attr_values[attr.name] = [0, len(attr.values)] elif attr.is_continuous: self.attr_values[attr.name] = [ self.domain_data_stat[index].min, self.domain_data_stat[index].max ] if 'no_data' in args: return # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData"): self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached( data, "visualizationData") else: no_jittering_data = np.c_[full_data.X, full_data.Y].T valid_data_array = ~np.isnan(no_jittering_data) original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if attr.is_discrete: # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices( data.domain[index], sort_values_for_discrete_attrs) if 0 in [ i == variable_value_indices[attr.values[i]] for i in range(len(attr.values)) ]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [ np.where(line == val, 1, 0) for val in range(len(attr.values)) ] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[ index] = line # save the changed array original_data[ index] = line # reorder also the values in the original data no_jittering_data[index] = ( (no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif attr.is_continuous: diff = self.domain_data_stat[ index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = ( no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data self.no_jittering_scaled_data = no_jittering_data self.valid_data_array = valid_data_array if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) # compute the scaled_data arrays scaled_data = self.no_jittering_scaled_data # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, 2**30 - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if attr.is_discrete: scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif attr.is_continuous and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * ( 0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute( scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) self.scaled_data = scaled_data[:, :len_data]
def mvstdnormcdf(lower, upper, corrcoef, **kwds): '''standardized multivariate normal cumulative distribution function This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates a rectangular integral over a standardized multivariate normal distribution. This function assumes standardized scale, that is the variance in each dimension is one, but correlation can be arbitrary, covariance = correlation matrix Parameters ---------- lower, upper : array_like, 1d lower and upper integration limits with length equal to the number of dimensions of the multivariate normal distribution. It can contain -np.inf or np.inf for open integration intervals corrcoef : float or array_like specifies correlation matrix in one of three ways, see notes optional keyword parameters to influence integration * maxpts : int, maximum number of function values allowed. This parameter can be used to limit the time. A sensible strategy is to start with `maxpts` = 1000*N, and then increase `maxpts` if ERROR is too large. * abseps : float absolute error tolerance. * releps : float relative error tolerance. Returns ------- cdfvalue : float value of the integral Notes ----- The correlation matrix corrcoef can be given in 3 different ways If the multivariate normal is two-dimensional than only the correlation coefficient needs to be provided. For general dimension the correlation matrix can be provided either as a one-dimensional array of the upper triangular correlation coefficients stacked by rows, or as full square correlation matrix See Also -------- mvnormcdf : cdf of multivariate normal distribution without standardization Examples -------- >>> print(mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5)) 0.5 >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]] >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0], [0.0,0.0,0.0], corr, abseps=1e-6)) 0.166666399198 >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0],corr, abseps=1e-8)) something wrong completion with ERROR > EPS and MAXPTS function values used; increase MAXPTS to decrease ERROR; 1.048330348e-006 0.166666546218 >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0], corr, \ maxpts=100000, abseps=1e-8)) 0.166666588293 ''' n = len(lower) #don't know if converting to array is necessary, #but it makes ndim check possible lower = np.array(lower) upper = np.array(upper) corrcoef = np.array(corrcoef) correl = np.zeros(int(n*(n-1)/2.0)) #dtype necessary? if (lower.ndim != 1) or (upper.ndim != 1): raise ValueError('can handle only 1D bounds') if len(upper) != n: raise ValueError('bounds have different lengths') if n==2 and corrcoef.size==1: correl = corrcoef #print 'case scalar rho', n elif corrcoef.ndim == 1 and len(corrcoef) == n*(n-1)/2.0: #print 'case flat corr', corrcoeff.shape correl = corrcoef elif corrcoef.shape == (n,n): #print 'case square corr', correl.shape correl = corrcoef[np.tril_indices(n, -1)] # for ii in range(n): # for jj in range(ii): # correl[ jj + ((ii-2)*(ii-1))/2] = corrcoef[ii,jj] else: raise ValueError('corrcoef has incorrect dimension') if 'maxpts' not in kwds: if n >2: kwds['maxpts'] = 10000*n lowinf = np.isneginf(lower) uppinf = np.isposinf(upper) infin = 2.0*np.ones(n) np.putmask(infin,lowinf,0)# infin.putmask(0,lowinf) np.putmask(infin,uppinf,1) #infin.putmask(1,uppinf) #this has to be last np.putmask(infin,lowinf*uppinf,-1) ## #remove infs ## np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf) ## np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf) #print lower,',',upper,',',infin,',',correl #print correl.shape #print kwds.items() error, cdfvalue, inform = scipy.stats.kde.mvn.mvndst(lower,upper,infin,correl,**kwds) if inform: print('something wrong', informcode[inform], error) return cdfvalue
def all(self, *, skipna: bool = True, **kwargs): """ Return whether all elements are True. Returns True unless there is at least one element that is False. By default, NAs are skipped. If ``skipna=False`` is specified and missing values are present, similar :ref:`Kleene logic <boolean.kleene>` is used as for logical operations. Parameters ---------- skipna : bool, default True Exclude NA values. If the entire array is NA and `skipna` is True, then the result will be True, as for an empty array. If `skipna` is False, the result will still be False if there is at least one element that is False, otherwise NA will be returned if there are NA's present. **kwargs : any, default None Additional keywords have no effect but might be accepted for compatibility with NumPy. Returns ------- bool or :attr:`pandas.NA` See Also -------- numpy.all : Numpy version of this method. BooleanArray.any : Return whether any element is True. Examples -------- The result indicates whether any element is True (and by default skips NAs): >>> pd.array([True, True, pd.NA]).all() True >>> pd.array([True, False, pd.NA]).all() False >>> pd.array([], dtype="boolean").all() True >>> pd.array([pd.NA], dtype="boolean").all() True With ``skipna=False``, the result can be NA if this is logically required (whether ``pd.NA`` is True or False influences the result): >>> pd.array([True, True, pd.NA]).all(skipna=False) <NA> >>> pd.array([True, False, pd.NA]).all(skipna=False) False """ kwargs.pop("axis", None) nv.validate_all((), kwargs) values = self._data.copy() np.putmask(values, self._mask, True) result = values.all() if skipna: return result else: if not result or len(self) == 0 or not self._mask.any(): return result else: return self.dtype.na_value
def optimize_SLOW_Separation(self, attrIndices, anchorData, XAnchors=None, YAnchors=None): if not self.graph.haveData or len( self.graph.rawData ) == 0 or not self.graph.dataHasDiscreteClass: return anchorData, (XAnchors, YAnchors) validData = self.graph.getValidList(attrIndices) selectedData = numpy.compress(validData, numpy.take( self.graph.noJitteringScaledData, attrIndices, axis=0), axis=1) if XAnchors == None: XAnchors = numpy.array([a[0] for a in anchorData], numpy.float) if YAnchors == None: YAnchors = numpy.array([a[1] for a in anchorData], numpy.float) transProjData = self.graph.createProjectionAsNumericArray( attrIndices, validData=validData, XAnchors=XAnchors, YAnchors=YAnchors, scaleFactor=self.graph.scaleFactor, normalize=self.graph.normalizeExamples, useAnchorData=1) if transProjData == None: return anchorData, (XAnchors, YAnchors) projData = numpy.transpose(transProjData) x_positions = projData[0] x_positions2 = numpy.array(x_positions) y_positions = projData[1] y_positions2 = numpy.array(y_positions) classData = projData[2] classData2 = numpy.array(classData) FXs = numpy.zeros(len(x_positions), numpy.float) # forces FYs = numpy.zeros(len(x_positions), numpy.float) GXs = numpy.zeros(len(anchorData), numpy.float) # gradients GYs = numpy.zeros(len(anchorData), numpy.float) rotateArray = range(len(x_positions)) rotateArray = rotateArray[1:] + [0] for i in range(len(x_positions) - 1): x_positions2 = numpy.take(x_positions2, rotateArray) y_positions2 = numpy.take(y_positions2, rotateArray) classData2 = numpy.take(classData2, rotateArray) dx = x_positions2 - x_positions dy = y_positions2 - y_positions rs2 = dx**2 + dy**2 rs2 += numpy.where(rs2 == 0.0, 0.0001, 0.0) # replace zeros to avoid divisions by zero rs = numpy.sqrt(rs2) F = numpy.zeros(len(x_positions), numpy.float) classDiff = numpy.where(classData == classData2, 1, 0) numpy.putmask(F, classDiff, 150 * self.attractG * rs2) numpy.putmask(F, 1 - classDiff, -self.repelG / rs2) FXs += F * dx / rs FYs += F * dy / rs # compute gradient for all anchors GXs = numpy.array( [sum(FXs * selectedData[i]) for i in range(len(anchorData))], numpy.float) GYs = numpy.array( [sum(FYs * selectedData[i]) for i in range(len(anchorData))], numpy.float) m = max(max(abs(GXs)), max(abs(GYs))) GXs /= (20 * m) GYs /= (20 * m) newXAnchors = XAnchors + GXs newYAnchors = YAnchors + GYs # normalize so that the anchor most far away will lie on the circle m = math.sqrt(max(newXAnchors**2 + newYAnchors**2)) newXAnchors /= m newYAnchors /= m return [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))], (newXAnchors, newYAnchors)
def optimize_LDA_Separation(self, attrIndices, anchorData, XAnchors=None, YAnchors=None): if not self.graph.haveData or len( self.graph.rawData ) == 0 or not self.graph.dataHasDiscreteClass: return anchorData, (XAnchors, YAnchors) classCount = len(self.graph.dataDomain.classVar.values) validData = self.graph.getValidList(attrIndices) selectedData = numpy.compress(validData, numpy.take( self.graph.noJitteringScaledData, attrIndices, axis=0), axis=1) if XAnchors == None: XAnchors = numpy.array([a[0] for a in anchorData], numpy.float) if YAnchors == None: YAnchors = numpy.array([a[1] for a in anchorData], numpy.float) transProjData = self.graph.createProjectionAsNumericArray( attrIndices, validData=validData, XAnchors=XAnchors, YAnchors=YAnchors, scaleFactor=self.graph.scaleFactor, normalize=self.graph.normalizeExamples, useAnchorData=1) if transProjData == None: return anchorData, (XAnchors, YAnchors) projData = numpy.transpose(transProjData) x_positions, y_positions, classData = projData[0], projData[ 1], projData[2] averages = [] for i in range(classCount): ind = classData == i xpos = numpy.compress(ind, x_positions) ypos = numpy.compress(ind, y_positions) xave = numpy.sum(xpos) / len(xpos) yave = numpy.sum(ypos) / len(ypos) averages.append((xave, yave)) # compute the positions of all the points. we will try to move all points so that the center will be in the (0,0) xCenterVector = -numpy.sum(x_positions) / len(x_positions) yCenterVector = -numpy.sum(y_positions) / len(y_positions) centerVectorLength = math.sqrt(xCenterVector * xCenterVector + yCenterVector * yCenterVector) meanDestinationVectors = [] for i in range(classCount): xDir = 0.0 yDir = 0.0 rs = 0.0 for j in range(classCount): if i == j: continue r = math.sqrt((averages[i][0] - averages[j][0])**2 + (averages[i][1] - averages[j][1])**2) if r == 0.0: xDir += math.cos((i / float(classCount)) * 2 * math.pi) yDir += math.sin((i / float(classCount)) * 2 * math.pi) r = 0.0001 else: xDir += (1 / r**3) * ((averages[i][0] - averages[j][0])) yDir += (1 / r**3) * ((averages[i][1] - averages[j][1])) #rs += 1/r #actualDirAmpl = math.sqrt(xDir**2 + yDir**2) #s = abs(xDir)+abs(yDir) #xDir = rs * (xDir/s) #yDir = rs * (yDir/s) meanDestinationVectors.append((xDir, yDir)) maxLength = math.sqrt( max([x**2 + y**2 for (x, y) in meanDestinationVectors])) meanDestinationVectors = [ (x / (2 * maxLength), y / (2 * maxLength)) for (x, y) in meanDestinationVectors ] # normalize destination vectors to some normal values meanDestinationVectors = [ (meanDestinationVectors[i][0] + averages[i][0], meanDestinationVectors[i][1] + averages[i][1]) for i in range(len(meanDestinationVectors)) ] # add destination vectors to the class averages #meanDestinationVectors = [(x + xCenterVector/5, y + yCenterVector/5) for (x,y) in meanDestinationVectors] # center mean values meanDestinationVectors = [(x + xCenterVector, y + yCenterVector) for (x, y) in meanDestinationVectors ] # center mean values FXs = numpy.zeros(len(x_positions), numpy.float) # forces FYs = numpy.zeros(len(x_positions), numpy.float) for c in range(classCount): ind = (classData == c) numpy.putmask(FXs, ind, meanDestinationVectors[c][0] - x_positions) numpy.putmask(FYs, ind, meanDestinationVectors[c][1] - y_positions) # compute gradient for all anchors GXs = numpy.array( [sum(FXs * selectedData[i]) for i in range(len(anchorData))], numpy.float) GYs = numpy.array( [sum(FYs * selectedData[i]) for i in range(len(anchorData))], numpy.float) m = max(max(abs(GXs)), max(abs(GYs))) GXs /= (20 * m) GYs /= (20 * m) newXAnchors = XAnchors + GXs newYAnchors = YAnchors + GYs # normalize so that the anchor most far away will lie on the circle m = math.sqrt(max(newXAnchors**2 + newYAnchors**2)) newXAnchors /= m newYAnchors /= m #self.parentWidget.updateGraph() """ for a in range(len(anchorData)): x = anchorData[a][0]; y = anchorData[a][1]; self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [x, x+GXs[a]], yData = [y, y+GYs[a]], forceFilledSymbols = 1, lineWidth=3) for i in range(classCount): self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [averages[i][0], meanDestinationVectors[i][0]], yData = [averages[i][1], meanDestinationVectors[i][1]], forceFilledSymbols = 1, lineWidth=3) self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, xData = [averages[i][0], averages[i][0]], yData = [averages[i][1], averages[i][1]], forceFilledSymbols = 1, lineWidth=5) """ #self.parentWidget.graph.repaint() #self.graph.anchorData = [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))] #self.graph.updateData(attrs, 0) return [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))], (newXAnchors, newYAnchors)
def lastrank(a, axis=-1): """ The ranking of the last element along the axis, ignoring NaNs. The ranking is normalized to be between -1 and 1 instead of the more common 1 and N. The results are adjusted for ties. Parameters ---------- a : ndarray Input array. If `a` is not an array, a conversion is attempted. axis : int, optional The axis over which to rank. By default (axis=-1) the ranking (and reducing) is performed over the last axis. Returns ------- d : array In the case of, for example, a 2d array of shape (n, m) and axis=1, the output will contain the rank (normalized to be between -1 and 1 and adjusted for ties) of the the last element of each row. The output in this example will have shape (n,). Examples -------- Create an array: >>> y1 = larry([1, 2, 3]) What is the rank of the last element (the value 3 in this example)? It is the largest element so the rank is 1.0: >>> import numpy as np >>> from la.afunc import lastrank >>> x1 = np.array([1, 2, 3]) >>> lastrank(x1) 1.0 Now let's try an example where the last element has the smallest value: >>> x2 = np.array([3, 2, 1]) >>> lastrank(x2) -1.0 Here's an example where the last element is not the minimum or maximum value: >>> x3 = np.array([1, 3, 4, 5, 2]) >>> lastrank(x3) -0.5 """ a = np.array(a, copy=False) ndim = a.ndim if a.size == 0: # At least one dimension has length 0 shape = list(a.shape) shape.pop(axis) r = np.empty(shape, dtype=a.dtype) r.fill(np.nan) if (r.ndim == 0) and (r.size == 1): r = np.nan return r indlast = [slice(None)] * ndim indlast[axis] = slice(-1, None) indlast = tuple(indlast) indlast2 = [slice(None)] * ndim indlast2[axis] = -1 indlast2 = tuple(indlast2) n = (~np.isnan(a)).sum(axis) a_indlast = a[indlast] g = (a_indlast > a).sum(axis) e = (a_indlast == a).sum(axis) r = (g + g + e - 1.0) / 2.0 r = r / (n - 1.0) r = 2.0 * (r - 0.5) if ndim == 1: if n == 1: r = 0 if np.isnan(a[indlast2]): # elif? r = np.nan else: np.putmask(r, n == 1, 0) np.putmask(r, np.isnan(a[indlast2]), np.nan) return r
def read_a_field(self, fnum, debug=False): """ Read a field from the MDV file. Parameters ---------- fnum : int Field number to read. debug : bool True to print debugging information, False to supress. Returns ------- field_data : array Field data. This data is also stored as a object attribute under the field name. See Also -------- read_all_fields : Read all fields in the MDV file. """ field_header = self.field_headers[fnum] # if the field has already been read, return it if self.fields_data[fnum] is not None: if debug: print("Getting data from the object.") return self.fields_data[fnum] # field has not yet been read, populate the object and return if debug: print("No data found in object, populating") nz = field_header['nz'] ny = field_header['ny'] nx = field_header['nx'] # read the header field_data = np.zeros([nz, ny, nx], dtype='float32') self.fileptr.seek(field_header['field_data_offset']) self._get_levels_info(nz) # dict not used, but need to seek. for sw in range(nz): if debug: print("doing levels ", sw) # get the compressed level data compr_info = self._get_compression_info() if compr_info['magic_cookie'] == 0xfe0103fd: # Run length encoding only has 20 bytes of compression # information with slightly different order, back up # 4 bytes to read all of the compressed data. self.fileptr.seek(-4, 1) compr_data = self.fileptr.read(compr_info['spare'][0]) else: compr_data = self.fileptr.read(compr_info['nbytes_coded']) encoding_type = field_header['encoding_type'] if encoding_type == ENCODING_INT8: fmt = '>%iB' % (nx * ny) np_form = '>B' elif encoding_type == ENCODING_INT16: fmt = '>%iH' % (nx * ny) np_form = '>H' elif encoding_type == ENCODING_FLOAT32: fmt = '>%if' % (nx * ny) np_form = '>f' else: raise NotImplementedError('encoding: ', encoding_type) # decompress the level data if compr_info['magic_cookie'] == 0xf7f7f7f7: cd_fobj = BytesIO(compr_data) gzip_file_handle = gzip.GzipFile(fileobj=cd_fobj) decompr_data = gzip_file_handle.read(struct.calcsize(fmt)) gzip_file_handle.close() elif compr_info['magic_cookie'] == 0xf5f5f5f5: decompr_data = zlib.decompress(compr_data) elif compr_info['magic_cookie'] == 0xf6f6f6f6: # ZLIB_NOT_COMPRESSED decompr_data = compr_data elif compr_info['magic_cookie'] == 0xfe0103fd: # Run length encoding of 8-bit data # Compression info is in a different order, namely # int32 : RL8_FLAG (0xfe0103fd) # int32 : key # int32 : nbytes_array (bytes of encoded data with header) # int32 : nbytes_full (bytes of unencoded data, no header) # int32 : nbytes_coded (bytes of encoded data, no header) key = compr_info['nbytes_uncompressed'] decompr_size = compr_info['nbytes_coded'] decompr_data = _decode_rle8(compr_data, key, decompr_size) else: raise NotImplementedError('unsupported compression mode') # With sample data it should be possible to write # decompressor for other modes, the compression magic # cookies for these modes are: # 0x2f2f2f2f : TA_NOT_COMPRESSED # 0xf8f8f8f8 : GZIP_NOT_COMPRSSED # 0xf3f3f3f3 : BZIP_COMPRESSED # 0xf4f4f4f4 : BZIP_NOT_COMPRESSED # read the decompressed data, reshape and mask sw_data = np.fromstring(decompr_data, np_form).astype('float32') sw_data.shape = (ny, nx) mask = sw_data == field_header['bad_data_value'] np.putmask(sw_data, mask, [np.NaN]) # scale and offset the data, store in field_data scale = field_header['scale'] bias = field_header['bias'] field_data[sw, :, :] = sw_data * scale + bias # store data as object attribute and return self.fields_data[fnum] = field_data return field_data
def replace_nans(self, orig, filtered_values): new = orig.copy() np.putmask(new, np.isnan(new), filtered_values) return new.data
def nanskew(values, axis=None, skipna=True, mask=None): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. Parameters ---------- values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional nan-mask if known Returns ------- result : float64 Unless input is a float array, in which case use the same precision as the input array. Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1,np.nan, 1, 2]) >>> nanops.nanskew(s) 1.7320508075688787 """ values = lib.values_from_object(values) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") count = _get_counts(values.shape, mask, axis) else: count = _get_counts(values.shape, mask, axis, dtype=values.dtype) if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna and mask is not None: np.putmask(adjusted, mask, 0) adjusted2 = adjusted**2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) with np.errstate(invalid="ignore", divide="ignore"): result = (count * (count - 1)**0.5 / (count - 2)) * (m3 / m2**1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def do_draw(self, data): if 0 in self.shape: return np.zeros(dtype=self.dtype, shape=self.shape) # Reset this flag for each test case to emit warnings from set_element self._report_overflow = True # This could legitimately be a np.empty, but the performance gains for # that would be so marginal that there's really not much point risking # undefined behaviour shenanigans. result = np.zeros(shape=self.array_size, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. if self.unique: seen = set() elements = cu.many(data, min_size=self.array_size, max_size=self.array_size, average_size=self.array_size) i = 0 while elements.more(): # We assign first because this means we check for # uniqueness after numpy has converted it to the relevant # type for us. Because we don't increment the counter on # a duplicate we will overwrite it on the next draw. self.set_element(data, result, i) if result[i] not in seen: seen.add(result[i]) i += 1 else: elements.reject() else: for i in hrange(len(result)): self.set_element(data, result, i) else: # We draw numpy arrays as "sparse with an offset". We draw a # collection of index assignments within the array and assign # fresh values from our elements strategy to those indices. If at # the end we have not assigned every element then we draw a single # value from our fill strategy and use that to populate the # remaining positions with that strategy. elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=math.sqrt(self.array_size), ) needs_fill = np.full(self.array_size, True) seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if not needs_fill[i]: elements.reject() continue self.set_element(data, result, i) if self.unique: if result[i] in seen: elements.reject() continue else: seen.add(result[i]) needs_fill[i] = False if needs_fill.any(): # We didn't fill all of the indices in the early loop, so we # put a fill value into the rest. # We have to do this hilarious little song and dance to work # around numpy's special handling of iterable values. If the # value here were e.g. a tuple then neither array creation # nor putmask would do the right thing. But by creating an # array of size one and then assigning the fill value as a # single element, we both get an array with the right value in # it and putmask will do the right thing by repeating the # values of the array across the mask. one_element = np.zeros(shape=1, dtype=self.dtype) self.set_element(data, one_element, 0, self.fill) fill_value = one_element[0] if self.unique: try: is_nan = np.isnan(fill_value) except TypeError: is_nan = False if not is_nan: raise InvalidArgument( 'Cannot fill unique array with non-NaN ' 'value %r' % (fill_value, )) np.putmask(result, needs_fill, one_element) return result.reshape(self.shape)
def _get_values( values: np.ndarray, skipna: bool, fill_value: Any = None, fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. For input arrays of boolean or integer dtypes, copies will only occur if a precomputed mask, a fill_value/fill_value_typ, and skipna=True are provided. Parameters ---------- values : ndarray input array to potentially compute mask for skipna : bool boolean for whether NaNs should be skipped fill_value : Any value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities mask : Optional[np.ndarray] nan-mask if known Returns ------- values : ndarray Potential copy of input value array mask : Optional[ndarray[bool]] Mask for values, if deemed necessary to compute dtype : dtype dtype for values dtype_max : dtype platform independent dtype fill_value : Any fill value used """ # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below assert is_scalar(fill_value) mask = _maybe_get_mask(values, skipna, mask) if is_datetime64tz_dtype(values): # lib.values_from_object returns M8[ns] dtype instead of tz-aware, # so this case must be handled separately from the rest dtype = values.dtype values = getattr(values, "_values", values) else: values = lib.values_from_object(values) dtype = values.dtype if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = getattr(values, "asi8", values) values = values.view(np.int64) dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) copy = (mask is not None) and (fill_value is not None) if skipna and copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, _ = maybe_upcast_putmask(values, mask, fill_value) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max, fill_value
def _bins_to_cuts( x, bins, right: bool = True, labels=None, precision: int = 3, include_lowest: bool = False, dtype=None, duplicates: str = "raise", ordered: bool = True, ): if not ordered and labels is None: raise ValueError("'labels' must be provided if 'ordered = False'") if duplicates not in ["raise", "drop"]: raise ValueError( "invalid value for 'duplicates' parameter, valid options are: raise, drop" ) if isinstance(bins, IntervalIndex): # we have a fast-path here ids = bins.get_indexer(x) result = Categorical.from_codes(ids, categories=bins, ordered=True) return result, bins unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == "raise": raise ValueError( f"Bin edges must be unique: {repr(bins)}.\n" f"You can drop duplicate edges by setting the 'duplicates' kwarg" ) else: bins = unique_bins side = "left" if right else "right" ids = ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if not (labels is None or is_list_like(labels)): raise ValueError( "Bin labels must either be False, None or passed in as a " "list-like argument") elif labels is None: labels = _format_labels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) elif ordered and len(set(labels)) != len(labels): raise ValueError( "labels must be unique if ordered=True; pass ordered=False for duplicate labels" # noqa ) else: if len(labels) != len(bins) - 1: raise ValueError( "Bin labels must be one fewer than the number of bin edges" ) if not is_categorical_dtype(labels): labels = Categorical( labels, categories=labels if len(set(labels)) == len(labels) else None, ordered=ordered, ) # TODO: handle mismatch between categorical label order and pandas.cut order. np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) else: result = ids - 1 if has_nas: result = result.astype(np.float64) np.putmask(result, na_mask, np.nan) return result, bins
def _highly_variable_genes_seurat_v3( adata: AnnData, layer: Optional[str] = None, n_top_genes: int = 2000, batch_key: Optional[str] = None, check_values: bool = True, span: float = 0.3, subset: bool = False, inplace: bool = True, ) -> Optional[pd.DataFrame]: """\ See `highly_variable_genes`. For further implemenation details see https://www.overleaf.com/read/ckptrbgzzzpg Returns ------- Depending on `inplace` returns calculated metrics (:class:`~pd.DataFrame`) or updates `.var` with the following fields highly_variable : bool boolean indicator of highly-variable genes **means** means per gene **variances** variance per gene **variances_norm** normalized variance per gene, averaged in the case of multiple batches highly_variable_rank : float Rank of the gene according to normalized variance, median rank in the case of multiple batches highly_variable_nbatches : int If batch_key is given, this denotes in how many batches genes are detected as HVG """ try: from skmisc.loess import loess except ImportError: raise ImportError( 'Please install skmisc package via `pip install --user scikit-misc' ) X = adata.layers[layer] if layer is not None else adata.X if check_values and not check_nonnegative_integers(X): warnings.warn( "`flavor='seurat_v3'` expects raw count data, but non-integers were found.", UserWarning, ) if batch_key is None: batch_info = pd.Categorical(np.zeros(adata.shape[0], dtype=int)) else: batch_info = adata.obs[batch_key].values norm_gene_vars = [] for b in np.unique(batch_info): ad = adata[batch_info == b] X = ad.layers[layer] if layer is not None else ad.X mean, var = _get_mean_var(X) not_const = var > 0 estimat_var = np.zeros(adata.shape[1], dtype=np.float64) y = np.log10(var[not_const]) x = np.log10(mean[not_const]) model = loess(x, y, span=span, degree=2) model.fit() estimat_var[not_const] = model.outputs.fitted_values reg_std = np.sqrt(10**estimat_var) batch_counts = X.astype(np.float64).copy() # clip large values as in Seurat N = np.sum(batch_info == b) vmax = np.sqrt(N) clip_val = reg_std * vmax + mean if sp_sparse.issparse(batch_counts): batch_counts = sp_sparse.csr_matrix(batch_counts) mask = batch_counts.data > clip_val[batch_counts.indices] batch_counts.data[mask] = clip_val[batch_counts.indices[mask]] else: clip_val_broad = np.broadcast_to(clip_val, batch_counts.shape) np.putmask( batch_counts, batch_counts > clip_val_broad, clip_val_broad, ) if sp_sparse.issparse(batch_counts): squared_batch_counts_sum = np.array( batch_counts.power(2).sum(axis=0)) batch_counts_sum = np.array(batch_counts.sum(axis=0)) else: squared_batch_counts_sum = np.square(batch_counts).sum(axis=0) batch_counts_sum = batch_counts.sum(axis=0) norm_gene_var = (1 / ((N - 1) * np.square(reg_std))) * ( (N * np.square(mean)) + squared_batch_counts_sum - 2 * batch_counts_sum * mean) norm_gene_vars.append(norm_gene_var.reshape(1, -1)) norm_gene_vars = np.concatenate(norm_gene_vars, axis=0) # argsort twice gives ranks, small rank means most variable ranked_norm_gene_vars = np.argsort(np.argsort(-norm_gene_vars, axis=1), axis=1) # this is done in SelectIntegrationFeatures() in Seurat v3 ranked_norm_gene_vars = ranked_norm_gene_vars.astype(np.float32) num_batches_high_var = np.sum( (ranked_norm_gene_vars < n_top_genes).astype(int), axis=0) ranked_norm_gene_vars[ranked_norm_gene_vars >= n_top_genes] = np.nan ma_ranked = np.ma.masked_invalid(ranked_norm_gene_vars) median_ranked = np.ma.median(ma_ranked, axis=0).filled(np.nan) df = pd.DataFrame(index=np.array(adata.var_names)) df['highly_variable_nbatches'] = num_batches_high_var df['highly_variable_rank'] = median_ranked df['variances_norm'] = np.mean(norm_gene_vars, axis=0) df['means'] = mean df['variances'] = var df.sort_values( ['highly_variable_rank', 'highly_variable_nbatches'], ascending=[True, False], na_position='last', inplace=True, ) df['highly_variable'] = False df.loc[:int(n_top_genes), 'highly_variable'] = True df = df.loc[adata.var_names] if inplace or subset: adata.uns['hvg'] = {'flavor': 'seurat_v3'} logg.hint('added\n' ' \'highly_variable\', boolean vector (adata.var)\n' ' \'highly_variable_rank\', float vector (adata.var)\n' ' \'means\', float vector (adata.var)\n' ' \'variances\', float vector (adata.var)\n' ' \'variances_norm\', float vector (adata.var)') adata.var['highly_variable'] = df['highly_variable'].values adata.var['highly_variable_rank'] = df['highly_variable_rank'].values adata.var['means'] = df['means'].values adata.var['variances'] = df['variances'].values adata.var['variances_norm'] = df['variances_norm'].values.astype( 'float64', copy=False) if batch_key is not None: adata.var['highly_variable_nbatches'] = df[ 'highly_variable_nbatches'].values if subset: adata._inplace_subset_var(df['highly_variable'].values) else: if batch_key is None: df = df.drop(['highly_variable_nbatches'], axis=1) return df
def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): """ Compute the variance along given axis while ignoring NaNs Parameters ---------- values : ndarray axis: int, optional skipna : bool, default True ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. mask : ndarray[bool], optional nan-mask if known Returns ------- result : float Unless input is a float array, in which case use the same precision as the input array. Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1, np.nan, 2, 3]) >>> nanops.nanvar(s) 1.0 """ values = lib.values_from_object(values) dtype = values.dtype mask = _maybe_get_mask(values, skipna, mask) if is_any_int_dtype(values): values = values.astype("f8") if mask is not None: values[mask] = np.nan if is_float_dtype(values): count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) else: count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) # xref GH10242 # Compute variance via two-pass algorithm, which is stable against # cancellation errors and relatively accurate for small numbers of # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) sqr = _ensure_numeric((avg - values)**2) if mask is not None: np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d # Return variance as np.float64 (the datatype used in the accumulator), # unless we were dealing with a float array, in which case use the same # precision as the original values array. if is_float_dtype(dtype): result = result.astype(dtype) return _wrap_results(result, values.dtype)
def lcdnorm4(arr_in, neighborhood, contrast=DEFAULT_CONTRAST, divisive=DEFAULT_DIVISIVE, stretch=DEFAULT_STRETCH, threshold=DEFAULT_THRESHOLD, stride=DEFAULT_STRIDE, arr_out=None): """4D Local Contrast Divisive Normalization XXX: docstring """ assert arr_in.ndim == 4 assert len(neighborhood) == 2 assert isinstance(contrast, bool) assert isinstance(divisive, bool) assert contrast or divisive in_imgs, inh, inw, ind = arr_in.shape nbh, nbw = neighborhood assert nbh <= inh assert nbw <= inw nb_size = 1. * nbh * nbw * ind if arr_out is not None: assert arr_out.dtype == arr_in.dtype assert arr_out.shape == (in_imgs, 1 + (inh - nbh) / stride, 1 + (inw - nbw) / stride, ind) # -- prepare arr_out lys = nbh / 2 lxs = nbw / 2 rys = (nbh - 1) / 2 rxs = (nbw - 1) / 2 _arr_out = arr_in[:, lys:inh - rys, lxs:inw - rxs][::stride, ::stride] # -- Contrast Normalization if contrast: # -- local sums arr_sum = arr_in.sum(-1) arr_sum = view_as_windows(arr_sum, (1, 1, nbw)).sum(-1)[:, :, ::stride, 0, 0] arr_sum = view_as_windows(arr_sum, (1, nbh, 1)).sum(-2)[:, ::stride, :, 0] # -- remove the mean _arr_out = _arr_out - arr_sum / nb_size # -- Divisive (gain) Normalization if divisive: # -- local sums of squares arr_ssq = (arr_in**2.0).sum(-1) arr_ssq = view_as_windows(arr_ssq, (1, 1, nbw)).sum(-1)[:, :, ::stride, 0, 0] arr_ssq = view_as_windows(arr_ssq, (1, nbh, 1)).sum(-2)[:, ::stride, :, 0] # -- divide by the euclidean norm if contrast: l2norms = (arr_ssq - (arr_sum**2.0) / nb_size) else: l2norms = arr_ssq np.putmask(l2norms, l2norms < 0., 0.) l2norms = np.sqrt(l2norms) + EPSILON if stretch != 1: _arr_out *= stretch l2norms *= stretch np.putmask(l2norms, l2norms < (threshold + EPSILON), 1.0) _arr_out = _arr_out / l2norms if arr_out is not None: arr_out[:] = _arr_out else: arr_out = _arr_out assert arr_out.shape[0] == in_imgs return arr_out
def nankurt(values, axis=None, skipna=True, mask=None): """ Compute the sample excess kurtosis The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. Parameters ---------- values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional nan-mask if known Returns ------- result : float64 Unless input is a float array, in which case use the same precision as the input array. Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1,np.nan, 1, 3, 2]) >>> nanops.nankurt(s) -1.2892561983471076 """ values = lib.values_from_object(values) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") count = _get_counts(values.shape, mask, axis) else: count = _get_counts(values.shape, mask, axis, dtype=values.dtype) if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna and mask is not None: np.putmask(adjusted, mask, 0) adjusted2 = adjusted**2 adjusted4 = adjusted2**2 m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) with np.errstate(invalid="ignore", divide="ignore"): adj = 3 * (count - 1)**2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 denom = (count - 2) * (count - 3) * m2**2 # floating point error # # #18044 in _libs/windows.pyx calc_kurt follow this behavior # to fix the fperr to treat denom <1e-14 as zero numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) if not isinstance(denom, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division if count < 4: return np.nan if denom == 0: return 0 with np.errstate(invalid="ignore", divide="ignore"): result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(denom == 0, 0, result) result[count < 4] = np.nan return result
def test_percentile_between(self): quintiles = range(5) filter_names = ['pct_' + str(q) for q in quintiles] iter_quintiles = list(zip(filter_names, quintiles)) terms = { name: self.f.percentile_between(q * 20.0, (q + 1) * 20.0) for name, q in iter_quintiles } # Test with 5 columns and no NaNs. eye5 = eye(5, dtype=float64) expected = {} for name, quintile in iter_quintiles: if quintile < 4: # There are four 0s and one 1 in each row, so the first 4 # quintiles should be all the locations with zeros in the input # array. expected[name] = ~eye5.astype(bool) else: # The top quintile should match the sole 1 in each row. expected[name] = eye5.astype(bool) self.check_terms( terms=terms, expected=expected, initial_workspace={self.f: eye5}, mask=self.build_mask(ones((5, 5))), ) # Test with 6 columns, no NaNs, and one masked entry per day. eye6 = eye(6, dtype=float64) mask = array( [[1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1], [1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 0, 1]], dtype=bool) expected = {} for name, quintile in iter_quintiles: if quintile < 4: # Should keep all values that were 0 in the base data and were # 1 in the mask. expected[name] = mask & ~eye6.astype(bool) else: # The top quintile should match the sole 1 in each row. expected[name] = eye6.astype(bool) self.check_terms( terms=terms, expected=expected, initial_workspace={self.f: eye6}, mask=self.build_mask(mask), ) # Test with 6 columns, no mask, and one NaN per day. Should have the # same outcome as if we had masked the NaNs. # In particular, the NaNs should never pass any filters. eye6_withnans = eye6.copy() putmask(eye6_withnans, ~mask, nan) expected = {} for name, quintile in iter_quintiles: if quintile < 4: # Should keep all values that were 0 in the base data and were # 1 in the mask. expected[name] = mask & (~eye6.astype(bool)) else: # Should keep all the 1s in the base data. expected[name] = eye6.astype(bool) self.check_terms( terms, expected, initial_workspace={self.f: eye6}, mask=self.build_mask(mask), )
q = -np.log(1.0 / np.square(J_pic) + 1.0e-16) / 2.0 break if case("lnF(a)"): q = -np.log(1.0 / np.square(J_a) + 1.0e-16) / 2.0 break if case("lnF(p)"): q = -np.log(1.0 / np.square(J_p) + 1.0e-16) / 2.0 break if case("entropy"): q = np.cumsum(S, axis=0) break if case("lyapunov"): q = S break np.putmask(q, np.isnan(q), 0.0) np.putmask(q, np.isinf(q), 0.0) ny, nx = q.shape # antialias using median filter osy = ny / 500 osx = nx / 1000 if (osy != 1 or osx != 1): aakernel = [(osy & (~1)) + 1, (osx & (~1)) + 1] q = medfilt(q, aakernel)[::osy, ::osx] ny, nx = q.shape print("Antialiased with median kernel %s; output size is (%i,%i) pixels" % (aakernel, ny, nx))
def v1like_norm(hin, conv_mode, kshape, threshold): """ V1LIKE local normalization Each pixel in the input image is divisively normalized by the L2 norm of the pixels in a local neighborhood around it, and the result of this division is placed in the output image. Inputs: hin -- a 3-dimensional array (width X height X rgb) kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization neighborhood threshold -- magnitude threshold, if the vector's length is below it doesn't get resized ex: 1. Outputs: hout -- a normalized 3-dimensional array (width X height X rgb) """ eps = 1e-5 kh, kw = kshape dtype = hin.dtype hsrc = hin[:].copy() # -- prepare hout hin_h, hin_w, hin_d = hin.shape hout_h = hin_h # - kh + 1 hout_w = hin_w # - kw + 1 if conv_mode != "same": hout_h = hout_h - kh + 1 hout_w = hout_w - kw + 1 hout_d = hin_d hout = np.empty((hout_h, hout_w, hout_d), 'float32') # -- compute numerator (hnum) and divisor (hdiv) # sum kernel hin_d = hin.shape[-1] kshape3d = list(kshape) + [hin_d] ker = np.ones(kshape3d, dtype=dtype) size = ker.size # compute sum-of-square hsq = hsrc**2. #hssq = conv(hsq, ker, conv_mode).astype(dtype) kerH = ker[:, 0, 0][:, None] #, None] kerW = ker[0, :, 0][None, :] #, None] kerD = ker[0, 0, :][None, None, :] hssq = conv( conv(conv(hsq, kerD, 'valid')[:, :, 0].astype(dtype), kerW, conv_mode), kerH, conv_mode).astype(dtype) hssq = hssq[:, :, None] # compute hnum and hdiv ys = kh / 2 xs = kw / 2 hout_h, hout_w, hout_d = hout.shape[-3:] hs = hout_h ws = hout_w hsum = conv( conv( conv(hsrc, kerD, 'valid')[:, :, 0].astype(dtype), kerW, conv_mode), kerH, conv_mode).astype(dtype) hsum = hsum[:, :, None] if conv_mode == 'same': hnum = hsrc - (hsum / size) else: hnum = hsrc[ys:ys + hs, xs:xs + ws] - (hsum / size) val = (hssq - (hsum**2.) / size) val[val < 0] = 0 hdiv = val**(1. / 2) + eps # -- apply normalization # 'volume' threshold np.putmask(hdiv, hdiv < (threshold + eps), 1.) result = (hnum / hdiv) #print result.shape hout[:] = result #print hout.shape, hout.dtype return hout
now = time.time() out.fill(0) grid(out, (0, 0.5, 1), size=1, n=10) frustum(out, depth_intrinsics) axes(out, view([0, 0, 0]), state.rotation, size=0.1, thickness=1) if not state.scale or out.shape[:2] == (h, w): pointcloud(out, verts, texcoords, color_source) else: tmp = np.zeros((h, w, 3), dtype=np.uint8) pointcloud(tmp, verts, texcoords, color_source) tmp = cv2.resize( tmp, out.shape[:2][::-1], interpolation=cv2.INTER_NEAREST) np.putmask(out, tmp > 0, tmp) if any(state.mouse_btns): axes(out, view(state.pivot), state.rotation, thickness=4) dt = time.time() - now cv2.setWindowTitle( state.WIN_NAME, "RealSense (%dx%d) %dFPS (%.2fms) %s" % (w, h, 1.0/dt, dt*1000, "PAUSED" if state.paused else "")) cv2.imshow(state.WIN_NAME, out) key = cv2.waitKey(1) if key == ord("r"): state.reset()
def _get_values( values: np.ndarray, skipna: bool, fill_value: Any = None, fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. For input arrays of boolean or integer dtypes, copies will only occur if a precomputed mask, a fill_value/fill_value_typ, and skipna=True are provided. Parameters ---------- values : ndarray input array to potentially compute mask for skipna : bool boolean for whether NaNs should be skipped fill_value : Any value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities mask : Optional[np.ndarray] nan-mask if known Returns ------- values : ndarray Potential copy of input value array mask : Optional[ndarray[bool]] Mask for values, if deemed necessary to compute dtype : np.dtype dtype for values dtype_max : np.dtype platform independent dtype fill_value : Any fill value used """ # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # np.where call below assert is_scalar(fill_value) values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) dtype = values.dtype datetimelike = False if needs_i8_conversion(values.dtype): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = np.asarray(values.view("i8")) datetimelike = True dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna and (mask is not None) and (fill_value is not None): if mask.any(): if dtype_ok or datetimelike: values = values.copy() np.putmask(values, mask, fill_value) else: # np.where will promote if needed values = np.where(~mask, values, fill_value) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.dtype(np.int64) elif is_float_dtype(dtype): dtype_max = np.dtype(np.float64) return values, mask, dtype, dtype_max, fill_value
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values order : deprecated na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ if order is not None: msg = "order is deprecated. See " \ "https://github.com/pydata/pandas/issues/6926" warn(msg, FutureWarning, stacklevel=2) from pandas import Index, Series, DatetimeIndex vals = np.asarray(values) # localize to UTC is_datetimetz = com.is_datetimetz(values) if is_datetimetz: values = DatetimeIndex(values) vals = values.tz_localize(None) is_datetime = com.is_datetime64_dtype(vals) is_timedelta = com.is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) labels = com._ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: try: sorter = uniques.argsort() except: # unorderable in py3 if mixed str/int t = hash_klass(len(uniques)) t.map_locations(com._ensure_object(uniques)) # order ints before strings ordered = np.concatenate([ np.sort(np.array([e for i, e in enumerate(uniques) if f(e)], dtype=object)) for f in [lambda x: not isinstance(x, string_types), lambda x: isinstance(x, string_types)]]) sorter = com._ensure_platform_int(t.lookup( com._ensure_object(ordered))) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) uniques = uniques.take(sorter) if is_datetimetz: # reset tz uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( values.tz) elif is_datetime: uniques = uniques.astype('M8[ns]') elif is_timedelta: uniques = uniques.astype('m8[ns]') if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def clip(x, ext_min, ext_max): np.putmask(x, x < ext_min, ext_min) np.putmask(x, x > ext_max, ext_max) return x
def EliminarFondo(Imagen_Color, Imagen_Profundidad, Distancia, Color_Contorno): Columnas, Filas, Dimensiones = Imagen_Color.shape for i in range(0, Dimensiones): auxiliar = Imagen_Color[:, :, i] np.putmask(auxiliar, Imagen_Profundidad > Distancia, Color_Contorno) Imagen_Color[:, :, i] = auxiliar
def blend(rgb, shade, shade_type=None): """ Provides several "shading" options based on shade_type dict *rgb* array of colors to 'shade', shape (nx, 3) or (nx, ny, 3) *shade* N&B array shape similar to rgb but last dim is 1 *shade_type* {"Lch": x1, "overlay": x2, "pegtop": x3} x1, x2, x3 positive scalars, the proportion of each shading method in the final image. """ if shade_type is None: shade_type = {"Lch": 4., "overlay": 4., "pegtop": 1.} blend_T = float( sum((shade_type.get(key, 0.) for key in ["Lch", "overlay", "pegtop"]))) is_image = (len(rgb.shape) == 3) if is_image: imx, imy, ichannel = rgb.shape if ichannel != 3: raise ValueError("expectd rgb array") rgb = np.copy(rgb.reshape(imx * imy, 3)) shade = np.copy(shade.reshape(imx * imy, 1)) XYZ = Color_tools.rgb_to_XYZ(rgb[:, 0:3]) XYZ_overlay = np.zeros([imx * imy, 3]) XYZ_pegtop = np.zeros([imx * imy, 3]) XYZ_Lch = np.zeros([imx * imy, 3]) ref_white = Color_tools.D50_ref_white if shade_type.get("overlay", 0.) != 0: low = 2. * shade * XYZ high = ref_white * 100. - 2. * (1. - shade) * (ref_white * 100. - XYZ) XYZ_overlay = np.where(XYZ <= 0.5 * ref_white * 100., low, high) if shade_type.get("pegtop", 0.) != 0: XYZ_pegtop = 2. * shade * XYZ + (1. - 2. * shade) * XYZ**2 / ref_white if shade_type.get("Lch", 0.) != 0: shade = 2. * shade - 1. Lab = Color_tools.XYZ_to_CIELab(XYZ) L = Lab[:, 0, np.newaxis] a = Lab[:, 1, np.newaxis] b = Lab[:, 2, np.newaxis] np.putmask(L, shade > 0, L + shade * (100. - L)) # lighten np.putmask(L, shade < 0, L * (1. + shade)) # darken np.putmask(a, shade > 0, a - shade**2 * a) # lighten np.putmask(a, shade < 0, a * (1. - shade**2)) # darken np.putmask(b, shade > 0, b - shade**2 * b) # lighten np.putmask(b, shade < 0, b * (1. - shade**2)) # darken Lab[:, 0] = L[:, 0] Lab[:, 1] = a[:, 0] Lab[:, 2] = b[:, 0] XYZ_Lch = Color_tools.CIELab_to_XYZ(Lab) XYZ = (XYZ_overlay * shade_type["overlay"] + XYZ_pegtop * shade_type["pegtop"] + XYZ_Lch * shade_type["Lch"]) / blend_T # Convert modified hsv back to rgb. blend = Color_tools.XYZ_to_rgb(XYZ) if is_image: blend = blend.reshape([imx, imy, 3]) return blend
def patch_image(t_in, s_out, cm=0): try: t = t_in.copy() ty, tx = t.shape if cm > 0: m = mask_rect(t == cm) else: m = (t == cm) tile = get_tile(t, m) if tile.size > 2 and s_out == t.shape: rt = np.tile( tile, (1 + ty // tile.shape[0], 1 + tx // tile.shape[1]))[0:ty, 0:tx] if (rt[~m] == t[~m]).all(): return rt for i in range(6): m = (t == cm) t -= cm if tx == ty: a = np.maximum(t, t.T) if (a[~m] == t[~m]).all(): t = a.copy() a = np.maximum(t, np.flip(t).T) if (a[~m] == t[~m]).all(): t = a.copy() a = np.maximum(t, np.flipud(t)) if (a[~m] == t[~m]).all(): t = a.copy() a = np.maximum(t, np.fliplr(t)) if (a[~m] == t[~m]).all(): t = a.copy() t += cm m = (t == cm) lms = measure.label(m.astype('uint8')) for l in range(1, lms.max() + 1): lm = np.argwhere(lms == l) lm = np.argwhere(lms == l) x_min = max(0, lm[:, 1].min() - 1) x_max = min(lm[:, 1].max() + 2, t.shape[0]) y_min = max(0, lm[:, 0].min() - 1) y_max = min(lm[:, 0].max() + 2, t.shape[1]) gap = t[y_min:y_max, x_min:x_max] sy, sx = gap.shape if i == 1: sy //= 2 y_max = y_min + sx gap = t[y_min:y_max, x_min:x_max] sy, sx = gap.shape allst = as_strided(t, shape=(ty, tx, sy, sx), strides=2 * t.strides) allst = allst.reshape(-1, sy, sx) allst = np.array( [a for a in allst if np.count_nonzero(a == cm) == 0]) gm = (gap != cm) for a in allst: if sx == sy: fpd = a.T fad = np.flip(a).T if i == 1: gm[sy - 1, 0] = gm[0, sx - 1] = False if (fpd[gm] == gap[gm]).all(): gm = (gap != cm) np.putmask(gap, ~gm, fpd) t[y_min:y_max, x_min:x_max] = gap break if i == 1: gm[0, 0] = gm[sy - 1, sx - 1] = False if (fad[gm] == gap[gm]).all(): gm = (gap != cm) np.putmask(gap, ~gm, fad) t[y_min:y_max, x_min:x_max] = gap break fud = np.flipud(a) flr = np.fliplr(a) if i == 1: gm[sy - 1, 0] = gm[0, sx - 1] = gm[0, 0] = gm[sy - 1, sx - 1] = False if (a[gm] == gap[gm]).all(): gm = (gap != cm) np.putmask(gap, ~gm, a) t[y_min:y_max, x_min:x_max] = gap break elif (fud[gm] == gap[gm]).all(): gm = (gap != cm) np.putmask(gap, ~gm, fud) t[y_min:y_max, x_min:x_max] = gap break elif (flr[gm] == gap[gm]).all(): gm = (gap != cm) np.putmask(gap, ~gm, flr) t[y_min:y_max, x_min:x_max] = gap break if s_out == t.shape: return t else: m = (t_in == cm) return np.resize(t[m], crop_min(m).shape) except: return np.resize(t_in, s_out)
def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): """ Sort ``values`` and reorder corresponding ``labels``. ``values`` should be unique if ``labels`` is not None. Safe for use with mixed types (int, str), orders ints before strs. .. versionadded:: 0.19.0 Parameters ---------- values : list-like Sequence; must be unique if ``labels`` is not None. labels : list_like Indices to ``values``. All out of bound indices are treated as "not found" and will be masked with ``na_sentinel``. na_sentinel : int, default -1 Value in ``labels`` to mark "not found". Ignored when ``labels`` is None. assume_unique : bool, default False When True, ``values`` are assumed to be unique, which can speed up the calculation. Ignored when ``labels`` is None. Returns ------- ordered : ndarray Sorted ``values`` new_labels : ndarray Reordered ``labels``; returned when ``labels`` is not None. Raises ------ TypeError * If ``values`` is not list-like or if ``labels`` is neither None nor list-like * If ``values`` cannot be sorted ValueError * If ``labels`` is not None and ``values`` contain duplicates. """ if not is_list_like(values): raise TypeError("Only list-like objects are allowed to be passed to" "safe_sort as values") values = np.array(values, copy=False) def sort_mixed(values): # order ints before strings, safe in py3 str_pos = np.array([isinstance(x, string_types) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) strs = np.sort(values[str_pos]) return _ensure_object(np.concatenate([nums, strs])) sorter = None if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer': # unorderable in py3 if mixed str/int ordered = sort_mixed(values) else: try: sorter = values.argsort() ordered = values.take(sorter) except TypeError: # try this anyway ordered = sort_mixed(values) # labels: if labels is None: return ordered if not is_list_like(labels): raise TypeError("Only list-like objects or None are allowed to be" "passed to safe_sort as labels") labels = _ensure_platform_int(np.asarray(labels)) from pandas import Index if not assume_unique and not Index(values).is_unique: raise ValueError("values should be unique if labels is not None") if sorter is None: # mixed types (hash_klass, _), values = _get_data_algo(values, _hashtables) t = hash_klass(len(values)) t.map_locations(values) sorter = _ensure_platform_int(t.lookup(ordered)) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = (labels < -len(values)) | (labels >= len(values)) | \ (labels == na_sentinel) # (Out of bound indices will be masked with `na_sentinel` next, so we may # deal with them here without performance loss using `mode='wrap'`.) new_labels = reverse_indexer.take(labels, mode='wrap') np.putmask(new_labels, mask, na_sentinel) return ordered, _ensure_platform_int(new_labels)
def shade_layer(normal, theta_LS, phi_LS, shininess=0., ratio_specular=0., **kwargs): """ *normal* flat array of normal vect shade_dict: "theta_LS" angle of incoming light [0, 360] "phi_LS" azimuth of incoming light [0, 90] 90 is vertical "shininess" material coefficient for specular "ratio_specular" ratio of specular to lambert Returns *shade* array of light intensity, greyscale image (value btwn 0 and 1) https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model """ if "LS_coords" in kwargs.keys(): # LS is localized somewhere in the image computing theta_LS # as a vector LSx, LSy = kwargs["LS_coords"] (ix, ixx, iy, iyy) = kwargs["chunk_slice"] chunk_mask = kwargs["chunk_mask"] nx = kwargs["nx"] ny = kwargs["ny"] nx_grid = (np.arange(ix, ixx, dtype=np.float32) / nx) - 0.5 ny_grid = (np.arange(iy, iyy, dtype=np.float32) / ny) - 0.5 ny_vec, nx_vec = np.meshgrid(ny_grid, nx_grid) theta_LS = -np.ravel(np.arctan2(LSy - ny_vec, nx_vec - LSx)) + np.pi if chunk_mask is not None: theta_LS = theta_LS[chunk_mask] else: # Default case LS at infinity incoming angle provided theta_LS = theta_LS * np.pi / 180. phi_LS = phi_LS * np.pi / 180. if "exp_map" in kwargs.keys(): raise ValueError() # debug # Normal angle correction in case of exponential map if kwargs["exp_map"]: (ix, ixx, iy, iyy) = kwargs["chunk_slice"] chunk_mask = kwargs["chunk_mask"] nx = kwargs["nx"] ny = kwargs["ny"] nx_grid = (np.arange(ix, ixx, dtype=np.float32) / nx) - 0.5 ny_grid = (np.arange(iy, iyy, dtype=np.float32) / ny) - 0.5 ny_vec, nx_vec = np.meshgrid(ny_grid, nx_grid) expmap_angle = np.ravel(np.exp(-1j * (ny_vec) * np.pi * 2.)) if chunk_mask is not None: expmap_angle = expmap_angle[chunk_mask] normal = normal * expmap_angle # k_ambient = - 1. / (2. * ratio_specular + 1.) k_lambert = 1. #- 2. * k_ambient k_spec = ratio_specular * k_lambert # Light source coordinates LSx = np.cos(theta_LS) * np.cos(phi_LS) LSy = np.sin(theta_LS) * np.cos(phi_LS) LSz = np.sin(phi_LS) # Normal vector coordinates - Lambert shading nx = normal.real ny = normal.imag nz = np.sqrt(1. - nx**2 - ny**2) if "inverse_n" in kwargs.keys(): if kwargs["inverse_n"]: nx = -nx ny = -ny lambert = LSx * nx + LSy * ny + LSz * nz np.putmask(lambert, lambert < 0., 0.) # half-way vector coordinates - Blinn Phong shading specular = np.zeros_like(lambert) if ratio_specular != 0.: phi_half = (np.pi * 0.5 + phi_LS) * 0.5 half_x = np.cos(theta_LS) * np.sin(phi_half) half_y = np.sin(theta_LS) * np.sin(phi_half) half_z = np.cos(phi_half) spec_angle = half_x * nx + half_y * ny + half_z * nz np.putmask(spec_angle, spec_angle < 0., 0.) specular = np.power(spec_angle, shininess) res = k_lambert * lambert + k_spec * specular # + k_ambient #res[normal == 0.] = 0.5 * (np.nanmin(res) + np.nanmax(res)) try: np.putmask( res, normal == 0., np.nanmin(res) + 0.5 * (np.nanmax(res) - np.nanmin(res))) except ValueError: pass return res # k_ambient + k_lambert * lambert + k_spec * specular
def replace_atom_types(z): np.putmask(z, np.isin(z, list(self.atom_types), invert=True), -1) return z
def f(x): x = pa.array(x, dtype=self.dtype) np.putmask(x, mask, self.fill_value) return x