def test_result_values(self): tgt = [np.percentile(d, 28) for d in _rdat] res = np.nanpercentile(_ndat, 28, axis=1) assert_almost_equal(res, tgt) tgt = [np.percentile(d, (28,98)) for d in _rdat] res = np.nanpercentile(_ndat, (28,98), axis=1) assert_almost_equal(res, tgt)
def _auto_limits(self): if self.component_data is None: return exclude = (100 - self.percentile) / 2. # For subsets in 'data' mode, we want to compute the limits based on # the full dataset, not just the subset. if isinstance(self.data, Subset): data_values = self.data.data[self.component_id] else: data_values = self.data[self.component_id] try: lower = np.nanpercentile(data_values, exclude) upper = np.nanpercentile(data_values, 100 - exclude) except AttributeError: # Numpy < 1.9 data_values = data_values[~np.isnan(data_values)] lower = np.percentile(data_values, exclude) upper = np.percentile(data_values, 100 - exclude) if isinstance(self.data, Subset): lower = 0 self.set_limits(lower, upper)
def shift_mask_data(X, Y, upper_percentile=70, lower_percentile=30, n_fwd_days=1): # Shift X to match factors at t to returns at t+n_fwd_days (we want to predict future returns after all) shifted_X = np.roll(X, n_fwd_days+1, axis=0) # Slice off rolled elements X = shifted_X[n_fwd_days+1:] Y = Y[n_fwd_days+1:] n_time, n_stocks, n_factors = X.shape # Look for biggest up and down movers upper = np.nanpercentile(Y, upper_percentile, axis=1)[:, np.newaxis] lower = np.nanpercentile(Y, lower_percentile, axis=1)[:, np.newaxis] upper_mask = (Y >= upper) lower_mask = (Y <= lower) mask = upper_mask | lower_mask # This also drops nans mask = mask.flatten() # Only try to predict whether a stock moved up/down relative to other stocks Y_binary = np.zeros(n_time * n_stocks) Y_binary[upper_mask.flatten()] = 1 Y_binary[lower_mask.flatten()] = -1 # Flatten X X = X.reshape((n_time * n_stocks, n_factors)) # Drop stocks that did not move much (i.e. are in the 30th to 70th percentile) X = X[mask] Y_binary = Y_binary[mask] return X, Y_binary
def qmap_mean_departure(x, sample1, sample2, meinequantilen, sample_size, return_mean=False, linear=True): from support_functions import qstats s1d = x[sample1] # truth (sample1) s2d = x[sample2] # biased (sample2) # add 0 and 100 meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]])) qb = np.nanpercentile(s1d, meinequantilen) # truth qa = np.nanpercentile(s2d, meinequantilen) # biased mean1 = np.copy(qb) mean2 = np.copy(qa) # Mean of quantile boxes( not 0 and 100 ) count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size) count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size) # only missing ? mean1[:-1] = m1 mean2[:-1] = m2 # interpolation of bin-means if linear: m1d = np.interp(s2d, qb[1:], mean1[:-1]) # interpoliere Mittelwerte zu Daten m2d = np.interp(s2d, qa[1:], mean2[:-1]) else: tck = interpolate.splrep(qb[1:], mean1[:-1], s=0) m1d = interpolate.splev(s2d, tck, der=0) tck = interpolate.splrep(qa[1:], mean2[:-1], s=0) m2d = interpolate.splev(s2d, tck, der=0) # difference if return_mean: return m1, m2 return m1d - m2d # one value
def simpleStats(y, axis=None): """ Computes simple statistics Computes the mean, median, min, max, standard deviation, and interquartile range of a numpy array y. Args: y (array): A Numpy array axis (int, typle of ints): Optional. Axis or Axes along which the means are computed, the default is to compute the mean of the flattened array. If a tuple of ints, performed over multiple axes Returns: The mean, median, min, max, standard deviation and IQR by columns """ # make sure that y is an array y = np.array(y, dtype='float64') # Perform the various calculations mean = np.nanmean(y, axis=axis) std = np.nanstd(y, axis=axis) median = np.nanmedian(y, axis=axis) min_ = np.nanmin(y, axis=axis) max_ = np.nanmax(y, axis=axis) IQR = np.nanpercentile(y, 75, axis=axis) - np.nanpercentile(y, 25, axis=axis) return mean, median, min_, max_, std, IQR
def update_values(self, use_default_modifiers=False, **properties): if not any(prop in properties for prop in ('attribute', 'percentile', 'log')): self.set(percentile='Custom') return if use_default_modifiers: percentile = 100 log = False else: percentile = self.percentile or 100 log = self.log or False if percentile == 'Custom' or self.data is None: self.set(percentile=percentile, log=log) else: exclude = (100 - percentile) / 2. data_values = self.data_values try: lower = np.nanpercentile(data_values, exclude) upper = np.nanpercentile(data_values, 100 - exclude) except AttributeError: # Numpy < 1.9 data_values = data_values[~np.isnan(data_values)] lower = np.percentile(data_values, exclude) upper = np.percentile(data_values, 100 - exclude) self.set(lower=lower, upper=upper, percentile=percentile, log=log)
def test_multiple_percentiles(self): perc = [50, 100] mat = np.ones((4, 3)) nan_mat = np.nan * mat # For checking consistency in higher dimensional case large_mat = np.ones((3, 4, 5)) large_mat[:, 0:2:4, :] = 0 large_mat[:, :, 3:] *= 2 for axis in [None, 0, 1]: for keepdim in [False, True]: with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') val = np.percentile(mat, perc, axis=axis, keepdims=keepdim) nan_val = np.nanpercentile(nan_mat, perc, axis=axis, keepdims=keepdim) assert_equal(nan_val.shape, val.shape) val = np.percentile(large_mat, perc, axis=axis, keepdims=keepdim) nan_val = np.nanpercentile(large_mat, perc, axis=axis, keepdims=keepdim) assert_equal(nan_val, val) megamat = np.ones((3, 4, 5, 6)) assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
def print_stats(array): print print "5th percentile of data is: " + '\t\t\t' + str(round(np.nanpercentile(array, 5), 2)) + "um" print "95th percentile of data is: " + '\t\t\t' + str(round(np.nanpercentile(array, 95), 2)) + "um" print "Peak-to-peak amplitude of structure is: " + '\t' + str(round(np.nanpercentile(array, 95)-np.nanpercentile(array, 5), 2)) + "um" print "Half peak-to-peak amplitude of structure is: " + '\t' + str(round((np.nanpercentile(array, 95)-np.nanpercentile(array, 5))/2, 2)) + "um" print
def display(self,keys= None,live = True, scale = False): """ plot the training data """ if keys == None: keys = self.headers plt.clf() fig, axes = plt.subplots(1, len(keys), figsize=(len(keys) * 5,5), squeeze=False) counter = 0 for fig_j,c in enumerate(self.categories): for fig_i, h in enumerate(keys): ax1 = axes[0,fig_i] ax1.plot(self.ys[c][h].x,self.ys[c][h].y,colors[fig_j]) if scale: m = np.nanpercentile(self.ys[c][h].y , 25, interpolation="higher") M = np.nanpercentile(self.ys[c][h].y , 75, interpolation="higher") ax1.set_ylim([0 , 1.5 * M]) val = self.ys[c][h].y[-1] #ax1.set_title(h + ": " + str(val)) if counter == 0: ax1.set_title("{0} : {1:.3f}".format(h,val)) #ax1.annotate(self.ys[h][-1],xy=( , np.mean(self.ys[h]) ) ) counter += 1 fig.tight_layout() if live: display.clear_output(wait=True) display.display(plt.gcf()) plt.close() else: plt.plot() plt.show()
def Tukey_outliers(set_of_means, FDR=0.005, supporting_interval=0.5, verbose=False): """ Performs Tukey quintile test for outliers from a normal distribution with defined false discovery rate :param set_of_means: :param FDR: :return: """ # false discovery rate v.s. expected falses v.s. power q1_q3 = norm.interval(supporting_interval) # TODO: this is not necessary: we can perfectly well fit it with proper params to FDR FDR_q1_q3 = norm.interval(1 - FDR) multiplier = (FDR_q1_q3[1] - q1_q3[1]) / (q1_q3[1] - q1_q3[0]) l_means = len(set_of_means) q1 = np.nanpercentile(set_of_means, 50*(1-supporting_interval)) q3 = np.nanpercentile(set_of_means, 50*(1+supporting_interval)) high_fence = q3 + multiplier*(q3 - q1) low_fence = q1 - multiplier*(q3 - q1) if verbose: print 'FDR:', FDR print 'q1_q3', q1_q3 print 'FDRq1_q3', FDR_q1_q3 print 'q1, q3', q1, q3 print 'fences', high_fence, low_fence if verbose: print "FDR: %s %%, expected outliers: %s, outlier 5%% confidence interval: %s" % \ (FDR*100, FDR*l_means, poisson.interval(0.95, FDR*l_means)) ho = (set_of_means < low_fence).nonzero()[0] lo = (set_of_means > high_fence).nonzero()[0] return lo, ho
def _auto_limits(self): if self.data is None: return if self.attribute is None: return if self.subset_mode == 'outline': self.set_limits(0, 1) return exclude = (100 - self.percentile) / 2. # For subsets in 'data' mode, we want to compute the limits based on # the full dataset, not just the subset. if self.subset_mode == 'data': data_values = self.data.data[self.attribute] else: data_values = self.data[self.attribute] try: lower = np.nanpercentile(data_values, exclude) upper = np.nanpercentile(data_values, 100 - exclude) except AttributeError: # Numpy < 1.9 data_values = data_values[~np.isnan(data_values)] lower = np.percentile(data_values, exclude) upper = np.percentile(data_values, 100 - exclude) if self.subset_mode == 'data': self.set_limits(0, upper) else: self.set_limits(lower, upper)
def _rescale_imshow_rgb(darray, vmin, vmax, robust): assert robust or vmin is not None or vmax is not None # There's a cyclic dependency via DataArray, so we can't import from # xarray.ufuncs in global scope. from xarray.ufuncs import maximum, minimum # Calculate vmin and vmax automatically for `robust=True` if robust: if vmax is None: vmax = np.nanpercentile(darray, 100 - ROBUST_PERCENTILE) if vmin is None: vmin = np.nanpercentile(darray, ROBUST_PERCENTILE) # If not robust and one bound is None, calculate the default other bound # and check that an interval between them exists. elif vmax is None: vmax = 255 if np.issubdtype(darray.dtype, np.integer) else 1 if vmax < vmin: raise ValueError( 'vmin=%r is less than the default vmax (%r) - you must supply ' 'a vmax > vmin in this case.' % (vmin, vmax)) elif vmin is None: vmin = 0 if vmin > vmax: raise ValueError( 'vmax=%r is less than the default vmin (0) - you must supply ' 'a vmin < vmax in this case.' % vmax) # Scale interval [vmin .. vmax] to [0 .. 1], with darray as 64-bit float # to avoid precision loss, integer over/underflow, etc with extreme inputs. # After scaling, downcast to 32-bit float. This substantially reduces # memory usage after we hand `darray` off to matplotlib. darray = ((darray.astype('f8') - vmin) / (vmax - vmin)).astype('f4') return minimum(maximum(darray, 0), 1)
def timeseries(iData, zoneMap, std=None): ''' Make zone-wise averaging of input data input: 3D matrix(Layers x Width x Height) and map of zones (W x H) output: 2D matrices(L x WH) with mean and std ''' #reshape input cube into 2D matrix r, h, w = iData.shape iData, notNanDataI = cube2flat(iData) #get unique values of not-nan labels uniqZones = np.unique(zoneMap[np.isfinite(zoneMap)]) zoneNum = np.zeros((r, uniqZones.size)) zoneMean = np.zeros((r, uniqZones.size)) zoneStd = np.zeros((r, uniqZones.size)) zoneP16 = np.zeros((r, uniqZones.size)) zoneP84 = np.zeros((r, uniqZones.size)) #in each zone: get all values from input data get not nan data average for i in range(uniqZones.size): zi = uniqZones[i] if not np.isnan(zi): zoneData = iData[:, zoneMap.flat == zi] zoneNum[:, i] = zi if std is not None: # filter out of maxSTD values outliers = (np.abs(zoneData.T - zoneMean[:, i]) > zoneStd[:, i] * std).T zoneData[outliers] = np.nan zoneMean[:, i] = np.nanmean(zoneData, axis=1) zoneStd[:, i] = np.nanstd(zoneData, axis=1) zoneP16[:, i] = np.nanpercentile(zoneData, 16, axis=1) zoneP84[:, i] = np.nanpercentile(zoneData, 84, axis=1) return zoneMean, zoneStd, zoneNum, zoneP16, zoneP84
def test_multiple_percentiles(self): perc = [50, 100] mat = np.ones((4, 3)) nan_mat = np.nan * mat # For checking consistency in higher dimensional case large_mat = np.ones((3, 4, 5)) large_mat[:, 0:2:4, :] = 0 large_mat[:, :, 3:] *= 2 for axis in [None, 0, 1]: for keepdim in [False, True]: with suppress_warnings() as sup: sup.filter(RuntimeWarning, "All-NaN slice encountered") val = np.percentile(mat, perc, axis=axis, keepdims=keepdim) nan_val = np.nanpercentile(nan_mat, perc, axis=axis, keepdims=keepdim) assert_equal(nan_val.shape, val.shape) val = np.percentile(large_mat, perc, axis=axis, keepdims=keepdim) nan_val = np.nanpercentile(large_mat, perc, axis=axis, keepdims=keepdim) assert_equal(nan_val, val) megamat = np.ones((3, 4, 5, 6)) assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
def truncate_range(data, percMin=0.25, percMax=99.75, discard_zeros=True): """Truncate too low and too high values. Parameters ---------- data : np.ndarray Image to be truncated. percMin : float Percentile minimum. percMax : float Percentile maximum. discard_zeros : bool Discard voxels with value 0 from truncation. Returns ------- data : np.ndarray Truncated data. pMin : float Minimum truncation threshold which is used. pMax : float Maximum truncation threshold which is used. """ if discard_zeros: msk = ~np.isclose(data, 0.) pMin, pMax = np.nanpercentile(data[msk], [percMin, percMax]) else: pMin, pMax = np.nanpercentile(data, [percMin, percMax]) temp = data[~np.isnan(data)] temp[temp < pMin], temp[temp > pMax] = pMin, pMax # truncate min and max data[~np.isnan(data)] = temp if discard_zeros: data[~msk] = 0 # put back masked out voxels return data, pMin, pMax
def _compute(self, arrays, dates, assets, mask): """ For each row in the input, compute a mask of all values falling between the given percentiles. """ # TODO: Review whether there's a better way of handling small numbers # of columns. data = arrays[0].copy().astype(float64) data[~mask] = nan # FIXME: np.nanpercentile **should** support computing multiple bounds # at once, but there's a bug in the logic for multiple bounds in numpy # 1.9.2. It will be fixed in 1.10. # c.f. https://github.com/numpy/numpy/pull/5981 lower_bounds = nanpercentile( data, self._min_percentile, axis=1, keepdims=True, ) upper_bounds = nanpercentile( data, self._max_percentile, axis=1, keepdims=True, ) return (lower_bounds <= data) & (data <= upper_bounds)
def test_percentile_nasty_partitions(self): # Test percentile with nasty partitions: divide up 5 assets into # quartiles. # There isn't a nice mathematical definition of correct behavior here, # so for now we guarantee the behavior of numpy.nanpercentile. This is # mostly for regression testing in case we write our own specialized # percentile calculation at some point in the future. data = arange(25, dtype=float).reshape(5, 5) % 4 quartiles = range(4) filter_names = ['pct_' + str(q) for q in quartiles] graph = TermGraph( { name: self.f.percentile_between(q * 25.0, (q + 1) * 25.0) for name, q in zip(filter_names, quartiles) } ) results = self.run_graph( graph, initial_workspace={self.f: data}, mask=self.build_mask(ones((5, 5))), ) for name, quartile in zip(filter_names, quartiles): result = results[name] lower = quartile * 25.0 upper = (quartile + 1) * 25.0 expected = and_( nanpercentile(data, lower, axis=1, keepdims=True) <= data, data <= nanpercentile(data, upper, axis=1, keepdims=True), ) check_arrays(result, expected)
def test_result_values(self): tgt = [np.percentile(d, 28) for d in _rdat] res = np.nanpercentile(_ndat, 28, axis=1) assert_almost_equal(res, tgt) # Transpose the array to fit the output convention of numpy.percentile tgt = np.transpose([np.percentile(d, (28, 98)) for d in _rdat]) res = np.nanpercentile(_ndat, (28, 98), axis=1) assert_almost_equal(res, tgt)
def doCalc(self): self.median = float(np.nanmedian(self.list_values)) self.average = float(np.nanmean(self.list_values)) self.mode = float(stats.mode(self.list_values, nan_policy='omit')[0]) #self.average = self.sum / self.len self.CI['min'] = float(np.nanpercentile(self.list_values, 5)) self.CI['max'] = float(np.nanpercentile(self.list_values, 95))
def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None, ax=None): """ Plots a violin box plot of period wise returns for factor quantiles. Parameters ---------- return_by_q : pd.DataFrame - MultiIndex DataFrame with date and quantile as rows MultiIndex, forward return windows as columns, returns as values. ylim_percentiles : tuple of integers Percentiles of observed data to use as y limits for plot. ax : matplotlib.Axes, optional Axes upon which to plot. Returns ------- ax : matplotlib.Axes The axes that were plotted on. """ return_by_q = return_by_q.copy() if ylim_percentiles is not None: ymin = (np.nanpercentile(return_by_q.values, ylim_percentiles[0]) * DECIMAL_TO_BPS) ymax = (np.nanpercentile(return_by_q.values, ylim_percentiles[1]) * DECIMAL_TO_BPS) else: ymin = None ymax = None if ax is None: f, ax = plt.subplots(1, 1, figsize=(18, 6)) unstacked_dr = (return_by_q .multiply(DECIMAL_TO_BPS)) unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods') unstacked_dr = unstacked_dr.stack() unstacked_dr.name = 'return' unstacked_dr = unstacked_dr.reset_index() sns.violinplot(data=unstacked_dr, x='factor_quantile', hue='forward_periods', y='return', orient='v', cut=0, inner='quartile', ax=ax) ax.set(xlabel='', ylabel='Return (bps)', title="Period Wise Return By Factor Quantile", ylim=(ymin, ymax)) ax.axhline(0.0, linestyle='-', color='black', lw=0.7, alpha=0.6) return ax
def qmap_departure(x, sample1, sample2, meinequantilen, sample_size, sample3=None, return_mean=False, linear=True, verbose=0): from support_functions import qstats # s1d = x[sample1] # truth (sample1) s2d = x[sample2] # biased (sample2) # # add 0 and 100 meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]])) # Be sure to remove 0,100 now # Mean of quantile boxes( not 0 and 100 ) count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size) count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size) ok1 = count1[:-1] > sample_size ok2 = count2[:-1] > sample_size # Enough data to calculate ? if not np.any(ok1 & ok2): if sample3 is not None: return np.zeros(x[sample3].shape) # return only zeros else: return np.zeros(s2d.shape) # if verbose > 1: print "Quantiles:", meinequantilen print "Sample 1: ", count1 print "Sample 2: ", count2 # qb = np.nanpercentile(s1d, meinequantilen) # truth qa = np.nanpercentile(s2d, meinequantilen) # biased # diffs = qb - qa # Difference of quantiles (1st and lst for interp) xp = qa xp[:-1] = m2 # x punkte der interpolation ( ? NAN ) diffs[:-1] = m1 - m2 # y punkte der interpolation if return_mean: return m1, m2 # interpolate quantile differences # how to handle end-point ? # if not extrapolate: # diffs = diffs[1:-1] # trim # xp = xp[1:-1] # trim # Spline or linear interpolation if not linear: tck = interpolate.splrep(xp, diffs, s=0) if sample3 is not None: out = interpolate.splev(x[sample3], tck, der=0) # does this retain nan ? else: out = interpolate.splev(s2d, tck, der=0) # else: # to all data in sample / but not when missing! if sample3 is not None: out = np.interp(x[sample3], xp, diffs) else: out = np.interp(s2d, xp, diffs) # turn missing into zero return np.where(np.isfinite(out), out, 0.) # size of sample 2 or sample 3 # no adjustment
def apply(self, predictions, dimension=0): """Peak detection Parameter --------- predictions : SlidingWindowFeature Predictions returned by segmentation approaches. Returns ------- segmentation : Timeline Partition. """ if len(predictions.data.shape) == 1: y = predictions.data elif predictions.data.shape[1] == 1: y = predictions.data[:, 0] else: y = predictions.data[:, dimension] if self.log_scale: y = np.exp(y) sw = predictions.sliding_window precision = sw.step order = max(1, int(np.rint(self.min_duration / precision))) indices = scipy.signal.argrelmax(y, order=order)[0] if self.scale == 'absolute': mini = 0 maxi = 1 elif self.scale == 'relative': mini = np.nanmin(data) maxi = np.nanmax(data) elif self.scale == 'percentile': mini = np.nanpercentile(data, 1) maxi = np.nanpercentile(data, 99) threshold = mini + self.alpha * (maxi - mini) peak_time = np.array([sw[i].middle for i in indices if y[i] > threshold]) n_windows = len(y) start_time = sw[0].start end_time = sw[n_windows].end boundaries = np.hstack([[start_time], peak_time, [end_time]]) segmentation = Timeline() for i, (start, end) in enumerate(pairwise(boundaries)): segment = Segment(start, end) segmentation.add(segment) return segmentation
def setCVflagByGroup(args, wide, dat): # Split design file by treatment group pdfOut = PdfPages(args.CVplot) CV = pd.DataFrame(index=wide.index) for title, group in dat.design.groupby(args.group): # Filter the wide file into a new dataframe currentFrame = wide[group.index] # Change dat.sampleIDs to match the design file dat.sampleIDs = group.index CV['cv_'+title], CVcutoff = setCVflag(args, currentFrame, dat, groupName=title) CV['cv'] = CV.apply(np.max, axis=1) if not args.CVcutoff: CVcutoff = np.nanpercentile(CV['cv'].values, q=90) CVcutoff = round(CVcutoff, -int(floor(log(abs(CVcutoff), 10))) + 2) else: CVcutoff = float(args.CVcutoff) for title, group in dat.design.groupby(args.group): fig, ax = plt.subplots() xmin = -np.nanpercentile(CV['cv_'+title].values,99)*0.2 xmax = np.nanpercentile(CV['cv_'+title].values,99)*1.5 ax.set_xlim(xmin, xmax) CV['cv_'+title].plot(kind='hist', range = (xmin, xmax), bins = 15, normed = 1, color = 'grey', label = "CV histogram") CV['cv_'+title].plot(kind='kde', title="Density Plot of Coefficients of Variation in " + args.group + " " + title, ax=ax, label = "CV density") plt.axvline(x=CVcutoff, color = 'red', linestyle = 'dashed', label = "Cutoff at: {0}".format(CVcutoff)) plt.legend() pdfOut.savefig(fig, bbox_inches='tight') plt.close(fig) fig, ax = plt.subplots() xmin = -np.nanpercentile(CV['cv'].values,99)*0.2 xmax = np.nanpercentile(CV['cv'].values,99)*1.5 ax.set_xlim(xmin, xmax) # Create flag file instance CVflag = Flags(index=CV['cv'].index) for title, group in dat.design.groupby(args.group): CV['cv_'+title].plot(kind='kde', title="Density Plot of Coefficients of Variation by " + args.group, ax=ax, label = "CV density in group "+title) # Create new flag row for each group CVflag.addColumn(column='flag_feature_big_CV_' + title, mask=((CV['cv_'+title].get_values() > CVcutoff) | CV['cv_'+title].isnull())) plt.axvline(x=CVcutoff, color = 'red', linestyle = 'dashed', label = "Cutoff at: {0}".format(CVcutoff)) plt.legend() pdfOut.savefig(fig, bbox_inches='tight') plt.close(fig) pdfOut.close() # Write flag file CVflag.df_flags.to_csv(args.CVflag, sep='\t')
def normalize_linear(np_array, lower_percentile, upper_percentile): lower_bound = np.nanpercentile(np_array, lower_percentile) upper_bound = np.nanpercentile(np_array, upper_percentile) np_array[np_array < lower_bound] = lower_bound np_array[np_array > upper_bound] = upper_bound np_array = np_array - lower_bound np_array = np_array / (upper_bound - lower_bound) return np_array
def get_dataframe(self, attr, measure='mean', sum=False, cum=False): """ :rtype NDFrame """ values = [] for name in self.names: market_periods_by_date = self.periods_by_market_and_date[name] values.append([market_periods_by_date[date][attr] for date in self.dates]) values = array(values) # shape is names-dates-samples if cum: # Accumulate over the dates, the second axis. # shape is the same: names-dates-samples values = values.cumsum(axis=1) if sum: # Sum over the names, the first axis. # shape is dates-samples values = values.sum(axis=0) pass if measure == 'mean': values = values.mean(axis=-1) elif measure == 'std': values = values.std(axis=-1) elif measure == 'quantile': assert self.confidence_interval is not None low_percentile = (100 - self.confidence_interval) / 2.0 high_percentile = 100 - low_percentile mean = values.mean(axis=-1) low = mean - nanpercentile(values, q=low_percentile, axis=-1) high = nanpercentile(values, q=high_percentile, axis=-1) - mean errors = [] if sum: # Need to return 2-len(dates) sized array, for a Series. errors.append([low, high]) else: # Need to return len(names)-2-len(dates) sized array, for a DateFrame. for i in range(len(self.names)): errors.append([low[i], high[i]]) values = array(errors) return values # elif measure == 'direct': # raise NotImplementedError() # if len(values) == 1: # values = values[0] # else: # raise NotImplementedError() # return DataFrame(values, index=dates, columns=names) else: raise Exception("Measure '{}' not supported".format(measure)) if sum: return Series(values, index=self.dates) else: return DataFrame(values.T, index=self.dates, columns=self.names)
def _get_power_range(power_dict): # Calculate the power data range across each channel max_db = {} min_db = {} for channel, channel_data in power_dict.iteritems(): all_power_data = np.concatenate(channel_data) max_db[channel] = np.nanpercentile(all_power_data, ZPLSPlot.upper_percentile) min_db[channel] = np.nanpercentile(all_power_data, ZPLSPlot.lower_percentile) return min_db, max_db
def stats(arr): af = arr.flatten() box_bot = np.nanpercentile(af, 25.0) box_top = np.nanpercentile(af, 75.0) box_center = np.nanpercentile(af, 50.0) # np.median(af) flier_low = np.nanpercentile(af, 0.0) # np.min(af) flier_high = np.nanpercentile(af, 100.0) # np.max(af) return flier_low, box_bot, box_center, box_top, flier_high
def test(): import glob import analysis.experiment as exp reload(exp) fn = exp.filename() print fn fn = filename(dtype = 'img') print fn print glob.glob(fn) data = exp.load(wid = 0); print data.shape import matplotlib.pyplot as plt plt.figure(1); plt.clf(); img = exp.load_img(t=200000); plt.imshow(img, cmap = 'gray') #animate movie import time fig, ax = plt.subplots() figimg = ax.imshow(img, cmap = 'gray'); plt.show(); for t in range(200000, 300000): figimg.set_data(exp.load_img(t=t)); ax.set_title('t=%d' % t); time.sleep(0.001) fig.canvas.draw() fig.canvas.flush_events() reload(exp) sbins = exp.stage_bins(wid = [0,1]) d = exp.load_stage_binned(wid = [0,1], nbins_per_stage=10) a = exp.load_aligned(wid = all, align='time', dtype = 'speed') a_th = np.nanpercentile(a, 85); a[a> a_th] = a_th; a[np.isnan(a)] = -1.0; import analysis.plot as fplt; fplt.plot_array(a) a = exp.load_aligned(wid = all, align='L2', dtype = 'speed') a_th = np.nanpercentile(a, 85); a[a> a_th] = a_th; a[np.isnan(a)] = -1.0; import analysis.plot as fplt; fplt.plot_array(a)
def contourf_date_lat(self, ax, whichcolumn='wind', updown='up', **kwargs): """ A contourf of multiple-day wind versus date and latitude. Args: ax: axis handle whichcolumn: string, 'wind', 'winde', 'windn'. updown: string, 'up' or 'down' **kwargs: for contourf Return: hc: handle of the contourf plot ---------------------------------------- Note: x axis is days from '2000-1-1' """ from matplotlib.ticker import AutoMinorLocator from scipy.interpolate import griddata if not self.empty: #self['epochday'] = (self.index-self.index.min())/pd.Timedelta('1D') self['epochday'] = (self.index-pd.Timestamp('2000-1-1'))/pd.Timedelta('1D') btime = self['epochday'].min() etime = self['epochday'].max() isup, isdown = mf.updown(self.lat) tmp = self[isup] if updown is 'up' else self[isdown] ut0 = np.arange(np.floor(btime), np.floor(etime)+1+0.5/24, 0.5/24) lat0 = np.arange(-90,91,3) ut, lat = np.meshgrid(ut0, lat0) windt = griddata((tmp['epochday'], tmp.lat), tmp[whichcolumn], (ut, lat), method='linear', rescale=True) for index, k in enumerate(ut0): fp = abs(tmp['epochday']-k)<0.5/24 if not fp.any(): windt[:,index]=np.nan hc = ax.contourf( ut, lat, windt, levels=np.linspace(np.nanpercentile(windt,1),np.nanpercentile(windt,99),11), **kwargs) ax.set_xlim(np.floor(btime),np.floor(etime)+1) ax.set_xticks(np.arange(np.floor(btime),np.floor(etime)+2)) ax.set_xticklabels( pd.date_range(tmp.index[0],tmp.index[-1]+pd.Timedelta('1d')).strftime('%j')) ax.set_ylim(-90,90) ax.set_yticks(np.arange(-90,91,30)) ax.xaxis.set_minor_locator(AutoMinorLocator(4)) ax.yaxis.set_minor_locator(AutoMinorLocator(3)) ax.tick_params(which='both', width=1.2) ax.tick_params(which='major', length=7) ax.tick_params(which='minor', length=4) ax.set_title('LT: {:.1f}'.format(tmp['LT'].median())) ax.set_xlabel('Day of {:d}' .format(tmp.index[0].year),fontsize=14) ax.set_ylabel('Latitude', fontsize=14) return hc#, windt
def LST_compare(day, t, ndi_thres): # get matching radiometer time step for flight dateCol = met.SelectTimestep(metTime, flightDates[day][t]) # get meteorological variables Rlu = metData['Rl_up_Wm2_EC'][dateCol][0] Rld = metData['Rl_down_Wm2_EC'][dateCol][0] # calculate LST from radiometer # Trad_EC_v1 = ((Rlu - emisAtm * sb * (1 - emis) * Ta**4)/(emis* \ # sb))**(1./4) - 273.16 Trad_EC = ((Rlu - (1 - emis)*Rld)/(emis* \ sb))**(1./4) - 273.16 # get UAV LST data image_TIR = 'TEMP_Mosaic_%s_%s_rect_rs03_noV_lowElev' % (t, day) im_TIR = os.path.join(direct_TIR, image_TIR + '.tif') fid_TIR=gdal.Open(im_TIR ,gdal.GA_ReadOnly) lst_UAV = fid_TIR.GetRasterBand(1).ReadAsArray() lst_UAV[lst_UAV <= -99] = np.nan # get NDI data image_ndi = 'NDI_%s_%s_rs03_unclip' % (day, t) im_ndi = os.path.join(direct_ndi, image_ndi + '.tif') fid_ndi=gdal.Open(im_ndi ,gdal.GA_ReadOnly) ndi =fid_ndi.GetRasterBand(1).ReadAsArray() ndi[ndi <= ndi_thres] = 0 ndi_mask = ndi > 0 # mask lst by ndi lst_UAV[~ndi_mask] = np.nan # The 10 % quantile is now used for comparison. To avoid that the # coolboxes are also evaluated the lowest 3 % are removed beforehand. lower = lst_UAV.flatten() without_lowest = lower.copy() without_lowest[lower <= np.nanpercentile(lower,0.5)] = np.nan low_LST= np.nanpercentile(without_lowest, 10) #low_LST= np.nanpercentile(lower, 10) UAV_LST = np.nanmean(lst_UAV[lst_UAV < low_LST]) LST_UAV.append(round(UAV_LST,1)) LST_EC.append(round(Trad_EC,1)) Days.append(day[1:]) Dates.append(day[1:] + '_' + t[1:]) compareTrad[day + '_' + t] = { 'UAV' : round(np.nanmean(UAV_LST),1), 'EC' : round(Trad_EC,1), 'Day' : day[1:]} return LST_UAV, LST_EC, Days, compareTrad
def summarize_values(values, summary_type=None, threshold=5e-2, decimal_count=1, display_n=True): s = {} total_len = np.nansum([len(values[x]) for x in values]) for key in values: s[key] = '' v = values[key] if summary_type == 1.0: # numeric data1 = values['Dood - totaal'][~values['Dood - totaal'].isna()] data2 = values['Levend ontslagen en niet heropgenomen - totaal'][ ~values['Levend ontslagen en niet heropgenomen - totaal'].isna( )] normalresult1 = ss.normaltest(data1) normalresult2 = ss.normaltest(data2) if normalresult1.pvalue < threshold or normalresult2.pvalue < threshold: # not normal: use median if len(v) - np.nansum(v.isna()) > 0: n = len(v) - np.nansum(v.isna()) median = np.nanmedian(v) iqr1 = np.nanpercentile(v, 25) iqr3 = np.nanpercentile(v, 75) if display_n: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + ')' + '\n(n=' + str(n) + ')') ] else: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + ')') ] else: # normal: use mean if len(v) - np.nansum(v.isna()) > 0: n = len(v) - np.nansum(v.isna()) p = (len(v) - np.nansum(v.isna())) / total_len * 100 mean = np.nanmean(v) std = np.nanstd(v) if display_n: s[key] = [ format( str(round(mean, decimal_count)) + ' ± ' + str(round(std, decimal_count)) + '\n(n=' + str(n) + ')') ] else: s[key] = [ format( str(round(mean, decimal_count)) + ' ± ' + str(round(std, decimal_count))) ] elif summary_type == 2.0: # binary n = len(v) - np.nansum( v.isna()) # total n available for this variable p = sum(v == 1) / n * 100 # percentage True if n == 0: p = 0 if display_n: s[key] = [format(str(int(p)) + '%' + '\n(n=' + str(n) + ')')] else: s[key] = [format(str(int(p)) + '%')] elif summary_type == 3.0: # binary n = len(v) - np.nansum(v.isna()) median = np.nanmedian(v) iqr1 = np.nanpercentile(v, 25) iqr3 = np.nanpercentile(v, 75) s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count))) ] if display_n: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + '\n(n=' + str(n) + ')') ] else: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count))) ] elif summary_type == 4.0: try: n = len(v) - np.nansum(v.isna()) median = np.nanmedian(v) iqr1 = np.nanpercentile(v, 25) iqr3 = np.nanpercentile(v, 75) s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count))) ] if display_n: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + '\n(n=' + str(n) + ')') ] else: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count))) ] except EXC as Exception: v = [float(v1) for v1 in v if v1 is not None] n = len(v) - np.nansum(v.isna()) median = np.nanmedian(v) iqr1 = np.nanpercentile(v, 25) iqr3 = np.nanpercentile(v, 75) if display_n: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + '\n(n=' + str(n) + ')') ] else: s[key] = [ format( str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count))) ] elif summary_type is None or summary_type == 'n_percn_meansd_medianiqr': if len(v) - np.nansum(v.isna()) > 0: n = len(v) - np.nansum(v.isna()) p = (len(v) - np.nansum(v.isna())) / total_len * 100 mean = np.nanmean(v) std = np.nanstd(v) median = np.nanmedian(v) iqr1 = np.nanpercentile(v, 25) iqr3 = np.nanpercentile(v, 75) s[key] = [ format('n = ' + str(n) + '\n' + str(int(p)) + '%\n' + str(round(mean, decimal_count)) + ' ± ' + str(round(std, decimal_count)) + '\n' + str(round(median, decimal_count)) + ' (' + str(round(iqr1, decimal_count)) + '-' + str(round(iqr3, decimal_count)) + ')') ] elif len(v) - np.nansum(v.isna()) == 0: n = len(v) - np.nansum(v.isna()) p = (len(v) - np.nansum(v.isna())) / total_len * 100 s[key] = [format('n = ' + str(n) + ';\n' + str(int(p)) + '%')] else: s[key] = ['n/a'] return s
def summary_plot(shap_values, features, feature_names=None, max_display=20, plot_type="dot", color="#ff0052", axis_color="#333333", title=None, alpha=1, show=True, sort=True): """ Create a SHAP summary plot, colored by feature values when they are provided. Parameters ---------- shap_values : numpy.array Matrix of SHAP values (# samples x # features) features : numpy.array or pandas.DataFrame or list Matrix of feature values (# samples x # features) or a feature_names list as shorthand feature_names : list Names of the features (length # features) max_display : int How many top features to include in the plot plot_type : "dot" (default) or "violin" What type of summary plot to produce """ # convert from a DataFrame or other types if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>": if feature_names is None: feature_names = features.columns features = features.as_matrix() elif str(type(features)) == "<class 'list'>": if feature_names is None: feature_names = features features = None elif len(features.shape) == 1 and feature_names is None: feature_names = features features = None if sort: # order features by the sum of their effect magnitudes feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0)[:-1]) feature_order = feature_order[-min(max_display, len(feature_order)):] else: feature_order = np.flip( np.arange(min(max_display, shap_values.shape[1] - 1)), 0) row_height = 0.4 pl.gcf().set_size_inches(7, len(feature_order) * row_height + 0.6) pl.axvline(x=0, color="#999999", zorder=-1) if plot_type == "dot": for pos, i in enumerate(feature_order): pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1) shaps = shap_values[:, i] N = len(shaps) hspacing = (np.max(shaps) - np.min(shaps)) / 200 curr_bin = [] nbins = 100 quant = np.round(nbins * (shap_values[:, i] - np.min(shaps)) / (np.max(shaps) - np.min(shaps) + 1e-8)) inds = np.argsort(quant + np.random.randn(N) * 1e-6) layer = 0 last_bin = -1 ys = np.zeros(N) for ind in inds: if quant[ind] != last_bin: layer = 0 ys[ind] = layer * ((layer % 2) * 2 - 1) layer += 1 last_bin = quant[ind] ys *= row_height / np.max(ys + 1) if features is not None: vmin = np.nanpercentile(features[:, i], 5) vmax = np.nanpercentile(features[:, i], 95) assert features.shape[0] == len( shaps ), "Feature and SHAP matrices must have the same number of rows!" pl.scatter(shaps, pos + ys, cmap=red_blue, vmin=vmin, vmax=vmax, s=16, c=np.nan_to_num(features[:, i]), alpha=alpha, linewidth=0, zorder=3) else: pl.scatter(shaps, pos + ys, s=16, alpha=alpha, linewidth=0, zorder=3, color=color) elif plot_type == "violin": for pos, i in enumerate(feature_order): pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1) if features is not None: global_low = np.nanpercentile( shap_values[:, :len(feature_names)].flatten(), 1) global_high = np.nanpercentile( shap_values[:, :len(feature_names)].flatten(), 99) for pos, i in enumerate(feature_order): shaps = shap_values[:, i] shap_min, shap_max = np.min(shaps), np.max(shaps) rng = shap_max - shap_min xs = np.linspace( np.min(shaps) - rng * 0.2, np.max(shaps) + rng * 0.2, 100) if np.std(shaps) < (global_high - global_low) / 100: ds = gaussian_kde(shaps + np.random.randn(len(shaps)) * (global_high - global_low) / 100)(xs) else: ds = gaussian_kde(shaps)(xs) ds /= np.max(ds) * 3 values = features[:, i] window_size = max(10, len(values) // 20) smooth_values = np.zeros(len(xs) - 1) for j in range(len(xs) - 1): smooth_values[j] = np.mean( values[max(0, j - window_size):min(len(xs), j + window_size)]) vmin = np.nanpercentile(values, 5) vmax = np.nanpercentile(values, 95) # smooth_values -= np.nanpercentile(smooth_values, 5) # smooth_values /= np.nanpercentile(smooth_values, 95) smooth_values -= vmin smooth_values /= vmax - vmin for i in range(len(xs) - 1): if ds[i] > 0.05 or ds[i + 1] > 0.05: pl.fill_between([xs[i], xs[i + 1]], [pos + ds[i], pos + ds[i + 1]], [pos - ds[i], pos - ds[i + 1]], color=red_blue(smooth_values[i]), zorder=2) vmin = np.nanpercentile(values, 5) vmax = np.nanpercentile(values, 95) pl.scatter(shaps, np.ones(shap_values.shape[0]) * pos, s=9, cmap=red_blue, vmin=vmin, vmax=vmax, c=values, alpha=alpha, linewidth=0, zorder=3) else: parts = pl.violinplot(shap_values[:, feature_order], range(len(feature_order)), points=200, vert=False, widths=0.7, showmeans=False, showextrema=False, showmedians=False) for pc in parts['bodies']: pc.set_facecolor(color) pc.set_edgecolor('none') pc.set_alpha(alpha) pl.gca().xaxis.set_ticks_position('bottom') pl.gca().yaxis.set_ticks_position('none') pl.gca().spines['right'].set_visible(False) pl.gca().spines['top'].set_visible(False) pl.gca().spines['left'].set_visible(False) pl.gca().tick_params(color=axis_color, labelcolor=axis_color) pl.yticks(range(len(feature_order)), [feature_names[i] for i in feature_order], fontsize=13) pl.gca().tick_params('y', length=20, width=0.5, which='major') pl.gca().tick_params('x', labelsize=11) pl.ylim(-1, len(feature_order)) pl.xlabel("SHAP value (impact on model output)", fontsize=13) pl.tight_layout() if show: pl.show()
def GetPoissonEstimates(bins, SNFinalPos, SNFinalNeg, LimitN, MinSN): ProbPoisson = [] ProbPoissonE1 = [] ProbPoissonE2 = [] ProbNegativeOverPositive = [] ProbNegativeOverPositiveE1 = [] ProbNegativeOverPositiveE2 = [] ProbPoissonExpected = [] ProbPoissonExpectedE1 = [] ProbPoissonExpectedE2 = [] ProbNegativeOverPositiveDif = [] ProbNegativeOverPositiveDifE1 = [] ProbNegativeOverPositiveDifE2 = [] PurityPoisson = [] Nnegative = [] NnegativeReal = [] NPositive = [] Nnegative_e1 = [] Nnegative_e2 = [] for sn in bins: if len(SNFinalPos[SNFinalPos >= sn]) > 0: Fraction, FractionE1, FractionE2 = GetPoissonErrorGivenMeasurements( len(SNFinalNeg[SNFinalNeg >= sn]), len(SNFinalPos[SNFinalPos >= sn])) if Fraction > 1.0: Fraction = 1.0 FractionE1 = 0.0 FractionE2 = 0.0 else: pass ProbNegativeOverPositive.append(Fraction) ProbNegativeOverPositiveE1.append(FractionE1) ProbNegativeOverPositiveE2.append(FractionE2) elif len(SNFinalNeg[SNFinalNeg >= sn]) > 0: ProbNegativeOverPositive.append(1.0) ProbNegativeOverPositiveE1.append(0.0) ProbNegativeOverPositiveE2.append(0.0) else: ProbNegativeOverPositive.append(0.0) ProbNegativeOverPositiveE1.append(0.0) ProbNegativeOverPositiveE2.append(0.0) if len(SNFinalPos[(SNFinalPos >= sn) & (SNFinalPos < sn + 0.1)]) > 0: Fraction, FractionE1, FractionE2 = GetPoissonErrorGivenMeasurements( len(SNFinalNeg[(SNFinalNeg >= sn) & (SNFinalNeg < sn + 0.1)]), len(SNFinalPos[(SNFinalPos >= sn) & (SNFinalPos < sn + 0.1)])) if Fraction > 1.0: Fraction = 1.0 FractionE1 = 0.0 FractionE2 = 0.0 else: pass ProbNegativeOverPositiveDif.append(min(1.0, Fraction)) ProbNegativeOverPositiveDifE1.append(FractionE1) ProbNegativeOverPositiveDifE2.append(FractionE2) elif len(SNFinalNeg[(SNFinalNeg >= sn) & (SNFinalNeg < sn + 0.1)]) > 0: ProbNegativeOverPositiveDif.append(1.0) ProbNegativeOverPositiveDifE1.append(0.0) ProbNegativeOverPositiveDifE2.append(0.0) else: ProbNegativeOverPositiveDif.append(0.0) ProbNegativeOverPositiveDifE1.append(0.0) ProbNegativeOverPositiveDifE2.append(0.0) k = len(SNFinalNeg[SNFinalNeg >= sn]) aux = scipy.special.gammaincinv(k + 1, [0.16, 0.5, 0.84]) NnegativeReal.append(k) Nnegative.append(aux[1]) Nnegative_e1.append(aux[1] - aux[0]) Nnegative_e2.append(aux[2] - aux[1]) NPositive.append(1.0 * len(SNFinalPos[SNFinalPos >= sn])) Nnegative = np.array(Nnegative) NPositive = np.array(NPositive) NnegativeReal = np.array(NnegativeReal) Nnegative_e1 = np.array(Nnegative_e1) Nnegative_e2 = np.array(Nnegative_e2) MinSNtoFit = min(bins) UsableBins = len( Nnegative[bins >= MinSNtoFit][Nnegative[bins >= MinSNtoFit] > LimitN]) AuxiliarOutput = open('SN_UsedInFit.dat', 'w') print('Min SN to do the fit:', round(MinSNtoFit, 1), ', Number of usable bins:', UsableBins) AuxiliarOutput.write( str(round(MinSNtoFit, 1)) + ' ' + str(UsableBins) + '\n') if UsableBins < 6: print('*** We are using ', UsableBins, ' points for the fitting of the negative counts ***') print( '*** We usually get good results with 6 points, try reducing the parameter -MinSN ***' ) while UsableBins > 6: MinSNtoFit = MinSNtoFit + 0.1 UsableBins = len(Nnegative[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN]) print('Min SN to do the fit:', round(MinSNtoFit, 1), ', Number of usable bins:', UsableBins) AuxiliarOutput.write( str(round(MinSNtoFit, 1)) + ' ' + str(UsableBins) + '\n') if MinSNtoFit > max(bins): print('No negative points to do the fit') exit() AuxiliarOutput.close() if UsableBins >= 3: try: # popt, pcov = curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[1e6,1]) popt, pcov = curve_fit( NegativeRateLog, bins[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN], np.log10(Nnegative[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN]), p0=[1e6, 1], sigma=np.log10( np.average([ Nnegative_e1[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN], Nnegative_e2[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN] ], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) # print popt,popt/perr,not np.isfinite(perr[0]) CounterFitTries = 0 while not np.isfinite(perr[0]): print('*** curve_fit failed to converge ... ***') NewParameter1 = np.power(10, np.random.uniform(1, 9)) NewParameter2 = np.random.uniform(0.1, 2.0) print('*** New Initial Estimates for the fitting (random):', round(NewParameter1), round(NewParameter2, 2), ' ***') # popt, pcov = curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[NewParameter1,NewParameter2]) popt, pcov = curve_fit( NegativeRateLog, bins[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN], np.log10(Nnegative[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN]), p0=[NewParameter1, NewParameter2], sigma=np.log10( np.average([ Nnegative_e1[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN], Nnegative_e2[bins >= MinSNtoFit][ Nnegative[bins >= MinSNtoFit] > LimitN] ], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) print('*** New Results: N:', round(popt[0]), ' +/- ', round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ', round(perr[1], 2), ' ***') CounterFitTries += 1 if CounterFitTries > 100: print('*** Over 100 attemps and no good fit *** ') break except: print('Fitting failed for LimitN:' + str(LimitN) + ' and ' + str(MinSN) + '... Will force LimitN=0') # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[1e6,1]) popt, pcov = curve_fit(NegativeRateLog, bins[Nnegative > 0], np.log10(Nnegative[Nnegative > 0]), p0=[1e6, 1], sigma=np.log10( np.average([ Nnegative_e1[Nnegative > 0], Nnegative_e2[Nnegative > 0] ], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) # print popt,popt/perr,not np.isfinite(perr[0]) CounterFitTries = 0 while not np.isfinite(perr[0]): print('*** curve_fit failed to converge ... ***') NewParameter1 = np.power(10, np.random.uniform(1, 9)) NewParameter2 = np.random.uniform(0.1, 2.0) print('*** New Initial Estimates for the fitting (random):', round(NewParameter1), round(NewParameter2, 2), ' ***') # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[NewParameter1,NewParameter2]) popt, pcov = curve_fit(NegativeRateLog, bins[Nnegative > 0], np.log10(Nnegative[Nnegative > 0]), p0=[NewParameter1, NewParameter2], sigma=np.log10( np.average([ Nnegative_e1[Nnegative > 0], Nnegative_e2[Nnegative > 0] ], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) print('*** New Results: N:', round(popt[0]), ' +/- ', round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ', round(perr[1], 2), ' ***') CounterFitTries += 1 if CounterFitTries > 100: print('*** Over 100 attemps and no good fit *** ') break else: print('Number of usable bins is less than 3 for LimitN:' + str(LimitN) + ' and ' + str(MinSN) + '... Will force LimitN=0') # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[1e6,1]) popt, pcov = curve_fit( NegativeRateLog, bins[Nnegative > 0], np.log10(Nnegative[Nnegative > 0]), p0=[1e6, 1], sigma=np.log10( np.average( [Nnegative_e1[Nnegative > 0], Nnegative_e2[Nnegative > 0]], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) # print popt,popt/perr,not np.isfinite(perr[0]) CounterFitTries = 0 while not np.isfinite(perr[0]): print('*** curve_fit failed to converge ... ***') NewParameter1 = np.power(10, np.random.uniform(1, 9)) NewParameter2 = np.random.uniform(0.1, 2.0) print('*** New Initial Estimates for the fitting (random):', round(NewParameter1), round(NewParameter2, 2), ' ***') # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[NewParameter1,NewParameter2]) popt, pcov = curve_fit(NegativeRateLog, bins[Nnegative > 0], np.log10(Nnegative[Nnegative > 0]), p0=[NewParameter1, NewParameter2], sigma=np.log10( np.average([ Nnegative_e1[Nnegative > 0], Nnegative_e2[Nnegative > 0] ], axis=0)), absolute_sigma=False) perr = np.sqrt(np.diag(pcov)) print('*** New Results: N:', round(popt[0]), ' +/- ', round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ', round(perr[1], 2), ' ***') CounterFitTries += 1 if CounterFitTries > 100: print('*** Over 100 attemps and no good fit *** ') break NegativeFitted = NegativeRate(bins, popt[0], popt[1]) SNPeakGaussian = (popt / np.sqrt(np.diag(pcov)))[0] # print 'SNPeakGaussian',SNPeakGaussian,popt,np.sqrt(np.diag(pcov)) # print curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[1e6,1],sigma=np.average([Nnegative_e1[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],Nnegative_e2[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]],axis=0),absolute_sigma=False) # print curve_fit(NegativeRateLog, # bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], # np.log10(Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]), # p0=[1e6,1], # sigma=np.log10(np.average([Nnegative_e1[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],Nnegative_e2[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]],axis=0)), # absolute_sigma=False) for i in range(len(bins)): aux = [] auxExpected = [] for j in range(1000): lamb = np.random.normal(NegativeFitted[i], NegativeFitted[i] / SNPeakGaussian) while lamb < 0: lamb = np.random.normal(NegativeFitted[i], NegativeFitted[i] / SNPeakGaussian) aux.append(1 - scipy.special.gammaincc(0 + 1, lamb)) if i == len(bins) - 1: if NPositive[i] > 0: auxExpected.append(1.0 - max(0, NPositive[i] - lamb) / NPositive[i]) else: auxExpected.append(0.0) else: # lamb2 = lamb - np.random.normal(NegativeFitted[i+1],NegativeFitted[i+1]/SNPeakGaussian) lamb2 = (NegativeFitted[i] - NegativeFitted[i + 1]) * lamb / NegativeFitted[i] while lamb2 < 0: lamb2 = lamb - np.random.normal( NegativeFitted[i + 1], NegativeFitted[i + 1] / SNPeakGaussian) if (NPositive[i] - NPositive[i + 1]) > 0: auxExpected.append( 1.0 - max(0, (NPositive[i] - NPositive[i + 1]) - lamb2) / (NPositive[i] - NPositive[i + 1])) else: auxExpected.append(0.0) # auxExpected.append(1.0-max(0,0.7 - lamb2)/0.7) PP = np.nanpercentile(aux, [16, 50, 84]) PPExpected = np.nanpercentile(auxExpected, [16, 50, 84]) ProbPoisson.append(PP[1]) ProbPoissonE1.append(PP[1] - PP[0]) ProbPoissonE2.append(PP[2] - PP[1]) ProbPoissonExpected.append(PPExpected[1]) ProbPoissonExpectedE1.append(PPExpected[1] - PPExpected[0]) ProbPoissonExpectedE2.append(PPExpected[2] - PPExpected[1]) # if i<len(bins)-1: # print bins[i],PPExpected,NegativeFitted[i],NPositive[i],NPositive[i+1] if NPositive[i] > 0: PurityPoisson.append( max((NPositive[i] - NegativeFitted[i]) / NPositive[i], 0)) else: PurityPoisson.append(0.0) ProbPoisson = np.array(ProbPoisson) ProbPoissonE1 = np.array(ProbPoissonE1) ProbPoissonE2 = np.array(ProbPoissonE2) ProbNegativeOverPositive = np.array(ProbNegativeOverPositive) ProbNegativeOverPositiveE1 = np.array(ProbNegativeOverPositiveE1) ProbNegativeOverPositiveE2 = np.array(ProbNegativeOverPositiveE2) ProbNegativeOverPositiveDif = np.array(ProbNegativeOverPositiveDif) ProbNegativeOverPositiveDifE1 = np.array(ProbNegativeOverPositiveDifE1) ProbNegativeOverPositiveDifE2 = np.array(ProbNegativeOverPositiveDifE2) ProbPoissonExpected = np.array(ProbPoissonExpected) ProbPoissonExpectedE1 = np.array(ProbPoissonExpectedE1) ProbPoissonExpectedE2 = np.array(ProbPoissonExpectedE2) PurityPoisson = np.array(PurityPoisson) output = [ bins, ProbPoisson, ProbNegativeOverPositive, PurityPoisson, NPositive, Nnegative, Nnegative_e1, Nnegative_e2, NegativeFitted, NnegativeReal, ProbPoissonE1, ProbPoissonE2, ProbNegativeOverPositiveE1, ProbNegativeOverPositiveE2, ProbNegativeOverPositiveDif, ProbNegativeOverPositiveDifE1, ProbNegativeOverPositiveDifE2, ProbPoissonExpected, ProbPoissonExpectedE1, ProbPoissonExpectedE2 ] return output
def cutoff(self, recorded): if not recorded: return None return np.nanpercentile(list(recorded.values()), (1 - 1 / self.rf) * 100)
def plot_representational_similarity(rs, dims=None, dim_labels=None, colors=None, dim_order=None, labels=True): if np.all(np.isnan(rs)): return # if rs is all NaN (happens with only 1 cell), there is nothing to plot if dim_order is not None: rsr = np.arange(len(rs)).reshape(*map(len,dims)) rsrt = rsr.transpose(dim_order) ri = rsrt.flatten() rs = rs[ri,:][:,ri] dims = np.array(dims)[dim_order] colors = np.array(colors)[dim_order] dim_labels = np.array(dim_labels)[dim_order] # force the color map to be centered at zero clim = np.nanpercentile(rs, [5.0,95.0], axis=None) vrange = max(abs(clim[0]), abs(clim[1])) rs = rs.copy() np.fill_diagonal(rs, np.nan) if labels: grid = ImageGrid(plt.gcf(), 111, nrows_ncols=(1,1), cbar_location="right", cbar_mode="single", cbar_size="7%", cbar_pad=0.05) for ax in grid: pass else: ax = plt.gca() im = ax.imshow(rs, interpolation='nearest', cmap='RdBu_r', vmin=-vrange, vmax=vrange) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_xticks([]) ax.set_yticks([]) if labels: cbar = ax.cax.colorbar(im) cbar.set_label_text('stimulus correlation') if dims is not None: dim_labels = ["%s(%s)" % (dim_labels[i],', '.join(map(float_label, dims[i].tolist()))) for i in range(len(dims)) ] dim_handlers = [ DimensionPatchHandler(dims[i], colors[i], 'w') for i in range(len(dims)) ] n = len(rs) for cell_i in range(n): idx = np.unravel_index(cell_i, map(len, dims)) start = -(len(dims))*2 width = 1.8 for dim_i, color in enumerate(colors): v_i = idx[dim_i] rgb = dim_handlers[dim_i].dim_color(v_i) r = mpatches.Rectangle((start + dim_i * width, cell_i-.5), width, 1.2, facecolor=rgb, linewidth=0) r.set_clip_on(False) ax.add_patch(r) r = mpatches.Rectangle((cell_i-.5, start + dim_i * width), 1.2, width, facecolor=rgb, linewidth=0) r.set_clip_on(False) ax.add_patch(r) if labels: patches = [ mpatches.Patch(label=dim_labels[i]) for i in range(len(dims)) ] ax.legend(handles=patches, handler_map=dict(zip(patches,dim_handlers)), loc='upper left', bbox_to_anchor=(0,0), ncol=2, fontsize=9, frameon=False) if labels: plt.subplots_adjust(left=0.07, right=.88, wspace=0.0, hspace=0.0)
def plot_budget(Gvel, buddif, buddiv3, budadv3, Utemp, Vtemp, spath, ts): """ Diagnostic figures for the budget - projected properly """ f = plt.figure(figsize=[9, 9]) # dtarg = dt.datetime(2015,1,1) # t_p1 = B1.condy.get_index(dtarg) # t_p2 = B2.condy.get_index(dtarg) # t_p3 = B3.condy.get_index(dtarg) vlim = 0.4 a_no = 30 # a_sc = 3.9e-1 # a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp)) # print(a_sc) a_sc = 2.5 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0] m = Gvel.mplot p_rng = np.nanpercentile(buddif, [2, 98]) pr = np.max(np.abs(p_rng)) p_rng = [-pr, pr] ### intensification plt.subplot(2, 2, 1) m.pcolormesh(Gvel.xptp, Gvel.yptp, buddif, cmap='RdBu', rasterized=True) m.colorbar(location='bottom') plt.clim(p_rng) m.drawcoastlines() plt.title('Intensification ' + ts.strftime('%Y%m%d')) ### DIVERGENCE plt.subplot(2, 2, 2) rm = int(Gvel.m / a_no) rn = int(Gvel.n / a_no) ra = np.sqrt(rm + rn) ra = ra * a_sc m.pcolormesh(Gvel.xptp, Gvel.yptp, buddiv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) m.colorbar(location='bottom') ur, vr = Gvel.rotate_vectors_to_plot(Utemp, Vtemp) m.quiver(Gvel.xpts[::rm, ::rn], Gvel.ypts[::rm, ::rn], ur[::rm, ::rn], vr[::rm, ::rn], scale=ra, width=0.005) m.drawcoastlines() plt.title('Divergence ' + ts.strftime('%Y%m%d')) ### ADVECTION plt.subplot(2, 2, 3) rm = int(Gvel.m / a_no) rn = int(Gvel.n / a_no) ra = np.sqrt(rm + rn) ra = ra * a_sc m.pcolormesh(Gvel.xptp, Gvel.yptp, budadv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) m.colorbar(location='bottom') m.quiver(Gvel.xpts[::rm, ::rn], Gvel.ypts[::rm, ::rn], ur[::rm, ::rn], vr[::rm, ::rn], scale=ra, width=0.005) m.drawcoastlines() plt.title('Advection ' + ts.strftime('%Y%m%d')) ### intensification plt.subplot(2, 2, 4) m.pcolormesh(Gvel.xptp, Gvel.yptp, buddif - buddiv3 - budadv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) m.colorbar(location='bottom') m.drawcoastlines() plt.title('Residual ' + ts.strftime('%Y%m%d')) f.savefig(spath + 'Budget_components_' + ts.strftime('%Y%m%d') + '.pdf', bbox_inches='tight') print('Saving figure: ' + spath + 'Budget_components_' + ts.strftime('%Y%m%d') + '.pdf')
def plot_budget_square(Gvel, buddif, buddiv3, budadv3, Utemp, Vtemp, spath, ts): """ Diagnostic figures for the budget - square grid """ f = plt.figure(figsize=[9, 9]) # dtarg = dt.datetime(2015,1,1) # t_p1 = B1.condy.get_index(dtarg) # t_p2 = B2.condy.get_index(dtarg) # t_p3 = B3.condy.get_index(dtarg) vlim = 0.4 a_no = 20 # a_sc = 3.9e-1 # a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp)) a_sc = 2.5 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0] # print(a_sc) p_rng = np.nanpercentile(buddif, [2, 98]) pr = np.max(np.abs(p_rng)) p_rng = [-pr, pr] Gvel.get_square_points() ### intensification plt.subplot(2, 2, 1) plt.pcolormesh(Gvel.xsq, Gvel.ysq, buddif, cmap='RdBu', rasterized=True) plt.colorbar(orientation="horizontal") plt.clim(p_rng) plt.title('Intensification ' + ts.strftime('%Y%m%d')) ### DIVERGENCE plt.subplot(2, 2, 2) rm = int(Gvel.m / a_no) rn = int(Gvel.n / a_no) ra = np.sqrt(rm + rn) ra = ra * a_sc plt.pcolormesh(Gvel.xsq, Gvel.ysq, buddiv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) plt.colorbar(orientation="horizontal") plt.quiver(Gvel.xsq[::rm, ::rn], Gvel.ysq[::rm, ::rn], Utemp[::rm, ::rn], Vtemp[::rm, ::rn], scale=ra, width=0.005) plt.title('Divergence ' + ts.strftime('%Y%m%d')) ### ADVECTION plt.subplot(2, 2, 3) plt.pcolormesh(Gvel.xsq, Gvel.ysq, budadv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) plt.colorbar(orientation="horizontal") plt.quiver(Gvel.xsq[::rm, ::rn], Gvel.ysq[::rm, ::rn], Utemp[::rm, ::rn], Vtemp[::rm, ::rn], scale=ra, width=0.005) plt.title('Advection ' + ts.strftime('%Y%m%d')) ### intensification plt.subplot(2, 2, 4) plt.pcolormesh(Gvel.xsq, Gvel.ysq, buddif - buddiv3 - budadv3, cmap='RdBu', rasterized=True) plt.clim(p_rng) plt.colorbar(orientation="horizontal") plt.title('Residual ' + ts.strftime('%Y%m%d')) f.savefig(spath + 'Budget_components_square_' + ts.strftime('%Y%m%d') + '.pdf', bbox_inches='tight') print('Saving figure: ' + spath + 'Budget_components_square_' + ts.strftime('%Y%m%d') + '.pdf')
def make_aperture_image(label, filter_list, center_ra, center_dec, major_diam, minor_diam, pos_angle): """ Make a picture of the galaxy with the apertures overlaid Currently just does one given aperture, but should eventually do the various annuli for each filter Parameters ---------- label : string label associated with the galaxy, both for finding image/data files and saving the aperture image (e.g., 'ngc24_offset_') filter_list : list of strings filters for the galaxy center_ra, center_dec : float coordinates of the center of the galaxy (degrees) major_diam, minor_diam : float major and minor axes for the galaxy ellipse (arcsec) pos_angle : float position angle of the galaxy ellipse ("position angle increases counterclockwise from North (PA=0)") """ counts_im = label + 'sk.fits' exp_im = label + 'ex.fits' # get the image HDUs hdu_list = [] for filt in filter_list: with fits.open(label+filt+'_sk.fits') as hdu_counts, fits.open(label+filt+'_ex.fits') as hdu_ex: hdu_list.append(fits.ImageHDU(data=hdu_counts[1].data/hdu_ex[1].data, header=hdu_counts[1].header)) # if there's more than one filter, do reprojection if len(filter_list) > 1: for f in range(1,len(filter_list)): new_array, _ = reproject_interp(hdu_list[f], hdu_list[0].header) hdu_list[f] = fits.ImageHDU(data=new_array, header=hdu_list[0].header) # normalize the images for f in range(len(filter_list)): # subtract mode # - do a sigma clip pix_clip = sigma_clip(hdu_list[f].data, sigma=2.5, iters=3) # - calculate biweight biweight_clip = biweight_location(pix_clip.data[~pix_clip.mask]) # - subtraction new_array = hdu_list[f].data - biweight_clip # set anything below 0 to 0 new_array[new_array < 0] = 0 # set 95th percentile to 1 new_array = new_array/np.nanpercentile(new_array, 95) # save it hdu_list[f].data = new_array # add the images together im_sum = np.mean([hdu_list[f].data for f in range(len(filter_list))], axis=0) # make it into an HDU hdu_sum = fits.ImageHDU(data=log_image(im_sum, 0, np.nanpercentile(im_sum, 99.5)), header=hdu_list[0].header) # make an image fig = aplpy.FITSFigure(hdu_sum) fig.show_grayscale() fig.axis_labels.hide_x() fig.axis_labels.hide_y() fig.tick_labels.hide_x() fig.tick_labels.hide_y() fig.frame.set_linewidth(0) # aperture ellipses fig.show_ellipses(center_ra, center_dec, major_diam/3600, minor_diam/3600, angle=90+pos_angle, edgecolor='red', linewidth=2) fig.save(label+'aperture_image.pdf')
def get_features_object(self): ''' This method will calculate features that characterize the rapideye scene, and will be useful later. It will populate the values into the database. ''' array = self.get_raster().read_data_file_as_array() data_array = numpy.array(array) features_array = [] for i in range(data_array.shape[0]): band = data_array[i, :, :].ravel() band = band[band != 0] features_array.append(numpy.nanpercentile(band, 10)) features_array.append(numpy.nanpercentile(band, 25)) features_array.append(numpy.nanpercentile(band, 50)) features_array.append(numpy.nanpercentile(band, 75)) features_array.append(numpy.nanpercentile(band, 90)) features_array.append(numpy.mean(band)) features_array.append(numpy.min(band) * 1.0) features_array.append(numpy.max(band) * 1.0) geotransform = self.get_raster().get_geotransform() features_array.append(geotransform[0]) features_array.append(geotransform[3]) features_array.append((self.get_aquisition_date() - datetime.datetime(1970, 1, 1)).total_seconds()) tile_id = self.get_sensor().get_attribute(TILE_ID) raster_path = self.file_dictionary[_IMAGE] features = RapideyeFeatures(band_1_quant_10=features_array[0], band_1_quant_25=features_array[1], band_1_quant_50=features_array[2], band_1_quant_75=features_array[3], band_1_quant_90=features_array[4], band_1_mean=features_array[5], band_1_min=features_array[6], band_1_max=features_array[7], band_2_quant_10=features_array[8], band_2_quant_25=features_array[9], band_2_quant_50=features_array[10], band_2_quant_75=features_array[11], band_2_quant_90=features_array[12], band_2_mean=features_array[13], band_2_min=features_array[14], band_2_max=features_array[15], band_3_quant_10=features_array[16], band_3_quant_25=features_array[17], band_3_quant_50=features_array[18], band_3_quant_75=features_array[19], band_3_quant_90=features_array[20], band_3_mean=features_array[21], band_3_min=features_array[22], band_3_max=features_array[23], band_4_quant_10=features_array[24], band_4_quant_25=features_array[25], band_4_quant_50=features_array[26], band_4_quant_75=features_array[27], band_4_quant_90=features_array[28], band_4_mean=features_array[29], band_4_min=features_array[30], band_4_max=features_array[31], band_5_quant_10=features_array[32], band_5_quant_25=features_array[33], band_5_quant_50=features_array[34], band_5_quant_75=features_array[35], band_5_quant_90=features_array[36], band_5_mean=features_array[37], band_5_min=features_array[38], band_5_max=features_array[39], top=features_array[40], left=features_array[41], time=features_array[42], footprint=tile_id, path=raster_path) return features
par = (par - par_min) / (par_max - par_min) red, green, blue = cm.jet(par)[:, :3].T alpha = r2 > r2_thr red[~alpha] = np.nan green[~alpha] = np.nan blue[~alpha] = np.nan return cortex.VertexRGB( red=red, green=green, blue=blue, subject='fsaverage', alpha=alpha.astype(float) * 0.7) for sid in range(10): p = pars[sid, :, 0] par_min = np.nanpercentile(p, 10) par_max = np.nanpercentile(p, 90) extra_subjects[f'mu_s{sid}'] = get_thr_map(pars[sid, :, 0], r2s[sid], par_min=par_min, par_max=par_max) ds = cortex.Dataset( r2=r2s_mean_v, corr=corrs_mean_v, # r2_cv=r2s_cv_v, # r2_trialwise=r2s_trialwise_v, # corrs_cv_mean=corrs_cv_mean_v, mu_log=pars_mean_v_log, mu_log_thr=weighted_mu_rgb_v, weighted_sd=weighted_sd, weighted_amplitude=weighted_amplitude, **extra_subjects)
def plot_mean_quantile_returns_spread_time_series(mean_returns_spread, std_err=None, bandwidth=1, ax=None): """ Plots mean period wise returns for factor quantiles. Parameters ---------- mean_returns_spread : pd.Series Series with difference between quantile mean returns by period. std_err : pd.Series Series with standard error of difference between quantile mean returns each period. bandwidth : float Width of displayed error bands in standard deviations. ax : matplotlib.Axes, optional Axes upon which to plot. Returns ------- ax : matplotlib.Axes The axes that were plotted on. """ if isinstance(mean_returns_spread, pd.DataFrame): if ax is None: ax = [None for a in mean_returns_spread.columns] ymin, ymax = (None, None) for (i, a), (name, fr_column) in zip(enumerate(ax), mean_returns_spread.iteritems()): stdn = None if std_err is None else std_err[name] a = plot_mean_quantile_returns_spread_time_series(fr_column, std_err=stdn, ax=a) ax[i] = a curr_ymin, curr_ymax = a.get_ylim() ymin = curr_ymin if ymin is None else min(ymin, curr_ymin) ymax = curr_ymax if ymax is None else max(ymax, curr_ymax) for a in ax: a.set_ylim([ymin, ymax]) return ax if mean_returns_spread.isnull().all(): return ax periods = mean_returns_spread.name title = ('Top Minus Bottom Quantile Mean Return ({} Period Forward Return)' .format(periods if periods is not None else "")) if ax is None: f, ax = plt.subplots(figsize=(18, 6)) mean_returns_spread_bps = mean_returns_spread * DECIMAL_TO_BPS mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='forestgreen') mean_returns_spread_bps.rolling(window=22).mean().plot(color='orangered', alpha=0.7, ax=ax) ax.legend(['mean returns spread', '1 month moving avg'], loc='upper right') if std_err is not None: std_err_bps = std_err * DECIMAL_TO_BPS upper = mean_returns_spread_bps.values + (std_err_bps * bandwidth) lower = mean_returns_spread_bps.values - (std_err_bps * bandwidth) ax.fill_between(mean_returns_spread.index, lower, upper, alpha=0.3, color='steelblue') ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95) ax.set(ylabel='Difference In Quantile Mean Return (bps)', xlabel='', title=title, ylim=(-ylim, ylim)) ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8) return ax
def plot_quantile_returns_bar(mean_ret_by_q, by_group=False, ylim_percentiles=None, ax=None): """ Plots mean period wise returns for factor quantiles. Parameters ---------- mean_ret_by_q : pd.DataFrame DataFrame with quantile, (group) and mean period wise return values. by_group : bool Disaggregated figures by group. ylim_percentiles : tuple of integers Percentiles of observed data to use as y limits for plot. ax : matplotlib.Axes, optional Axes upon which to plot. Returns ------- ax : matplotlib.Axes The axes that were plotted on. """ mean_ret_by_q = mean_ret_by_q.copy() if ylim_percentiles is not None: ymin = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[0]) * DECIMAL_TO_BPS) ymax = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[1]) * DECIMAL_TO_BPS) else: ymin = None ymax = None if by_group: num_group = len(mean_ret_by_q.index.get_level_values('group').unique()) if ax is None: v_spaces = ((num_group - 1) // 2) + 1 f, ax = plt.subplots(v_spaces, 2, sharex=False, sharey=True, figsize=(18, 6 * v_spaces)) ax = ax.flatten() for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')): (cor.xs(sc, level='group').multiply(DECIMAL_TO_BPS).plot(kind='bar', title=sc, ax=a)) a.set(xlabel='', ylabel='Mean Return (bps)', ylim=(ymin, ymax)) if num_group < len(ax): ax[-1].set_visible(False) return ax else: if ax is None: f, ax = plt.subplots(1, 1, figsize=(18, 6)) (mean_ret_by_q.multiply(DECIMAL_TO_BPS).plot( kind='bar', title="Mean Period Wise Return By Factor Quantile", ax=ax)) ax.set(xlabel='', ylabel='Mean Return (bps)', ylim=(ymin, ymax)) return ax
def do_stats(time,statf,data,drr,hem,filename,sheetname,min_occ): year=time.year month=time.month mat=[] row=[''] for stat in statf: if isinstance(stat,str): row.append(stat) elif isinstance(stat,list): for p in stat: row.append('P'+str(p)) else: row.append('Main direction') mat.append(row) # monthly stats for mo in range(1,13): idx=month==mo if any(idx): row=[datetime.date(1900, mo, 1).strftime('%B')] for stat in statf: if stat=='n': tmp=data[idx] row.append('%.2f'%len(tmp[~np.isnan(tmp)])) elif isinstance(stat,str): fct=getattr(np, 'nan'+stat) row.append('%.2f'%fct(data[idx])) elif isinstance(stat,list): perc=list(np.nanpercentile(data[idx],stat)) row+=['%.2f'%x for x in perc] else: if not isinstance(drr,str): #for min_occ in [15,10,5,1]: occ=do_occurence(drr[idx].values,min_occ) # if len(occ)>0: # break row.append(', '.join(occ)) mat.append(row) # Do seasons if hem=='South hemisphere(Summer/Winter)': seas=[((month<=3) | (month>=10))] # Summer: October to March seas.append(((month>=4) & (month<=9))) # Winter: April to September sea_names=['Summer','Winter'] elif hem=='South hemisphere 4 seasons': seas=[(month>=6) & (month <=8)]# winter seas.append((month>=9) & (month <=11))# spring seas.append((month>=12) | (month<=2))#summer seas.append((month>=3) & (month<=5))# autumn sea_names=['Winter','Spring','Summer','Autumn'] elif hem =='North hemishere(Summer/Winter)': seas=[(month>=4) & (month<=9)] # Winter: April to September seas.append((month<=3) | (month>=10)) # Summer: October to March sea_names=['Summer','Winter'] elif hem=='North hemisphere moosoon(SW,NE,Hot season)': seas=[(month>=5) & (month<=10)] # SW: May to Oct seas.append((month<=2) | (month>=11)) # SE: Nov to Feb seas.append((month==3) | (month==4)) # Hot: March and April sea_names=['SW monsoon','NE monsoon','Hot season'] elif hem=='North hemisphere 4 seasons': seas=[(month>=12) | (month<=2)] # winter seas.append((month>=3) & (month<=5)) # spring seas.append((month>=6) & (month <=8)) # summer seas.append((month>=9) & (month <=11)) # autumn sea_names=['Winter','Spring','Summer','Autumn'] elif hem == 'Yearly': unique_year=np.unique(year) seas=[] sea_names=[] for y in unique_year: seas.append(year==y) sea_names.append('%i' % y) for i,idx in enumerate(seas): if any(idx): row=[sea_names[i]] for stat in statf: if stat=='n': tmp=data[idx] row.append('%.2f'%len(tmp[~np.isnan(tmp)])) elif isinstance(stat,str): fct=getattr(np, 'nan'+stat) row.append('%.2f'%fct(data[idx])) elif isinstance(stat,list): perc=list(np.nanpercentile(data[idx],stat)) row+=['%.2f'%x for x in perc] else: if not isinstance(drr,str): #for min_occ in [15,10,5,1]: occ=do_occurence(drr[idx].values,min_occ) # if len(occ)>0: # break row.append(', '.join(occ)) mat.append(row) # %% Do total row=['Total'] for stat in statf: if stat=='n': row.append('%.2f'%len(data[~np.isnan(data)])) elif isinstance(stat,str): fct=getattr(np, 'nan'+stat) row.append('%.2f'%fct(data)) elif isinstance(stat,list): perc=list(np.nanpercentile(data,stat)) row+=['%.2f'%x for x in perc] else: if not isinstance(drr,str): #for min_occ in [15,10,5,1]: occ=do_occurence(drr.values,min_occ) # if len(occ)>0: # break row.append(', '.join(occ)) mat.append(row) create_table(filename,sheetname,np.array(mat))
def calc_wd_age(teff, e_teff, logg, e_logg, n_mc=2000, model_wd='DA', feh='p0.00', vvcrit='0.0', model_ifmr='Cummings_2018', return_distributions=False): ''' Calculated white dwarfs ages with a frequentist approch. Starts from normal dristribution of teff and logg based on the errors and passes the full distribution through the same process to get a distribution of ages. ''' if (not isinstance(teff, np.ndarray)): teff = np.array([teff]) e_teff = np.array([e_teff]) logg = np.array([logg]) e_logg = np.array([e_logg]) N = len(teff) teff_dist, logg_dist = [], [] for i in range(N): if (np.isnan(teff[i] + e_teff[i] + logg[i] + e_logg[i])): teff_dist.append(np.nan) logg_dist.append(np.nan) else: teff_dist.append(np.random.normal(teff[i], e_teff[i], n_mc)) logg_dist.append(np.random.normal(logg[i], e_logg[i], n_mc)) teff_dist, logg_dist = np.array(teff_dist), np.array(logg_dist) cooling_age_dist, final_mass_dist = calc_cooling_age(teff_dist, logg_dist, n_mc, N, model=model_wd) initial_mass_dist = calc_initial_mass(model_ifmr, final_mass_dist, n_mc) ms_age_dist = calc_ms_age(initial_mass_dist, feh=feh, vvcrit=vvcrit) total_age_dist = cooling_age_dist + ms_age_dist mask = np.logical_or( np.logical_or(ms_age_dist / 1e9 > 13.8, total_age_dist / 1e9 > 13.8), cooling_age_dist / 1e9 > 13.8) cooling_age_dist[mask] = np.copy(cooling_age_dist[mask]) * np.nan final_mass_dist[mask] = np.copy(final_mass_dist[mask]) * np.nan initial_mass_dist[mask] = np.copy(initial_mass_dist[mask]) * np.nan ms_age_dist[mask] = np.copy(ms_age_dist[mask]) * np.nan total_age_dist[mask] = np.copy(total_age_dist[mask]) * np.nan results = Table() results['final_mass_median'] = np.array( [np.nanpercentile(x, 50) for x in final_mass_dist]) results['final_mass_err_high'] = np.array([ np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50) for x in final_mass_dist ]) results['final_mass_err_low'] = np.array([ np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655) for x in final_mass_dist ]) results['initial_mass_median'] = np.array( [np.nanpercentile(x, 50) for x in initial_mass_dist]) results['initial_mass_err_high'] = np.array([ np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50) for x in initial_mass_dist ]) results['initial_mass_err_low'] = np.array([ np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655) for x in initial_mass_dist ]) results['cooling_age_median'] = np.array( [np.nanpercentile(x, 50) for x in cooling_age_dist]) results['cooling_age_err_high'] = np.array([ np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50) for x in cooling_age_dist ]) results['cooling_age_err_low'] = np.array([ np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655) for x in cooling_age_dist ]) results['ms_age_median'] = np.array( [np.nanpercentile(x, 50) for x in ms_age_dist]) results['ms_age_err_high'] = np.array([ np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50) for x in ms_age_dist ]) results['ms_age_err_low'] = np.array([ np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655) for x in ms_age_dist ]) results['total_age_median'] = np.array( [np.nanpercentile(x, 50) for x in total_age_dist]) results['total_age_err_high'] = np.array([ np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50) for x in total_age_dist ]) results['total_age_err_low'] = np.array([ np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655) for x in total_age_dist ]) if (return_distributions): results['final_mass_dist'] = final_mass_dist results['initial_mass_dist'] = initial_mass_dist results['cooling_age_dist'] = cooling_age_dist results['ms_age_dist'] = ms_age_dist results['total_age_dist'] = total_age_dist return results
def threemultis(): # K2-198 # ---------------------------------------------# print('K2-198') if not os.path.isfile('{}/results/K2-198.fits'.format(PACKAGEDIR)): tpfs = lk.search_targetpixelfile('K2-198').download_all() clcs = [] # Corrected Light Curves for idx, tpf in enumerate(tpfs): tpf = tpf[10:] tpf = tpf[np.in1d( tpf.time, tpf.to_lightcurve(aperture_mask='all').remove_nans().time)] tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8] aper = tpf.create_threshold_mask() tpf.plot(aperture_mask=aper) mask = utils.planet_mask(tpf.time, 'K2-198') clc = fit.PLD(tpf, planet_mask=mask, trim=1, ndraws=1000, logrho_mu=np.log10(150), aperture=aper) pickle.dump( clc, open('{}/results/K2-198_{}.p'.format(PACKAGEDIR, idx), 'wb')) clcs.append(clc) clc = clcs[0].append(clcs[1]) clc.to_fits('{}/results/K2-198.fits'.format(PACKAGEDIR), overwrite=True) clc.to_csv('{}/results/K2-198.csv'.format(PACKAGEDIR)) else: print('file exists') df = pd.read_csv('{}/results/K2-198.csv'.format(PACKAGEDIR)) clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err) #_run(clc, 'K2-198') # K2-168 # ---------------------------------------------# print('K2-168') if not os.path.isfile('{}/results/K2-168.fits'.format(PACKAGEDIR)): tpf = lk.search_targetpixelfile('K2-168').download() tpf = tpf[10:] tpf = tpf[np.in1d( tpf.time, tpf.to_lightcurve(aperture_mask='all').remove_nans().time)] tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8] mask = utils.planet_mask(tpf.time, 'K2-168') aper = np.nanmedian(tpf.flux, axis=0) > 30 # First pass, remove some very bad outliers bad = np.zeros(len(tpf.time), bool) for count in range(2): pld_lc = tpf[~bad].to_corrector('pld').correct( aperture_mask=aper, cadence_mask=mask[~bad]) pld_lc = pld_lc.flatten(31, mask=~mask[~bad]) bad |= np.in1d( tpf.time, pld_lc.time[np.abs(pld_lc.flux - 1) > 5 * np.std(pld_lc.flux - 1)]) tpf = tpf[~bad] mask = mask[~bad] clc = fit.PLD(tpf, planet_mask=mask, trim=0, aperture=aper, logrho_mu=np.log(1)) clc.to_fits('{}/results/K2-168.fits'.format(PACKAGEDIR)) clc.to_csv('{}/results/K2-168.csv'.format(PACKAGEDIR)) pickle.dump(clc, open('{}/results/K2-168.p'.format(PACKAGEDIR), 'wb')) else: print('file exists') df = pd.read_csv('{}/results/K2-168.csv'.format(PACKAGEDIR)) clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err) #_run(clc, 'K2-168') # K2-43 # ---------------------------------------------# print('K2-43') if not os.path.isfile('{}/results/K2-43.fits'.format(PACKAGEDIR)): # Trim out some pixels which have a bleed column on them raw_tpf = lk.search_targetpixelfile('K2-43').download() hdu = deepcopy(raw_tpf.hdu) for name in hdu[1].columns.names: if (len(hdu[1].data[name].shape) == 3): hdu[1].data[name][:, :, :4] = np.nan fits.HDUList(hdus=list(hdu)).writeto('hack.fits', overwrite=True) tpf = lk.KeplerTargetPixelFile('hack.fits', quality_bitmask=raw_tpf.quality_bitmask) os.remove('hack.fits') tpf = tpf[10:] tpf = tpf[np.in1d( tpf.time, tpf.to_lightcurve(aperture_mask='all').remove_nans().time)] tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8] mask = utils.planet_mask(tpf.time, 'K2-43') aper = np.nan_to_num(np.nanpercentile(tpf.flux, 95, axis=(0))) > 50 # First pass, remove some very bad outliers bad = np.zeros(len(tpf.time), bool) for count in range(2): pld_lc = tpf[~bad].to_corrector('pld').correct( aperture_mask=aper, cadence_mask=mask[~bad]) pld_lc = pld_lc.flatten(31, mask=~mask[~bad]) bad |= np.in1d( tpf.time, pld_lc.time[np.abs(pld_lc.flux - 1) > 5 * np.std(pld_lc.flux - 1)]) tpf = tpf[~bad] mask = mask[~bad] clc = fit.PLD(tpf, planet_mask=mask, trim=1, aperture=aper, logrho_mu=np.log(30)) clc.to_fits('{}/results/K2-43.fits'.format(PACKAGEDIR)) clc.to_csv('{}/results/K2-43.csv'.format(PACKAGEDIR)) pickle.dump(clc, open('{}/results/K2-43.p'.format(PACKAGEDIR), 'wb')) else: print('file exists') df = pd.read_csv('{}/results/K2-43.csv'.format(PACKAGEDIR)) clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err)
print("-------- Analyse par Cateorie-----------") print(kinds) #je veux la taille moyenne, mediane, perecentile 70, percentile 30 calcByCategory = {} for category in categories: # print(dataByCategory[category]) calc = {} data = np.array(dataByCategory[category], dtype=np.float) meanvalues = np.nanmean(data, axis=0) medianvalues = np.nanmedian(data, axis=0) sdvalues = np.nanstd(data, axis=0) varvalues = np.nanvar(data, axis=0) minvalues = np.nanmin(data, axis=0) maxvalues = np.nanmax(data, axis=0) percentile25values = np.nanpercentile(data, 25, axis=0) percentile75values = np.nanpercentile(data, 75, axis=0) lengthvalues = np.count_nonzero(~np.isnan(data), axis=0) for i in range(len(kinds)): calc["mean-" + kinds[i]] = meanvalues[i] calc["median-" + kinds[i]] = medianvalues[i] calc["sd-" + kinds[i]] = sdvalues[i] calc["var-" + kinds[i]] = varvalues[i] calc["min-" + kinds[i]] = minvalues[i] calc["max-" + kinds[i]] = maxvalues[i] calc["percentile25-" + kinds[i]] = percentile25values[i] calc["percentile75-" + kinds[i]] = percentile75values[i] calc["length-" + kinds[i]] = lengthvalues[i] calcByCategory[category] = calc # print(calcByCategory)
def nanpercentile(arr, axis=0): return np.nanpercentile(arr, PERCENTILES, axis=axis)
def cpg_heatmap(df, methylated_color: str = 'rgb(215,48,39)', unmethylated_color: str = 'rgb(33,102,172)', ambiguous_color: str = 'rgb(240,240,240)', lim_llr: float = 10, min_diff_llr: float = 1, fig_width: int = None, fig_height: int = None): """ Plot the values per CpG as a heatmap """ # Cannot calculate if at least not 2 values if len(df.columns) <= 1: return None # Fill missing values by 0 = ambiguous methylation df = df.fillna(0) # Prepare subplot aread fig = make_subplots(rows=1, cols=2, shared_yaxes=True, column_widths=[0.95, 0.05], specs=[[{ "type": "heatmap" }, { "type": "scatter" }]]) # Plot dendogramm dendrogram = ff.create_dendrogram(df.values, labels=df.index, orientation='left', color_threshold=0, colorscale=["grey"]) for data in dendrogram.data: fig.add_trace(data, row=1, col=2) # Reorder rows labels_ordered = np.flip(dendrogram.layout['yaxis']['ticktext']) df = df.reindex(labels_ordered) # Define min_llr if not given = symetrical 2nd percentile if not lim_llr: lim_llr = max(np.absolute(np.nanpercentile(df.values, [2, 98]))) # Define colorscale offset = min_diff_llr / lim_llr * 0.5 colorscale = colorscale = [[0.0, unmethylated_color], [0.5 - offset, ambiguous_color], [0.5 + offset, ambiguous_color], [1.0, methylated_color]] # plot heatmap heatmap = go.Heatmap(name="heatmap", x=df.columns, y=df.index, z=df.values, zmin=-lim_llr, zmax=lim_llr, zmid=0, colorscale=colorscale, colorbar_title="Median LLR") fig.add_trace(heatmap, row=1, col=1) # Tweak figure layout fig.update_layout(dict1={ 'showlegend': False, 'hovermode': 'closest', "plot_bgcolor": 'rgba(0,0,0,0)', "width": fig_width, "height": fig_height, "margin": { "t": 50, "b": 50 } }, xaxis2={ "fixedrange": True, 'showgrid': False, 'showline': False, "showticklabels": False, 'zeroline': False, 'ticks': "" }, yaxis2={ "fixedrange": True, 'showgrid': False, 'showline': False, "showticklabels": False, 'zeroline': False, 'ticks': "", "automargin": True }, xaxis={ "fixedrange": False, "domain": [0, 0.95], "showticklabels": False, "title": "CpG positions" }, yaxis={ "fixedrange": True, "domain": [0, 1], "ticks": "outside", "automargin": True }) return fig
def plot_dstats(Gvel, Utemp, Vtemp, InputV, ddiv, dcrl, dshr, spath, ts): """ Diagnostic figures for the drift statistics - projected properly """ f = plt.figure(figsize=[9, 3]) # dtarg = dt.datetime(2015,1,1) # t_p1 = B1.condy.get_index(dtarg) # t_p2 = B2.condy.get_index(dtarg) # t_p3 = B3.condy.get_index(dtarg) vlim = 0.4 a_no = 30 # a_sc = 3.9e-1 # a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp)) a_sc = 2 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0] rm = int(Gvel.m / a_no) rn = int(Gvel.n / a_no) ra = np.sqrt(rm + rn) ra = ra * a_sc m = Gvel.mplot plt.subplot(1, 3, 1) m.pcolormesh(Gvel.xptp, Gvel.yptp, ddiv, cmap='RdBu', rasterized=True) m.colorbar(location='bottom') p_rng = np.nanpercentile(ddiv, [40, 60]) pr = np.max(np.abs(p_rng)) p_rng = [-pr, pr] # plt.clim(p_rng) # plt.clim([0.0,1.0]) m.drawcoastlines() ur, vr = Gvel.rotate_vectors_to_plot(Utemp, Vtemp) m.quiver(Gvel.xpts[::rm, ::rn], Gvel.ypts[::rm, ::rn], ur[::rm, ::rn], vr[::rm, ::rn], scale=ra, width=0.005) plt.ylabel(InputV.name) plt.title('Drift div. ' + ts.strftime('%Y%m%d')) plt.subplot(1, 3, 2) m.pcolormesh(Gvel.xptp, Gvel.yptp, dcrl, cmap='RdBu', rasterized=True) m.colorbar(location='bottom') p_rng = np.nanpercentile(dcrl, [8, 92]) pr = np.max(np.abs(p_rng)) p_rng = [-pr, pr] plt.clim(p_rng) # plt.clim([0.0,5.0]) m.drawcoastlines() m.quiver(Gvel.xpts[::rm, ::rn], Gvel.ypts[::rm, ::rn], ur[::rm, ::rn], vr[::rm, ::rn], scale=ra, width=0.005) plt.ylabel(InputV.name) plt.title('Drift curl ' + ts.strftime('%Y%m%d')) plt.subplot(1, 3, 3) rm = int(Gvel.m / a_no) rn = int(Gvel.n / a_no) ra = np.sqrt(rm + rn) ra = ra * a_sc m.pcolormesh(Gvel.xptp, Gvel.yptp, dshr, cmap='YlGnBu', rasterized=True) m.colorbar(location='bottom') p_rng = np.nanpercentile(dshr, [0, 87]) plt.clim(p_rng) # plt.clim([0.0,0.3]) m.drawcoastlines() m.quiver(Gvel.xpts[::rm, ::rn], Gvel.ypts[::rm, ::rn], ur[::rm, ::rn], vr[::rm, ::rn], scale=ra, width=0.005) plt.ylabel(InputV.name) plt.title('Drift shear ' + ts.strftime('%Y%m%d')) f.savefig(spath + 'Drift_statistics_' + ts.strftime('%Y%m%d') + '.pdf', bbox_inches='tight') print('Saving figure: ' + spath + 'Drift_statistics_' + ts.strftime('%Y%m%d') + '.pdf')
def main(): pass # For compatibility between running under Spyder and the CLI #%% pl.ion() fname = [u'demo_behavior.h5'] if fname[0] in ['demo_behavior.h5']: # TODO: todocument fname = [download_demo(fname[0])] # TODO: todocument m = cm.load(fname[0], is_behavior=True) #%% load, rotate and eliminate useless pixels m = m.transpose([0, 2, 1]) m = m[:, 150:, :] #%% visualize movie m.play() #%% select interesting portion of the FOV (draw a polygon on the figure that pops up, when done press enter) # TODO: Put the message below into the image print("Please draw a polygon delimiting the ROI on the image that will be displayed after the image; press enter when done") mask = np.array(behavior.select_roi(np.median(m[::100], 0), 1)[0], np.float32) #%% n_components = 4 # number of movement looked for resize_fact = 0.5 # for computational efficiency movies are downsampled # number of standard deviations above mean for the magnitude that are considered enough to measure the angle in polar coordinates num_std_mag_for_angle = .6 only_magnitude = False # if onlu interested in factorizing over the magnitude method_factorization = 'dict_learn' # could also use nmf # number of iterations for the dictionary learning algorithm (Marial et al, 2010) max_iter_DL = -30 spatial_filter_, time_trace_, of_or = cm.behavior.behavior.extract_motor_components_OF(m, n_components, mask=mask, resize_fact=resize_fact, only_magnitude=only_magnitude, verbose=True, method_factorization='dict_learn', max_iter_DL=max_iter_DL) #%% mags, dircts, dircts_thresh, spatial_masks_thrs = cm.behavior.behavior.extract_magnitude_and_angle_from_OF( spatial_filter_, time_trace_, of_or, num_std_mag_for_angle=num_std_mag_for_angle, sav_filter_size=3, only_magnitude=only_magnitude) #%% idd = 0 axlin = pl.subplot(n_components, 2, 2) for mag, dirct, spatial_filter in zip(mags, dircts_thresh, spatial_filter_): pl.subplot(n_components, 2, 1 + idd * 2) min_x, min_y = np.min(np.where(mask), 1) spfl = spatial_filter spfl = cm.movie(spfl[None, :, :]).resize( 1 / resize_fact, 1 / resize_fact, 1).squeeze() max_x, max_y = np.add((min_x, min_y), np.shape(spfl)) mask[min_x:max_x, min_y:max_y] = spfl mask[mask < np.nanpercentile(spfl, 70)] = np.nan pl.imshow(m[0], cmap='gray') pl.imshow(mask, alpha=.5) pl.axis('off') axelin = pl.subplot(n_components, 2, 2 + idd * 2, sharex=axlin) pl.plot(mag / 10, 'k') dirct[mag < 0.5 * np.std(mag)] = np.nan pl.plot(dirct, 'r-', linewidth=2) idd += 1
second_lower_percentile_dissip_med = [None] * number_of_profiles second_upper_percentile_dissip_med = [None] * number_of_profiles """ #compute statistical properties of the saved values for index in range(total_number_of_valid_profiles): number_of_zero_flux += np.sum(np.abs(BB_flux_list[index]) == 0) amount_of_missing_values += np.sum(np.isnan(BB_flux_list[index])) #count the number of flux data points mean_Osborn_flux[index] = np.nanmean(Osborn_flux_list[index]) mean_Shih_flux[index] = np.nanmean(Shih_flux_list[index]) mean_BB_flux[index] = np.nanmean(BB_flux_list[index]) median_flux[index] = np.nanmedian(BB_flux_list[index]) upper_percentile_flux[index] = np.nanpercentile( BB_flux_list[index], flux_percentile) lower_percentile_flux[index] = np.nanpercentile( BB_flux_list[index], 100 - flux_percentile) second_upper_percentile_flux[index] = np.nanpercentile( BB_flux_list[index], second_flux_percentile) second_lower_percentile_flux[index] = np.nanpercentile( BB_flux_list[index], 100 - second_flux_percentile) """ mean_min_flux[index] = np.nanmean(oxygen_flux_statistic[index][:,0],axis=0) median_min_flux[index] = np.nanmedian(oxygen_flux_statistic[index][:,0],axis=0) upper_percentile_min_flux[index] = np.nanpercentile(oxygen_flux_statistic[index][:,0], flux_percentile) lower_percentile_min_flux[index] = np.nanpercentile(oxygen_flux_statistic[index][:,0], 100-flux_percentile) """ #bathymetrie_mean[index] = np.nanmean(bathymetrie_statistic[index])
def get_rid_outlier (np_array, lower_percentile, upper_percentile): lower_bound = np.nanpercentile(np_array, lower_percentile) upper_bound = np.nanpercentile(np_array, upper_percentile) np_array[ np_array < lower_bound ] = lower_bound np_array[ np_array > upper_bound ] = upper_bound return np_array
hax = ax.imshow(cutamp, cm.Greys_r) else: cutphi = as_strided(phi[ibeg:iend, jbeg:jend]) * rad2mm if arguments["--wrap"] is not None: cutphi = np.mod(cutphi + float(arguments["--wrap"]), 2 * float(arguments["--wrap"])) - float(arguments["--wrap"]) vmax = float(arguments["--wrap"]) vmin = -vmax elif (arguments["--vmax"] is not None) or (arguments["--vmin"] is not None): if arguments["--vmax"] is not None: vmax = np.float(arguments["--vmax"]) if arguments["--vmin"] is not None: vmin = np.float(arguments["--vmin"]) else: vmax = np.nanpercentile(cutphi, 98) vmin = np.nanpercentile(cutphi, 2) cax = ax.imshow(cutphi, cmap, interpolation='nearest', vmax=vmax, vmin=vmin, alpha=0.6) divider = make_axes_locatable(ax) c = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(cax, cax=c) if arguments["--title"] == None: fig.canvas.set_window_title(infile) else:
latr = np.deg2rad(rlat) weights = np.cos(latr) for st in range(len(ttt_rain_dates)): if weightlats: zonmean_ttt = np.ma.mean(masked_rain[st, :, :], axis=1) regmean_ttt = np.ma.average(zonmean_ttt, weights=weights) reg_ttt_mean[st] = regmean_ttt else: reg_ttt_mean[st] = np.ma.mean(masked_rain[st, :, :]) # Getting a long term sum or mean tottttrain = np.nansum(reg_ttt_mean) rainperttt = np.nanmean(reg_ttt_mean) per75rain = np.nanpercentile(reg_ttt_mean, 75) if raintype == 'totrain': yvals[cnt] = tottttrain elif raintype == 'rainperttt': yvals[cnt] = rainperttt elif raintype == 'perc75': yvals[cnt] = per75rain ### Put name into string list if dset == 'noaa': if aspect == 'rain': modnm[cnt] = name + '/' + rainname else: modnm[cnt] = name else:
text_labels = [ 'Seiners', 'Trawlers and dredgers', 'Fixed gear', 'Drifting longlines', 'Squid jiggers', 'Pole and line, and trollers', 'Unclassified', 'All' ] for i, varname in enumerate([ 'seiners', 'trawlers_and_dredgers', 'fixed_gear', 'drifting_longlines', 'squid_jigger', 'pole_and_line_and_trollers', 'fishing', 'all' ]): axis = ax.flatten()[i] grid_data = np.copy(assumption_A[i]) grid_data[grid_data == 0] = np.nan p05 = np.nanpercentile(grid_data, 5) p95 = np.nanpercentile(grid_data, 95) heatmap = axis.pcolormesh(lon_bnd, lat_bnd, grid_data / p95, cmap=cmr.chroma_r, norm=colors.LogNorm(vmin=1e-3, vmax=1)) axis.add_feature(land_10m) axis.text(98, 26, text_labels[i], c='w', horizontalalignment='right') axis.axis('off')
def dependence_plot(ind, shap_values, features, feature_names=None, display_features=None, interaction_index="auto", color="#ff0052", axis_color="#333333", dot_size=16, alpha=1, title=None, show=True): """ Create a SHAP dependence plot, colored by an interaction feature. Parameters ---------- ind : int Index of the feature to plot. shap_values : numpy.array Matrix of SHAP values (# samples x # features) features : numpy.array or pandas.DataFrame Matrix of feature values (# samples x # features) feature_names : list Names of the features (length # features) display_features : numpy.array or pandas.DataFrame Matrix of feature values for visual display (such as strings instead of coded values) interaction_index : "auto" or int The index of the feature used to color the plot. """ # convert from DataFrames if we got any if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>": if feature_names is None: feature_names = features.columns features = features.as_matrix() if str(type(display_features)) == "<class 'pandas.core.frame.DataFrame'>": if feature_names is None: feature_names = display_features.columns display_features = display_features.as_matrix() elif display_features is None: display_features = features # allow vectors to be passed if len(shap_values.shape) == 1: shap_values = np.reshape(shap_values, len(shap_values), 1) if len(features.shape) == 1: features = np.reshape(features, len(features), 1) # get both the raw and display feature values xv = features[:, ind] xd = display_features[:, ind] s = shap_values[:, ind] if type(xd[0]) == str: name_map = {} for i in range(len(xv)): name_map[xd[i]] = xv[i] xnames = list(name_map.keys()) # allow a single feature name to be passed alone if type(feature_names) == str: feature_names = [feature_names] name = feature_names[ind] # guess what other feature as the stongest interaction with the plotted feature if interaction_index == "auto": interaction_index = approx_interactions(ind, shap_values, features)[0] # get both the raw and display color values cv = features[:, interaction_index] cd = display_features[:, interaction_index] if type(cd[0]) == str: cname_map = {} for i in range(len(cv)): cname_map[cd[i]] = cv[i] cnames = list(cname_map.keys()) clow = np.nanpercentile(features[:, interaction_index], 5) chigh = np.nanpercentile(features[:, interaction_index], 95) # the actual scatter plot, TODO: adapt the dot_size to the number of data points pl.scatter(xv, s, s=dot_size, linewidth=0, c=features[:, interaction_index], cmap=red_blue, alpha=alpha, vmin=clow, vmax=chigh) # draw the color bar if type(cd[0]) == str: cb = pl.colorbar(ticks=[cname_map[n] for n in cnames]) cb.set_ticklabels(cnames) else: cb = pl.colorbar() cb.set_label(feature_names[interaction_index], size=13) cb.ax.tick_params(labelsize=11) cb.set_alpha(1) cb.draw_all() # make the plot more readable pl.gcf().set_size_inches(7.5, 5) pl.xlabel(name, color=axis_color, fontsize=13) pl.ylabel("SHAP value for " + name, color=axis_color, fontsize=13) if title != None: pl.title(title, color=axis_color, fontsize=13) pl.gca().xaxis.set_ticks_position('bottom') pl.gca().yaxis.set_ticks_position('left') pl.gca().spines['right'].set_visible(False) pl.gca().spines['top'].set_visible(False) pl.gca().tick_params(color=axis_color, labelcolor=axis_color, labelsize=11) for spine in pl.gca().spines.values(): spine.set_edgecolor(axis_color) if type(xd[0]) == str: pl.xticks([name_map[n] for n in xnames], xnames, rotation='vertical', fontsize=11) if show: pl.show()
thetav_dd_stat = {} thetav_p_dd_stat = {} thetal_dd_stat = {} thetal_p_dd_stat = {} theta_dd_stat = {} theta_p_dd_stat = {} ww_dd_stat = {} ww_p_dd_stat = {} qv_dd_stat = {} ql_dd_stat = {} qt_dd_stat = {} qt_p_dd_stat = {} ###################################### Lets's Begin ############################################## ####################################### Get Data ################################################# percenval_ud = np.array([[np.nanpercentile(WW[it,iz,WW[it,iz,:,:] <= dd_thres], percent) for iz in range(WW.shape[1])] for it in range(WW.shape[0])]) percenval_dd = np.array([[np.nanpercentile(WW[it,iz,WW[it,iz,:,:] <= dd_thres], percent) for iz in range(WW.shape[1])] for it in range(WW.shape[0])]) # Initialise lists for the updraught stats # variables thetav_ud_stat = [] thetav_p_ud_stat = [] thetal_ud_stat = [] thetal_p_ud_stat = [] theta_ud_stat = [] theta_p_ud_stat = [] ww_ud_stat = [] ww_p_ud_stat = [] qv_ud_stat = [] mcl_ud_stat = [] qcld_ud_stat = []
def cutoff(self, recorded) -> Optional[Union[int, float, complex, np.ndarray]]: if not recorded: return None return np.nanpercentile(list(recorded.values()), (1 - 1 / self.rf) * 100)
def calc_percentiles(ln_ms_age, ln_cooling_age, ln_total_age, initial_mass, final_mass, high_perc, low_perc, datatype='log'): if (datatype == 'log'): ms_age_median = np.nanpercentile(ln_ms_age, 50) ms_age_err_low = ms_age_median - np.nanpercentile(ln_ms_age, low_perc) ms_age_err_high = np.nanpercentile(ln_ms_age, high_perc) - ms_age_median cooling_age_median = np.nanpercentile(ln_cooling_age, 50) cooling_age_err_low = cooling_age_median - np.nanpercentile( ln_cooling_age, low_perc) cooling_age_err_high = np.nanpercentile(ln_cooling_age, high_perc) - cooling_age_median total_age_median = np.nanpercentile(ln_total_age, 50) total_age_err_low = total_age_median - np.nanpercentile( ln_total_age, low_perc) total_age_err_high = np.nanpercentile(ln_total_age, high_perc) - total_age_median initial_mass_median = np.nanpercentile(initial_mass, 50) initial_mass_err_low = initial_mass_median - np.nanpercentile( initial_mass, low_perc) initial_mass_err_high = np.nanpercentile( initial_mass, high_perc) - initial_mass_median final_mass_median = np.nanpercentile(final_mass, 50) final_mass_low = final_mass_median - np.nanpercentile( final_mass, low_perc) final_mass_high = np.nanpercentile(final_mass, high_perc) - final_mass_median if (datatype == 'Gyr'): ms_age_median = np.nanpercentile((10**ln_ms_age) / 1e9, 50) ms_age_err_low = ms_age_median - np.nanpercentile( (10**ln_ms_age) / 1e9, low_perc) ms_age_err_high = np.nanpercentile( (10**ln_ms_age) / 1e9, high_perc) - ms_age_median cooling_age_median = np.nanpercentile((10**ln_cooling_age) / 1e9, 50) cooling_age_err_low = cooling_age_median - np.nanpercentile( (10**ln_cooling_age) / 1e9, low_perc) cooling_age_err_high = np.nanpercentile( (10**ln_cooling_age) / 1e9, high_perc) - cooling_age_median total_age_median = np.nanpercentile((10**ln_total_age) / 1e9, 50) total_age_err_low = total_age_median - np.nanpercentile( (10**ln_total_age) / 1e9, low_perc) total_age_err_high = np.nanpercentile( (10**ln_total_age) / 1e9, high_perc) - total_age_median initial_mass_median = np.nanpercentile(initial_mass, 50) initial_mass_err_low = initial_mass_median - np.nanpercentile( initial_mass, low_perc) initial_mass_err_high = np.nanpercentile( initial_mass, high_perc) - initial_mass_median final_mass_median = np.nanpercentile(final_mass, 50) final_mass_low = final_mass_median - np.nanpercentile( final_mass, low_perc) final_mass_high = np.nanpercentile(final_mass, high_perc) - final_mass_median return [ ms_age_median, ms_age_err_low, ms_age_err_high, cooling_age_median, cooling_age_err_low, cooling_age_err_high, total_age_median, total_age_err_low, total_age_err_high, initial_mass_median, initial_mass_err_low, initial_mass_err_high, final_mass_median, final_mass_low, final_mass_high ]
def acolite_map(inputfile=None, output=None, parameters=None, dpi=300, ext='png', mapped=True, max_dim = 1000, limit=None, auto_range=False, range_percentiles=(5,95), dataset_rescale=False, map_title=True, map_colorbar=False, map_colorbar_orientation='vertical',#'horizontal', rgb_rhot = False, rgb_rhos = False, red_wl = 660, green_wl = 560, blue_wl = 480, rgb_min = [0.0]*3, rgb_max = [0.15]*3, rgb_pan_sharpen = False, map_parameters_pan=True, map_fillcolor='White', map_scalepos = 'LR', map_scalebar = True, map_scalecolor='Black', map_scalecolor_rgb='White', map_scalelen=None, map_projection='tmerc', map_colorbar_edge=True, map_points=None, return_image=False, map_raster=False): import os, copy import datetime, time, dateutil.parser from acolite.shared import datascl,nc_data,nc_datasets,nc_gatts,qmap,closest_idx from acolite.acolite import pscale import acolite as ac from numpy import nanpercentile, log10, isnan, dstack from scipy.ndimage import zoom import matplotlib if not os.path.exists(inputfile): print('File {} not found.'.format(inputfile)) return(False) ## run through maps maps = {'rhot':rgb_rhot,'rhos':rgb_rhos, 'parameters':parameters != None} if all([maps[m] == False for m in maps]): return() ## get parameter scaling psc = pscale() ## read netcdf info l2w_datasets = nc_datasets(inputfile) print(l2w_datasets) gatts = nc_gatts(inputfile) if 'MISSION_INDEX' in gatts: sat, sen = gatts['MISSION'], gatts['MISSION_INDEX'] stime = dateutil.parser.parse(gatts['IMAGING_DATE']+' '+gatts['IMAGING_TIME']) obase = '{}_{}_{}'.format(sat, sen, stime.strftime('%Y_%m_%d_%H_%M_%S')) else: sp = gatts['sensor'].split('_') if 'sensor' in gatts else gatts['SATELLITE_SENSOR'].split('_') sat, sen = sp[0], sp[1] stime = dateutil.parser.parse(gatts['isodate'] if 'isodate' in gatts else gatts['ISODATE']) obase = gatts['output_name'] if 'output_name' in gatts else gatts['obase'] ## find pan sharpening dataset if rgb_pan_sharpen: if sat not in ['L7','L8']: rgb_pan_sharpen = False tmp = os.path.splitext(inputfile) l1_pan_ncdf = '{}L1R_pan{}'.format(tmp[0][0:-3],tmp[1]) if os.path.exists(l1_pan_ncdf): pan_data = nc_data(l1_pan_ncdf, 'rhot_pan') else: print('L1 pan NetCDF file not found') rgb_pan_sharpen=False if output is not None: odir = output else: odir = gatts['output_dir'] if not os.path.exists(odir): os.makedirs(odir) scf= 1. rescale = 1.0 #if dataset_rescale or mapped: lon = nc_data(inputfile, 'lon') if mapped: lat = nc_data(inputfile, 'lat') if rgb_pan_sharpen: lon_pan = zoom(lon, zoom=2, order=1) lat_pan = zoom(lat, zoom=2, order=1) ## set up mapping info if True: from numpy import linspace, tile, ceil, isnan, nan mask_val = -9999.9999 from scipy.ndimage.interpolation import map_coordinates ## rescale to save memory dims = lon.shape dsc = (dims[0]/max_dim, dims[1]/max_dim) scf/=max(dsc) if rgb_pan_sharpen: scf = 1.0 if (scf < 1.) and dataset_rescale: sc_dims = (int(ceil(dims[0] * scf)), int(ceil(dims[1] * scf))) xdim = linspace(0,dims[1],sc_dims[1]).reshape(1,sc_dims[1]) ydim = linspace(0,dims[0],sc_dims[0]).reshape(sc_dims[0],1) xdim = tile(xdim, (sc_dims[0],1)) ydim = tile(ydim, (1,sc_dims[1])) resc = [ydim,xdim] xdim, ydim = None, None lon = map_coordinates(lon, resc, mode='nearest') lat = map_coordinates(lat, resc, mode='nearest') else: rescale = scf ## run through parameters for mi in maps: if not maps[mi]: continue if mi == 'parameters': if rgb_pan_sharpen: if map_parameters_pan & mapped: lon = lon_pan * 1.0 lon_pan = None lat = lat_pan * 1.0 lat_pan = None pan_data, lon_pan, lat_pan = None, None, None print('Mapping {}'.format(mi)) if type(parameters) is not list: parameters=[parameters] for pid, par in enumerate(parameters): pard = None ## check if this parameter exists if par not in l2w_datasets: print('Parameter {} not in file {}.'.format(par, inputfile)) continue print('Mapping {}'.format(par)) ## read data data = nc_data(inputfile, par) if (rgb_pan_sharpen) & (map_parameters_pan): data = zoom(data, zoom=2, order=1) ## rescale data if (scf != 1.0) and dataset_rescale: data[isnan(data)] = mask_val data = map_coordinates(data, resc, cval=mask_val) data[data <= int(mask_val)] = nan data[data <= 0] = nan data_range = nanpercentile(data, range_percentiles) ## get parameter mapping configuration if par in psc: pard = copy.deepcopy(psc[par]) else: tmp = par.split('_') par_generic = '_'.join((tmp[0:-1]+['*'])) if par_generic in psc: pard = copy.deepcopy(psc[par_generic]) try: ## add wavelength to generic name wave = int(tmp[len(tmp)-1]) pard['name'] = '{} ({} nm)'.format(pard['name'], wave) except: pass else: pard= {'color table':'default', 'min':data_range[0], 'max':data_range[1], 'log': False, 'name':par, 'unit':'', 'parameter':par} if pard['color table'] == 'default': pard['color table']='viridis' ctfile = "{}/{}/{}.txt".format(ac.config['pp_data_dir'], 'Shared/ColourTables', pard['color table']) if os.path.exists(ctfile): from matplotlib.colors import ListedColormap from numpy import loadtxt pard['color table'] = ListedColormap(loadtxt(ctfile)/255.) if 'title' not in pard: pard['title']='{} [{}]'.format(pard['name'],pard['unit']) if auto_range: pard['min']=data_range[0] pard['max']=data_range[1] if isnan(pard['min']): pard['min']=data_range[0] if isnan(pard['max']): pard['max']=data_range[1] ## outputfile outputfile = '{}/{}_{}.png'.format(odir,obase,par) if map_title: title = '{} {}/{} {}'.format(pard['name'], sat, sen, stime.strftime('%Y-%m-%d (%H:%M UTC)')) else: title = None ## use qmap option if mapped: range = (pard['min'], pard['max']) if 'limit' in gatts: limit = gatts['limit'] if ('xx' not in locals()): xx, yy, m = qmap(data, lon, lat, outputfile=outputfile, title=title, rescale=rescale, colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, cmap=pard['color table'], label=pard['title'], range=range, log = pard['log'], map_fillcolor=map_fillcolor, limit=limit, dpi=dpi, points=map_points, projection=map_projection, scalebar=map_scalebar, scalepos=map_scalepos, scalecolor=map_scalecolor, scalelen=map_scalelen) else: xx, yy, m = qmap(data, lon, lat, outputfile=outputfile, title=title, rescale=rescale, colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, cmap=pard['color table'], label=pard['title'], range=range, log = pard['log'], map_fillcolor=map_fillcolor, limit=limit, dpi=dpi, points=map_points, projection=map_projection, scalebar=map_scalebar, scalepos=map_scalepos, scalecolor=map_scalecolor, scalelen=map_scalelen, xx=xx, yy=yy, m=m) else: import matplotlib.cm as cm from matplotlib.colors import ListedColormap cmap = cm.get_cmap(pard['color table']) cmap.set_bad(map_fillcolor) cmap.set_under(map_fillcolor) if not map_raster: ## set up plot fig = matplotlib.figure.Figure() canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig) ax = fig.add_subplot(111) print(pard['min'], pard['max']) if pard['log']: from matplotlib.colors import LogNorm cax = ax.imshow(data, vmin=pard['min'], vmax=pard['max'], cmap=cmap, norm=LogNorm(vmin=pard['min'], vmax=pard['max'])) else: cax = ax.imshow(data, vmin=pard['min'], vmax=pard['max'], cmap=cmap) if map_colorbar: if map_colorbar_orientation == 'vertical': cbar = fig.colorbar(cax, orientation='vertical') cbar.ax.set_ylabel(pard['title']) else: cbar = fig.colorbar(cax, orientation='horizontal') cbar.ax.set_xlabel(pard['title']) if map_title: ax.set_title(title) ax.axis('off') canvas.print_figure(outputfile, dpi=dpi, bbox_inches='tight') else: from PIL import Image ## rescale for mapping if pard['log']: from numpy import log10 datasc = datascl(log10(data), dmin=log10(pard['min']), dmax=log10(pard['max'])) else: datasc = datascl(data, dmin=pard['min'], dmax=pard['max']) d = cmap(datasc) for wi in (0,1,2): ## convert back to 8 bit channels (not ideal) d_ = datascl(d[:,:,wi], dmin=0, dmax=1) if wi == 0: im = d_ else: im = dstack((im,d_)) img = Image.fromarray(im) ## output image img.save(outputfile) print('Wrote {}'.format(outputfile)) else: print('Mapping RGB {}'.format(mi)) ## RGBs waves = [float(ds.split('_')[1]) for ds in l2w_datasets if ds[0:4] == mi] if len(waves) == 0: print('No appropriate datasets found for RGB {} in {}'.format(mi, inputfile)) continue ## read datasets for wi, wl in enumerate([red_wl, green_wl, blue_wl]): idx, wave = closest_idx(waves, wl) cpar = '{}_{}'.format(mi, int(wave)) ## read data data = nc_data(inputfile, cpar) if rgb_pan_sharpen: data = zoom(data, zoom=2, order=1) if wi == 0: vis_i = data * 1.0 else: vis_i += data if wi == 2: vis_i /= 3 pan_i = vis_i/pan_data vis_i = None ## rescale data if (scf != 1.0) and dataset_rescale: data[isnan(data)] = mask_val data = map_coordinates(data, resc, cval=mask_val) data[data <= int(mask_val)] = nan data[data <= 0] = nan ## stack image if wi == 0: image = data else: image = dstack((image,data)) ## rescale data between 0 and 1 for wi in (2,1,0): if rgb_pan_sharpen: image[:,:,wi] /= pan_i image[:,:,wi] = datascl(image[:,:,wi], dmin=rgb_min[wi], dmax=rgb_max[wi])/255. par = r'$\rho_{}$'.format(mi[3]) + ' RGB' if map_title: title = '{} {}/{} {}'.format(par, sat, sen, stime.strftime('%Y-%m-%d (%H:%M UTC)')) else: title = None ## outputfile if rgb_pan_sharpen: outputfile = '{}/{}_rgb_{}_pan.png'.format(odir,obase,mi) else: outputfile = '{}/{}_rgb_{}.png'.format(odir,obase,mi) # use qmap option if mapped: if 'limit' in gatts: limit = gatts['limit'] if rgb_pan_sharpen: ret = qmap(image, lon_pan, lat_pan, outputfile=outputfile, title=title, rescale=rescale, colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, limit=limit, dpi=dpi, points=map_points, projection=map_projection, scalebar=map_scalebar, scalepos=map_scalepos, scalecolor=map_scalecolor_rgb, scalelen=map_scalelen) ret = None else: if ('xx' not in locals()): xx, yy, m = qmap(image, lon, lat, outputfile=outputfile, title=title, rescale=rescale, colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, limit=limit, dpi=dpi, points=map_points, projection=map_projection, scalebar=map_scalebar, scalepos=map_scalepos, scalecolor=map_scalecolor_rgb, scalelen=map_scalelen) else: xx, yy, m = qmap(image, lon, lat, outputfile=outputfile, title=title, rescale=rescale, colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, limit=limit, dpi=dpi, points=map_points, projection=map_projection, scalebar=map_scalebar, scalepos=map_scalepos, scalecolor=map_scalecolor_rgb, scalelen=map_scalelen, xx=xx, yy=yy, m=m) else: if not map_raster: ## set up plot fig = matplotlib.figure.Figure() canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig) ax = fig.add_subplot(111) ax.imshow(image) image = None if map_title: ax.set_title(title) ax.axis('off') canvas.print_figure(outputfile, dpi=dpi, bbox_inches='tight') else: from PIL import Image for wi in (0,1,2): # convert again to 8 bit channels (not ideal) data = datascl(image[:,:,wi], dmin=0, dmax=1) if wi == 0: im = data else: im = dstack((im,data)) img = Image.fromarray(im) img.save(outputfile) print('Wrote {}'.format(outputfile))