def XWrap(x,ifold,fill_value=0): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input ifold : Wrap array after ifold indecies. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences nrow = int(np.floor(ncad/ifold) + 1) nExtend = nrow * ifold - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack( (x ,pad) ) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack( (x ,pad) ) xwrap = x.reshape( nrow,-1 ) return xwrap
def compute_spec_fiducial(wcslist): """ For a celestial footprint this is the center. For a spectral footprint, it is the beginning of the range. This function assumes all WCSs have the same output coordinate frame. Build-7 workaround. """ output_frame = wcslist[0].output_frame axes_types = wcslist[0].output_frame.axes_type spatial_axes = np.array(axes_types) == 'SPATIAL' spectral_axes = np.array(axes_types) == 'SPECTRAL' footprints = ma.hstack( [spec_footprint(w, bounding_box=w.bounding_box) for w in wcslist]) spatial_footprint = footprints[spatial_axes] spectral_footprint = footprints[spectral_axes] # Compute center of footprint fiducial = np.empty(len(axes_types)) if (spatial_footprint).any(): lon, lat = spatial_footprint lon, lat = np.deg2rad(lon), np.deg2rad(lat) x_mean = np.mean(np.cos(lat) * np.cos(lon)) y_mean = np.mean(np.cos(lat) * np.sin(lon)) z_mean = np.mean(np.sin(lat)) lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0 lat_fiducial = np.rad2deg( np.arctan2(z_mean, np.sqrt(x_mean**2 + y_mean**2))) fiducial[spatial_axes] = lon_fiducial, lat_fiducial if (spectral_footprint).any(): fiducial[spectral_axes] = spectral_footprint.min() return ((fiducial[spatial_axes]), fiducial[spectral_axes])
def compute_spec_fiducial(wcslist, domain=None): """ For a celestial footprint this is the center. For a spectral footprint, it is the beginning of the range. This function assumes all WCSs have the same output coordinate frame. Build-7 workaround. """ output_frame = wcslist[0].output_frame axes_types = wcslist[0].output_frame.axes_type spatial_axes = np.array(axes_types) == 'SPATIAL' spectral_axes = np.array(axes_types) == 'SPECTRAL' footprints = ma.hstack([spec_footprint(w, domain=domain) for w in wcslist]) spatial_footprint = footprints[spatial_axes] spectral_footprint = footprints[spectral_axes] # Compute center of footprint fiducial = np.empty(len(axes_types)) if (spatial_footprint).any(): lon, lat = spatial_footprint lon, lat = np.deg2rad(lon), np.deg2rad(lat) x_mean = np.mean(np.cos(lat) * np.cos(lon)) y_mean = np.mean(np.cos(lat) * np.sin(lon)) z_mean = np.mean(np.sin(lat)) lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0 lat_fiducial = np.rad2deg(np.arctan2(z_mean, np.sqrt(x_mean ** 2 + y_mean ** 2))) fiducial[spatial_axes] = lon_fiducial, lat_fiducial # c = coord.SkyCoord(lon_fiducial, lat_fiducial, unit='deg') if (spectral_footprint).any(): fiducial[spectral_axes] = spectral_footprint.min() return ((fiducial[spatial_axes]), fiducial[spectral_axes])
def _get_corr_arr(self): print 'redrawing cross correl' # get the list of names and sort them alphabetically corr_data = ma.hstack(self.var_arr_list) # @kelidas: return small differences between ma and numpy corrcoef # return ma.corrcoef( corr_data, rowvar = False, allow_masked = True ) return MatSpearman(corr_data)
def _angles(self, U, V, eps=0.001): xy = self.ax.transData.transform(self.XY) uv = ma.hstack((U[:, np.newaxis], V[:, np.newaxis])).filled(0) xyp = self.ax.transData.transform(self.XY + eps * uv) dxy = xyp - xy ang = ma.arctan2(dxy[:, 1], dxy[:, 0]) return ang
def _angles(self, U, V, eps=0.001): xy = self.ax.transData.transform(self.XY) uv = ma.hstack((U[:,np.newaxis], V[:,np.newaxis])).filled(0) xyp = self.ax.transData.transform(self.XY + eps * uv) dxy = xyp - xy ang = ma.arctan2(dxy[:,1], dxy[:,0]) return ang
def __call__(self, data, weight_id=0, progress_callback=None): array, classes, w = data.toNumpyMA() domain = data.domain if isinstance(domain.class_var, Orange.feature.Discrete): # Discrete class (extend the data with class indicator matrix) nval = len(data.domain.class_var.values) ext = ma.zeros((len(array), nval)) ext[([i for i, m in enumerate(classes.mask) if m], [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0 elif isinstance(domain.class_var, Orange.feature.Continuous): # Continuous class, just add the one column (what about multitarget) nval = 1 ext = ma.zeros((len(array), nval)) ext[:,0] = classes elif domain.class_var is None: # No class var nval = 0 ext = ma.zeros((len(array), nval)) else: raise TypeError("Unsuported `class_var` %r" % domain.class_var) array = ma.hstack((array, ext)) map = Map(self.map_shape, topology=self.topology) if self.initialize == Map.InitializeLinear: map.initialize_map_linear(array) else: map.initialize_map_random(array) map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, epoch=self.epochs)(array, map, progress_callback=progress_callback) # Remove class columns from the vectors for node in map: node.vector = node.vector[:-nval] return SOMMap(map, data)
def _build_crossvalidation_iterator(config, y_train, y_test=None): """ Returns a crossvalidation iterator, which contains a list of (train_indices, test_indices) that can be used to slice a dataset to perform crossvalidation. Additionally, returns the original data that was passed in and a mask specifying what data points should be used for validation. The method of splitting for CV is determined by what is specified in the conf file. The splitting of data in train/test/validate set is not done in this function- here we only return a mask for the validation data and an iterator for the train/test data. The full text is provided as a parameter so that joblib can cache the call to this function. """ cv_type = config['type'] k = config['k'] dataset_size = len(y_train) if y_test is not None: logging.warning('You have requested test set to be used for evaluation.') if cv_type != 'test_set' and cv_type != 'subsampled_test_set': raise ValueError('Wrong crossvalidation type. Only test_set ' 'or subsampled_test_set are permitted with a test set') train_indices = range(dataset_size) test_indices = range(dataset_size, dataset_size + len(y_test)) y_train = hstack([y_train, y_test]) dataset_size += len(y_test) random_state = config['random_state'] if k < 0: logging.warning('crossvalidation.k not specified, defaulting to 1') k = 1 if cv_type == 'kfold': iterator = cross_validation.KFold(dataset_size, n_folds=int(k), random_state=random_state) elif cv_type == 'skfold': iterator = cross_validation.StratifiedKFold(y_train, n_folds=int(k), random_state=random_state) elif cv_type == 'oracle': iterator = LeaveNothingOut(dataset_size) elif cv_type == 'test_set' and y_test is not None: iterator = PredefinedIndicesIterator(train_indices, test_indices) elif cv_type == 'subsampled_test_set' and y_test is not None: iterator = SubsamplingPredefinedIndicesIterator(y_train, train_indices, test_indices, int(k), config['sample_size'], config['random_state']) else: raise ValueError('Unrecognised crossvalidation type %(cv_type)s. The supported types are kfold, skfold, ' 'test_set, subsampled_test_set and oracle') return iterator, y_train
def join_full(self, dm_new): """ Combines the content of two Datamats. If a parameter of the Datamats is not equal or does not exist in one, it is promoted to a field. If the two Datamats have different fields then the elements for the Datamats that did not have the field will be NaN. Parameters dm_new : instance of Datamat This Datamat is added to the current one. Capacity to use superset of fields added by rmuil 2012/01/30 """ # Check if parameters are equal. If not, promote them to fields. for (nm, val) in self._parameters.items(): if dm_new._parameters.has_key(nm): if (val != dm_new._parameters[nm]): self.parameter_to_field(nm) dm_new.parameter_to_field(nm) else: self.parameter_to_field(nm) for (nm, val) in dm_new._parameters.items(): if self._parameters.has_key(nm): if (val != self._parameters[nm]): self.parameter_to_field(nm) dm_new.parameter_to_field(nm) else: dm_new.parameter_to_field(nm) # Deal with mismatch in the fields # First those in self that do not exist in new... orig_fields = self._fields[:] for field in orig_fields: if not field in dm_new._fields: dm_new.add_field_like(field, self.field(field)) # ... then those in the new that do not exist in self. orig_fields = dm_new._fields[:] for field in orig_fields: if not field in self._fields: self.add_field_like(field, dm_new.field(field)) # Concatenate fields for field in self._fields: self.__dict__[field] = ma.hstack((self.__dict__[field], dm_new.__dict__[field])) # Update _num_fix self._num_fix += dm_new._num_fix
def clean_outliers(self): """ Function to remove outliers. Parameters ---------- self.outlier_perc : integer Percentile value for mstats.scoreatpercentile function. Mask all values greater than this value. """ # Outliers using percentiles - num_rows * [min, max] outlier_all = ma.array([[mstats.scoreatpercentile(self.xs[i, :], 100 - self.outlier_perc), mstats.scoreatpercentile(self.xs[i, :], self.outlier_perc)] for i in xrange(self.rows_N)]) self.xs = ma.array([ma.hstack((ma.masked_outside(self.xs[i, :-self.keep_n_values], outlier_all[i, 0], outlier_all[i, 1]), self.xs[i, -self.keep_n_values:])) for i in xrange(self.rows_N)])
def XWrap2(x,P0,fill_value=0,pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int( np.floor(ncad/P0) +1 ) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack( (x ,pad) ) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack( (x ,pad) ) xwrap = x.reshape( nrow,-1 ) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty( (nrow2-nrow,P0) ) fill[:] = fill_value fill.mask=True xwrap = ma.vstack([xwrap,fill]) return xwrap
def XWrap2(x, P0, fill_value=0, pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int(np.floor(ncad / P0) + 1) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack((x, pad)) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack((x, pad)) xwrap = x.reshape(nrow, -1) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty((nrow2 - nrow, P0)) fill[:] = fill_value fill.mask = True xwrap = ma.vstack([xwrap, fill]) return xwrap
def join(self, fm_new): """ Adds content of a new Datamat to this Datamat, assuming same fields. If a parameter of the Datamats is not equal or does not exist in one, it is promoted to a field. If the two Datamats have different fields, the mismatching fields will simply be deleted. Parameters fm_new : instance of Datamat This Datamat is added to the current one. """ # Check if parameters are equal. If not, promote them to fields. ''' for (nm, val) in fm_new._parameters.items(): if self._parameters.has_key(nm): if (val != self._parameters[nm]): self.parameter_to_field(nm) fm_new.parameter_to_field(nm) else: fm_new.parameter_to_field(nm) ''' # Deal with mismatch in the fields # First those in self that do not exist in new... orig_fields = self._fields[:] for field in orig_fields: if not field in fm_new._fields: self.rm_field(field) warn("field '%s' doesn't exist in target DataMat, removing." % field) # ... then those in the new that do not exist in self. orig_fields = fm_new._fields[:] for field in orig_fields: if not field in self._fields: fm_new.rm_field(field) warn("field '%s' doesn't exist in source DataMat, removing." % field) # Concatenate fields for field in self._fields: self.__dict__[field] = ma.hstack((self.__dict__[field], fm_new.__dict__[field])) # Update _num_fix self._num_fix += fm_new._num_fix
def bin(lc): """ Bin the light curve for faster computation of GP Compute the mean of every nbin measurements (padding the end if necessary). Return only the valid datapoints. """ fm = ma.masked_invalid( lc['f'] ) nbin = 8 rem = np.remainder(lc.size,nbin) if rem > 0: # if points don't d npad = nbin - rem pad = ma.masked_array(np.zeros(npad),True) fm = ma.hstack([fm,pad]) y = fm.reshape(-1,nbin).mean(axis=1) x = lc['t'][::nbin] b = ~y.mask return x[b],y.data[b]
def dt(t0): t = copy.deepcopy(t0) fm = ma.masked_array(t.f,mask=t.fmask) tm = ma.masked_array(t.TIME,mask=t.fmask) label = sepseg(tm) sL = ma.notmasked_contiguous(label) # If there is only one slice. if type(sL) == slice: sL = [sL] id = sL2id(sL) tnd = fm.copy() temp = [spldtm(tm[s],fm[s]) for s in sL] temp = ma.hstack(temp) tnd[id] = temp return fm-tnd
def __call__(self, data, weight_id=0, progress_callback=None): array, classes, w = data.toNumpyMA() domain = data.domain if isinstance(domain.class_var, Orange.feature.Discrete): # Discrete class (extend the data with class indicator matrix) nval = len(data.domain.class_var.values) ext = ma.zeros((len(array), nval)) ext[([i for i, m in enumerate(classes.mask) if m], [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0 elif isinstance(domain.class_var, Orange.feature.Continuous): # Continuous class, just add the one column (what about multitarget) nval = 1 ext = ma.zeros((len(array), nval)) ext[:, 0] = classes elif domain.class_var is None: # No class var nval = 0 ext = ma.zeros((len(array), nval)) else: raise TypeError("Unsuported `class_var` %r" % domain.class_var) array = ma.hstack((array, ext)) map = Map(self.map_shape, topology=self.topology) if self.initialize == Map.InitializeLinear: map.initialize_map_linear(array) else: map.initialize_map_random(array) map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, epoch=self.epochs)(array, map, progress_callback=progress_callback) # Remove class columns from the vectors for node in map: node.vector = node.vector[:-nval] return SOMMap(map, data)
def estimate_cell_edges(x): """Convert one-dimensional vector x of size n into n + 1, where the input describes the centres of the cells, and the output is an estimate of the edges of the cell""" # centres (with extra centres padded at the ends by linear interpolation) dx = ma.diff(x) x_c = ma.hstack((x[0] - atleast_1d(dx[0]), x, x[-1] + atleast_1d(dx[-1]))) # _f is notation from MITgcm (implies faces) x_f = (x_c[1:] + x_c[:-1])/2 dx_c = np.diff(x_c) # Catch nan or masked values and estimate edge using dx from previous or # next cell nan_before = ma.where( ma.logical_and(nan_or_masked(x_f[:-1]), ~nan_or_masked(x_f[1:])))[0] nan_after = ma.where( ma.logical_and(~nan_or_masked(x_f[:-1]), nan_or_masked(x_f[1:])))[0] x_f[nan_before] = x_f[nan_before + 1] - dx_c[nan_before + 1] x_f[nan_after + 1] = x_f[nan_after] + dx_c[nan_after] return x_f
def __call__(self, examples, weightID=0, progressCallback=None): data, classes, w = examples.toNumpyMA() nval = len(examples.domain.classVar.values) ext = ma.zeros((len(data), nval)) ext[([i for i, m in enumerate(classes.mask) if m], [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0 data = ma.hstack((data, ext)) map = Map(self.map_shape, topology=self.topology) if self.initialize == Map.InitializeLinear: map.initialize_map_linear(data) else: map.initialize_map_random(data) map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, epoch=self.epochs)(data, map, progressCallback=progressCallback) for node in map: node.vector = node.vector[:-nval] return SOMMap(map, examples)
def gaussianSmooothNormalisedCorrelation(obs, wrf, sigma=20, sigmaWRF=5, thres=15, showImage=True, saveImage=True, outputFolder="", outputType="correlation", *args, **kwargs): """ to used normalised correlation to study the similarity between obs and wrf codes from armor.tests.gaussianSmoothNormalisedCorrelation2 input: sigma = sigma for obs sigmaWRF = sigma for wrf """ if outputFolder =="": try: outputFolder = obs.imageFolder except AttributeError: outputFolder = pattern.defaultOutputFolderForImages if showImage: import pylab pylab.ion() k = obs # alias w = wrf matrix0 = copy.copy(k.matrix) k.getCentroid() k.setThreshold(thres) #2014-05-30 k.matrix = k.gaussianFilter(sigma).matrix #k.matrix = 100.* (k.matrix>=thres) k.matrix.mask = np.zeros(k.matrix.shape) #k.makeImage(closeAll=True) #pylab.draw() #correlations = [] w.getCentroid() w.setThreshold(thres) #2014-05-30 w1 = w.gaussianFilter(sigmaWRF) topRowName = w.name + ', gaussian(' + str(sigmaWRF) + ') and ' + k.name topRow = ma.hstack([w.matrix, w1.matrix, matrix0]) #w1.matrix = 100.*(w1.matrix>=thres) w1.matrix.mask = np.zeros(w1.matrix.shape) try: ############################################ # key lines w2 = w1.momentNormalise(k) w3 = w1.momentNormalise(k, extraAngle=np.pi) if outputType=="correlation" or outputType=="corr": corr = w2.corr(k) corr2 = w3.corr(k) if corr2 > corr: print '180 degree switch: ' print ' ', k.name, w.name ,corr, corr2, '\n................................' corr = corr2 w2 = w3 returnValue= corr #elif outputType=="regression" or outputType=="regress": else: x, residuals = w2.regress(k) x2, residuals2 = w3.regress(k) if residuals2 < residuals: print '180 degree switch: ' print ' ', k.name, w.name, residuals2, "<", residuals, '\n................................' x = x2 w2 = w3 returnValue = x # ############################################# ####### # making the output image w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix]) w2.name = w.name + ', normalised, and ' + k.name + '\nnormalised ' if outputType=="corr" or outputType=="correlation": w2.name += 'correlation: ' + str(corr) w2.matrix = ma.vstack([w2.matrix, topRow]) w2.name = topRowName + '\n' + "bottom row:" + w2.name w2.imagePath = outputFolder + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png' w2.vmin= -20. w2.vmax = 100. if saveImage: w2.saveImage() if showImage: w2.makeImage(closeAll=True) pylab.draw() # ############################################ #except IndexError: except SyntaxError: returnValue = -999 # restoring the matrix k.backupMatrix('gaussian smooth normalised correlations, sigma='+ str(sigma) + 'threshold=' + str(thres)) k.matrix = matrix0 return returnValue
def hstack(x): return ma.hstack(x)
def join(self, fm_new, minimal_subset=True): """ Adds content of a new Datamat to this Datamat. If a parameter of the Datamats is not equal or does not exist in one, it is promoted to a field. If the two Datamats have different fields then the elements for the Datamats that did not have the field will be NaN, unless 'minimal_subset' is true, in which case the mismatching fields will simply be deleted. Parameters fm_new : instance of Datamat This Datamat is added to the current one. minimal_subset : if true, remove fields which don't exist in both, instead of using NaNs for missing elements (defaults to False) Capacity to use superset of fields added by rmuil 2012/01/30 """ # Check if parameters are equal. If not, promote them to fields. ''' for (nm, val) in fm_new._parameters.items(): if self._parameters.has_key(nm): if (val != self._parameters[nm]): self.parameter_to_field(nm) fm_new.parameter_to_field(nm) else: fm_new.parameter_to_field(nm) ''' # Deal with mismatch in the fields # First those in self that do not exist in new... orig_fields = self._fields[:] for field in orig_fields: if not field in fm_new._fields: if minimal_subset: self.rm_field(field) else: warnings.warn("This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning) fm_new.add_field_like(field, self.field(field)) # ... then those in the new that do not exist in self. orig_fields = fm_new._fields[:] for field in orig_fields: if not field in self._fields: if minimal_subset: fm_new.rm_field(field) else: warnings.warn("This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning) self.add_field_like(field, fm_new.field(field)) if 'SUBJECTINDEX' in self._fields[:]: if fm_new.SUBJECTINDEX[0] in self.SUBJECTINDEX: fm_new.SUBJECTINDEX[:] = self.SUBJECTINDEX.max()+1 # Concatenate fields for field in self._fields: self.__dict__[field] = ma.hstack((self.__dict__[field], fm_new.__dict__[field])) # Update _num_fix self._num_fix += fm_new._num_fix
def moving_average(dat, columns=None, window='hanning', size=5, normalize=True, **kwargs): """ Calculates window-averaged time-series. Parameters ---------- dat : array like, record array Array with data. columns : array like window : string, array like The window to apply. Can be either an array or a string. If a string, creates a window of length given by `size` parameter. Valid strings are: `boxcar`, `hanning`, `lanczos`. size : integer, optional Size of the window. Default window size is 5. normalize : bool, optional If `True` (default) normalizes window to have unit integral. kwargs : optional Additional arguments depending on the selected window function. Returns ------- TODO """ # Checks for data columns. ndim = None if columns is None: columns = dat.dtype.names if columns is None: ndim = dat.ndim if dat.ndim == 1: dat = ma.asarray([dat]) dat = ma.masked_invalid(dat) columns = [0] else: columns = arange(y.shape[1]) # Initializes result array. Dat = copy(dat) # If window parameter is given as a string, calculate window array. if isinstance(window, basestring): # Makes sure that window size is odd and an integer size = 2 * (size - 1) // 2 + 1 if window == 'hanning': window = hanning(size) elif window == 'boxcar': window = ones(size) elif window == 'lanczos': window = lanczos(size, **kwargs) else: raise ValueError('Invalid window `{}`.'.format(window)) elif not isinstance(window, ndarray): raise ValueError('Invalid window.') # Normalize window to avoid input of variance. if normalize: window /= window.sum() # Walksthroug each column/variable in data array. for col in columns: mean = dat[col].mean() y = dat[col] - mean mask = dat[col].mask y[mask] = 0 # Mirror the edges to avoid edge effects. y = ma.hstack([y[:size][::-1], y, y[-size:][::-1]]) # Calculates the windowed-average. In case that input values are # complex, calculates the complex window average. if iscomplex(y).any(): Y = convolve(y.real, window, mode='same') + \ 1j * convolve(y.imag, window, mode='same') else: Y = convolve(y, window, mode='same') # Update result array with windo-averaged values. Y = Y[size:-size] mask = mask | isnan(Y) Dat[col] = ma.masked_where(mask, Y) + mean # if ndim == 1: return Dat[0] else: return Dat
k.matrix = 100.* (k.matrix>=thres) k.matrix.mask = np.zeros(k.matrix.shape) #k.vmax=2 #k.vmin=-2 #k.makeImage(closeAll=True) #pylab.draw() correlations = [] for w in wrf.listTemp: #try: # LOAD w, smooth by gaussian , and get threshold w.load() w.setThreshold(0) w.getCentroid() w1 = w.gaussianFilter(sigma) topRowName = w.name + ', gaussian(' + str(sigma) + ') and ' + k.name topRow = ma.hstack([w.matrix, w1.matrix, k.matrix0]) #k.load() #k.setThreshold(0) #topRow = ma.hstack([w.matrix, w1.matrix, k.matrix]) w1.matrix = 100.*(w1.matrix>=thres) w1.matrix.mask = np.zeros(w1.matrix.shape) #w1.vmax = 2 #w1.vmin =-2 #w.makeImage(closeAll=True) #pylab.draw() #print "w.matrix.shape, w.matrix.mask.shape", w.matrix.shape, w.matrix.mask.shape try: ############################################ # punchlines w2 = w1.momentNormalise(k) corr = w2.corr(k)
def moving_average(dat, columns=None, window='hanning', size=5, normalize=True, **kwargs): """ Calculates window-averaged time-series. Parameters ---------- dat : array like, record array Array with data. columns : array like window : string, array like The window to apply. Can be either an array or a string. If a string, creates a window of length given by `size` parameter. Valid strings are: `boxcar`, `hanning`, `lanczos`. size : integer, optional Size of the window. Default window size is 5. normalize : bool, optional If `True` (default) normalizes window to have unit integral. kwargs : optional Additional arguments depending on the selected window function. Returns ------- TODO """ # Checks for data columns. ndim = None if columns == None: columns = dat.dtype.names if columns == None: ndim = dat.ndim if dat.ndim == 1: dat = ma.asarray([dat]) dat = ma.masked_invalid(dat) columns = [0] else: columns = arange(y.shape[1]) # Initializes result array. Dat = copy(dat) # If window parameter is given as a string, calculate window array. if isinstance(window, basestring): # Makes sure that window size is odd and an integer size = 2 * (size - 1) // 2 + 1 if window == 'hanning': window = hanning(size) elif window == 'boxcar': window = ones(size) elif window == 'lanczos': window = lanczos(size, **kwargs) else: raise ValueError('Invalid window `{}`.'.format(window)) elif not isinstance(window, ndarray): raise ValueError('Invalid window.') # Normalize window to avoid input of variance. if normalize: window /= window.sum() # Walksthroug each column/variable in data array. for col in columns: mean = dat[col].mean() y = dat[col] - mean mask = dat[col].mask y[mask] = 0 # Mirror the edges to avoid edge effects. y = ma.hstack([y[:size][::-1], y, y[-size:][::-1]]) # Calculates the windowed-average. In case that input values are # complex, calculates the complex window average. if iscomplex(y).any(): Y = convolve(y.real, window, mode='same') + \ 1j * convolve(y.imag, window, mode='same') else: Y = convolve(y, window, mode='same') # Update result array with windo-averaged values. Y = Y[size:-size] mask = mask | isnan(Y) Dat[col] = ma.masked_where(mask, Y) + mean # if ndim == 1: return Dat[0] else: return Dat
def __init__(self,conf,sysVars=None,stateVars=None): self.verbose_solver = False if conf.get('SolverVerbos',0) > 0: self.verbose_solver = True self.QmaxConst = deepcopy(self.QmaxConst_cf) UnControlledTank = [k for k in self.CSOT_cf.iterkeys() if self.CSOT_cf[k]['Status'] in ['m','uc']] self.ControlledTank = [k for k in self.CSOT_cf.iterkeys() if self.CSOT_cf[k]['Status']=='c'] self.ConfigTanks = [x for x in self.CSOT_cf.iterkeys() if self.CSOT_cf[x]['Status'] in ['c','m','uc']] #-----------------------------------------------------------------------------# #-------------------------Parameters------------------------------------------# #-----------------------------------------------------------------------------# # TO DO : get default from config # control time period self.t_update = conf['ControlTimeperiod'] #[s]; ToDo: This should come from the caller somehow (can change over time) self.unitConv = {'l/s':float(self.t_update)/1000.} # "*" [l/s] -> [m3/control-period] ; "/" [m3/control-period] -> [l/s] self.unitConv['m3/h'] = float(self.t_update)/3600. # "*" [m3/h] -> [m3/control-period] ; "/" [m3/control-period] -> [m3/h] # extend CSOT_cf with calculated volume limits for switching controllable <-> controlled # Update CSOT_cf Qmaintenance with sysVars if existing. for ti in self.ConfigTanks: CSOTi_cf = self.CSOT_cf[ti] # 1. Update if existing Qmaintenance and hPon/off first because they are used in Cbl/C switching for opcVi,cfKi in [["Qmaintenance","Qmaintenance"],["hPon","h_Pon"],["hPoff","h_Poff"]]: #Check config key existence if not cfKi in CSOTi_cf: continue vari = ':'.join((ti,opcVi)) #Check opcVariable existence if vari in sysVars: value = sysVars[vari].value if value != None: unitConv = 1.0 if cfKi in ("Qmaintenance",): unitConv = self.unitConv['m3/h'] / self.unitConv['l/s'] CSOTi_cf[cfKi] = value * unitConv # Extent or (re)init volume and InEst limits for Cbl/C switching if ti in self.ControlledTank: self._init_CblC_Config(ti,CSOTi_cf) # Parameters used in the subgoals definition cOP = conf['ParamOptiProb'] ArtOptOv = -10.0; self.Yref = cOP['YRef'] #[l/s]; ToDo: should come from system variables (parameter to be modified by user) self.Yref=self.Yref*self.unitConv['l/s'] #[m3/control-period] self.MaxWWTP= cOP['MaxWWTP'] #[l/s]; ToDo: should come from system variables (parameter to be modified by user) self.MaxWWTP=self.MaxWWTP*self.unitConv['l/s'] #[m3/control-period] self.NbRUB = len(self.ControlledTank+UnControlledTank) #self.InflowForecast = True #No Assumption self.InflowForecast = False #Assumption constant #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-------------------------Parameters------------------------------------------# #-----------------------------------------------------------------------------# self.VolMax = np.array([self.CSOT_cf[x]['Volume'] for x in self.ConfigTanks]) self.OutMax = np.array([self.CSOT_cf[x]['Qmax'] for x in self.ConfigTanks]) self.OutMax = self.OutMax*self.unitConv['l/s'] self.StepDelayed = np.array([self.CSOT_cf[x]['FlowTimeToSink'] for x in self.ConfigTanks]) self.StepDelayed = (self.StepDelayed/(float(self.t_update)/60)).round() self.MaxLag = int(np.max(self.StepDelayed)) # number of GPC iterations over the Control horizon self.NbXPart = self.NbRUB*self.MaxLag for k in self.QmaxConst: for ik,iv in k[0].iteritems(): k[0][ik]=iv/((self.t_update)/60) OutMIN = 0; QMaint = np.array([self.CSOT_cf[x]['Qmaintenance'] for x in self.ConfigTanks]) QMaint = QMaint*self.unitConv['l/s'] self.QMaint_extend = np.ravel( np.tile(np.c_[QMaint],self.MaxLag)) #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-------------------------Variables Initialisation----------------------------# #-----------------------------------------------------------------------------# # TO DO : Give some values (current volumes, few history values for outflows) #Init the default values for the alogrithm history self._init_AlgHistory() self.RUB_inEST = np.zeros((self.NbRUB,1)) self.Ov = np.zeros((self.NbRUB,1)) self.Ov_Real = np.zeros((self.NbRUB,1)) self.OutComm = np.zeros((self.NbRUB,1)) if sysVars: self.updateAlgHistory(sysVars) self.OutComm = self.RUB_OUT_Real[[-1],:].T self.Volumes = self.Volumes_Real #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-----------------------------------------------------------------------------# #-------------------------Problem Formulation---------------------------------# #-----------------------------------------------------------------------------# #--- unchanged matrices between 2 successive steps ---------------------------# #--- as long as the network structure or parameters does not change ----------# #--- Decision Variables-----------------------# # NbTanks*MaxLag first variables represent the water volume over the prediction horizon grouped by tanks # The following NbTanks*MaxLag variables represent the outflow volume over the prediction horizon grouped by tanks # The last NbTanks*MaxLag variables represent the overflow volume over the prediction horizon grouped by tanks self.x = cvxpy.Variable(3*self.NbXPart,1,name='x') #--- Matrix C (from the Equality Constraint : Cx = D )-----------------------# C_Vol=np.identity(self.NbXPart)-np.diag(np.ones(self.NbXPart-1),-1) k=np.arange(self.MaxLag+1,self.NbXPart,self.MaxLag) C_Vol[k-1,k-2]=0 C_Out=np.identity(self.NbXPart) self.TkinCascade =[[ idx , self.ConfigTanks.index(self.CSOT_cf[val]['Sink']) ] for idx, val in enumerate(self.ConfigTanks) if self.CSOT_cf[val]['Sink'] not in 'KAHe' ] for i in range(len(self.TkinCascade)): tku = self.TkinCascade[i][0] tkd = self.TkinCascade[i][1] Ctemp = -np.eye(self.MaxLag) C_Out[tkd*self.MaxLag:(tkd+1)*self.MaxLag , tku*self.MaxLag:(tku+1)*self.MaxLag] \ = np.hstack(( Ctemp[:,self.StepDelayed[tku]:] , np.zeros((self.MaxLag,self.StepDelayed[tku])) )) C_Ov=np.identity(self.NbXPart) self.C = np.hstack([C_Vol,C_Out,C_Ov]) del(C_Vol,C_Out,C_Ov) self.EQCxD_L = self.C*self.x #--- CostFunction : min |Ax-B|-------------------------# Av = np.array([]) for i in range(self.NbRUB): Av_temp = np.tile(-self.VolMax[i]*np.identity(self.MaxLag),(1,self.NbRUB)) #use positive coeff and multiply by (-1) only at the end ? Av = np.concatenate([x for x in [Av,Av_temp] if x.size > 0]) del(Av_temp) self.Aout = np.array([]) for i in range(self.NbRUB): if self.CSOT_cf[self.ConfigTanks[i]]['Sink'] == 'KAHe': Atemp = np.identity(self.MaxLag) if self.StepDelayed[i]>1: Aout1 = np.zeros((self.StepDelayed[i]-1,self.MaxLag)) Aout2 = np.zeros((self.MaxLag-(self.StepDelayed[i]-1),self.MaxLag)) # Aout_temp = np.vstack([Aout1,Aout2]) Aout_temp = np.vstack([Aout1,np.identity(self.MaxLag),Aout2]) else: Aout_temp = np.vstack([np.identity(self.MaxLag),np.zeros((self.MaxLag,self.MaxLag))]) else :#tanks in cascade Aout_temp = np.zeros((2*self.MaxLag,self.MaxLag)) self.Aout = np.concatenate([x for x in [self.Aout,Aout_temp] if x.size > 0],1) del(Aout_temp) A_Ov = np.identity(self.NbXPart) Zsq = np.zeros((self.NbXPart,self.NbXPart)) Zrect = np.zeros((2*self.MaxLag,self.NbXPart)) self.A=np.vstack([np.hstack([Av,Zsq,Zsq]), np.hstack([Zrect,self.Aout,Zrect]), np.hstack([Zsq,Zsq,A_Ov])]) del(Av,A_Ov,Zsq,Zrect) self.A0 = np.array([]) for i in range(self.NbRUB): if self.CSOT_cf[self.ConfigTanks[i]]['Sink'] == 'KAHe': Atemp = np.identity(self.MaxLag) else :#tanks in cascade Atemp = np.zeros((self.MaxLag,self.MaxLag)) if self.StepDelayed[i]: A01 = np.flipud(Atemp[:,(self.MaxLag+2-self.StepDelayed[i]-1):] ) else : A01 = np.flipud(Atemp[:,(self.MaxLag+2-self.StepDelayed[i]-1+1):] ) self.A0 = np.concatenate([x for x in [self.A0,A01] if x.size > 0],1) del(A01,Atemp) self.A0 = np.vstack((self.A0,np.zeros((self.MaxLag,self.A0.shape[1])))) self.Bv = np.zeros((self.NbXPart,1)) self.B_Ov = ArtOptOv*np.ones((self.NbXPart,1)) # weighting coefficients self._update_CFWeight(conf['CostFunctionWeights']) #-----------------------------------------------------------------------------# #-------------------------Constraints-----------------------------------------# #--- constraints on Vol, Out, Ov--------------------------------# #--- x >= l --------------------------------# l_Vol = np.zeros([self.NbXPart,1]) l_Out = OutMIN*np.ones([self.NbXPart,1]) l_Ov = np.zeros([self.NbXPart,1]) LowerLimit = np.vstack([l_Vol,l_Out,l_Ov]) LowerLimit.flatten() # DF it probably exists a simple way to do that del(l_Vol,l_Out,l_Ov) #--- x <= u --------------------------------# u_Vol = np.tile(self.VolMax,(self.MaxLag,1)) u_Out = np.tile(self.OutMax,(self.MaxLag,1)) UpperLimit = np.hstack([u_Vol,u_Out]) UpperLimit = UpperLimit.T.reshape((self.NbXPart*2,1)) # DF it probably exists a simple way to do that del(u_Vol,u_Out) #--- Hard constraints: l<=x<=u --- self.constr_cf = ma.hstack([self.x[kk] >= LowerLimit[kk] for kk in range(3*self.NbXPart)]) for kk in range(2*self.NbXPart) : self.constr_cf = ma.hstack((self.constr_cf, self.x[kk] <= UpperLimit[kk])) ### ----- Debug output ---- #print 'Constrains block - 1' #--- Maximum InFlow to the WWTP over the prediction horizon --- #---'Constrains block - 2' TS2 = np.zeros((2*self.MaxLag,self.x.size[0])) TS2[:,self.NbXPart:2*self.NbXPart]=self.Aout for k in range(self.NbRUB): ColIdx = self.NbXPart-1+(k+1)*self.MaxLag RowIdx = np.where(TS2[:,ColIdx]==1)[0] if RowIdx: TS2[RowIdx[0]:-1,ColIdx] = 1 self.LEQ2_L = TS2*self.x #--- Maximum Flow in pipes prediction horizon --- #---'Constrains block - 3.1' IND_U = np.reshape(np.c_[self.NbXPart:2*self.NbXPart],(self.MaxLag,self.NbRUB),order='F') NbQMaxConst = len(self.QmaxConst) self.TS31 = np.zeros((0,self.x.size[0])) self.TS32 = np.zeros((0,self.x.size[0])) self.Qmax31 = np.zeros((0,1)) self.TanksName31 = [] for k in xrange( NbQMaxConst ): U_Ind = np.array([self.ConfigTanks.index(kk) for kk in self.QmaxConst[k][0].keys() if kk in self.ConfigTanks]) if U_Ind.size == 0: continue IND_Utmp = IND_U[:,U_Ind] #new addition U_Delay = np.array([self.QmaxConst[k][0][self.ConfigTanks[kk]] for kk in U_Ind]) Qmax = self.QmaxConst[k][1] Ninputs = U_Ind.size IndNode = np.tile(IND_U[-1,U_Ind],(2*self.MaxLag,1)) for kk in xrange(Ninputs): IndNode[U_Delay[kk]+np.arange(self.MaxLag),kk] = IND_Utmp[:,kk] IndNode = IndNode[U_Delay.max()+range(self.MaxLag),:] IndNode = IndNode.astype(int) for Ii in IndNode: if len(Ii) == 0: continue ts = np.zeros((1,self.x.size[0])) ts[0,Ii] = 1 self.TS31 = np.vstack((self.TS31, ts)) self.Qmax31 = np.vstack( (self.Qmax31, Qmax) ) self.TanksName31 = self.TanksName31 + [set(self.QmaxConst_cf[k][0].keys())]*IndNode.shape[0] #---'Constrains block - 3.2 Maxgap = U_Delay.max() - U_Delay.min() U_Delay = U_Delay - U_Delay.min() if Maxgap > 0: LHS = np.tile(np.c_[1:Maxgap+1],(Ninputs,1)).T - np.tile(U_Delay,(Maxgap,1)) for kk in xrange(Maxgap): FL = LHS[kk,:] > 0 xidx = np.diag(IND_U[np.ix_(LHS[kk,FL]-1,U_Ind[FL])]) ts = np.zeros((1,self.x.size[0])) ts[0,xidx] = 1 self.TS32 = np.vstack((self.TS32, ts)) ### ----- Debug output ---- #print 'Constrains block - 3.2 k=%s/%s %s' % (k,NbQMaxConst,NM) if self.TS32.shape[0] > 0 : self.LEQ32_L = self.TS32*self.x else : self.LEQ32_L = None for kk in range(self.Qmax31.size) : self.constr_cf = ma.hstack(( self.constr_cf , self.TS31[kk,:].reshape(1,self.NbXPart*3)*self.x <= self.Qmax31[kk] )) self.updateStructOptPB()
def join(self, fm_new, minimal_subset=True): """ Adds content of a new Datamat to this Datamat. If a parameter of the Datamats is not equal or does not exist in one, it is promoted to a field. If the two Datamats have different fields then the elements for the Datamats that did not have the field will be NaN, unless 'minimal_subset' is true, in which case the mismatching fields will simply be deleted. Parameters fm_new : instance of Datamat This Datamat is added to the current one. minimal_subset : if true, remove fields which don't exist in both, instead of using NaNs for missing elements (defaults to False) Capacity to use superset of fields added by rmuil 2012/01/30 """ # Check if parameters are equal. If not, promote them to fields. ''' for (nm, val) in fm_new._parameters.items(): if self._parameters.has_key(nm): if (val != self._parameters[nm]): self.parameter_to_field(nm) fm_new.parameter_to_field(nm) else: fm_new.parameter_to_field(nm) ''' # Deal with mismatch in the fields # First those in self that do not exist in new... orig_fields = self._fields[:] for field in orig_fields: if not field in fm_new._fields: if minimal_subset: self.rm_field(field) else: warnings.warn( "This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning) fm_new.add_field_like(field, self.field(field)) # ... then those in the new that do not exist in self. orig_fields = fm_new._fields[:] for field in orig_fields: if not field in self._fields: if minimal_subset: fm_new.rm_field(field) else: warnings.warn( "This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning) self.add_field_like(field, fm_new.field(field)) if 'SUBJECTINDEX' in self._fields[:]: if fm_new.SUBJECTINDEX[0] in self.SUBJECTINDEX: fm_new.SUBJECTINDEX[:] = self.SUBJECTINDEX.max() + 1 # Concatenate fields for field in self._fields: self.__dict__[field] = ma.hstack( (self.__dict__[field], fm_new.__dict__[field])) # Update _num_fix self._num_fix += fm_new._num_fix
def _build_crossvalidation_iterator(config, x_vals, y_vals, x_test=None, y_test=None): """ Returns a crossvalidation iterator, which contains a list of (train_indices, test_indices) that can be used to slice a dataset to perform crossvalidation. Additionally, returns the original data that was passed in and a mask specifying what data points should be used for validation. The method of splitting for CV is determined by what is specified in the conf file. The splitting of data in train/test/validate set is not done in this function- here we only return a mask for the validation data and an iterator for the train/test data. The full text is provided as a parameter so that joblib can cache the call to this function. """ logging.info('Building crossvalidation iterator') cv_type = config['type'] k = config['k'] if (config['validation_slices'] != '' and config['validation_slices'] is not None): # the data should be treated as a stream, which means that it should # not # be reordered and it should be split into a seen portion and an unseen # portion separated by a virtual 'now' point in the stream validation_data = get_named_object(config['validation_slices']) validation_data = validation_data(x_vals, y_vals) else: validation_data = [(0, 0)] validation_indices = reduce(lambda l, (head, tail): l + range(head, tail), validation_data, []) if x_test is not None and y_test is not None: logging.warn('You have requested test set to be used for evaluation.') if cv_type != 'test_set' and cv_type != 'subsampled_test_set': logging.error('Wrong crossvalidation type. Only test_set ' 'or subsampled_test_set are permitted with a test set') sys.exit(1) x_vals = list(x_vals) train_indices = range(len(x_vals)) test_indices = range(len(x_vals), len(x_vals) + len(x_test)) x_vals.extend(x_test) y_vals = hstack([y_vals, y_test]) mask = np.zeros(y_vals.shape[0]) # we only mask the rows mask[validation_indices] = 1 # mask has 1 where the data point should be # used for validation and not for training/testing seen_data_mask = mask == 0 dataset_size = np.sum(seen_data_mask) targets_seen = y_vals[seen_data_mask] if k < 0: logging.warn( 'crossvalidation.k not specified, defaulting to 1') k = 1 if cv_type == 'kfold': iterator = cross_validation.KFold(dataset_size, int(k)) elif cv_type == 'skfold': iterator = cross_validation.StratifiedKFold(targets_seen, int(k)) elif cv_type == 'loo': iterator = cross_validation.LeaveOneOut(dataset_size, int(k)) elif cv_type == 'bootstrap': ratio = config['ratio'] if k < 0: logging.warn( 'crossvalidation.ratio not specified,defaulting to 0.8') ratio = 0.8 iterator = cross_validation.Bootstrap(dataset_size, n_iter=int(k), train_size=ratio) elif cv_type == 'oracle': iterator = LeaveNothingOut(dataset_size) elif cv_type == 'test_set' and x_test is not None and y_test is not None: iterator = PredefinedIndicesIterator(train_indices, test_indices) elif cv_type == 'subsampled_test_set' and \ x_test is not None and y_test is not None: iterator = SubsamplingPredefinedIndicesIterator(y_vals, train_indices, test_indices, int(k), config['sample_size'], config['random_state']) else: raise ValueError( 'Unrecognised crossvalidation type \'%(cv_type)s\'. The supported ' 'types are \'kfold\', \'skfold\', \'loo\', \'bootstrap\', ' '\'test_set\', \'subsampled_test_set\' and \'oracle\'') # Pick out the non-validation data from x_vals. This requires x_vals # to be cast to a format that supports slicing, such as the compressed # sparse row format (converting to that is also fast). seen_indices = range(targets_seen.shape[0]) seen_indices = sorted(set(seen_indices) - set(validation_indices)) x_vals = [x_vals[index] for index in seen_indices] # y_vals is a row vector, need to transpose it to get the same shape as # x_vals y_vals = y_vals[:, seen_indices].transpose() return iterator, validation_indices, x_vals, y_vals