def transforms_available(self): """ Returns a list of all transformations available """ txfm = tx.transforms(self.x, self.D, {}) TXFM_FCN_DICT = txfm.transforms_available() return TXFM_FCN_DICT
def transform(self, source_name, txfm_fcn, txfm_name, **kwargs): """ This runs transform (txfm_fcn) on the data matrix defined by source_name with parameters that are variable for each transform. For example, oe.data.transform('parent', 'zscore','zscore_parent', axis=0) will run the zscore in a vector-wise manner across the matrix (column-wise) and the new data dictionary access to the transformed data is oe.data['zscore_parent'] Successful completion results in the addition of a new entry in the data dictionary with a key according to txfm_name. Parameters ---------- source_name: string the name of the source data, for example 'parent', or 'log2' txfm_fcn: string the name of the transform function. See transforms.py or run oe.data.transforms_available() for list txfm_name: string the name you want to use in the data object dictionary oe.data.D['name'] to access transformed data Other Parameters ---------------- **Keep_NaN: boolean Set to True in order to prevent transformations from being added that produce NaNs. Default Keep_NaN=True this will add transformed data even if NaNs are produced. Set to 0 to prevent addition of data transforms containing NaNs. **Keep_Inf: boolean Set to True in order to prevent transformations from being added that produce infinite values Default: Keep_Inf = True (this will add transformed data even if infinite values are produced. Set to 0 to prevent addition of data transforms conta Warnings -------- NaNs or infinite values are produced Raises ------ ValueError if the transform function does not exist OR if the data source does not exist by source_name Examples -------- >>> import pandas as pd >>> import openensembles as oe >>> df = pd.read_csv(file) >>> d = oe.data(df, df.columns >>> d.transform('parent', 'zscore', 'zscore') >>> d.transform('zscore', 'PCA', 'pca', n_components=3) """ #CHECK that the source exists if source_name not in self.D: raise ValueError( "ERROR: the source you requested for transformation does not exist by that name %s" % (source_name)) TXFM_FCN_DICT = self.transforms_available() Keep_NaN_txfm = 1 #default value is to keep a transform, even if NaN values are created Keep_Inf_txfm = 1 #default value is to keep a transform, even if NaN values are created paramDict = {} if not kwargs: var_params = [] else: var_params = kwargs if 'Keep_NaN' in kwargs: Keep_NaN_txfm = kwargs['Keep_NaN'] if 'Keep_Inf' in kwargs: Keep_Inf_txfm = kwargs['Keep_Inf'] ######BEGIN TXFM BLOCK ###### if txfm_fcn not in TXFM_FCN_DICT: raise ValueError( "The transform function you requested does not exist, currently the following are supported %s" % (list(TXFM_FCN_DICT.keys()))) txfm = tx.transforms(self.x[source_name], self.D[source_name], kwargs) func = getattr(txfm, txfm_fcn) outputs = func() #### FINAL staging, X, D and var_params have been set in transform block, now add each #check and print a warning if NaN values were created in the transformation boolCheck = np.isnan(txfm.data_out) numNaNs = sum(sum(boolCheck)) if numNaNs.any(): warnings.warn( "WARNING: transformation %s resulted in %d NaN values" % (txfm_fcn, numNaNs), UserWarning) if not Keep_NaN_txfm: print( "Transformation %s resulted in %d NaN values, and you requested not to keep a transformation with NaNs" % (txfm_fcn, numNaNs)) return infCheck = np.isinf(txfm.data_out) numInf = sum(sum(infCheck)) if numInf.any() > 0: warnings.warn( "WARNING: transformation %s resulted in %d Inf values" % (txfm_fcn, numInf), UserWarning) if not Keep_Inf_txfm: #print("Transformation %s resulted in %d Inf values, and you requested not to keep a transformation with infinite values"%(txfm_fcn, numInf)) return self.x[txfm_name] = txfm.x_out self.params[txfm_name] = txfm.var_params self.D[txfm_name] = txfm.data_out return outputs