예제 #1
0
    def transforms_available(self):
        """
		Returns a list of all transformations available
		"""
        txfm = tx.transforms(self.x, self.D, {})
        TXFM_FCN_DICT = txfm.transforms_available()
        return TXFM_FCN_DICT
예제 #2
0
    def transform(self, source_name, txfm_fcn, txfm_name, **kwargs):
        """
		This runs transform (txfm_fcn) on the data matrix defined by
		source_name with parameters that are variable for each transform. 
		For example, oe.data.transform('parent', 'zscore','zscore_parent', axis=0) will run the
		zscore in a vector-wise manner across the matrix (column-wise) and the new data
		dictionary access to the transformed data is oe.data['zscore_parent']
		Successful completion results in
		the addition of a new entry in the data dictionary with a key according
		to txfm_name.

		Parameters
		----------
		source_name: string
			the name of the source data, for example 'parent', or 'log2'
		txfm_fcn: string
			the name of the transform function. See transforms.py or run oe.data.transforms_available() for list
		txfm_name: string
			the name you want to use in the data object dictionary oe.data.D['name'] to access transformed data

		Other Parameters
		----------------
		**Keep_NaN: boolean
			Set to True in order to prevent transformations from being added that produce NaNs. 
			Default Keep_NaN=True this will add transformed data even if NaNs are produced. Set to 0 to prevent addition of data transforms containing NaNs.
		**Keep_Inf: boolean
			Set to True in order to prevent transformations from being added that produce infinite values
			Default: Keep_Inf = True (this will add transformed data even if infinite values are produced. Set to 0 to prevent addition of data transforms conta

		Warnings
		--------
		NaNs or infinite values are produced

		Raises
		------
		ValueError
			if the transform function does not exist OR if the data source does not exist by source_name

		Examples
		--------
		>>> import pandas as pd
		>>> import openensembles as oe
		>>> df = pd.read_csv(file)
		>>> d = oe.data(df, df.columns
		>>> d.transform('parent', 'zscore', 'zscore')
		>>> d.transform('zscore', 'PCA', 'pca', n_components=3)

		
		"""
        #CHECK that the source exists
        if source_name not in self.D:
            raise ValueError(
                "ERROR: the source you requested for transformation does not exist by that name %s"
                % (source_name))
        TXFM_FCN_DICT = self.transforms_available()
        Keep_NaN_txfm = 1  #default value is to keep a transform, even if NaN values are created
        Keep_Inf_txfm = 1  #default value is to keep a transform, even if NaN values are created
        paramDict = {}

        if not kwargs:
            var_params = []
        else:
            var_params = kwargs
            if 'Keep_NaN' in kwargs:
                Keep_NaN_txfm = kwargs['Keep_NaN']
            if 'Keep_Inf' in kwargs:
                Keep_Inf_txfm = kwargs['Keep_Inf']

        ######BEGIN TXFM BLOCK  ######
        if txfm_fcn not in TXFM_FCN_DICT:
            raise ValueError(
                "The transform function you requested does not exist, currently the following are supported %s"
                % (list(TXFM_FCN_DICT.keys())))

        txfm = tx.transforms(self.x[source_name], self.D[source_name], kwargs)
        func = getattr(txfm, txfm_fcn)
        outputs = func()

        #### FINAL staging, X, D and var_params have been set in transform block, now add each
        #check and print a warning if NaN values were created in the transformation

        boolCheck = np.isnan(txfm.data_out)
        numNaNs = sum(sum(boolCheck))
        if numNaNs.any():
            warnings.warn(
                "WARNING: transformation %s resulted in %d NaN values" %
                (txfm_fcn, numNaNs), UserWarning)
            if not Keep_NaN_txfm:
                print(
                    "Transformation %s resulted in %d NaN values, and you requested not to keep a transformation with NaNs"
                    % (txfm_fcn, numNaNs))
                return
        infCheck = np.isinf(txfm.data_out)
        numInf = sum(sum(infCheck))
        if numInf.any() > 0:
            warnings.warn(
                "WARNING: transformation %s resulted in %d Inf values" %
                (txfm_fcn, numInf), UserWarning)
            if not Keep_Inf_txfm:
                #print("Transformation %s resulted in %d Inf values, and you requested not to keep a transformation with infinite values"%(txfm_fcn, numInf))
                return

        self.x[txfm_name] = txfm.x_out
        self.params[txfm_name] = txfm.var_params
        self.D[txfm_name] = txfm.data_out
        return outputs