コード例 #1
0
ファイル: ingest.py プロジェクト: bhayden53/calcloud
def transformer(inputs):
    """applies yeo-johnson power transform to first two indices of array (n_files, total_mb) using lambdas, mean and standard deviation calculated for each variable prior to model training.

    Returns: X inputs as 2D-array for generating predictions
    """
    X = inputs
    n_files = X[0]
    total_mb = X[1]
    # apply power transformer normalization to continuous vars
    x = np.array([[n_files], [total_mb]]).reshape(1, -1)
    pt = PowerTransformer(standardize=False)
    pt.lambdas_ = np.array([-1.51, -0.12])
    xt = pt.transform(x)
    # normalization (zero mean, unit variance)
    f_mean, f_sigma = 0.5682815234265285, 0.04222565843608133
    s_mean, s_sigma = 1.6250374589283951, 1.0396138451086632
    x_files = np.round(((xt[0, 0] - f_mean) / f_sigma), 5)
    x_size = np.round(((xt[0, 1] - s_mean) / s_sigma), 5)
    # print(f"Power Transformed variables: {x_files}, {x_size}")
    X_values = {
        "x_files": x_files,
        "x_size": x_size,
        "drizcorr": X[2],
        "pctecorr": X[3],
        "crsplit": X[4],
        "subarray": X[5],
        "detector": X[6],
        "dtype": X[7],
        "instr": X[8],
    }
    # X = np.array([x_files, x_size, X[2], X[3], X[4], X[5], X[6], X[7], X[8]])
    return X_values
コード例 #2
0
    def transformer(self, pt_data):
        """applies yeo-johnson power transform to first two indices of array (n_files, total_mb) using lambdas, mean and standard deviation calculated for each variable prior to model training.

        Returns: X inputs as 2D-array for generating predictions
        """
        X = self.inputs
        n_files = X[0]
        total_mb = X[1]
        # apply power transformer normalization to continuous vars
        x = np.array([[n_files], [total_mb]]).reshape(1, -1)
        pt = PowerTransformer(standardize=False)
        pt.lambdas_ = np.array([pt_data["f_lambda"], pt_data["s_lambda"]])
        # pt.lambdas_ = np.array([-1.05989146, 0.1691683])
        xt = pt.transform(x)
        # normalization (zero mean, unit variance)
        f_mean, f_sigma = pt_data["f_mean"], pt_data["f_sigma"]
        s_mean, s_sigma = pt_data["s_mean"], pt_data["s_sigma"]
        # f_mean, f_sigma = 0.7313458816815209, 0.09209684806404451
        # s_mean, s_sigma = 4.18491577280472, 2.4467903663338366
        x_files = np.round(((xt[0, 0] - f_mean) / f_sigma), 5)
        x_size = np.round(((xt[0, 1] - s_mean) / s_sigma), 5)
        X = np.array([x_files, x_size, X[2], X[3], X[4], X[5], X[6], X[7], X[8]]).reshape(1, -1)
        return X