def transformer(inputs): """applies yeo-johnson power transform to first two indices of array (n_files, total_mb) using lambdas, mean and standard deviation calculated for each variable prior to model training. Returns: X inputs as 2D-array for generating predictions """ X = inputs n_files = X[0] total_mb = X[1] # apply power transformer normalization to continuous vars x = np.array([[n_files], [total_mb]]).reshape(1, -1) pt = PowerTransformer(standardize=False) pt.lambdas_ = np.array([-1.51, -0.12]) xt = pt.transform(x) # normalization (zero mean, unit variance) f_mean, f_sigma = 0.5682815234265285, 0.04222565843608133 s_mean, s_sigma = 1.6250374589283951, 1.0396138451086632 x_files = np.round(((xt[0, 0] - f_mean) / f_sigma), 5) x_size = np.round(((xt[0, 1] - s_mean) / s_sigma), 5) # print(f"Power Transformed variables: {x_files}, {x_size}") X_values = { "x_files": x_files, "x_size": x_size, "drizcorr": X[2], "pctecorr": X[3], "crsplit": X[4], "subarray": X[5], "detector": X[6], "dtype": X[7], "instr": X[8], } # X = np.array([x_files, x_size, X[2], X[3], X[4], X[5], X[6], X[7], X[8]]) return X_values
def transformer(self, pt_data): """applies yeo-johnson power transform to first two indices of array (n_files, total_mb) using lambdas, mean and standard deviation calculated for each variable prior to model training. Returns: X inputs as 2D-array for generating predictions """ X = self.inputs n_files = X[0] total_mb = X[1] # apply power transformer normalization to continuous vars x = np.array([[n_files], [total_mb]]).reshape(1, -1) pt = PowerTransformer(standardize=False) pt.lambdas_ = np.array([pt_data["f_lambda"], pt_data["s_lambda"]]) # pt.lambdas_ = np.array([-1.05989146, 0.1691683]) xt = pt.transform(x) # normalization (zero mean, unit variance) f_mean, f_sigma = pt_data["f_mean"], pt_data["f_sigma"] s_mean, s_sigma = pt_data["s_mean"], pt_data["s_sigma"] # f_mean, f_sigma = 0.7313458816815209, 0.09209684806404451 # s_mean, s_sigma = 4.18491577280472, 2.4467903663338366 x_files = np.round(((xt[0, 0] - f_mean) / f_sigma), 5) x_size = np.round(((xt[0, 1] - s_mean) / s_sigma), 5) X = np.array([x_files, x_size, X[2], X[3], X[4], X[5], X[6], X[7], X[8]]).reshape(1, -1) return X