def test_scaler_without_centering(): rng = np.random.RandomState(42) X = rng.randn(4, 5) X[:, 0] = 0.0 # first feature is always of zero X_csr = sparse.csr_matrix(X) X_csc = sparse.csc_matrix(X) assert_raises(ValueError, StandardScaler().fit, X_csr) null_transform = StandardScaler(with_mean=False, with_std=False, copy=True) X_null = null_transform.fit_transform(X_csr) assert_array_equal(X_null.data, X_csr.data) X_orig = null_transform.inverse_transform(X_null) assert_array_equal(X_orig.data, X_csr.data) scaler = StandardScaler(with_mean=False).fit(X) X_scaled = scaler.transform(X, copy=True) assert_false(np.any(np.isnan(X_scaled))) scaler_csr = StandardScaler(with_mean=False).fit(X_csr) X_csr_scaled = scaler_csr.transform(X_csr, copy=True) assert_false(np.any(np.isnan(X_csr_scaled.data))) scaler_csc = StandardScaler(with_mean=False).fit(X_csc) X_csc_scaled = scaler_csr.transform(X_csc, copy=True) assert_false(np.any(np.isnan(X_csc_scaled.data))) assert_equal(scaler.mean_, scaler_csr.mean_) assert_array_almost_equal(scaler.std_, scaler_csr.std_) assert_equal(scaler.mean_, scaler_csc.mean_) assert_array_almost_equal(scaler.std_, scaler_csc.std_) assert_array_almost_equal( X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2) assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.]) X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled) assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0)) assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0)) # Check that X has not been modified (copy) assert_true(X_scaled is not X) assert_true(X_csr_scaled is not X_csr) X_scaled_back = scaler.inverse_transform(X_scaled) assert_true(X_scaled_back is not X) assert_true(X_scaled_back is not X_scaled) assert_array_almost_equal(X_scaled_back, X) X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled) assert_true(X_csr_scaled_back is not X_csr) assert_true(X_csr_scaled_back is not X_csr_scaled) assert_array_almost_equal(X_csr_scaled_back.toarray(), X) X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc()) assert_true(X_csc_scaled_back is not X_csc) assert_true(X_csc_scaled_back is not X_csc_scaled) assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
def test_scaler_without_centering(): rng = np.random.RandomState(42) X = rng.randn(4, 5) X[:, 0] = 0.0 # first feature is always of zero X_csr = sparse.csr_matrix(X) X_csc = sparse.csc_matrix(X) assert_raises(ValueError, StandardScaler().fit, X_csr) null_transform = StandardScaler(with_mean=False, with_std=False, copy=True) X_null = null_transform.fit_transform(X_csr) assert_array_equal(X_null.data, X_csr.data) X_orig = null_transform.inverse_transform(X_null) assert_array_equal(X_orig.data, X_csr.data) scaler = StandardScaler(with_mean=False).fit(X) X_scaled = scaler.transform(X, copy=True) assert_false(np.any(np.isnan(X_scaled))) scaler_csr = StandardScaler(with_mean=False).fit(X_csr) X_csr_scaled = scaler_csr.transform(X_csr, copy=True) assert_false(np.any(np.isnan(X_csr_scaled.data))) scaler_csc = StandardScaler(with_mean=False).fit(X_csc) X_csc_scaled = scaler_csr.transform(X_csc, copy=True) assert_false(np.any(np.isnan(X_csc_scaled.data))) assert_equal(scaler.mean_, scaler_csr.mean_) assert_array_almost_equal(scaler.std_, scaler_csr.std_) assert_equal(scaler.mean_, scaler_csc.mean_) assert_array_almost_equal(scaler.std_, scaler_csc.std_) assert_array_almost_equal(X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2) assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.]) X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled) assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0)) assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0)) # Check that X has not been modified (copy) assert_true(X_scaled is not X) assert_true(X_csr_scaled is not X_csr) X_scaled_back = scaler.inverse_transform(X_scaled) assert_true(X_scaled_back is not X) assert_true(X_scaled_back is not X_scaled) assert_array_almost_equal(X_scaled_back, X) X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled) assert_true(X_csr_scaled_back is not X_csr) assert_true(X_csr_scaled_back is not X_csr_scaled) assert_array_almost_equal(X_csr_scaled_back.toarray(), X) X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc()) assert_true(X_csc_scaled_back is not X_csc) assert_true(X_csc_scaled_back is not X_csc_scaled) assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
def preprocess(self): sc = StandardScaler() sc.fit(self.X_train) X_train_std = sc.transform(self.X_train) X_test_std = sc.transform(self.X_test) self.train_dataset = self.Dataset(data=X_train_std, target=self.y_train) self.test_dataset = self.Dataset(data=X_test_std, target=self.y_test)
def imputeAndScale(X_train,X_test): imp= Imputer() X_train=imp.fit_transform(X_train) X_test=imp.transform(X_test) scaler= StandardScaler().fit(X_train) X_test=scaler.transform(X_test) X_train= scaler.transform(X_train) return X_train, X_test
def xval(feature_file, removed_columns=None): """ Load features into file :param feature_file: feature file :param removed_columns: index of feature columns to remove """ module_logger.info('------ Load feature data ::: {}'.format(feature_file)) clf = svm_clf() fs = numpy.loadtxt(feature_file, delimiter='\t', skiprows=1) _, n = fs.shape iX = fs[:, 0] X = fs[:, 1:n - 1] y = fs[:, n - 1] if removed_columns is not None and len(removed_columns) > 0: X = numpy.delete(X, removed_columns, 1) module_logger.info('------ data dimension ::: {} ::: {}'.format(X.shape, n)) y_true = numpy.array([]) y_out = numpy.array([]) y_prob = numpy.array([]) y_i = numpy.array([]) std_scaler = StandardScaler() skf = StratifiedKFold(n_splits=5) for train_index, test_index in skf.split(X, y): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] std_scaler.fit(X_train) X_train_scaled = std_scaler.transform(X_train, copy=True) X_test_scaled = std_scaler.transform(X_test, copy=True) clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) y_logp = clf.predict_proba(X_test_scaled) y_true = numpy.hstack((y_true, y_test)) y_out = numpy.hstack((y_out, y_pred)) y_prob = numpy.hstack((y_prob, numpy.max(y_logp, axis=1))) iX_test = iX[test_index] y_i = numpy.hstack((y_i, iX_test)) return write_prediction_output(y_i, y_true, y_out, feature_file.replace('.csv', '_pred.csv'), y_prob)
def prepare_time_data(data): data_scaler = StandardScaler() data_concat = np.concatenate(data, axis=0) data_scaler.fit(data_concat) new_data = [data_scaler.transform(data_) for data_ in data] return data_scaler, new_data
def normalize_features(self, scaler: StandardScaler=None) \ -> StandardScaler: ''' Normalizes the features of the dataset using a StandardScaler (subtract mean, divide by standard deviation). If a scaler is provided, uses that scaler to perform the normalization. Otherwise fits a scaler to the features in the dataset and then performs the normalization. :param scaler: A fitted StandardScaler. Used if provided. Otherwise a StandardScaler is fit on this dataset and is then used. :param replace_nan_token: What to replace nans with. :return: A fitted StandardScaler. If a scaler is provided, this is the same scaler. Otherwise, this is a scaler fit on this dataset. ''' if not self.data or not self.data[0].features: return None if not scaler: scaler = StandardScaler() features = np.vstack([d.features for d in self.data]) scaler.fit(features) for d in self.data: d.set_features(scaler.transform(d.features.reshape(1, -1))[0]) return scaler
def __stdScaler(self): all_cols = list(self.data_df.columns.values) for col in all_cols: if col not in self.non_numeric_cols and col != 'time_to_failure': stdScaler = StandardScaler() stdScaler.fit(self.data_df[[col]]) self.data_df[col] = stdScaler.transform(self.data_df[[col]]) print('Standard Scaler applied ... ')
def obtain_sets(self, psychological_construct, percentage): index = self.get_index(psychological_construct) logging.info("Psychological construct under analysis:" + psychological_construct) negative_students, positive_students = self.get_instances(index) train_set, dev_set, test_set = self.divide_sets(negative_students, positive_students, percentage) train_set_x, train_set_y = self.get_x_and_y(train_set, index) logging.info("Training set shape:" + str(train_set_x.shape)) if self.norm == z_norm_literal: logging.info("Z-Normalizing") reshaped_train_set_x = self.reshape_numpy_array(train_set_x) scaler = StandardScaler() scaler.fit(reshaped_train_set_x) normalized_reshaped_train_x = scaler.transform(reshaped_train_set_x) normalized_train_set_x = np.reshape(normalized_reshaped_train_x, (train_set_x.shape[0], train_set_x.shape[1], train_set_x.shape[2], train_set_x.shape[3])) dev_set_x, dev_set_y = self.get_x_and_y(dev_set, index) if self.norm == z_norm_literal: logging.info("Z-Normalizing") reshaped_dev_x = self.reshape_numpy_array(dev_set_x) normalized_reshaped_dev_x = scaler.transform(reshaped_dev_x) normalized_dev_x = np.reshape(normalized_reshaped_dev_x, (dev_set_x.shape[0], dev_set_x.shape[1], dev_set_x.shape[2], dev_set_x.shape[3])) test_set_x, test_set_y = self.get_x_and_y(test_set, index, test_flag=True) if self.norm == z_norm_literal: logging.info("Z-Normalizing") reshaped_test_x = self.reshape_numpy_array(test_set_x) normalized_reshaped_test_x = scaler.transform(reshaped_test_x) normalized_test_x = np.reshape(normalized_reshaped_test_x, (test_set_x.shape[0], test_set_x.shape[1], test_set_x.shape[2], test_set_x.shape[3])) return normalized_train_set_x, train_set_y, normalized_dev_x, dev_set_y, normalized_test_x, test_set_y else: return train_set_x, train_set_y, dev_set_x, dev_set_y, test_set_x, test_set_y
def test_simple_poly_dataset_scaled_cv(self): model = Model.create_model( model_type=Model.MODEL_TYPE_SVR, cross_validation=True, feature_scaling=True, C_range=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], kernel=Model.KERNEL_RBF ) train_dataset, test_dataset = test_datasets.get_simple_polynomial_datasets(n=1000) scaler = StandardScaler() scaler.fit(train_dataset.data) print("Train mean: " + str(scaler.transform(train_dataset.data).mean(axis=0))) print("Test mean: " + str( scaler.transform(test_dataset.data).mean(axis=0))) print("Train std: " + str(scaler.transform(train_dataset.data).std(axis=0))) print("Test str: " + str( scaler.transform(test_dataset.data).std(axis=0))) self._test_dataset(model, train_dataset, test_dataset, 0, title="SVR with RBF kernel, scaled CV on poly dataset")
def main(): args = parse() n_rollout = args.nrollout n_epoch = args.epoch savename = args.savename if args.savename is not None else 'model-' + str( n_rollout) + 'unroll' np.random.seed(1098) path = args.filename names = ['target_pos', 'target_speed', 'pos', 'vel', 'effort'] with h5py.File(path, 'r') as f: (target_pos, target_speed, pos, vel, effort) = [[np.array(val) for val in f[name].values()] for name in names] x_target = np.array(target_pos) x_first = np.array([pos_[0] for pos_ in pos]) x_speed = np.array(target_speed).reshape((-1, 1)) aux_output = [np.ones(eff.shape[0]).reshape((-1, 1)) for eff in effort] x = np.concatenate((x_target, x_first, x_speed), axis=1) input_scaler = StandardScaler() x = input_scaler.fit_transform(x) output_scaler = StandardScaler() effort_concat = np.concatenate([a for a in effort], axis=0) output_scaler.fit(effort_concat) effort = [output_scaler.transform(eff) for eff in effort] y = pad_sequences(effort, padding='post', value=0.) aux_output = pad_sequences(aux_output, padding='post', value=0.) x, x_test, y, y_test, y_aux, y_aux_test = train_test_split(x, y, aux_output, test_size=0.2) y_mask, y_test_mask = [this_y[:, :, 0] for this_y in (y_aux, y_aux_test)] y_aux_mask, y_aux_test_mask = [ np.ones(this_y.shape[:2]) for this_y in (y_aux, y_aux_test) ] model = MyModel(train=[x, [y, y_aux]], val=[x_test, [y_test, y_aux_test]], train_mask=[y_mask, y_aux_mask], val_mask=[y_test_mask, y_aux_test_mask], max_unroll=n_rollout, name=savename) if not os.path.exists('save'): os.makedirs('save') if args.train: model.fit(nb_epoch=n_epoch, batch_size=32) elif args.resume: model.resume(nb_epoch=n_epoch, batch_size=32)
def evalOne(parameters): all_obs = [] all_pred = [] for location in locations: trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) all_obs.extend(testY) all_pred.extend(prediction) return rmseEval(all_obs, all_pred)[1]
def test_scalar(): from sklearn.preprocessing.data import MinMaxScaler, StandardScaler scalar = StandardScaler() training = pd.read_csv(TRAIN_FEATURES_CSV, nrows=200000) test = pd.read_csv(TEST_FEATURES_CSV) # normalize the values for column in TOTAL_TRAINING_FEATURE_COLUMNS: training[column] = scalar.fit_transform(training[column]) test[column] = scalar.transform(test[column])
def evalOne(parameters): all_obs = [] all_pred = [] for location in locations: trainX, testX, trainY, testY = splitDataForXValidation( location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) layers = [] for _ in range(0, parameters["hidden_layers"]): layers.append( Layer(parameters["hidden_type"], units=parameters["hidden_neurons"])) layers.append(Layer("Linear")) model = Regressor(layers=layers, learning_rate=parameters["learning_rate"], n_iter=parameters["iteration"], random_state=42) X = np.array(trainX) y = np.array(trainY) model.fit(X, y) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) print("location: " + str(location) + " -> " + str(rmseEval(prediction, testY)[1])) all_obs.extend(testY) all_pred.extend(prediction) return rmseEval(all_obs, all_pred)[1]
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv): clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 1), activation='logistic', batch_size=BATCH_HUMAN, shuffle=True, max_iter=5000) scaler = StandardScaler() scaler.fit(train) train1 = scaler.transform(train) # apply same transformation to test data test = scaler.transform(test) train_out = train_out.astype(float) clf.fit(X=train1, y=train_out) predict_test = clf.predict(test) predict_val = clf.predict(val) print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test), acc_manual(test_out, predict_test)) print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val), acc_manual(val_out, predict_test))
def test_center_kernel(): """Test that KernelCenterer is equivalent to StandardScaler in feature space""" rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) scaler = StandardScaler(with_std=False) scaler.fit(X_fit) X_fit_centered = scaler.transform(X_fit) K_fit = np.dot(X_fit, X_fit.T) # center fit time matrix centerer = KernelCenterer() K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T) K_fit_centered2 = centerer.fit_transform(K_fit) assert_array_almost_equal(K_fit_centered, K_fit_centered2) # center predict time matrix X_pred = rng.random_sample((2, 4)) K_pred = np.dot(X_pred, X_fit.T) X_pred_centered = scaler.transform(X_pred) K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T) K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2)
def test_center_kernel(): """Test that KernelCenterer is equivalent to StandardScaler in feature space""" rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) scaler = StandardScaler(with_std=False) scaler.fit(X_fit) X_fit_centered = scaler.transform(X_fit) K_fit = np.dot(X_fit, X_fit.T) # center fit time matrix centerer = KernelCenterer() K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T) K_fit_centered2 = centerer.fit_transform(K_fit) assert_array_almost_equal(K_fit_centered, K_fit_centered2) # center predict time matrix X_pred = rng.random_sample((2, 4)) K_pred = np.dot(X_pred, X_fit.T) X_pred_centered = scaler.transform(X_pred) K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T) K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2)
def train_test(feature_file, test_file, removed_columns=None): """ Load features into file :param feature_file: feature file :param test_file: test file :param removed_columns: index of feature columns to remove """ module_logger.info('------ Train/test model ::: {} ::: {}'.format(feature_file, test_file)) clf = svm_clf() fs = numpy.loadtxt(feature_file, delimiter='\t', skiprows=1) _, n = fs.shape X_train = fs[:, 1:n - 1] y_train = fs[:, n - 1] fs = numpy.loadtxt(test_file, delimiter='\t', skiprows=1) _, n = fs.shape X_test = fs[:, 1:n - 1] y_test = fs[:, n - 1] y_i = fs[:, 0] if removed_columns is not None and len(removed_columns) > 0: X_test = numpy.delete(X_test, removed_columns, 1) X_train = numpy.delete(X_train, removed_columns, 1) module_logger.info('------ data dimension ::: {} ::: {} ::: {}'.format(X_train.shape, X_test.shape, n)) std_scaler = StandardScaler() std_scaler.fit(X_train) X_train_scaled = std_scaler.transform(X_train, copy=True) X_test_scaled = std_scaler.transform(X_test, copy=True) clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) y_logp = clf.predict_proba(X_test_scaled) return write_prediction_output(y_i, y_test, y_pred, test_file.replace('.csv', '_pred.csv'), y_logp)
def test_scale_sparse_with_mean_raise_exception(): rng = np.random.RandomState(42) X = rng.randn(4, 5) X_csr = sparse.csr_matrix(X) # check scaling and fit with direct calls on sparse data assert_raises(ValueError, scale, X_csr, with_mean=True) assert_raises(ValueError, StandardScaler(with_mean=True).fit, X_csr) # check transform and inverse_transform after a fit on a dense array scaler = StandardScaler(with_mean=True).fit(X) assert_raises(ValueError, scaler.transform, X_csr) X_transformed_csr = sparse.csr_matrix(scaler.transform(X)) assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
def test_scale_sparse_with_mean_raise_exception(): rng = np.random.RandomState(42) X = rng.randn(4, 5) X_csr = sparse.csr_matrix(X) # check scaling and fit with direct calls on sparse data assert_raises(ValueError, scale, X_csr, with_mean=True) assert_raises(ValueError, StandardScaler(with_mean=True).fit, X_csr) # check transform and inverse_transform after a fit on a dense array scaler = StandardScaler(with_mean=True).fit(X) assert_raises(ValueError, scaler.transform, X_csr) X_transformed_csr = sparse.csr_matrix(scaler.transform(X)) assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
class StandardScalerImpl(): def __init__(self, copy=True, with_mean=True, with_std=True): self._hyperparams = { 'copy': copy, 'with_mean': with_mean, 'with_std': with_std } self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def _proccess_input(self, target_pos, target_speed, pos, vel, effort): x_target = np.array(target_pos) x_first = np.array([pos_[0] for pos_ in pos]) x_speed = np.array(target_speed).reshape((-1, 1)) aux_output = [np.ones(eff.shape[0]).reshape((-1, 1)) for eff in effort] x = np.concatenate((x_target, x_first, x_speed), axis=1) input_scaler = StandardScaler() x = input_scaler.fit_transform(x) output_scaler = StandardScaler() effort_concat = np.concatenate([a for a in effort], axis=0) output_scaler.fit(effort_concat) effort = [output_scaler.transform(eff) for eff in effort] y = pad_sequences(effort, padding='post', value=0.) aux_output = pad_sequences(aux_output, padding='post', value=0.) x, x_test, y, y_test, y_aux, y_aux_test = train_test_split(x, y, aux_output, test_size=0.2) return x, x_test, y, y_test, y_aux, y_aux_test
class CreateStandardScaler(CreateModel): def fit(self, data, args): self.model = StandardScaler() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval def test(self, data): assert self.model is not None return self.model.transform(data.X_test) def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval
def test_iris(self): train_X, test_X, train_y, test_y = data_io.get_iris_train_test() print("train_X's shape = %s, train_y's shape = %s" % (train_X.shape, train_y.shape)) print("test_X's shape = %s, test_y's shape = %s" % (test_X.shape, test_y.shape)) print("Applying standard scaling ...") scaler = StandardScaler() train_X = scaler.fit_transform(train_X) test_X = scaler.transform(test_X) # train_X = test_X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) # train_y = test_y = np.array([0, 1, 1, 0]) # train_X = test_X = np.array([[0], [1]]) # train_y = test_y = np.array([0, 1]) layers = [100] clf = MLPClassifier(layers, batch_size=train_X.shape[0], n_epochs=100, learning_rate=0.1) print("clf: %s" % clf) print("Fitting ...") clf.fit(train_X, train_y) print("Predicting ...") pred_y = clf.predict(test_X) print("y = %s" % test_y) print("pred_y = \n%s" % pred_y) # pred_proba_y = clf.predict_proba(test_X) # print("pred_proba_y = \n%s" % pred_proba_y) accuracy = accuracy_score(test_y, pred_y) print("Accuracy = %g%%" % (100 * accuracy)) self.assertGreaterEqual(accuracy, 0.89)
def split_train_validation_test(multi_time_series_df, valid_start_time, test_start_time, features, time_step_lag=1, horizon=1, target='target', time_format='%Y-%m-%d %H:%M:%S', freq='H'): if not isinstance(features, list) or len(features) < 1: raise Exception( "Bad input for features. It must be an array of dataframe colummns used" ) train = multi_time_series_df.copy()[ multi_time_series_df.index < valid_start_time] train_features = train[features] train_targets = train[target] # X_scaler = MinMaxScaler() # target_scaler = MinMaxScaler() # y_scaler = MinMaxScaler() X_scaler = StandardScaler() target_scaler = StandardScaler() y_scaler = StandardScaler() # 'load' is our key target. If it is in features, then we scale it. # if it not 'load', then we scale the first column if 'load' in features: tg = train[['load']] y_scaler.fit(tg) else: tg = train[target] ## scale the first column y_scaler.fit(tg.values.reshape(-1, 1)) train[target] = target_scaler.fit_transform(train_targets) X_scaler.fit(train_features) train[features] = X_scaler.transform(train_features) tensor_structure = {'X': (range(-time_step_lag + 1, 1), features)} train_inputs = TimeSeriesTensor(train, target=target, H=horizon, freq=freq, tensor_structure=tensor_structure) print(train_inputs.dataframe.head()) look_back_dt = dt.datetime.strptime( valid_start_time, time_format) - dt.timedelta(hours=time_step_lag - 1) valid = multi_time_series_df.copy()[ (multi_time_series_df.index >= look_back_dt) & (multi_time_series_df.index < test_start_time)] valid_features = valid[features] valid[features] = X_scaler.transform(valid_features) tensor_structure = {'X': (range(-time_step_lag + 1, 1), features)} valid_inputs = TimeSeriesTensor(valid, target=target, H=horizon, freq=freq, tensor_structure=tensor_structure) print(valid_inputs.dataframe.head()) # test set # look_back_dt = dt.datetime.strptime(test_start_time, '%Y-%m-%d %H:%M:%S') - dt.timedelta(hours=time_step_lag - 1) test = multi_time_series_df.copy()[test_start_time:] test_features = test[features] test[features] = X_scaler.transform(test_features) test_inputs = TimeSeriesTensor(test, target=target, H=horizon, freq=freq, tensor_structure=tensor_structure) print("time lag:", time_step_lag, "original_feature:", len(features)) return train_inputs, valid_inputs, test_inputs, y_scaler
def test_scaler_int(): # test that scaler converts integer input to floating # for both sparse and dense matrices rng = np.random.RandomState(42) X = rng.randint(20, size=(4, 5)) X[:, 0] = 0 # first feature is always of zero X_csr = sparse.csr_matrix(X) X_csc = sparse.csc_matrix(X) null_transform = StandardScaler(with_mean=False, with_std=False, copy=True) with warnings.catch_warnings(record=True): X_null = null_transform.fit_transform(X_csr) assert_array_equal(X_null.data, X_csr.data) X_orig = null_transform.inverse_transform(X_null) assert_array_equal(X_orig.data, X_csr.data) with warnings.catch_warnings(record=True): scaler = StandardScaler(with_mean=False).fit(X) X_scaled = scaler.transform(X, copy=True) assert_false(np.any(np.isnan(X_scaled))) with warnings.catch_warnings(record=True): scaler_csr = StandardScaler(with_mean=False).fit(X_csr) X_csr_scaled = scaler_csr.transform(X_csr, copy=True) assert_false(np.any(np.isnan(X_csr_scaled.data))) with warnings.catch_warnings(record=True): scaler_csc = StandardScaler(with_mean=False).fit(X_csc) X_csc_scaled = scaler_csr.transform(X_csc, copy=True) assert_false(np.any(np.isnan(X_csc_scaled.data))) assert_equal(scaler.mean_, scaler_csr.mean_) assert_array_almost_equal(scaler.std_, scaler_csr.std_) assert_equal(scaler.mean_, scaler_csc.mean_) assert_array_almost_equal(scaler.std_, scaler_csc.std_) assert_array_almost_equal(X_scaled.mean(axis=0), [0., 1.109, 1.856, 21., 1.559], 2) assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.]) X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0( X_csr_scaled.astype(np.float)) assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0)) assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0)) # Check that X has not been modified (copy) assert_true(X_scaled is not X) assert_true(X_csr_scaled is not X_csr) X_scaled_back = scaler.inverse_transform(X_scaled) assert_true(X_scaled_back is not X) assert_true(X_scaled_back is not X_scaled) assert_array_almost_equal(X_scaled_back, X) X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled) assert_true(X_csr_scaled_back is not X_csr) assert_true(X_csr_scaled_back is not X_csr_scaled) assert_array_almost_equal(X_csr_scaled_back.toarray(), X) X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc()) assert_true(X_csc_scaled_back is not X_csc) assert_true(X_csc_scaled_back is not X_csc_scaled) assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
def test_scaler_int(): # test that scaler converts integer input to floating # for both sparse and dense matrices rng = np.random.RandomState(42) X = rng.randint(20, size=(4, 5)) X[:, 0] = 0 # first feature is always of zero X_csr = sparse.csr_matrix(X) X_csc = sparse.csc_matrix(X) null_transform = StandardScaler(with_mean=False, with_std=False, copy=True) with warnings.catch_warnings(record=True): X_null = null_transform.fit_transform(X_csr) assert_array_equal(X_null.data, X_csr.data) X_orig = null_transform.inverse_transform(X_null) assert_array_equal(X_orig.data, X_csr.data) with warnings.catch_warnings(record=True): scaler = StandardScaler(with_mean=False).fit(X) X_scaled = scaler.transform(X, copy=True) assert_false(np.any(np.isnan(X_scaled))) with warnings.catch_warnings(record=True): scaler_csr = StandardScaler(with_mean=False).fit(X_csr) X_csr_scaled = scaler_csr.transform(X_csr, copy=True) assert_false(np.any(np.isnan(X_csr_scaled.data))) with warnings.catch_warnings(record=True): scaler_csc = StandardScaler(with_mean=False).fit(X_csc) X_csc_scaled = scaler_csr.transform(X_csc, copy=True) assert_false(np.any(np.isnan(X_csc_scaled.data))) assert_equal(scaler.mean_, scaler_csr.mean_) assert_array_almost_equal(scaler.std_, scaler_csr.std_) assert_equal(scaler.mean_, scaler_csc.mean_) assert_array_almost_equal(scaler.std_, scaler_csc.std_) assert_array_almost_equal( X_scaled.mean(axis=0), [0., 1.109, 1.856, 21., 1.559], 2) assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.]) X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0( X_csr_scaled.astype(np.float)) assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0)) assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0)) # Check that X has not been modified (copy) assert_true(X_scaled is not X) assert_true(X_csr_scaled is not X_csr) X_scaled_back = scaler.inverse_transform(X_scaled) assert_true(X_scaled_back is not X) assert_true(X_scaled_back is not X_scaled) assert_array_almost_equal(X_scaled_back, X) X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled) assert_true(X_csr_scaled_back is not X_csr) assert_true(X_csr_scaled_back is not X_csr_scaled) assert_array_almost_equal(X_csr_scaled_back.toarray(), X) X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc()) assert_true(X_csc_scaled_back is not X_csc) assert_true(X_csc_scaled_back is not X_csc_scaled) assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
loadData("/data/york_hour_2013.csv", ["timestamp", "atc"], data, columns) all_features = deepcopy(columns) all_features.remove("target") all_features.remove("location") output = open(OUTPUT_DATA_FILE, 'w') output.write("location,observation,prediction\n") for location in locations: print(str(location)) trainX, testX, trainY, testY = splitDataForXValidation( location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=40, cache_size=5000), max_samples=4200, n_estimators=10, verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY)
del preds print(y.shape) """ y = DataFrame(clf1.predict(dataTest)) print("Prediction done") res = DataFrame(np.nan, index=range(len(ids)), columns=["Id", "Response"]) res["Id"] = ids res["Response"] = y.values res.to_csv("submission1.csv", index=False) #Scale print("Scaling") dataTest = imputer.transform(dataTest) dataTest = scaler.transform(dataTest) print("Predicting") """ preds=[] for i in range(6): dtest=dataTest.ix[range(bounds[i],bounds[i+1])] y_pred=clf2.predict(dtest) del dtest preds.append(DataFrame(y_pred)) gc.collect() print(preds) y=concat(preds,axis=0,copy=False) del dataTest del preds
loss=loss_function) predicted_values = [] real_values = [] for student in students_gender_train: train_students = students_gender_train - set([student]) print(train_students) test_student = set([student]) print(test_student) train_x, train_y = dataset_loader.get_x_and_y( students_set=train_students, index=index, test_flag=False) test_x, test_y = dataset_loader.get_x_and_y( students_set=test_student, index=index, test_flag=True) reshaped_train_set_x = dataset_loader.reshape_numpy_array(train_x) scaler = StandardScaler() scaler.fit(reshaped_train_set_x) normalized_reshaped_train_x = scaler.transform( reshaped_train_set_x) normalized_train_set_x = np.reshape( normalized_reshaped_train_x, (train_x.shape[0], train_x.shape[1], train_x.shape[2], train_x.shape[3])) reshaped_test_x = dataset_loader.reshape_numpy_array(test_x) normalized_reshaped_test_x = scaler.transform(reshaped_test_x) normalized_test_x = np.reshape(normalized_reshaped_test_x, (test_x.shape[0], test_x.shape[1], test_x.shape[2], test_x.shape[3])) predicted_values.extend( cnn_classifier.train(normalized_train_set_x, train_y, normalized_test_x, test_y, student=student))
import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.preprocessing.data import StandardScaler from sklearn.linear_model import Lasso from mpl_toolkits.mplot3d import Axes3D irisdata = load_iris() iris_X = irisdata.data iris_y = irisdata.target scale = StandardScaler() scale.fit(iris_X) iris_x = scale.transform(iris_X) pca = PCA(n_components=3) iris_x = pca.fit_transform(iris_x) fig = plt.figure() ax = fig.add_subplot(111) # ax.scatter(iris_x[:, 0], iris_x[:, 1], iris_x[:, 2], marker='o', c=iris_y) x_tran, x_test, y_tran, y_test = train_test_split(iris_x, iris_y, test_size=0.3, random_state=42) result = {} test_number = len(y_test) for i in range(1, 11, 1): clf = Lasso(alpha=i / 10).fit(x_tran, y_tran) y_pre = clf.predict(x_test) result[i / 10] = sum(m < 0.5 for m in abs(y_test - y_pre)) / test_number print(result) ax.plot(list(result.keys()), list(result.values()))
from sklearn.preprocessing.data import StandardScaler from sklearn.datasets import load_breast_cancer from sklearn.decomposition import PCA import matplotlib.pyplot as plt import mglearn cancer = load_breast_cancer() scaler = StandardScaler() scaler.fit(cancer.data) X_scaled = scaler.transform(cancer.data) pca = PCA(n_components=2) pca.fit(X_scaled) X_pca = pca.transform(X_scaled) print("original {}, reduction {}".format(X_scaled.shape, X_pca.shape)) plt.figure(figsize=(8,8)) mglearn.discrete_scatter(X_pca[:,0], X_pca[:,1], cancer.target) plt.legend(["malignancy(cancer)", "benign"], loc="best") plt.gca().set_aspect("equal") plt.xlabel("1st principal component") plt.ylabel("2nd principal component") plt.draw() print("PCA PC shape:{}".format(pca.components_.shape)) print("PCA PC {}".format(pca.components_)) plt.matshow(pca.components_, cmap='viridis') plt.yticks([0,1], ["first principal component", "second principal component"]) plt.colorbar()
class SkRanker(Ranker, SkLearner): ''' Basic ranker wrapping scikit-learn functions ''' def train(self, dataset_filename, scale=True, feature_selector=None, feature_selection_params={}, feature_selection_threshold=.25, learning_params={}, optimize=True, optimization_params={}, scorers=['f1_score'], attribute_set=None, class_name=None, metaresults_prefix="./0-", **kwargs): plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf") data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name, **kwargs) learner = self.learner #the class must remember the attribute_set and the class_name in order to reproduce the vectors self.attribute_set = attribute_set self.class_name = class_name #scale data to the mean if scale: log.info("Scaling datasets...") log.debug("Data shape before scaling: {}".format(data.shape)) self.scaler = StandardScaler() data = self.scaler.fit_transform(data) log.debug("Data shape after scaling: {}".format(data.shape)) log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_)) #avoid any NaNs and Infs that may have occurred due to the scaling data = np.nan_to_num(data) #feature selection if isinstance(feature_selection_params, basestring): feature_selection_params = eval(feature_selection_params) self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename) #initialize learning method and scoring functions and optimize self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers) log.info("Data shape before fitting: {}".format(data.shape)) self.learner.fit(data, labels) self.fit = True return metadata def get_model_description(self): params = {} if self.scaler: params = self.scaler.get_params(deep=True) try: #these are for SVC if self.learner.kernel == "rbf": params["gamma"] = self.learner.gamma params["C"] = self.learner.C for i, n_support in enumerate(self.learner.n_support_): params["n_{}".format(i)] = n_support log.debug(len(self.learner.dual_coef_)) return params elif self.learner.kernel == "linear": coefficients = self.learner.coef_ att_coefficients = {} for attname, coeff in zip(self.attribute_set.get_names_pairwise(), coefficients[0]): att_coefficients[attname] = coeff return att_coefficients except AttributeError: pass try: #adaboost etc params = self.learner.get_params() numeric_params = OrderedDict() for key, value in params.iteritems(): try: value = float(value) except ValueError: continue numeric_params[key] = value return numeric_params except: pass return {} def get_ranked_sentence(self, parallelsentence, critical_attribute="rank_predicted", new_rank_name="rank_hard", del_orig_class_att=False, bidirectional_pairs=False, ties=True, reconstruct='hard'): """ """ if type(self.learner) == str: if self.classifier: self.learner = self.classifier # this is to provide backwards compatibility for old models # whose classes used differeent attribute names try: self.learner._dual_coef_ = self.learner.dual_coef_ self.learner._intercept_ = self.learner.intercept_ except AttributeError: # it's ok if the model doesn't have these variables pass try: # backwards compatibility for old LogisticRegression try_classes = self.learner.classes_ except AttributeError: self.learner.classes_ = [-1, 1] #de-compose multiranked sentence into pairwise comparisons pairwise_parallelsentences = parallelsentence.get_pairwise_parallelsentences(bidirectional_pairs=bidirectional_pairs, class_name=self.class_name, ties=ties) if len(parallelsentence.get_translations()) == 1: log.warning("Parallelsentence has only one target sentence") parallelsentence.tgt[0].add_attribute(new_rank_name, 1) return parallelsentence, {} elif len(parallelsentence.get_translations()) == 0: return parallelsentence, {} #list that will hold the pairwise parallel sentences including the learner's decision classified_pairwise_parallelsentences = [] resultvector = {} for pairwise_parallelsentence in pairwise_parallelsentences: #convert pairwise parallel sentence into an orange instance instance = parallelsentence_to_instance(pairwise_parallelsentence, attribute_set=self.attribute_set) #scale data instance to mean, based on trained scaler if self.scaler: try: instance = np.nan_to_num(instance) instance = self.scaler.transform(instance) except ValueError as e: log.error("Could not transform instance: {}, scikit replied: {}".format(instance, e)) #raise ValueError(e) pass try: if self.featureselector: instance = np.nan_to_num(instance) instance = self.featureselector.transform(instance) except AttributeError: pass log.debug('Instance = {}'.format(instance)) #make sure no NaN or inf appears in the instance instance = np.nan_to_num(instance) #run learner for this instance predicted_value = self.learner.predict(instance) try: distribution = dict(zip(self.learner.classes_, self.learner.predict_proba(instance)[0])) except AttributeError: #if learner does not support per-class probability (e.g. LinearSVC) assign 0.5 distribution = dict([(cl, 0.5) for cl in self.learner.classes_]) log.debug("Distribution: {}".format(distribution)) log.debug("Predicted value: {}".format(predicted_value)) #even if we have a binary learner, it may be that it cannot decide between two classes #for us, this means a tie if not bidirectional_pairs and distribution and len(distribution)==2 and float(distribution[1])==0.5: predicted_value = 0 distribution[predicted_value] = 0.5 log.debug("{}, {}, {}".format(pairwise_parallelsentence.get_system_names(), predicted_value, distribution)) #gather several metadata from the classification, which may be needed resultvector.update({'systems' : pairwise_parallelsentence.get_system_names(), 'value' : predicted_value, 'distribution': distribution, 'confidence': distribution[int(predicted_value)], # 'instance' : instance, }) #add the new predicted ranks as attributes of the new pairwise sentence pairwise_parallelsentence.add_attributes({"rank_predicted":predicted_value, "prob_-1":distribution[-1], "prob_1":distribution[1] }) classified_pairwise_parallelsentences.append(pairwise_parallelsentence) #gather all classified pairwise comparisons of into one parallel sentence again sentenceset = CompactPairwiseParallelSentenceSet(classified_pairwise_parallelsentences) if reconstruct == 'hard': log.debug("Applying hard reconstruction to produce rank {}".format(new_rank_name)) ranked_sentence = sentenceset.get_multiranked_sentence(critical_attribute=critical_attribute, new_rank_name=new_rank_name, del_orig_class_att=del_orig_class_att) else: attribute1 = "prob_-1" attribute2 = "prob_1" log.debug("Applying soft reconstruction to produce rank {}".format(new_rank_name)) try: ranked_sentence = sentenceset.get_multiranked_sentence_with_soft_ranks(attribute1, attribute2, critical_attribute, new_rank_name, normalize_ranking=False) except: raise ValueError("Sentenceset {} from {} caused exception".format(classified_pairwise_parallelsentences, parallelsentence)) return ranked_sentence, resultvector
from sklearn.datasets import load_boston from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.preprocessing.data import StandardScaler from sklearn.linear_model import ElasticNet # In ElasticNet,we have two important variable, alpha and l1_ratio import numpy as np from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from matplotlib.ticker import LinearLocator bostondata = load_boston() boston_X = bostondata.data boston_y = bostondata.target scale = StandardScaler() scale.fit(boston_X) boston_x = scale.transform(boston_X) pca = PCA(n_components=3) # boston_x = pca.fit_transform(boston_x) fig = plt.figure() ax = plt.gca(projection='3d') # ax.scatter(boston_x[:, 0], boston_x[:, 1], boston_x[:, 2], marker='o', c=boston_y) x_tran, x_test, y_tran, y_test = train_test_split(boston_x, boston_y, test_size=0.3, random_state=42) result = [] z = np.zeros(shape=(10, 10)) test_number = len(y_test) for i in range(1, 11, 1): for j in range(1, 11, 1): clf = ElasticNet(alpha=i / 10, l1_ratio=j / 10).fit(x_tran, y_tran) y_pre = clf.predict(x_test) result.append([i, j, clf.score(x_test, y_test)]) z[i - 1, j - 1] = clf.score(x_test, y_test)
def train_and_test(alpha, predictors, predictor_params, x_filename, y_filename, n_users, percTest, featureset_to_use, diff_weighting, phi, force_balanced_classes, do_scaling, optimise_predictors, report, conf_report=None): # all_X = numpy.loadtxt(x_filename, delimiter=",") all_X = numpy.load(x_filename + ".npy") all_y = numpy.loadtxt(y_filename, delimiter=",") print("loaded X and y files", x_filename, y_filename) if numpy.isnan(all_X.any()): print("nan in", x_filename) exit() if numpy.isnan(all_y.any()): print("nan in", y_filename) exit() #print("selecting balanced subsample") print("t t split") X_train, X_test, y_train, y_test = train_test_split(all_X, all_y, test_size=percTest, random_state=666) # feature extraction # test = SelectKBest(score_func=chi2, k=100) # kb = test.fit(X_train, y_train) # # summarize scores # numpy.set_printoptions(precision=3) # print(kb.scores_) # features = kb.transform(X_train) # mask = kb.get_support() # # summarize selected features # print(features.shape) # X_train = X_train[:,mask] # X_test = X_test[:,mask] scaler = StandardScaler() rdim = FeatureAgglomeration(n_clusters=100) if do_scaling: # input(X_train.shape) X_train = rdim.fit_transform(X_train) X_test = rdim.transform(X_test) X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) with open('../../../isaac_data_files/qutor_scaler.pkl', 'wb') as output: pickle.dump(scaler, output, pickle.HIGHEST_PROTOCOL) with open('../../../isaac_data_files/qutor_rdim.pkl', 'wb') as output: pickle.dump(rdim, output, pickle.HIGHEST_PROTOCOL) # print("feature reduction...") # pc = PCA(n_components=100) # X_train = pc.fit_transform(X_train) # X_test = pc.transform(X_test) classes = numpy.unique(y_train) sample_weights = None if (force_balanced_classes): X_train, y_train = balanced_subsample(X_train, y_train, 1.0) #0.118) print("X_train shape:", X_train.shape) print("X_test shape:", X_test.shape) print("tuning classifier ...") for ix, p in enumerate(predictors): print(type(p)) print(p.get_params().keys()) if optimise_predictors == True and len(predictor_params[ix]) > 1: pbest = run_random_search(p, X_train, y_train, predictor_params[ix]) else: pbest = p.fit(X_train, y_train) predictors[ix] = pbest print("pickling classifier ...") for ix, p in enumerate(predictors): p_name = predictor_params[ix]['name'] with open( '../../../isaac_data_files/p_{}_{}_{}.pkl'.format( p_name, alpha, phi), 'wb') as output: pickle.dump(p, output, pickle.HIGHEST_PROTOCOL) print("done!") # report.write("* ** *** |\| \` | | |) /; `|` / |_| *** ** *\n") # report.write("* ** *** | | /_ |^| |) || | \ | | *** ** *\n") #report.write("RUNS,P,FB,WGT,ALPHA,PHI,SCL,0p,0r,0F,0supp,1p,1r,1F,1supp,avg_p,avg_r,avg_F,#samples\n") for ix, p in enumerate(predictors): report.write(",".join( map(str, (all_X.shape[0], str(p).replace(",", ";").replace( "\n", ""), force_balanced_classes, diff_weighting, alpha, phi, do_scaling)))) y_pred_tr = p.predict(X_train) y_pred = p.predict(X_test) # for x,y,yp in zip(X_train, y_test, y_pred): if conf_report: conf_report.write( str(p).replace(",", ";").replace("\n", "") + "\n") conf_report.write(str(alpha) + "," + str(phi) + "\n") conf_report.write(str(confusion_matrix(y_test, y_pred)) + "\n") conf_report.write("\n") # p = precision_score(y_test, y_pred, average=None, labels=classes) # r = recall_score(y_test, y_pred, average=None, labels=classes) # F = f1_score(y_test, y_pred, average=None, labels=classes) p, r, F, s = precision_recall_fscore_support(y_test, y_pred, labels=classes, average=None, warn_for=('precision', 'recall', 'f-score')) avp, avr, avF, _ = precision_recall_fscore_support( y_test, y_pred, labels=classes, average='weighted', warn_for=('precision', 'recall', 'f-score')) for ix, c in enumerate(classes): report.write(",{},{},{},{},{},".format(c, p[ix], r[ix], F[ix], s[ix])) report.write("{},{},{},{}\n".format(avp, avr, avF, numpy.sum(s))) # report.write(classification_report(y_test, y_pred)+"\n") # report.write("------END OF CLASSIFIER------\n") report.flush() return X_train, X_test, y_pred_tr, y_pred, y_test, scaler
tf.keras.metrics.AUC(name='auc') ]) save_best_callback = tf.keras.callbacks.ModelCheckpoint( './model-{epoch:02d}-{acc:.2f}.hdf5', monitor='acc', verbose=1, save_best_only=True, save_weights_only=False, save_freq=1) logdir = os.path.join('tflogs', datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) tb_train_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1, profile_batch=0) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) model.fit( X_train_scaled, y_train, class_weight=class_weight, # batch_size=64, validation_split=0.1, callbacks=[save_best_callback, tb_train_callback], epochs=50) # model = tf.keras.models.load_model('./model-35-0.88.hdf5') X_test_scaled = scaler.transform(X_test) model.evaluate(X_test_scaled, y_test) # print(np.round(model.predict(X_test)))