def cross_validataion_cnn(t_span, height_span, image_size, downsample_size, cnn_model, initial_weights, augment, test_ratio=0.2, limit=10000): """ cross validation for cnn-like models """ K = int(1 / test_ratio) Y_pred_collection = None for t in t_span: X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X = X / NORM_X Y = Y / NORM_Y # X, Y = preprocessing_data(X, Y) k_fold = KFold(K) Y_pred = np.zeros((len(Y), 1)) for k, (train, test) in enumerate(k_fold.split(X, Y)): reset_weights(cnn_model, initial_weights) train_X, train_Y = preprocessing_data(X[train], Y[train]) test_X, test_Y = X[test], Y[test] if augment: train_X, train_Y = augment_training_data(train_X, train_Y, image_size, mode='image') early_stop = EarlyStopping(monitor='loss', patience=0) cnn_model.fit(train_X, train_Y, batch_size=32, epochs=200, verbose=1, validation_data=(test_X, test_Y), callbacks=[ early_stop, ]) Y_pred[test] = cnn_model.predict(test_X).reshape(-1, 1) print("cv {} rmse: {}".format( k, rmse(Y_pred[test] * NORM_Y, Y[test] * NORM_Y))) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred))) print("avg rmse:{}".format(rmse(Y, avg_Y_pred))) return avg_Y_pred, Y
def time_sensitive_validataion_avg_aggregate_4_viewpoints( t_span, height_span, image_size, learner, augment, downsample_size, test_ratio=0.2, data_preprocess=None, limit=10000): """ use the first part of training data for training and the last part of the data for validation holdout is the percentage of the validation part """ Y_pred_collection = None for t in t_span: Xs, Y = load_training_data_sklearn_4_viewpoints( t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) Y_1D = Y.reshape(-1) num_of_recs = len(Y_1D) train = range(int(num_of_recs * (1 - test_ratio))) test = range(int(num_of_recs * (1 - test_ratio)), num_of_recs) Y_pred_4_viewpoints = np.zeros((len(test), 4)) for idx, X in enumerate(Xs): X, _ = preprocessing_data(X, Y, methods=data_preprocess) if not augment: learner.fit(X[train], Y_1D[train]) else: augment_X, augment_Y = augment_training_data(X[train], Y_1D[train], image_size, mode='flatten') learner.fit(augment_X, augment_Y) Y_pred_4_viewpoints[:, idx] = learner.predict(X[test]).reshape(-1) print("view point {} rmse: {}".format( idx + 1, rmse(Y_pred_4_viewpoints[:, idx], Y[test]))) Y_pred = np.mean(Y_pred_4_viewpoints, axis=1).reshape(-1, 1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test], Y_pred))) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred))) return avg_Y_pred, Y[test]
def time_sensitive_validataion_avg_aggregate(t_span, height_span, image_size, learner, augment, downsample_size, test_ratio=0.2, data_preprocess=None, limit=10000): """ use the first part of training data for training and the last part of the data for validation holdout is the percentage of the validation part """ Y_pred_collection = None for t in t_span: X, Y = load_training_data_sklearn(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X, Y = preprocessing_data(X, Y, methods=data_preprocess) Y_1D = Y.reshape(-1) num_of_recs = len(Y_1D) train = range(int(num_of_recs * (1 - test_ratio))) test = range(int(num_of_recs * (1 - test_ratio)), num_of_recs) if not augment: X_train, Y_train = X[train], Y_1D[train] else: X_train, Y_train = augment_training_data(X[train], Y_1D[train], image_size, mode='flatten') learner.fit(X_train, Y_train) Y_pred = learner.predict(X[test]).reshape(-1, 1) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) avg_Y_pred = np.mean(Y_pred_collection, axis=1).reshape(-1, 1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test], Y_pred))) print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred))) np.savetxt("result_cache.txt", np.concatenate((Y[test], avg_Y_pred, Y_pred_collection), axis=1), fmt="%.5f") return avg_Y_pred, Y[test]
def cross_validataion_avg_aggregate(t_span, height_span, image_size, learner, augment, downsample_size, test_ratio=0.2, data_preprocess=None, limit=10000): K = int(1 / test_ratio) Y_pred_collection = None for t in t_span: X, Y = load_training_data_sklearn(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X, Y = preprocessing_data(X, Y, methods=data_preprocess) Y_1D = Y.reshape(-1) k_fold = KFold(K) Y_pred = np.zeros((len(Y), 1)) for k, (train, test) in enumerate(k_fold.split(X, Y_1D)): if not augment: learner.fit(X[train], Y_1D[train]) else: augment_X, augment_Y = augment_training_data(X[train], Y_1D[train], image_size, mode='flatten') learner.fit(augment_X, augment_Y) Y_pred[test] = learner.predict(X[test]).reshape(-1, 1) print("cv {} rmse: {}".format(k, rmse(Y_pred[test], Y[test]))) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred))) print("avg rmse:{}".format(rmse(Y, avg_Y_pred))) return Y_pred_collection, Y
def time_sensitive_validataion_cnn(t_span, height_span, image_size, downsample_size, cnn_models, initial_weights, augment, holdout=0.1): """ use the first part of training data for training and the last part of the data for validation holdout is the percentage of the validation part """ Y_pred_collection = None for t in t_span: X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=10000) num_of_recs = len(Y) train = range(int(num_of_recs * (1 - holdout))) test = range(int(num_of_recs * (1 - holdout)), num_of_recs) if augment is False: train_X, train_Y = X[train], Y[train] else: train_X, train_Y = augment_training_data(X[train], Y[train], image_size, mode='image') Y_pred = None for idx, cnn_model in enumerate(cnn_models): reset_weights(cnn_model, initial_weights) cnn_model.fit(train_X, train_Y, batch_size=256, epochs=5, verbose=1, validation_data=(X[test], Y[test])) Y_pred_each = cnn_model.predict(X[test]).reshape(-1, 1) print("model {} rmse: {}".format(idx, rmse(Y[test], Y_pred_each))) if Y_pred is None: Y_pred = Y_pred_each else: Y_pred = np.concatenate((Y_pred, Y_pred_each), axis=1) Y_pred = np.mean(Y_pred, axis=1).reshape(-1, 1) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) print(Y_pred_collection.shape) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test], Y_pred))) print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred))) return avg_Y_pred, Y[test]