def train(net_type, generator_fn_str, dataset_file, build_net_fn, featurized=True): d = Dataset(dataset_file + 'train.pgn') generator_fn = getattr(d, generator_fn_str) d_test = Dataset(dataset_file + 'test.pgn') X_val, y_val = d_test.load(generator_fn.__name__, featurized = featurized, refresh = False, board = net_type) board_num_channels = X_val[0].shape[1] if net_type == 'to' else X_val[0].shape[0] model = build_net_fn(board_num_channels=board_num_channels, net_type=net_type) start_time = str(int(time.time())) try: plot_model(model, start_time, net_type) except: print("Skipping plot") from keras.callbacks import ModelCheckpoint checkpointer = ModelCheckpoint( filepath = get_filename_for_saving(start_time, net_type), verbose = 2, save_best_only = True) model.fit_generator(generator_fn(featurized=featurized, board=net_type), samples_per_epoch = SAMPLES_PER_EPOCH, nb_epoch = NUMBER_EPOCHS, callbacks = [checkpointer], validation_data = (X_val, y_val), verbose = VERBOSE_LEVEL)
def main(args): dataset_root = args.dataset num_repeats = args.repeats models = args.models print(models) log_dir = "logs" results_dir = "results" models_dir = "models" dataset = Dataset(dataset_root=dataset_root) train_X, train_y = dataset.load() test_X, test_y = dataset.load(split="test") for model_type in models: print(">>>>>>>>>>>>> Running experiments for '{}'".format(model_type)) _log_dir = os.path.join(log_dir, model_type) precision, recall, f1, model = run_experiment(repeats=num_repeats, model_type=model_type, train_data={ "X": train_X, "y": train_y }, test_data={ "X": test_X, "y": test_y }, tb_log_dir=_log_dir) print(">>>>>>>>>>>>> Writing results for '{}'".format(model_type)) with open(os.path.join(results_dir, model_type + ".txt"), "w") as res: line = "{}:\n".format(model_type) line += "Precision: {:.5f} (+/-{:.5f}) \n Recall: {:.5f} (+/-{:.5f}) \n F1: {:.5f} (+/-{:.5f})\n".format( precision[0], precision[1], recall[0], recall[1], f1[0], f1[1]) line += "--------------------------------------------------------------------------------------------------------------------------------- \n" res.writelines(line) print(">>>>>>>>>>>>> Saving the model: {}.h5".format(model_type)) model.model.save(os.path.join(models_dir, model_type + ".h5"))
def validate(model_hdf5, net_type, generator_fn_str, dataset_file, featurized=True): from keras.models import load_model import data d_test = Dataset(dataset_file + 'test.pgn') X_val, y_val = d_test.load(generator_fn_str, featurized = featurized, refresh = False, board = "both") boards = data.board_from_state(X_val) if net_type == "from": model_from = load_model("saved/" + model_hdf5) y_hat_from = model_from.predict(X_val) num_correct = 0 for i in range(len(boards)): if y_val[0][i,np.argmax(y_hat_from[i])] > 0: num_correct += 1 print(num_correct / len(boards)) elif net_type == "to": model_to = load_model("saved/" + model_hdf5) y_hat_to = model_to.predict([X_val, y_val[0].reshape(y_val[0].shape[0],1,X_val.shape[2],X_val.shape[3])]) num_correct = 0 for i in range(len(boards)): if y_val[1][i,np.argmax(y_hat_to[i])] > 0: num_correct += 1 print(num_correct / len(boards)) elif net_type == "from_to": model_from = load_model("saved/" + model_hdf5[0]) model_to = load_model("saved/" + model_hdf5[1]) y_hat_from = model_from.predict(X_val) for i in range(len(boards)): from_square = np.argmax(y_hat_from[i]) y_max_from = np.zeros((1,1,X_val.shape[2],X_val.shape[3])) y_max_from.flat[from_square] = 1 y_hat_to = model_to.predict([np.expand_dims(X_val[i], 0), y_max_from]) to_square = np.argmax(y_hat_to) move_attempt = data.move_from_action(from_square, to_square) if boards[i].is_legal(move_attempt): print("YAY") else: print("BOO") print(move_attempt) move = data.move_from_action(np.argmax(y_val[0]), np.argmax(y_val[1])) print(move)
def main(args: argparse.Namespace): dataset = Dataset(args.train) model_args = { 'hidden_size': args.hidden_size, 'input_size': args.input_size, 'feature_size': len(dataset.dataframe.columns), } train_x, train_y = dataset(**model_args) test_frame = dataset.load(args.test) test = np.concatenate( (train_x[0], dataset.transform( np.hstack((test_frame, np.zeros((len(test_frame), 1))))))) model = Model.from_file(args.weight) # Test sequences print('Testing ...') for index in range(len(test) - args.input_size): test_input = np.expand_dims(test[index:index + args.input_size], 0) pred = model.predict(test_input).squeeze() test[index + args.input_size, -1] = pred test_frame[dataset.dataframe.columns[-1]] = dataset.inverse_transform( test[args.input_size:, -1]) test_frame.to_csv(str(out.joinpath('test-prediction.csv')), index=None) prediction = test_frame[dataset.dataframe.columns[-1]] prediction += abs(prediction.min()) label = pd.read_csv(args.label).values[:, -1] label = MinMaxScaler().fit_transform(label.reshape(-1, 1)) prediction = MinMaxScaler().fit_transform(prediction.reshape(-1, 1)) mse = mean_squared_error(label, prediction) mae = mean_absolute_error(label, prediction) mape = mean_absolute_percentage_error(label, prediction, args.epsilon) print(f'MSE: {mse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}')
dev_dataset.data[str(i)]) readability_feature_extract.readability_attributes( test_dataset.data[str(i)]) def output_feature(dataset, cover_feature_names, file): dataset_features = {} for i in range(1, 9): for sample in dataset.data[str(i)]: features = [] # features.append(sample['essay_id']) # features.append(sample['essay_set']) for feature_name in cover_feature_names: if feature_name in sample.keys(): features.append(sample[feature_name]) dataset_features[sample['essay_id']] = features with open(file, 'wb') as f: pickle.dump(dataset_features, f) train_dataset = Dataset.load("../../data/essay_data/train-entire.p") dev_dataset = Dataset.load("../../data/essay_data/dev-entire.p") test_dataset = Dataset.load("../../data/essay_data/test-entire.p") _extract_feature() Dataset.save_feature(train_dataset, '../../data/essay_data/train-feature-5.p') Dataset.save_feature(dev_dataset, '../../data/essay_data/dev-feature-5.p') Dataset.save_feature(test_dataset, '../../data/essay_data/test-feature-5.p')
class GazeDirection(object): """Main class, retriving video frames from the webcam, acquiring data and estimating the look direction """ ROLLING_WINDOW_LENGTH = 3 def __init__(self): self.dataset = Dataset() self.cap = None self.showMoments = False self.showEvaluation = False self.bufferFace = Buffer(self.ROLLING_WINDOW_LENGTH) self.bufferLeftEye = Buffer(self.ROLLING_WINDOW_LENGTH) self.bufferRightEye = Buffer(self.ROLLING_WINDOW_LENGTH) def startCapture(self): """Start the webcam recording """ self.cap = cv2.VideoCapture(0) def stopCapture(self): """Stop the camera recording """ # When everything done, release the capture self.cap.release() cv2.destroyAllWindows() def run(self): """Main loop """ self.startCapture() data_collector = DataCollector(self.dataset) keepLoop = True while keepLoop: pressed_key = cv2.waitKey(1) img = self.getCameraImage() face, left_eye, right_eye = img.detectEyes(self.bufferFace, self.bufferLeftEye, self.bufferRightEye) if face: face.draw(img) if left_eye: left_eye.draw(face) if right_eye: right_eye.draw(face) # Controls if pressed_key & 0xFF == ord('q'): keepLoop = False if pressed_key & 0xFF == ord('s'): self.dataset.save() if pressed_key & 0xFF == ord('l'): self.dataset.load() if pressed_key & 0xFF == ord('m'): self.showMoments = not self.showMoments if pressed_key & 0xFF == ord('e'): self.showEvaluation = not self.showEvaluation data_collector.step(img.canvas, pressed_key, left_eye, right_eye) txt = 'Dataset: {} (s)ave - (l)oad'.format(len(self.dataset)) cv2.putText(img.canvas, txt, (21, img.canvas.shape[0] - 29), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (32, 32, 32), 2) cv2.putText(img.canvas, txt, (20, img.canvas.shape[0] - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 126, 255), 2) if left_eye and right_eye: direction = self.dataset.estimateDirection( left_eye.computeMomentVectors(), right_eye.computeMomentVectors()) txt = 'Estimated direction: {}'.format(direction.name) cv2.putText(img.canvas, txt, (21, img.canvas.shape[0] - 49), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (32, 32, 32), 2) cv2.putText(img.canvas, txt, (20, img.canvas.shape[0] - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 126, 255), 2) img.show() if self.showEvaluation: fig = self.dataset.showValidationScoreEvolution() plt.show() self.showEvaluation = False if self.showMoments: fig = self.dataset.drawVectorizedMoments() plt.show() # cv2.imshow('moments', self.fig2cv(fig)) # plt.close(fig) self.showMoments = False self.stopCapture() def fig2cv(self, fig): """Convert a matplotlib figure to a cv2 image that can be displayed Args: fig (plt.Figure): Original matplotlib figure Returns: cv2.Image: Converted cv2 image """ fig.canvas.draw() img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') img = img.reshape(fig.canvas.get_width_height()[::-1] + (3, )) # img is rgb, convert to opencv's default bgr img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) return img def getCameraImage(self): """Retrieves the current frame from the webcam Returns: Image: Image frame captured """ # Capture frame-by-frame ret, frame = self.cap.read() frame = cv2.resize(frame, (640, 480)) frame = cv2.flip(frame, 1) return Image(frame)
self.train_X = self.train_X.reshape( (self.train_X.shape[0], n_steps, 1, n_length, self.n_features)) self.test_X = self.test_X.reshape( (self.test_X.shape[0], n_steps, 1, n_length, self.n_features)) # define model model = Sequential() model.add(ConvLSTM2D(filters=64, kernel_size=(1, 3), activation='relu', input_shape=(n_steps, 1, n_length, self.n_features))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(100, activation='relu')) model.add(Dense(self.n_outputs, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) input() super().build(model) if __name__ == "__main__": dataset_root = "/home/icirc/Desktop/KJ/Class_Project/Machine_Learning_3_term_project/project/human_activity_recognition-master/code/home/icirc/Desktop/KJ/Class_Project/Machine_Learning_3_term_project/project/LSTM-Human-Activity-Recognition-master/data/UCI HAR Dataset" dataset = Dataset(dataset_root=dataset_root) train_X, train_y = dataset.load() test_X, test_y = dataset.load(split="test") # lstm = LSTM(train_data={"X": train_X, "y": train_y}, # test_data={"X": test_X, "y": test_y})
def main(): """ Load data, train and evaluate a model """ args = get_args() if args.classifier_type in ['cnn', 'ffn']: # PyTorch run_pkg = 'pytorch' else: run_pkg = 'sklearn' if not args.text_emb_type: emb_type_name = args.post_emb_type else: emb_type_name = args.text_emb_type name_parts = [ args.model_name, args.features.replace(',', '+'), emb_type_name, args.classifier_type ] name_parts = [n for n in name_parts if n is not None] exp_name = '_'.join(name_parts).strip('_') exp_output_dirpath = os.path.join(args.output_dirpath, exp_name) model_outpath = f'/projects/tumblr_community_identity/models/{exp_name}.pkl' # Load trained embedding models # TODO: Move the loading of things elsewhere (load_embeddings) if (args.text_emb_type == 'unigrams' or args.text_emb_type is None) and \ (args.post_emb_type == 'unigrams' or args.post_emb_type == 'tags' or \ args.post_emb_type is None): word_embs = None graph_embs = None sent_embs = None else: print("Loading embeddings...") emb_loader = EmbeddingLoader(args.post_emb_type, args.text_emb_type) load_word_embs, load_graph_embs, graph_embs = False, False, None load_sent_embs, sent_embs = False, None if args.post_emb_type != 'unigrams': load_word_embs = True if 'graph' in args.features: load_graph_embs = True if 'text' in args.features and args.text_emb_type in [ 'fasttext', 'bert' ]: load_sent_embs = True emb_loader.load(word_embs=load_word_embs, graph_embs=load_graph_embs, sent_embs=load_sent_embs) if load_graph_embs: graph_embs = emb_loader.graph_embs if load_sent_embs: sent_embs = emb_loader.sent_embs word_embs = emb_loader.word_embs # Load and filter dataset print("Loading and filtering data...") dataset = Dataset() dataset.load(args.data_location, args.task) if args.load_preprocessed: id2token = load_pickle(args.load_preprocessed) user_filter = set(list(id2token.keys())) dataset.filter(user_ids=user_filter, word_filter=word_embs.wv, word_filter_min=args.word_filter_min, preprocessed_descs=id2token) else: if args.text_emb_type and args.post_emb_type and \ args.text_emb_type != 'unigrams' and args.post_emb_type != 'unigrams': dataset.filter(word_filter=word_embs.wv, word_filter_min=args.word_filter_min) elif 'comms' in args.features: dataset.load_filter_communities() # Extract features print("Extracting features...") post_ngrams, post_tags, text_ngrams = False, False, False if run_pkg == 'pytorch': extractor = FeatureExtractor(args.features, word_embs=word_embs, graph_embs=graph_embs, sent_embs=sent_embs, word_inds=True, padding_size=30) dataset = extractor.extract(dataset, run_pkg, dev=True) else: if args.post_emb_type == 'unigrams': post_ngrams = True elif args.post_emb_type == 'tags': post_tags = True if args.text_emb_type == 'unigrams': text_ngrams = True extractor = FeatureExtractor(args.features, word_embs=word_embs, graph_embs=graph_embs, sent_embs=sent_embs, post_ngrams=post_ngrams, post_tags=post_tags, text_ngrams=text_ngrams, select_k=args.feature_selection_k, post_tag_pca=args.post_tag_pca, post_tag_lda=args.post_tag_lda) dataset = extractor.extract(dataset, run_pkg, dev=True) # Run model print("Running model...") data_outpath = f'../tmp/{exp_name}_data.pkl' dataset.save(data_outpath) print(f"\tSaved dataset folds to {data_outpath}") experiment = Experiment(extractor, dataset, args.classifier_type, args.use_cuda, args.epochs, sfs_k=args.forward_feature_selection_k) experiment.run() # Print output if experiment.dev_score: print(f'\tDev set score: {experiment.dev_score: .4f}') print(f'\tTest set score: {experiment.test_score: .4f}') # Save settings, output if run_pkg == 'sklearn': dataset.save_settings(exp_output_dirpath) experiment.save_output(exp_output_dirpath) experiment.save_model(model_outpath)
def main(args: argparse.Namespace): init(args.seed) dataset = Dataset(args.train) model_args = { 'hidden_size': args.hidden_size, 'input_size': args.input_size, 'feature_size': len(dataset.dataframe.columns), 'nested': args.nested, 'dropout': args.dropout, } optim_args = { 'lr': args.lr, 'beta_1': args.beta_1, 'beta_2': args.beta_2, 'decay': args.decay, } train_x, train_y = dataset(**model_args) test_frame = dataset.load(args.test) label = test_frame.values[:, -1] test = np.concatenate((train_x[0], dataset.transform( np.hstack((test_frame.values[:, :-1], np.zeros((len(test_frame), 1))))))) if args.use_test: test_y = test_frame.values[args.input_size:, -1] test_x = np.empty((len(test_frame) - args.input_size, args.input_size, len(dataset.dataframe.columns))) for i in range(len(test_y) - args.input_size): test_x[i] = test_frame.values[i:i + args.input_size] train_x = np.vstack((train_x, test_x)) train_y = np.concatenate((train_y, test_y)) out = Path(args.output) out.mkdir(exist_ok=True, parents=True) if args.model: print(f'Model load from {args.model} ...') model = Model.from_file(args.model) else: model = Model(model_args, optim_args) if not args.silence: model.summary() # Train sequences print('Training ...') model.fit(train_x, train_y, epochs=args.epoch, shuffle=False, batch_size=args.batch, verbose=not args.silence, callbacks=model.callbacks(early_stop=not args.no_stop)) model.save(str(out.joinpath('model.h5'))) # Test sequences print('Testing ...') for index in tqdm(range(len(test) - args.input_size)): test_input = np.expand_dims(test[index:index + args.input_size], 0) pred = model.predict(test_input).squeeze() test[index + args.input_size, -1] = pred test_frame[dataset.dataframe.columns[-1]] = test[args.input_size:, -1] test_frame.to_csv(str(out.joinpath('prediction.csv')), index=None) test_frame[dataset.dataframe.columns[-1]] = dataset.inverse_transform( test[args.input_size:, -1]) test_frame.to_csv(str(out.joinpath('prediction-scaled.csv')), index=None) if not args.no_fig: def scaler(values: np.ndarray)\ -> np.ndarray: min_, max_ = values.min(), values.max() return (values - min_) / (max_ - min_) import matplotlib import matplotlib.pyplot as plt matplotlib.use('Agg') label_scale = scaler(label) prediction = scaler(test_frame.values[:, -1]) x_range = np.arange(np.size(prediction, 0)) error = mean_absolute_percentage_error(label_scale, prediction) plt.title(f'MAPE: {error:.4}') plt.ylim(0, 1) plt.plot(x_range, label_scale, c='r') plt.plot(x_range, prediction, c='b') plt.savefig(str(out.joinpath('figure.jpg')), dpi=400) print(f'MAPE: {error:.4}')
import regression import metrics import copy from sklearn.model_selection import KFold from sklearn.model_selection import StratifiedKFold from sklearn.preprocessing import StandardScaler import pickle from scipy.stats import wasserstein_distance import numpy as np from data import Dataset, split_sentence import pandas as pd import itertools train_dataset = Dataset.load("../../data/essay_data/train-feature-5.p") dev_dataset = Dataset.load("../../data/essay_data/dev-feature-5.p") test_dataset = Dataset.load("../../data/essay_data/test-feature-5.p") feature = [ 'token_count', 'unique_token_count', 'no_stop_count', 'comma_count', 'special_count', 'noun_count', 'verb_count', 'adv_count', 'adj_count', 'pron_count', 'word_avg_len',
def get_joint_accuracy(y_from, y_to, y_from_true, y_to_true): num_top = 3 score = np.zeros((y_to.shape[0],)) for i in tqdm(range(y_to.shape[0])): from_square_true = np.where(y_from_true[i] == 1)[0] to_square_true = np.where(y_to_true[i] == 1)[0] p = np.outer(y_from[i], y_to[i]) p_shape = p.shape p = p.reshape((-1,)) for j, idx in enumerate(np.argsort(p)[::-1]): if j >= num_top: break from_square, to_square = np.unravel_index(idx, p_shape) if from_square == from_square_true and to_square == to_square_true: score[i] = 1 break print("Joint move accuracy: %f" % (score.sum() / score.shape[0])) if __name__ == '__main__': d_test = Dataset('data/small_test.pgn') X_val, y_from_val, y_to_val = d_test.load( 'white_state_action_sl', featurized=True, refresh=False) from keras.models import load_model model = load_model("./saved/policy/1481219504/94-4.61.hdf5") y_from, y_to = model.predict(X_val, verbose=1) get_joint_accuracy(y_from, y_to, y_from_val, y_to_val)