def evaluate_autoencoder(model_dir, test_data_dir): weights_file = os.path.join(model_dir, "autoencoder_weights.hdf5") json_file = os.path.join(model_dir, "autoencoder_model.json") with open(json_file, 'r') as f: json_string = f.read() autoencoder = model_from_json(json_string) autoencoder.load_weights(weights_file) originaldata = load_data(test_data_dir) data = originaldata.astype(np.float32) / 255 data = np.reshape(data, (data.shape[0], ) + data.shape[2:]) decoded = (autoencoder.predict(data) * 255).astype(np.uint8) # decoded = decoded.reshape(decoded, (decoded.shape[0], 48,64)) gs = gridspec.GridSpec(1, 2) for i in range(20): image = decoded[i, :, :, 0] plt.subplot(gs[0]) plt.imshow(image, interpolation='none') plt.subplot(gs[1]) plt.imshow(originaldata[i, 0, :, :, 0]) plt.savefig(os.path.join(model_dir, 'plot_' + str(i) + '.png')) plt.clf()
def set_train_data(self, raw=False): """ sets train_data attribute to ADL data correpsonding to the dataset for this experiment """ split_by_vid_or_class = 'Split_by_class' vid_class = 'NonFall' data = load_data(split_by_vid_or_class = split_by_vid_or_class, raw = raw,\ img_width = self.img_width, img_height = self.img_height, vid_class = vid_class, dset = self.dset) self.train_data = data
def train_autoencoder(trainingData, outDir): # Load training data print("Loading and normalizing data...") data = load_data(trainingData) data = np.reshape(data, (data.shape[0], ) + data.shape[2:]) data = data.astype(np.float32) / 255 print("Finished loading data!!") # Initialize and train model print("Initlializing model...") #data_format = backend.image_data_format() input = Input(shape=(48, 64, 1)) autoencoder_layers = Conv2D(3, 3, padding='same', activation='relu')(input) autoencoder_layers = MaxPooling2D()(autoencoder_layers) autoencoder_layers = Conv2D(5, 3, padding='same', activation='relu')(autoencoder_layers) autoencoder_layers = MaxPooling2D()(autoencoder_layers) autoencoder_layers = UpSampling2D()(autoencoder_layers) autoencoder_layers = Conv2D(3, 3, padding='same', activation='relu')(autoencoder_layers) autoencoder_layers = UpSampling2D()(autoencoder_layers) autoencoder_layers = Conv2D(1, 3, padding='same', activation='sigmoid')(autoencoder_layers) autoencoder = Model(inputs=input, outputs=autoencoder_layers) autoencoder.compile(optimizer='sgd', loss='binary_crossentropy') print("Done!") batch_size = 256 nb_epoch = 100 print("Training model...") weights_file = os.path.join(outDir, "autoencoder_weights.hdf5") json_file = os.path.join(outDir, "autoencoder_model.json") tbLogPath = os.path.join(outDir, "tensorboard_logs") callbacks = [ ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True), TensorBoard(log_dir=tbLogPath, histogram_freq=nb_epoch / 10, batch_size=batch_size) ] autoencoder.fit(data, data, batch_size, nb_epoch, verbose=2, callbacks=callbacks, validation_split=0.1) json_string = autoencoder.to_json() with open(json_file, "w") as f: f.write(json_string) print("Done!")
def train(config): # Load training data print("Loading and normalizing data...") training_data = os.path.join(config.data_dir, "train") data_type = np.uint8 if config.model_type != ModelType.STATE_VECTOR else np.float32 data = load_data(training_data, num_of_samples=config.sequences, dtype=data_type) data = normalize_data(data, config.model_type) print("Finished loading data!!") input_shape = data.shape[2:] # Initialize and train model print("Initilalizing model...") factory = PredNetModelBuilder(config) factory.set_input_shape(input_shape) factory.set_tensorboard_verbosity(write_grads=True, write_images=True) #factory.add_pretrained_autoencoder("C:\\Users\\Alberto\\Projects\\Keras_models\\autoencoder_bb\\autoencoder_model.json", # "C:\\Users\\Alberto\\Projects\\Keras_models\\autoencoder_bb\\autoencoder_weights.hdf5") factory.trainable_autoencoder = False model, callbacks = factory.build_model(config.model_type) print("Done!") print("Training model...") y = np.zeros((data.shape[0], 1), np.float32) model.fit(data, y, config.batch_size, config.epochs, verbose=2, callbacks=callbacks, validation_split=0.1) print("Done!") print("Saving model...") json_string = model.to_json() if not os.path.isdir(config.model_dir): os.mkdir(config.model_dir) json_file = os.path.join(config.model_dir, "prednet_model.json") with open(json_file, "w") as f: f.write(json_string) print("Done!")
np.save('MLP_WE/mlp_2_weights', pretrain_model.get_layer('mlp_2').get_weights()) np.save('MLP_WE/mlp_3_weights', pretrain_model.get_layer('mlp_3').get_weights()) np.save('MLP_WE/mlp_user_embed_weights', pretrain_model.get_layer('MLP_user_embed').get_weights()) np.save('MLP_WE/mlp_item_embed_weights', pretrain_model.get_layer('MLP_item_embed').get_weights()) hit_rate_accuracy = evaluation.evaluate_integer_input( 'input/testing_data.npy', pretrain_model, 'hit_rate', 'input/int_mat.npy') print('MLP produces accuracy rate of: ' + str(hit_rate_accuracy)) if __name__ == '__main__': try: interaction_mx = np.load('input/int_mat.npy') except IOError: data_management.load_data() interaction_mx = np.load('input/int_mat.npy') inputs, labels = data_management.training_data_generation( 'input/training_data.npy', 'input/int_mat.npy', 5) data_management.load_data(file_path='../data/movielens/ratings.dat') train_mlp(num_predictive_factors=8, batch_size=256, epochs=2, interaction_mx=interaction_mx, inputs=inputs, labels=labels)
num_predictive_factors = int(arg) print "Number of predictive factors is " + str(num_predictive_factors) elif opt in ("-b", "--bsize"): batch_size = int(arg) print "Batch size is " + str(batch_size) elif opt in ("-e", "--epoch"): num_pretrain_epochs = int(arg) print "number of training epochs for pretrain and full model is " + str(num_pretrain_epochs) num_final_epochs = num_pretrain_epochs data_management.load_data() interaction_mx = np.load('input/int_mat.npy') inputs, labels = data_management.training_data_generation('input/training_data.npy', 'input/int_mat.npy') labels = keras.utils.to_categorical(labels, 6) # pretrain MLP MLP.train_mlp(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, interaction_mx=interaction_mx, inputs=inputs, labels=labels) # pretrain GMF GMF.train_gmf(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs, interaction_mx=interaction_mx, inputs=inputs, labels=labels) # check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/ user_input = Input(shape=(1,), name='user_input') item_input = Input(shape=(1,), name='item_input')
sequences = tokenizer.texts_to_sequences(articles) # padding the sequences padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) return padded if __name__ == "__main__": # load data my_articles, my_labels = load_data(data_path=DATA_PATH, file_name=FILENAME) # clearn the data my_articles = clean_articles(articles=my_articles, stopwords=STOPWORDS) # split articles train_articles, validation_articles, test_articles = split_articles( articles=my_articles, train_portion=TRAIN_PORTION, test_size=TEST_SIZE) # split labels train_labels, validation_labels, test_labels = split_labels( labels=my_labels, train_portion=TRAIN_PORTION, test_size=TEST_SIZE) print("Train Articles:", len(train_articles)) print("validation Articles:", len(validation_articles)) print("Test Articles:", len(test_articles))
__author__ = 'Rex' # usage: # python inspect_blockdata [height] import sys sys.path.append('..') import data_management as dm if __name__ == "__main__": try: height = int(sys.argv[1]) exchange = dm.load_data(str(height), dm.BLOCKS_FOLDER) except: exchange = dm.pop_exchange() print exchange.exchange
def evaluate(config): if config.model_type == ModelType.PREDNET or config.model_type == ModelType.SINGLE_PIXEL_ACTIVATION: PredNet = PredNetVanilla elif config.model_type == ModelType.CONV_PREDNET: PredNet = ConvPredNet elif config.model_type == ModelType.AMPLIF_ERROR: PredNet = AmplifiedErrorPredNet elif config.model_type == ModelType.STATE_VECTOR: PredNet = RNN elif config.model_type == ModelType.CONCAT_PREDNET: PredNet = ConcatPredNet weights_file = os.path.join(results_folder, str(config) + "\\prednet_weights.hdf5") json_file = os.path.join(results_folder, str(config) + "\\prednet_model.json") batch_size = 8 # load test data test_set_dir = os.path.join(config.data_dir, "test") data_type = 'uint8' if config.model_type != ModelType.STATE_VECTOR else 'float32' data = load_data(test_set_dir, dtype=data_type) X_test = normalize_data(data, config.model_type) nt = X_test.shape[1] # Load trained model with open(json_file, 'r') as f: json_string = f.read() # add custom layers definitions if config.model_type != ModelType.STATE_VECTOR and config.model_type != ModelType.CONCAT_PREDNET: custom_objects = {PredNet.__name__: PredNet} elif config.model_type == ModelType.STATE_VECTOR: custom_objects = { StateVectorPredNetCell.__name__: StateVectorPredNetCell } elif config.model_type == ModelType.CONCAT_PREDNET: custom_objects = { PredNetVanilla.__name__: PredNetVanilla, ConcatPredNet.__name__: ConcatPredNet } model = model_from_json(json_string, custom_objects=custom_objects) model.load_weights(weights_file) # Create testing model (to output predictions) layer_config = model.layers[1].get_config() if config.model_type == ModelType.STATE_VECTOR: cell_config = layer_config['cell']['config'] cell_config['output_mode'] = 'prediction' layer_config['cell'] = StateVectorPredNetCell(**cell_config) test_prednet = PredNet(weights=model.layers[1].get_weights(), **layer_config) else: layer_config['output_mode'] = 'prediction' test_prednet = PredNet(weights=model.layers[1].get_weights(), **layer_config) test_prednet.extrap_start_time = config.infer_pred_start test_prednet.extrap_end_time = config.infer_pred_end input_shape = list(model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) test_model = Model(inputs=inputs, outputs=predictions) # make predictions X_hat = test_model.predict(X_test, batch_size) if config.model_type != ModelType.STATE_VECTOR: data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] if data_format == 'channels_first': X_test = np.transpose(X_test, (0, 1, 3, 4, 2)) X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2)) X_test = (X_test * 255).astype(np.uint8) X_hat = (X_hat * 255).astype(np.uint8) else: X_test, X_hat = image_from_state_vector( X_test), image_from_state_vector(X_hat) # Compare MSE of PredNet predictions vs. using last frame. Write results to prediction_scores.txt scores_file = os.path.join(results_folder, str(config) + '\\prediction_scores.txt') mse_model, mse_prev = compute_and_save_mse(X_test, X_hat, scores_file) plots_dir = os.path.join(results_folder, str(config) + '\\prediction_plots\\') plot_predictions(X_test, X_hat, plots_dir, config)
def __signature__project(dsv, seed, perplexity): return hash(dsv[:5].tostring()), seed, perplexity def project(dsv, seed=1, perplexity=30): for item in project.cache.items(): if item[1][0].size == dsv.shape[0]: return item[1] temp = TSNE(2, perplexity, random_state=seed).fit_transform(dsv) project.cache[signature] = (temp.T[0], temp.T[1]) return project.cache[signature] project.cache = load_data("projection_cache") def plot_clusters(projection, clustering): # cluster = None for simply plotting projection try: clustering = clustering.labels_ except AttributeError: pass plt.scatter(*projection, c=clustering) plt.show() def complement_customers(df, a_clusterings, b_clusterings): groups = split_to_groups(df) try: