def bimodal_fusion(dataset): # concat_data_audio_1, concat_data_audio_2, labels = load_data(dataset, 'audio', 'concat', verbose=True) concat_data_mfcc_1, concat_data_mfcc_2, labels = load_data(dataset, 'mfcc', 'concat', verbose=True) # X = np.vstack((concat_data_audio_1, concat_data_audio_2)) X = np.vstack((concat_data_mfcc_1, concat_data_mfcc_2)) y = np.hstack((labels[:, 0], labels[:, 0])) print("--" * 20) print("processed data shape", X.shape) print("processed label shape", y.shape) print("--" * 20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) assert X_train.shape[1] == X_test.shape[1] test_AE = Autoencoder(dataset, 'concat_audio', X_train.shape[1]) test_AE.build_model() test_AE.train_model(X_train) X_encoded_train = test_AE.transform(X_train) X_encoded_test = test_AE.transform(X_test) test_SVM = LinearSVM('%s_baseline_%s' % (dataset, 'concat_audio')) test_SVM.train(X_encoded_train, y_train) test_SVM.test(X_encoded_test, y_test)
def baseline(dataset): audio, _, _, labels = load_data(dataset, 'audio', 'frame', verbose=True) X = flatten_data(audio, image=False) y = labels[:, 0] print("--" * 20) print("processed data shape", X.shape) print("processed label shape", y.shape) print("--" * 20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) assert X_train.shape[1] == X_test.shape[1] test_AE = Autoencoder(dataset, 'audio', X_train.shape[1]) test_AE.build_model() test_AE.train_model(X_train) X_encoded_train = test_AE.transform(X_train) X_encoded_test = test_AE.transform(X_test) test_SVM = LinearSVM('%s_baseline_%s' % (dataset, 'audio')) test_SVM.train(X_encoded_train, y_train) test_SVM.test(X_encoded_test, y_test)
def __init__(self, dataset_name, arch_name, input_dim_A, input_dim_V): # para dataset_name: # para modality_name: # para input_dim_A: # para input_dim_V: Autoencoder.__init__(self, dataset_name, 'bimodal_%s' % arch_name, 0) self.save_dir = self.config['autoencoder']['save_dir_bimodal'] self.input_dim_A = input_dim_A self.input_dim_V = input_dim_V self.hidden_dim_A = [ int(self.input_dim_A * self.hidden_ratio), int(self.input_dim_A * self.hidden_ratio**2), ] self.hidden_dim_V = [ int(self.input_dim_V * self.hidden_ratio), int(self.input_dim_V * self.hidden_ratio**2), ] self.hidden_dim_shared = int(self.hidden_dim_A[1] / 4 + self.hidden_dim_V[1] / 4)
def mnist_test(self): from keras.datasets import mnist print("running autoencoders on MNIST data") (X_train, _), (X_test, _) = mnist.load_data() assert X_train.shape[1:] == X_test.shape[1:] X_train = flatten_data(X_train) X_test = flatten_data(X_test) assert X_train.shape == X_test.shape mnist_ae = Autoencoder('12','12', X_train.shape[1]) mnist_ae.build_model() mnist_ae.train_model(X_train, X_test) mnist_ae.vis_model(X_test)
### Processing Images # Get images from the directories logging.info(f"Reading and Processing Images from {train_dir}") train_data, train_ids = read_images_in_dir(train_dir, img_height, img_width) logging.info(f"Reading and Processing Images from {test_dir}") test_data, test_ids = read_images_in_dir(test_dir, img_height, img_width) # Normalize the image pixels to 0-1 logging.info(f"Normalizing the images!") trans_train_data = transform_images(train_data) trans_test_data = transform_images(test_data) ### Setting up CNN Autoencoder Model logging.info('Setting up the Autoencoder') autoencoder = Autoencoder() autoencoder.set_architecture(img_width, img_height, img_channel) autoencoder.compile_autoencoder() autoencoder.fit(trans_train_data, trans_test_data) ## Encoded layer for both the train and test data logging.info('Putting Images through the encoded layer') encoded_train = autoencoder.encoder_predict(trans_train_data) encoded_test = autoencoder.encoder_predict(trans_test_data) ## flatten the encoded img, so they are the shape (#imgs, height*width*channels of output of encoder)- input for KNN encoded_train_flat = encoded_train.reshape( (-1, np.prod(encoded_train.shape[1:]))) encoded_test_flat = encoded_test.reshape((-1, np.prod(encoded_test.shape[1:]))) ## Save model in pickle
import os from src.configreader import ConfigReader from src.dataset import Dataset from src.autoencoder import Autoencoder if __name__ == "__main__": config_path = os.path.join(os.path.dirname(__file__), "config.json") config_obj = ConfigReader(config_path) dataset = Dataset(config_obj) x_train = dataset.load_train_data() x_val = dataset.load_val_data() x_eval = dataset.load_eval_data() model = Autoencoder(config_obj, dataset) model.set_iterators(x_train, x_val, eval_from_input_iterator=x_eval) for i in range(12000): # the evaluation is quite time intensive, during it off increase the speed do_evaluation = i % 500 == 0 and i > 0 stats = model.train(do_evaluation) print("{}: {}".format(i, stats["loss"])) if "val_loss" in stats: print("Val loss: {}".format(stats["val_loss"])) print("IO: {}, l1: {}".format(stats['iou'], stats["eval_l1"])) if i % 1000 and i > 0: model.save(config_obj.data.get_string("model_save_path")) model.save(config_obj.data.get_string("model_save_path"))
"and the empty/full block encoding to set the block empty/full right away." ) parser.add_argument("--store_as_npy", help="Usually the output is saved as a .hdf5 container, " "using this will save the output as .npy", action="store_true") args = parser.parse_args() config_path = os.path.join(os.path.dirname(__file__), "config.json") config_obj = ConfigReader(config_path) dataset = Dataset(config_obj) dataset.batch_size = args.batch_size model = Autoencoder(config_obj, dataset) model.set_iterators(eval_from_placeholder=True) model.load(config_obj.data.get_string("model_save_path")) input_ones = np.ones( [1, dataset.input_size(), dataset.input_size(), dataset.input_size(), 1]) full_block_latent = model.encode_from_placeholder( input_ones * -dataset.truncation_threshold) empty_block_latent = model.encode_from_placeholder( input_ones * dataset.truncation_threshold) data_iterator = dataset.load_custom_data( args.data_path,
# Linear encoder encoder_lin = nn.Sequential(nn.Linear(3 * 3 * 32, 64), nn.ReLU(True), nn.Linear(64, encoded_space_dim)) # Linear decoder decoder_lin = nn.Sequential(nn.Linear(encoded_space_dim, 64), nn.ReLU(True), nn.Linear(64, 3 * 3 * 32), nn.ReLU(True)) # Convolutional decoder decoder_cnn = nn.Sequential( nn.ConvTranspose2d(32, 16, 3, stride=2, output_padding=0), nn.ReLU(True), nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1, output_padding=1), nn.ReLU(True), nn.ConvTranspose2d(8, 1, 3, stride=2, padding=1, output_padding=1)) # Instantiate the network net = Autoencoder(encoder_cnn=encoder_cnn, encoder_lin=encoder_lin, decoder_lin=decoder_lin, decoder_cnn=decoder_cnn, lin_to_cnn=(32, 3, 3)) # Show the network print(net) ### Some examples # Take an input image (remember to add the batch dimension) img = test_dataset[0][1].unsqueeze(0) print('Original image shape:', img.shape) # Encode the image img_enc = net.encode(img) print('Encoded image shape:', img_enc.shape) # Decode the image dec_img = net.decode(img_enc) print('Decoded image shape:', dec_img.shape)
train_data, val_data = dprep.get_prepped_training_data(FLAGS=FLAGS) test_data = dprep.get_prepped_testing_data(FLAGS=FLAGS) print('got prepped training and testing data') train_iter = train_data.make_initializable_iterator() val_iter = val_data.make_initializable_iterator() test_iter = test_data.make_initializable_iterator() print('made iterators') train_x = train_iter.get_next() val_x = val_iter.get_next() test_x = test_iter.get_next() print('got next for all iterators') model = Autoencoder(FLAGS=FLAGS) print('made model') train_op, train_loss_op = model._optimizer(train_x) pred_op, test_loss_op = model._validation_loss(val_x, test_x) print('starting session') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_loss = 0 test_loss = 0 for epoch in range(FLAGS.num_epoch): sess.run(train_iter.initializer) for batch_nr in range(num_batches):
type=int, default=4, help="Number of threads to use in the input pipeline.") args = parser.parse_args() config_path = os.path.join(os.path.dirname(__file__), "config.json") config_obj = ConfigReader(config_path) dataset = Dataset(config_obj) dataset.batch_size = args.batch_size data_iterator = dataset.load_custom_data(args.path, fast_inference=True, num_threads=args.threads) model = Autoencoder(config_obj, dataset) model.set_iterators(eval_from_input_iterator=data_iterator, eval_from_placeholder=True, eval_uses_fast_inference=True) model.load(config_obj.data.get_string("model_save_path")) model.summary() input_ones = np.ones( [1, dataset.input_size(), dataset.input_size(), dataset.input_size(), 1]) full_block_latent = model.encode_from_placeholder( input_ones * -dataset.truncation_threshold) empty_block_latent = model.encode_from_placeholder(