Esempio n. 1
0
def bimodal_fusion(dataset):
    # concat_data_audio_1, concat_data_audio_2, labels = load_data(dataset, 'audio', 'concat', verbose=True)
    concat_data_mfcc_1, concat_data_mfcc_2, labels = load_data(dataset,
                                                               'mfcc',
                                                               'concat',
                                                               verbose=True)

    # X = np.vstack((concat_data_audio_1, concat_data_audio_2))
    X = np.vstack((concat_data_mfcc_1, concat_data_mfcc_2))
    y = np.hstack((labels[:, 0], labels[:, 0]))

    print("--" * 20)
    print("processed data shape", X.shape)
    print("processed label shape", y.shape)
    print("--" * 20)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    assert X_train.shape[1] == X_test.shape[1]

    test_AE = Autoencoder(dataset, 'concat_audio', X_train.shape[1])
    test_AE.build_model()
    test_AE.train_model(X_train)

    X_encoded_train = test_AE.transform(X_train)
    X_encoded_test = test_AE.transform(X_test)

    test_SVM = LinearSVM('%s_baseline_%s' % (dataset, 'concat_audio'))
    test_SVM.train(X_encoded_train, y_train)
    test_SVM.test(X_encoded_test, y_test)
Esempio n. 2
0
def baseline(dataset):
    audio, _, _, labels = load_data(dataset, 'audio', 'frame', verbose=True)

    X = flatten_data(audio, image=False)
    y = labels[:, 0]

    print("--" * 20)
    print("processed data shape", X.shape)
    print("processed label shape", y.shape)
    print("--" * 20)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    assert X_train.shape[1] == X_test.shape[1]

    test_AE = Autoencoder(dataset, 'audio', X_train.shape[1])
    test_AE.build_model()
    test_AE.train_model(X_train)

    X_encoded_train = test_AE.transform(X_train)
    X_encoded_test = test_AE.transform(X_test)

    test_SVM = LinearSVM('%s_baseline_%s' % (dataset, 'audio'))
    test_SVM.train(X_encoded_train, y_train)
    test_SVM.test(X_encoded_test, y_test)
Esempio n. 3
0
 def __init__(self, dataset_name, arch_name, input_dim_A, input_dim_V):
     # para dataset_name:
     # para modality_name:
     # para input_dim_A:
     # para input_dim_V:
     Autoencoder.__init__(self, dataset_name, 'bimodal_%s' % arch_name, 0)
     self.save_dir = self.config['autoencoder']['save_dir_bimodal']
     self.input_dim_A = input_dim_A
     self.input_dim_V = input_dim_V
     self.hidden_dim_A = [
         int(self.input_dim_A * self.hidden_ratio),
         int(self.input_dim_A * self.hidden_ratio**2),
     ]
     self.hidden_dim_V = [
         int(self.input_dim_V * self.hidden_ratio),
         int(self.input_dim_V * self.hidden_ratio**2),
     ]
     self.hidden_dim_shared = int(self.hidden_dim_A[1] / 4 +
                                  self.hidden_dim_V[1] / 4)
    def mnist_test(self):
        from keras.datasets import mnist
        print("running autoencoders on MNIST data")
        (X_train, _), (X_test, _) = mnist.load_data()
        
        assert X_train.shape[1:] == X_test.shape[1:]

        X_train = flatten_data(X_train)
        X_test = flatten_data(X_test)

        assert X_train.shape == X_test.shape 

        mnist_ae = Autoencoder('12','12', X_train.shape[1])
        mnist_ae.build_model()
        mnist_ae.train_model(X_train, X_test)
        mnist_ae.vis_model(X_test)
Esempio n. 5
0
### Processing Images
# Get images from the directories
logging.info(f"Reading and Processing Images from {train_dir}")
train_data, train_ids = read_images_in_dir(train_dir, img_height, img_width)

logging.info(f"Reading and Processing Images from {test_dir}")
test_data, test_ids = read_images_in_dir(test_dir, img_height, img_width)

# Normalize the image pixels to 0-1
logging.info(f"Normalizing the images!")
trans_train_data = transform_images(train_data)
trans_test_data = transform_images(test_data)

### Setting up CNN Autoencoder Model
logging.info('Setting up the Autoencoder')
autoencoder = Autoencoder()
autoencoder.set_architecture(img_width, img_height, img_channel)
autoencoder.compile_autoencoder()
autoencoder.fit(trans_train_data, trans_test_data)

## Encoded layer for both the train and test data
logging.info('Putting Images through the encoded layer')
encoded_train = autoencoder.encoder_predict(trans_train_data)
encoded_test = autoencoder.encoder_predict(trans_test_data)

## flatten the encoded img, so they are the shape (#imgs, height*width*channels of output of encoder)- input for KNN
encoded_train_flat = encoded_train.reshape(
    (-1, np.prod(encoded_train.shape[1:])))
encoded_test_flat = encoded_test.reshape((-1, np.prod(encoded_test.shape[1:])))

## Save model in pickle
Esempio n. 6
0
import os
from src.configreader import ConfigReader
from src.dataset import Dataset
from src.autoencoder import Autoencoder

if __name__ == "__main__":

    config_path = os.path.join(os.path.dirname(__file__), "config.json")

    config_obj = ConfigReader(config_path)

    dataset = Dataset(config_obj)
    x_train = dataset.load_train_data()
    x_val = dataset.load_val_data()
    x_eval = dataset.load_eval_data()
    model = Autoencoder(config_obj, dataset)

    model.set_iterators(x_train, x_val, eval_from_input_iterator=x_eval)

    for i in range(12000):
        # the evaluation is quite time intensive, during it off increase the speed
        do_evaluation = i % 500 == 0 and i > 0
        stats = model.train(do_evaluation)
        print("{}: {}".format(i, stats["loss"]))
        if "val_loss" in stats:
            print("Val loss: {}".format(stats["val_loss"]))
            print("IO: {}, l1: {}".format(stats['iou'], stats["eval_l1"]))
        if i % 1000 and i > 0:
            model.save(config_obj.data.get_string("model_save_path"))

    model.save(config_obj.data.get_string("model_save_path"))
    "and the empty/full block encoding to set the block empty/full right away."
)
parser.add_argument("--store_as_npy",
                    help="Usually the output is saved as a .hdf5 container, "
                    "using this will save the output as .npy",
                    action="store_true")
args = parser.parse_args()

config_path = os.path.join(os.path.dirname(__file__), "config.json")

config_obj = ConfigReader(config_path)

dataset = Dataset(config_obj)
dataset.batch_size = args.batch_size

model = Autoencoder(config_obj, dataset)
model.set_iterators(eval_from_placeholder=True)

model.load(config_obj.data.get_string("model_save_path"))

input_ones = np.ones(
    [1, dataset.input_size(),
     dataset.input_size(),
     dataset.input_size(), 1])
full_block_latent = model.encode_from_placeholder(
    input_ones * -dataset.truncation_threshold)
empty_block_latent = model.encode_from_placeholder(
    input_ones * dataset.truncation_threshold)

data_iterator = dataset.load_custom_data(
    args.data_path,
Esempio n. 8
0
# Linear encoder
encoder_lin = nn.Sequential(nn.Linear(3 * 3 * 32, 64), nn.ReLU(True),
                            nn.Linear(64, encoded_space_dim))
# Linear decoder
decoder_lin = nn.Sequential(nn.Linear(encoded_space_dim, 64), nn.ReLU(True),
                            nn.Linear(64, 3 * 3 * 32), nn.ReLU(True))
# Convolutional decoder
decoder_cnn = nn.Sequential(
    nn.ConvTranspose2d(32, 16, 3, stride=2, output_padding=0), nn.ReLU(True),
    nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1, output_padding=1),
    nn.ReLU(True),
    nn.ConvTranspose2d(8, 1, 3, stride=2, padding=1, output_padding=1))
# Instantiate the network
net = Autoencoder(encoder_cnn=encoder_cnn,
                  encoder_lin=encoder_lin,
                  decoder_lin=decoder_lin,
                  decoder_cnn=decoder_cnn,
                  lin_to_cnn=(32, 3, 3))
# Show the network
print(net)

### Some examples
# Take an input image (remember to add the batch dimension)
img = test_dataset[0][1].unsqueeze(0)
print('Original image shape:', img.shape)
# Encode the image
img_enc = net.encode(img)
print('Encoded image shape:', img_enc.shape)
# Decode the image
dec_img = net.decode(img_enc)
print('Decoded image shape:', dec_img.shape)
Esempio n. 9
0
train_data, val_data = dprep.get_prepped_training_data(FLAGS=FLAGS)
test_data = dprep.get_prepped_testing_data(FLAGS=FLAGS)
print('got prepped training and testing data')

train_iter = train_data.make_initializable_iterator()
val_iter = val_data.make_initializable_iterator()
test_iter = test_data.make_initializable_iterator()
print('made iterators')

train_x = train_iter.get_next()
val_x = val_iter.get_next()
test_x = test_iter.get_next()
print('got next for all iterators')

model = Autoencoder(FLAGS=FLAGS)
print('made model')

train_op, train_loss_op = model._optimizer(train_x)
pred_op, test_loss_op = model._validation_loss(val_x, test_x)

print('starting session')
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_loss = 0
    test_loss = 0

    for epoch in range(FLAGS.num_epoch):
        sess.run(train_iter.initializer)

        for batch_nr in range(num_batches):
                    type=int,
                    default=4,
                    help="Number of threads to use in the input pipeline.")
args = parser.parse_args()

config_path = os.path.join(os.path.dirname(__file__), "config.json")

config_obj = ConfigReader(config_path)

dataset = Dataset(config_obj)
dataset.batch_size = args.batch_size
data_iterator = dataset.load_custom_data(args.path,
                                         fast_inference=True,
                                         num_threads=args.threads)

model = Autoencoder(config_obj, dataset)

model.set_iterators(eval_from_input_iterator=data_iterator,
                    eval_from_placeholder=True,
                    eval_uses_fast_inference=True)

model.load(config_obj.data.get_string("model_save_path"))
model.summary()

input_ones = np.ones(
    [1, dataset.input_size(),
     dataset.input_size(),
     dataset.input_size(), 1])
full_block_latent = model.encode_from_placeholder(
    input_ones * -dataset.truncation_threshold)
empty_block_latent = model.encode_from_placeholder(