def autoencoder(): # building dataset, batch_size and preprocessor data = Mnist(train_valid_test_ratio=[8, 1, 1], batch_size=100, preprocessor=GCN()) # for AutoEncoder, the inputs and outputs must be the same train = data.get_train() data.set_train(train.X, train.X) valid = data.get_valid() data.set_valid(valid.X, valid.X) test = data.get_test() data.set_test(test.X, test.X) # building autoencoder ae = AutoEncoder(input_dim=data.feature_size(), rand_seed=123) h1_layer = Tanh(dim=500, name="h1_layer", W=None, b=None) # adding encoding layer ae.add_encode_layer(h1_layer) # mirror layer has W = h1_layer.W.T h1_mirror = Tanh(dim=ae.input_dim, name="h1_mirror", W=h1_layer.W.T, b=None) # adding decoding mirror layer ae.add_decode_layer(h1_mirror) # build learning method learning_method = AdaGrad(learning_rate=0.1, momentum=0.9) # set the learning rules learning_rule = LearningRule( max_col_norm=10, L1_lambda=None, L2_lambda=None, training_cost=Cost(type="mse"), learning_rate_decay_factor=None, stopping_criteria={ "max_epoch": 300, "epoch_look_back": 10, "cost": Cost(type="error"), "percent_decrease": 0.01, }, ) # put all the components into a TrainObject train_object = TrainObject(model=ae, dataset=data, learning_rule=learning_rule, learning_method=learning_method) # finally run the training train_object.run()
def build_dataset(self): dataset = None preprocessor = None if self.state.dataset.preprocessor.type is None else \ getattr(preproc, self.state.dataset.preprocessor.type)() noise = None if self.state.dataset.dataset_noise.type is None else \ getattr(data_noise, self.state.dataset.dataset_noise.type)() if self.state.dataset.dataset_noise.type == 'Gaussian': noise.std = self.state.dataset.dataset_noise.std if self.state.dataset.preprocessor.type == 'Scale': preprocessor.max = self.state.dataset.preprocessor.global_max preprocessor.min = self.state.dataset.preprocessor.global_min preprocessor.buffer = self.state.dataset.preprocessor.buffer preprocessor.scale_range = self.state.dataset.preprocessor.scale_range if self.state.dataset.type == 'Mnist': dataset = Mnist(train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.y, train.y) valid = dataset.get_valid() dataset.set_valid(valid.y, valid.y) test = dataset.get_test() dataset.set_test(test.y, test.y) elif self.state.dataset.type[:12] == 'Mnist_Blocks': dataset = getattr(mnist, self.state.dataset.type)( feature_size=self.state.dataset.feature_size, target_size=self.state.dataset.feature_size, train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) elif self.state.dataset.type[:4] == 'P276': dataset = getattr(spec, self.state.dataset.type)( train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:5] == 'Laura': dataset = getattr(spec, self.state.dataset.type)( feature_size=self.state.dataset.feature_size, target_size=self.state.dataset.feature_size, train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, num_blocks=self.state.dataset.num_blocks, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) elif self.state.dataset.type[:18] == 'TransFactor_Blocks': dataset = getattr(tf, self.state.dataset.type)( feature_size=self.state.dataset.feature_size, target_size=self.state.dataset.feature_size, one_hot=self.state.dataset.one_hot, num_blocks=self.state.dataset.num_blocks, train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) elif self.state.dataset.type[:11] == 'TransFactor': dataset = getattr(tf, self.state.dataset.type)( # feature_size = self.state.dataset.feature_size, # target_size = self.state.dataset.feature_size, train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:13] == 'I2R_Posterior': dataset = getattr(i2r, self.state.dataset.type)( train_valid_test_ratio=self.state.dataset. train_valid_test_ratio, preprocessor=preprocessor, noise=noise, batch_size=self.state.dataset.batch_size, num_batches=self.state.dataset.num_batches, iter_class=self.state.dataset.iter_class, rng=self.state.dataset.rng) return dataset
def build_dataset(self): dataset = None preprocessor = None if self.state.dataset.preprocessor.type is None else \ getattr(preproc, self.state.dataset.preprocessor.type)() noise = None if self.state.dataset.dataset_noise.type is None else \ getattr(data_noise, self.state.dataset.dataset_noise.type)() if self.state.dataset.dataset_noise.type == 'Gaussian': noise.std = self.state.dataset.dataset_noise.std if self.state.dataset.preprocessor.type == 'Scale': preprocessor.max = self.state.dataset.preprocessor.global_max preprocessor.min = self.state.dataset.preprocessor.global_min preprocessor.buffer = self.state.dataset.preprocessor.buffer preprocessor.scale_range = self.state.dataset.preprocessor.scale_range if self.state.dataset.type == 'Mnist': dataset = Mnist(train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.y, train.y) valid = dataset.get_valid() dataset.set_valid(valid.y, valid.y) test = dataset.get_test() dataset.set_test(test.y, test.y) elif self.state.dataset.type[:12] == 'Mnist_Blocks': dataset = getattr(mnist, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:4] == 'P276': dataset = getattr(spec, self.state.dataset.type)( train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:5] == 'Laura': dataset = getattr(spec, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, num_blocks = self.state.dataset.num_blocks, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:18] == 'TransFactor_Blocks': dataset = getattr(tf, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, one_hot = self.state.dataset.one_hot, num_blocks = self.state.dataset.num_blocks, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:11] == 'TransFactor': dataset = getattr(tf, self.state.dataset.type)( # feature_size = self.state.dataset.feature_size, # target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:13] == 'I2R_Posterior': dataset = getattr(i2r, self.state.dataset.type)( train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) return dataset
def build_dataset(self): dataset = None preprocessor = None if self.state.dataset.preprocessor.type is None else \ getattr(preproc, self.state.dataset.preprocessor.type)() # if self.state.dataset.noise.type == 'BlackOut' or self.state.dataset.noise.type == 'MaskOut': # noise = None if self.state.dataset.noise.type is None else \ # getattr(noisy, self.state.dataset.noise.type)(ratio=self.state.dataset.noise.ratio) # else: # noise = getattr(noisy, self.state.dataset.noise.type)() noise = None if self.state.dataset.dataset_noise.type is None else \ getattr(noisy, self.state.dataset.dataset_noise.type)() if self.state.dataset.preprocessor.type == 'Scale': preprocessor.max = self.state.dataset.preprocessor.global_max preprocessor.min = self.state.dataset.preprocessor.global_min preprocessor.buffer = self.state.dataset.preprocessor.buffer preprocessor.scale_range = self.state.dataset.preprocessor.scale_range if self.state.dataset.type == 'Mnist': dataset = Mnist(train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:12] == 'Mnist_Blocks': dataset = getattr(mnist, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:4] == 'P276': dataset = getattr(spec, self.state.dataset.type)( train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) elif self.state.dataset.type[:5] == 'Laura': dataset = getattr(spec, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:18] == 'TransFactor_Blocks': dataset = getattr(tf, self.state.dataset.type)( feature_size = self.state.dataset.feature_size, target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) elif self.state.dataset.type[:11] == 'TransFactor': dataset = getattr(tf, self.state.dataset.type)( # feature_size = self.state.dataset.feature_size, # target_size = self.state.dataset.feature_size, train_valid_test_ratio = self.state.dataset.train_valid_test_ratio, preprocessor = preprocessor, noise = noise, batch_size = self.state.dataset.batch_size, num_batches = self.state.dataset.num_batches, iter_class = self.state.dataset.iter_class, rng = self.state.dataset.rng) train = dataset.get_train() dataset.set_train(train.X, train.X) valid = dataset.get_valid() dataset.set_valid(valid.X, valid.X) test = dataset.get_test() dataset.set_test(test.X, test.X) return dataset
def stacked_autoencoder(): name = "Stacked_AE" # =====[ Train First layer of stack autoencoder ]=====# print("Start training First Layer of AutoEncoder") # building dataset, batch_size and preprocessor data = Mnist(train_valid_test_ratio=[8, 1, 1], batch_size=100) # for AutoEncoder, the inputs and outputs must be the same train = data.get_train() data.set_train(train.X, train.X) valid = data.get_valid() data.set_valid(valid.X, valid.X) test = data.get_test() data.set_test(test.X, test.X) # building autoencoder ae = AutoEncoder(input_dim=data.feature_size(), rand_seed=123) h1_layer = RELU(dim=500, name="h1_layer", W=None, b=None) # adding encoding layer ae.add_encode_layer(h1_layer) # mirror layer has W = h1_layer.W.T h1_mirror = RELU(dim=ae.input_dim, name="h1_mirror", W=h1_layer.W.T, b=None) # adding decoding mirror layer ae.add_decode_layer(h1_mirror) # build learning method learning_method = SGD(learning_rate=0.001, momentum=0.9) # set the learning rules learning_rule = LearningRule( max_col_norm=10, L1_lambda=None, L2_lambda=None, training_cost=Cost(type="mse"), learning_rate_decay_factor=None, stopping_criteria={"max_epoch": 3, "epoch_look_back": 1, "cost": Cost(type="error"), "percent_decrease": 0.01}, ) # put all the components into a TrainObject train_object = TrainObject(model=ae, dataset=data, learning_rule=learning_rule, learning_method=learning_method) # finally run the training train_object.run() # =====[ Train Second Layer of autoencoder ]=====# print("Start training Second Layer of AutoEncoder") # fprop == forward propagation reduced_train_X = ae.encode(train.X) reduced_valid_X = ae.encode(valid.X) reduced_test_X = ae.encode(test.X) data.set_train(X=reduced_train_X, y=reduced_train_X) data.set_valid(X=reduced_valid_X, y=reduced_valid_X) data.set_test(X=reduced_test_X, y=reduced_test_X) # create a new mlp taking inputs from the encoded outputs of first autoencoder ae2 = AutoEncoder(input_dim=data.feature_size(), rand_seed=None) h2_layer = RELU(dim=100, name="h2_layer", W=None, b=None) ae2.add_encode_layer(h2_layer) h2_mirror = RELU(dim=h1_layer.dim, name="h2_mirror", W=h2_layer.W.T, b=None) ae2.add_decode_layer(h2_mirror) train_object = TrainObject(model=ae2, dataset=data, learning_rule=learning_rule, learning_method=learning_method) train_object.run() # =====[ Fine Tuning ]=====# print("Fine Tuning") data = Mnist() train = data.get_train() data.set_train(train.X, train.X) valid = data.get_valid() data.set_valid(valid.X, valid.X) test = data.get_test() data.set_test(test.X, test.X) ae3 = AutoEncoder(input_dim=data.feature_size(), rand_seed=None) ae3.add_encode_layer(h1_layer) ae3.add_encode_layer(h2_layer) ae3.add_decode_layer(h2_mirror) ae3.add_decode_layer(h1_mirror) train_object = TrainObject(model=ae3, dataset=data, learning_rule=learning_rule, learning_method=learning_method) train_object.run() print("Training Done")