def load_dataset(self): # TODO: we might need other variables for identifying what kind of # extra preprocessing was done such as features product and number # of features kept based on MI. #base_path = get_data_path(self.state) #self.base_path = base_path #import pdb #pdb.set_trace() if self.state.dataset == 'mnist': self.test_ddm = MNIST(which_set='test', one_hot=True) dataset = MNIST(which_set='train', shuffle=True, one_hot=True) train_X, valid_X = np.split(dataset.X, [50000]) train_y, valid_y = np.split(dataset.y, [50000]) self.train_ddm = DenseDesignMatrix(X=train_X, y=train_y) self.valid_ddm = DenseDesignMatrix(X=valid_X, y=valid_y) elif self.state.dataset == 'svhn': self.train_ddm = SVHN(which_set='splitted_train') self.test_ddm = SVHN(which_set='test') self.valid_ddm = SVHN(which_set='valid') elif self.state.dataset == 'cifar10': self.train_ddm = My_CIFAR10(which_set='train', one_hot=True) self.test_ddm = None self.valid_ddm = My_CIFAR10(which_set='test', one_hot=True) elif self.state.dataset == 'faceEmo': self.train_ddm = FaceEmo(which_set='train', one_hot=True) self.test_ddm = None self.valid_ddm = FaceEmo(which_set='test', one_hot=True) if self.train_ddm is not None: self.nvis = self.train_ddm.X.shape[1] self.nout = self.train_ddm.y.shape[1] print "nvis, nout :", self.nvis, self.nout self.ntrain = self.train_ddm.X.shape[0] print "ntrain :", self.ntrain if self.valid_ddm is not None: self.nvalid = self.valid_ddm.X.shape[0] print "nvalid :", self.nvalid if self.test_ddm is not None: self.ntest = self.test_ddm.X.shape[0] print "ntest :", self.ntest
class HPS: def __init__(self, state, base_channel_names = ['train_objective'], save_prefix = "model_", cache_dataset = True): self.cache_dataset = cache_dataset self.dataset_cache = {} self.state = state self.mbsb_channel_name = self.state.term_array.early_stopping.save_best_channel self.base_channel_names = base_channel_names self.save_prefix = save_prefix # TODO store this in data for each experiment or dataset def run(self): (model, learner, algorithm) \ = self.get_config() # try: print 'learning' learner.main_loop() # except Exception, e: # print e print 'End of model training' def get_config(self): # dataset self.load_dataset() # model self.load_model() # monitor: self.setup_monitor() # training algorithm algorithm = self.get_train() # extensions extensions = self.get_extensions() # channels #self.setup_channels() # learner learner = Train(dataset=self.train_ddm, model=self.model, algorithm=algorithm, extensions=extensions) return (self.model, learner, algorithm) def load_dataset(self): # TODO: we might need other variables for identifying what kind of # extra preprocessing was done such as features product and number # of features kept based on MI. #base_path = get_data_path(self.state) #self.base_path = base_path #import pdb #pdb.set_trace() if self.state.dataset == 'mnist': self.test_ddm = MNIST(which_set='test', one_hot=True) dataset = MNIST(which_set='train', shuffle=True, one_hot=True) train_X, valid_X = np.split(dataset.X, [50000]) train_y, valid_y = np.split(dataset.y, [50000]) self.train_ddm = DenseDesignMatrix(X=train_X, y=train_y) self.valid_ddm = DenseDesignMatrix(X=valid_X, y=valid_y) elif self.state.dataset == 'svhn': self.train_ddm = SVHN(which_set='splitted_train') self.test_ddm = SVHN(which_set='test') self.valid_ddm = SVHN(which_set='valid') elif self.state.dataset == 'cifar10': self.train_ddm = My_CIFAR10(which_set='train', one_hot=True) self.test_ddm = None self.valid_ddm = My_CIFAR10(which_set='test', one_hot=True) elif self.state.dataset == 'faceEmo': self.train_ddm = FaceEmo(which_set='train', one_hot=True) self.test_ddm = None self.valid_ddm = FaceEmo(which_set='test', one_hot=True) if self.train_ddm is not None: self.nvis = self.train_ddm.X.shape[1] self.nout = self.train_ddm.y.shape[1] print "nvis, nout :", self.nvis, self.nout self.ntrain = self.train_ddm.X.shape[0] print "ntrain :", self.ntrain if self.valid_ddm is not None: self.nvalid = self.valid_ddm.X.shape[0] print "nvalid :", self.nvalid if self.test_ddm is not None: self.ntest = self.test_ddm.X.shape[0] print "ntest :", self.ntest def load_model(self): model_class = self.state.model_class fn = getattr(self, 'get_model_'+model_class) self.model = fn() return self.model def get_model_mlp(self): self.dropout = False self.input_include_probs = {} self.input_scales = {} self.weight_decay = False self.weight_decays = {} self.l1_weight_decay = False self.l1_weight_decays = {} nnet_layers = self.state.layers input_space_id = self.state.input_space_id nvis = self.nvis self.batch_size = self.state.batch_size # TODO: add input_space as a config option. input_space = None # TODO: top_view always False for the moment. self.topo_view = False assert nvis is not None layers = [] for i,layer in enumerate(nnet_layers.values()): layer = expand(layer) layer = self.get_layer(layer, i) layers.append(layer) # create MLP: print layers model = MLP(layers=layers,input_space=input_space,nvis=nvis, batch_size=self.batch_size) self.mlp = model return model def get_layer(self, layer, layer_id): layer_class = layer.layer_class layer_name = layer.layer_name dropout_scale = layer.dropout_scale dropout_prob = layer.dropout_probability weight_decay = layer.weight_decay l1_weight_decay = layer.l1_weight_decay fn = getattr(self, 'get_layer_'+layer_class) if layer_name is None: layer_name = layer_class+str(layer_id) layer.layer_name = layer_name layer = fn(layer) # per-layer cost function parameters: if (dropout_scale is not None): self.dropout = True self.input_scales[layer_name] = dropout_scale if (dropout_prob is not None): self.dropout = True self.input_include_probs[layer_name] = (1. - dropout_prob) if (weight_decay is not None): self.weight_decay = False self.weight_decays[layer_name] = weight_decay if (l1_weight_decay is not None): self.l1_weight_decay = False self.l1_weight_decays[layer_name] = l1_weight_decay return layer def get_layer_sigmoid(self, layer): return Sigmoid(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange, istdev=layer.istdev,sparse_init=layer.sparse_init, sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob, init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm, max_row_norm=layer.max_row_norm) def get_layer_tanh(self, layer): return My_Tanh(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange, istdev=layer.istdev,sparse_init=layer.sparse_init, sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob, init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm, max_row_norm=layer.max_row_norm) def get_layer_rectifiedlinear(self, layer): # TODO: left_slope is set to 0.0 It should be set by the user! layer.left_slope = 0.0 return RectifiedLinear(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange, istdev=layer.istdev,sparse_init=layer.sparse_init, sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob, init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm, max_row_norm=layer.max_row_norm, left_slope=layer.left_slope,use_bias=layer.use_bias) def get_layer_softmax(self, layer): return My_Softmax(layer_name=layer.layer_name,n_classes=layer.dim,irange=layer.irange, istdev=layer.istdev,sparse_init=layer.sparse_init, init_bias_target_marginals=layer.init_bias, W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale, max_col_norm=layer.max_col_norm, max_row_norm=layer.max_row_norm) def get_layer_noisyRELU(self, layer): return NoisyRELU( dim=layer.dim, layer_name=layer.layer_name, irange=layer.irange, sparse_init=layer.sparse_init, W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale, mask_weights = None, max_row_norm=layer.max_row_norm, max_col_norm=layer.max_col_norm, use_bias=True, noise_factor=layer.noise_factor, desired_active_rate=layer.desired_active_rate, adjust_threshold_factor=layer.adjust_threshold_factor ) def get_layer_convRectifiedLinear(self, layer): return ConvRectifiedLinear( layer_name=layer.layer_name, output_channels=layer.output_channels, irange=layer.irange, kernel_shape=layer.kernel_shape, pool_shape=layer.pool_shape, pool_stride=layer.pool_stride, max_kernel_norm=layer.max_kernel_norm) def get_layer_gaussianRELU(self, layer): return GaussianRELU( dim=layer.dim, layer_name=layer.layer_name, irange=layer.irange, sparse_init=layer.sparse_init, W_lr_scale=layer.W_lr_scale, b_lr_scale=layer.b_lr_scale, mask_weights = None, max_row_norm=layer.max_row_norm, max_col_norm=layer.max_col_norm, use_bias=True, desired_active_rate=layer.desired_active_rate, adjust_threshold_factor=layer.adjust_threshold_factor, noise_std=layer.noise_std ) def setup_monitor(self): if self.topo_view: print "topo view" self.minibatch = T.as_tensor_variable( self.valid_ddm.get_batch_topo(self.batch_size), name='minibatch' ) else: print "design view" batch = self.valid_ddm.get_batch_design(self.batch_size) if isinstance(batch, spp.csr_matrix): print "sparse2" self.minibatch = self.model.get_input_space().make_batch_theano() print type(self.minibatch) else: self.minibatch = T.as_tensor_variable( self.valid_ddm.get_batch_design(self.batch_size), name='minibatch' ) self.target = T.matrix('target') self.monitor = Monitor.get_monitor(self.model) self.log_channel_names = [] self.log_channel_names.extend(self.base_channel_names) # self.monitor.add_dataset(self.valid_ddm, self.state.train_iteration_mode, # self.batch_size) # if self.test_ddm is not None: # self.monitor.add_dataset(self.test_ddm, self.state.train_iteration_mode, # self.batch_size) def get_train(self): train_class = self.state.train_class fn = getattr(self, 'get_train_'+train_class) return fn() def get_train_sgd(self): cost = MethodCost('cost_from_X') #cost = self.get_costs() num_train_batch = (self.ntrain/self.batch_size) print "num training batches:", num_train_batch termination_criterion = self.get_terminations() monitoring_dataset = {} for dataset_id in self.state.monitoring_dataset: if dataset_id == 'test' and self.test_ddm is not None: monitoring_dataset['test'] = self.test_ddm elif dataset_id == 'valid' and self.valid_ddm is not None: monitoring_dataset['valid'] = self.valid_ddm else: monitoring_dataset = None return SGD( learning_rate=self.state.learning_rate, batch_size=self.state.batch_size, cost=cost, batches_per_iter=num_train_batch, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, init_momentum=self.state.init_momentum, train_iteration_mode=self.state.train_iteration_mode) def get_terminations(self): if 'term_array' not in self.state: return None terminations = [] for term_obj in self.state.term_array.values(): fn = getattr(self, 'get_term_' + term_obj.term_class) terminations.append(fn(term_obj)) if len(terminations) > 1: return And(terminations) return terminations[0] def get_term_epochcounter(self, term_obj): return EpochCounter(term_obj.max_epochs) def get_term_monitorbased(self, term_obj): print 'monitor_based' return MonitorBased( prop_decrease=term_obj.proportional_decrease, N=term_obj.max_epochs, channel_name=term_obj.channel_name ) def get_extensions(self): if 'ext_array' not in self.state: return [] extensions = [] for ext_obj in self.state.ext_array.values(): fn = getattr(self, 'get_ext_' + ext_obj.ext_class) extensions.append(fn(ext_obj)) # monitor based save best print 'save best channel', self.mbsb_channel_name if self.mbsb_channel_name is not None: self.save_path = self.save_prefix + str(self.state.config_id) + "_optimum.pkl" extensions.append(MonitorBasedSaveBest( channel_name = self.mbsb_channel_name, save_path = self.save_path ) ) return extensions def get_ext_exponentialdecayoverepoch(self, ext_obj): return ExponentialDecayOverEpoch( decay_factor=ext_obj.decay_factor, min_lr_scale=ext_obj.min_lr_scale ) def get_ext_momentumadjustor(self, ext_obj): return MomentumAdjustor( final_momentum=ext_obj.final_momentum, start=ext_obj.start_epoch, saturate=ext_obj.saturate_epoch )