def __init__(self, dataset_name, model_name, optimizer_name, trial_num): """ :param dataset_name: name of the dataset :type dataset_name: str :param model_name: name of the model :type model_name: str :param optimizer_name: name of the optimizer :type optimizer_name: str :param trial_num: current number of repeated trials :type trial_num: int """ # get optimized hyperparameters with open( f'../params/{dataset_name}_{model_name}_{optimizer_name}/result.json' ) as f: params = json.load(f) # get instances self.dataset = Datasets.get(dataset_name) self.model = Models.get(model_name, dataset=self.dataset) self.optimizer = Optimizers.get(optimizer_name, params=params) # get config with open('./config.json') as f: config = json.load(f) # get constants c = config['constants'][dataset_name][model_name] self.loss = c['loss'] self.batch_size = c['batch_size'] self.epochs = c['epochs'] # configure and initialize directory d = self.main_dir = f'../data/{dataset_name}_{model_name}_{optimizer_name}/trial{trial_num}' if os.path.exists(d): shutil.rmtree(d) os.makedirs(d) # configure hyperdash experiment self.hd_exp = HyperdashExperiment( f'{dataset_name}', api_key_getter=lambda: config['hyperdash']['api_key']) self.hd_exp.param('dataset_name', dataset_name) self.hd_exp.param('model_name', model_name) self.hd_exp.param('optimizer_name', optimizer_name) self.hd_exp.param('trial_num', trial_num) for k, v in params.items(): self.hd_exp.param(k, v) # set callbacks self.callbacks = [ Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'], self.hd_exp), TensorBoard(log_dir=f'{self.main_dir}/tensorboard'), TimeLogger(filename=f'{self.main_dir}/time.csv'), CSVLogger(filename=f'{self.main_dir}/result.csv', append=True) ]
def objective(self, params): """ objective function to optimize :param params: hyperparamters for optimizer :return: maximum validation accuracy :rtype: float """ # get instances dataset = Datasets.get(self.dataset_name) model = Models.get(self.model_name, dataset=dataset) optimizer = Optimizers.get(self.optimizer_name, params=params) # configure hyperdash experiment hd_exp = HyperdashExperiment( f'{self.dataset_name}', api_key_getter=lambda: self.config['hyperdash']['api_key']) hd_exp.param('dataset_name', self.dataset_name) hd_exp.param('model_name', self.model_name) hd_exp.param('optimizer_name', self.optimizer_name) for k, v in params.items(): hd_exp.param(k, v) # set callbacks callbacks = [ Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'], hd_exp), EarlyStopping('val_accuracy', patience=10, min_delta=0.01, verbose=1), TerminateOnNaN() ] # get data (x_train, y_train), *_ = dataset.get_batch() # start learning model.compile(loss=self.loss, optimizer=optimizer, metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, callbacks=callbacks, validation_split=0.2, verbose=2) # stop hyperdash experiment hd_exp.end() # return maximum validation accuracy val_accuracy = np.array(history.history['val_accuracy']) return max(val_accuracy) * (-1)
def objective(self, params): """ objective function to optimize :param params: hyperparamters for optimizer :return: return value of `experiment.begin()` :rtype: float """ # get instances benchmark = Benchmarks.get(self.benchmark_name) optimizer = Optimizers.get(self.optimizer_name, benchmark=benchmark, params=params) # initialize coordinates # random seed is set to 0 np.random.seed(0) coords = np.array( [np.random.rand(100).astype(np.float) * 10 - 5 for _ in range(2)]) optimum = np.array(benchmark.optimum).reshape(2, 1) # update coordinates dists_mean_min = np.inf wait = 0 patience = 10 for i in range(10000): coords = optimizer.update(coords) if i % 100 == 0: dists = (np.sum(coords - optimum, axis=0)**2.0)**0.5 # terminate on nan if np.any(np.isnan(dists)): break # early stopping if np.mean(dists) > dists_mean_min: wait += 1 if wait > patience: break else: wait = 0 dists_mean_min = np.mean(dists) # return minimum distance in log 10 return np.log10(dists_mean_min)
def __init__(self, benchmark_name, optimizer_name): """ :param benchmark_name: name of the benchmark :type benchmark_name: str :param optimizer_name: name of the optimizer :type optimizer_name: str """ # get optimized hyperparameters with open(f'../params/{benchmark_name}_{optimizer_name}/result.json') as f: params = json.load(f) # get instances self.benchmark = Benchmarks.get(benchmark_name) self.optimizer = Optimizers.get(optimizer_name, benchmark=self.benchmark, params=params) # configure and initialize directory d = self.main_dir = f'../data/{benchmark_name}_{optimizer_name}' if os.path.exists(d): shutil.rmtree(d) os.makedirs(d)
def __init__(self, name, hyper, load_params=False): if load_params: try: with open(name + '.params', 'rb') as f: model_values, hyper, curves = pickle.load(f) except IOError as e: print("Error opening file: ", e) else: model_values = {} curves = { 'CD error': [], 'MSE 1': [], 'MSE 2': [], 'log likelihood': [], 'validation error': [] } std_err = {} # initialize random number generator self.np_rng = np.random.RandomState(hyper['seed']) self.theano_rng = RandomStreams(hyper['seed']) self.name = name self.model_values = model_values self.hyperparameters = hyper self.monitoring_curves = curves self.params = OrderedDict() self.params_shp = OrderedDict() # Optimizer opt = Optimizers() if hyper['learner'] == 'amsgrad': self.update_opt = opt.adam_updates elif hyper['learner'] == 'momentum': self.update_opt = opt.momentum_updates elif hyper['learner'] == 'rmsprop': self.update_opt = opt.rmsprop_updates else: self.update_opt = opt.sgd_updates
def __init__(self, name, hyper, load_params=False): if load_params: try: with open(name + '.params', 'rb') as f: model_values, hyper, curves = pickle.load(f) except IOError as e: print("Error opening file: ", e) else: model_values = {} curves = {'CD error': [], 'log likelihood': []} # initialize random number generator self.np_rng = np.random.RandomState(hyper['seed']) self.theano_rng = T.shared_randomstreams.RandomStreams(hyper['seed']) self.name = name self.model_values = model_values self.hyperparameters = hyper self.monitoring_curves = curves self.model_params = OrderedDict() self.model_params_shapes = OrderedDict() self.opt = Optimizers()
self.y[indices[have_train_num:]], self.class_num) train_data = DataLoader(x_train, y_train, 3) test_data = DataLoader(x_test, y_test, 3) # 构建网络 model = NeuralNetwork() model.add_layer(Layer(4, 8, 'relu')) model.add_layer(Layer(8, 8, 'relu')) model.add_layer(Layer(8, 3)) # 构建损失函数和优化器 lr = 0.01 loss = Loss(loss='cross_entropy_with_logits') optimizer = Optimizers(optimizer='sgd', learning_rate=lr) model.compile(loss=loss, optimizer=optimizer) # 训练数据 num_epochs = 1600 batch_size = 64 train_loss = [] test_loss = [] for epoch in range(num_epochs): for x, y in train_data.get_batch(batch_size): loss = model.fit(x, y) train_loss.append(loss) t_loss, n, right_num = 0., 0, 0 for x, y in test_data.get_batch(batch_size, shuffle=False): y_pred = model(x) right_num += np.sum(
def main(data): # optimizer opt = Optimizers() # sampler theano_rng = RandomStreams(999) # import dataset n_samples = data.attrs['n_rows'] lr = 1e-3 batch_size = 128 x_data = [ data['purpose'], data['avg_speed'], data['duration'], data['trip_km'], data['n_coord'], data['interval'], data['dow'], data['startdistrict'], data['enddistrict'] ] y_data = [data['mode']] params = OrderedDict() params_shp = OrderedDict() output = [] input = [] asc_params = [] asc_params_m = [] beta_params_f = [] beta_params_s = [] beta_params_sf = [] beta_params = [] beta_params_m = [] for var in y_data: name = 'asc_' + var.name.strip('/') asc_shp = var['data'][:].squeeze().shape[1:] print('y', name, asc_shp) output.append(init_tensor((), name)) mask = np.ones(asc_shp, DTYPE_FLOATX) mask[-1] = 0. asc_value = np.zeros(asc_shp, DTYPE_FLOATX) * mask asc_params.append(shared(asc_value, name)) asc_params_m.append(shared(mask, name + '_mask')) params[name] = asc_params[-1] params_shp[name] = asc_shp for var in x_data: name = 'beta_' + var.name.strip('/') shp = var['data'].shape[1:] + asc_shp print('x', name, shp) input.append(init_tensor(var['data'].shape[1:], name)) mask = np.ones(shp, DTYPE_FLOATX) mask[..., -1] = 0. mask = mask.flatten() beta_value = np.zeros(np.prod(shp), DTYPE_FLOATX) * mask sigma_value = np.ones(np.prod(shp), DTYPE_FLOATX) * mask beta_params_f.append(shared(beta_value, name)) beta_params_sf.append(shared(sigma_value, name + '_sigma')) beta_params.append(T.reshape(beta_params_f[-1], shp)) beta_params_s.append(T.reshape(beta_params_sf[-1], shp)) beta_params_m.append(shared(mask, name + '_mask')) params[name] = beta_params_f[-1] params[name + '_sigma'] = beta_params_sf[-1] params_shp[name] = shp params_shp[name + '_sigma'] = shp # compute the utility function utility = 0. h_utility = 0. for x, b, s in zip(input, beta_params, beta_params_s): normal_sample = b[..., None] + T.sqr(s)[..., None] * theano_rng.normal( size=b.eval().shape + (1, ), avg=0., std=1., dtype=DTYPE_FLOATX) ax = [np.arange(x.ndim)[1:], np.arange(b.ndim)[:-1]] utility += T.tensordot(x, normal_sample, axes=ax) if x.ndim > 2: h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1, 2], [0, 1]]) else: h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1], [0]]) for y, asc in zip(output, asc_params): utility += asc[None, ..., None] h_utility += asc (d1, d2, d3) = utility.shape utility = utility.reshape((d1 * d3, d2)) p_y_given_x = T.nnet.softmax(utility) hessian_prob = T.nnet.softmax(h_utility) #! hessian_nll = T.log(hessian_prob) hessian_cr = hessian_nll[T.arange(y.shape[0]), y] hessian_cost = -T.sum(hessian_cr) nll = T.log(p_y_given_x).reshape((d3, d1, d2)) nll = nll[:, T.arange(y.shape[0]), y] cost = -T.sum(T.mean(nll, axis=0)) gparams = asc_params + beta_params_f + beta_params_sf grads = T.grad(cost, gparams) # mask gradient updates mask = asc_params_m + beta_params_m + beta_params_m for j, g in enumerate(grads): grads[j] = g * mask[j] # create list of updates to iterate over updates = opt.sgd_updates(gparams, grads, lr) # symbolic equation for the Hessian function stderrs = [] hessian = T.hessian(cost=hessian_cost, wrt=gparams) stderr = [T.sqrt(f) for f in [T.diag(2. / h) for h in hessian]] stderrs.extend(stderr) tensors = input + output shared_x = [shared(var['data'][:], borrow=True) for var in x_data] shared_y = [T.cast(shared(var['label'][:]), 'int32') for var in y_data] shared_variables = shared_x + shared_y i = T.lscalar('index') start_idx = i * batch_size end_idx = (i + 1) * batch_size print('constructing Theano computational graph...') train = theano.function( inputs=[i], outputs=cost, updates=updates, givens={ key: val[start_idx:end_idx] for key, val in zip(tensors, shared_variables) }, name='train', allow_input_downcast=True, ) std_err = theano.function( inputs=[], outputs=stderrs, givens={key: val[:] for key, val in zip(tensors, shared_variables)}, name='std errors', allow_input_downcast=True, ) # train model print('training the model...') curves = [] n_batches = n_samples // batch_size epochs = 100 epoch = 0 t0 = time.time() while epoch < epochs: epoch += 1 cost = [] for i in range(n_batches): cost_items = train(i) cost.append(cost_items) epoch_cost = np.sum(cost) curves.append((epoch, epoch_cost)) minutes, seconds = divmod(time.time() - t0, 60.) hours, minutes = divmod(minutes, 60.) print(("epoch {0:d} loglikelihood " "{1:.3f} time {hh:02d}:{mm:02d}:{ss:05.2f}").format( epoch, epoch_cost, hh=int(hours), mm=int(minutes), ss=seconds)) if (epoch % 5) == 0: print('checkpoint') param_values = {} for name, param in params.items(): param_shp = params_shp[name] param_values[name] = param.eval().reshape(param_shp) np.savetxt('params/{}.csv'.format(name), param_values[name].squeeze(), fmt='%.3f', delimiter=',') to_file = param_values, curves path = 'params/epoch_{0:d}.params'.format(epoch) with open(path, 'wb') as f: pickle.dump(to_file, f, protocol=pickle.HIGHEST_PROTOCOL) # save parameters and stderrs to .csv stderrs = std_err() params_list = [p for p in asc_params + beta_params_f + beta_params_sf] param_names = [p.name for p in asc_params + beta_params_f + beta_params_sf] for se, param, name in zip(stderrs, params_list, param_names): v = param.eval().squeeze() shp = v.shape path = 'params/stderrs_{}.csv'.format(name) np.savetxt(path, se.reshape(shp), fmt='%.3f', delimiter=',') path = 'params/tstat_{}.csv'.format(name) np.savetxt(path, v / se.reshape(shp), fmt='%.3f', delimiter=',')
def __init__(self, experiment): self.experiment = experiment processing = experiment['processing'] #config dataset self.dataset = DatasetFactory( name=experiment['dataset'], flat=processing['flat'], concat=processing['concat'], expand=processing['expand'], normalize=processing['normalize'], ) #config state of the experiment self.state = LoaderState( id_exp=self.experiment_name(), epochs=experiment['epochs'], dataset=self.dataset, valid_exp=experiment['exp'], url=experiment['dir'] ).state #compiler parameters optimizer = experiment['optimizer'] opt_params = experiment['opt_params'] loss = experiment['loss'] metrics = [m for m in experiment['metrics'] if m != 'f1_score'] history_metrics = [m.lower() for m in experiment['metrics'] if m != 'f1_score'] metrics.append(f1_score) self.compiler_params = dict([ ('optimizer', Optimizers(optimizer, opt_params).optimizer()), ('loss', loss), ('metrics', metrics) ]) #Config training callbacks = [] history_metrics.insert(0, 'loss') history_metrics.append('f1_score') cp = [m for m in history_metrics] for m in cp: history_metrics.append('val_' + m) callbacks.append(HistoryCheckpoint( self.experiment['dir'], self.state, history_metrics)) callbacks.append(WeightsCheckpoint(self.experiment['dir'], self.state)) if experiment['decay']: callbacks.append(ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=50, min_lr=0.1e-3)) datagen = None if experiment['data_augmentation']: datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) self.trainner = Trainner( epochs=experiment['epochs'], batch_size=experiment['batch'], data_augmentation=datagen, callbacks=callbacks, dir_path=experiment['dir'], state=self.state )
data['valid']['X'], data['valid']['Y'],\ data['test']['X'], data['test']['Y'], # Initialize network np.random.seed(1234) network = Network(num_hidden, sizes, activation_choice=activation, output_choice=output_choice, loss_choice=loss) model_name = '{}-{}-{}-{}-{}-{}-{}-{}.npy'.format( num_hidden, ','.join([str(word) for word in sizes]), activation, output_choice, batch_size, loss, opt, lr) if pretrained_path != None: network.load(path=pretrained_path) optimizer = Optimizers(network.theta.shape[0], opt, lr, momentum) # Train print 'Training has started' num_epochs = 20 num_batches = int(float(train_X.shape[1]) / batch_size) steps = 0 lr_min = 0.00001 loss_history = [np.inf] prev_loss = np.inf indices = np.arange(train_X.shape[1]) for epoch in range(num_epochs): steps = 0 np.random.shuffle(indices) train_X, train_Y = train_X[:, indices], train_Y[indices] epoch_loss = []
def run(self, x, y, epochs, batch_size=None, lr=0.001, optimizer='adam', rho1=0.9, rho2=0.999, shuffle=True): """ Description ----------- MLP Training. Parameters ---------- x : ndarray y : ndarray epochs : int batch_size : int optmizer : str, optional Arguments - 'sgd', 'sgd_momentum', 'adagrad', 'rmsprop' or 'adam'. lr : float. rho1 : float, optional first moment rho2 : float, optional second moment shuffle : bool, optional Returns ------- history : list Loss funcion values throughout the training. """ # if the batch_size was not informed if batch_size == None: batch_size = self.__n_images # initializing the optimizer opt = Optimizers(optimizer, self.__w, self.__n_layers) history = [] # Running the epochs for i in range(1, epochs + 1): total_loss = 0 # shuffling the data if shuffle: x, y = self.__shuffling_data(x, y) # indexes for get samples of set of training images ini_batch = 0 end_batch = batch_size # interval of batches until reach n_images while ini_batch < self.__n_images: # Forward pass s = self.__forward_pass(x[ini_batch:end_batch]) # Calculating the Train loss total_loss += self.__loss.forward(s, y[ini_batch:end_batch]) # Backward pass self.__backward_pass(self.__loss, y[ini_batch:end_batch], opt, lr, rho1, rho2, i) # updating the indexes of batch ini_batch += batch_size end_batch += batch_size if end_batch > self.__n_images: end_batch = self.__n_images # loss history print(f'Train_Loss [{i}]: {round(total_loss/self.__n_images, 6)}') history.append(total_loss / self.__n_images) return history