def train(save_path, model, lr=0.1, batch_size=128, callbacks=[]): # Create dynamically dataset generators train, valid, test, meta_data = get_dataset(batch_size=batch_size) # Create dynamically model model = models.__dict__[model]() summary(model) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=lr) # Create dynamically callbacks callbacks_constructed = [] for name in callbacks: clbk = get_callback(name, verbose=0) if clbk is not None: callbacks_constructed.append(clbk) # Pass everything to the training loop steps_per_epoch = (len(meta_data['x_train']) - 1) // batch_size + 1 training_loop(model=model, optimizer=optimizer, loss_function=loss_function, metrics=[acc], train=train, valid=test, meta_data=meta_data, steps_per_epoch=steps_per_epoch, save_path=save_path, config=_CONFIG, use_tb=True, custom_callbacks=callbacks_constructed)
def train(config, save_path): train, test, meta_data = datasets(dataset=config['dataset'], batch_size=config['batch_size'], augmented=config['augmented'], preprocessing='center', seed=config['seed']) pytorch_model_builder = models.__dict__[config['model']] pytorch_model = pytorch_model_builder(**config.get('model_kwargs', {})) summary(pytorch_model) loss_function = torch.nn.MSELoss() # Because logsoftmax. Be careful! optimizer = torch.optim.SGD(pytorch_model.parameters(), lr=config['lr']) model = Model(pytorch_model, optimizer, loss_function, [acc]) callbacks = [] callbacks.append(LRSchedule(lr_schedule=config['lr_schedule'])) # Call training loop (warning: using test as valid. Please don't do this) steps_per_epoch = int(len(meta_data['x_train']) / config['batch_size']) training_loop(model=model, train=train, valid=test, save_path=save_path, n_epochs=config['n_epochs'], save_freq=1, reload=config['reload'], use_tb=True, steps_per_epoch=steps_per_epoch, custom_callbacks=callbacks)
def train(save_path, data_class, label_mode = 'multiclass_cancer_sides', batch_size=128, callbacks=['BreastDataLoader']): ''' data_class: 'data_with_segmentations_gin' or 'data_gin' ''' # Create dynamically dataset generators data_loader = data.__dict__[data_class](logger_breast_ori(save_path, 'output_log.log'), minibatch_size=batch_size) # Create dynamically callbacks callbacks_constructed = [] for name in callbacks: clbk = get_callback(name, verbose=0) if clbk is not None: callbacks_constructed.append(clbk) if data_loader.parameters['train_sampling_mode'] == 'normal': training_oversampled_indices = data_loader.data_list_training else: training_oversampled_indices = data_loader.train_sampler.sample_indices(data_loader.get_train_labels_cancer('multiclass_cancer_sides'), random_seed=0) steps_per_epoch = (len(training_oversampled_indices) - 1) // batch_size + 1 validation_steps = (len(data_loader.data_list_validation) - 1) // batch_size + 1 logger.info('samples_per_training_epoch=%d; steps_per_epoch=%d'%(len(training_oversampled_indices), steps_per_epoch)) logger.info('samples_per_evaluation_epoch=%d; validation_steps=%d'%(len(data_loader.data_list_validation), validation_steps)) training_loop(meta_data=None, label_mode=label_mode, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, data_loader=data_loader, save_path=save_path, config=_CONFIG, custom_callbacks=callbacks_constructed)
def train(save_path, model, batch_size=128, seed=777, callbacks=[], resume=True, evaluate=True): # Create dynamically dataset generators train, valid, test, meta_data = get_dataset(batch_size=batch_size, seed=seed) # Create dynamically model model = models.__dict__[model]() summary(model) # Create dynamically callbacks callbacks_constructed = [] for name in callbacks: clbk = get_callback(name, verbose=0) if clbk is not None: callbacks_constructed.append(clbk) if not resume and os.path.exists(os.path.join(save_path, "last.ckpt")): raise IOError( "Please clear folder before running or pass train.resume=True") # Create module and pass to trianing checkpoint_callback = ModelCheckpoint( filepath=os.path.join(save_path, "weights"), verbose=True, save_last=True, # For resumability monitor='valid_acc', mode='max') pl_module = supervised_training.SupervisedLearning(model, meta_data=meta_data) trainer = training_loop(train, valid, pl_module=pl_module, checkpoint_callback=checkpoint_callback, callbacks=callbacks_constructed, save_path=save_path) # Evaluate if evaluate: results, = trainer.test(test_dataloaders=test) logger.info(results) with open(os.path.join(save_path, "eval_results.json"), "w") as f: json.dump(results, f)
def train(save_path, model, lr_splitting_by=None, lrs=None, wd=0, lr=0.1, batch_size=128, n_epochs=100, weights=None, fb_method=False, callbacks=[], optimizer='sgd', scheduler=None, freeze_all_but_this_layer=None, mode='train'): # Create dynamically dataset generators train, valid, test, meta_data = get_chexnet_covid(batch_size=batch_size) # Create dynamically model model = models.__dict__[model]() summary(model) loss_function = torch.nn.BCELoss() if freeze_all_but_this_layer is not None: # First freeze all layers logger.info("Freezing all layers") for i, parameter in enumerate(model.parameters()): parameter.requires_grad = False # Unfreeze layers that matches for i, (name, parameter) in enumerate(model.named_parameters()): if name.startswith(freeze_all_but_this_layer): parameter.requires_grad = True logger.info("Unfreezing {}: {}".format(name, parameter.shape)) if optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd) elif optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd) if scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, n_epochs) if lr_splitting_by is not None: optimizer, _ = create_optimizer(optimizer, model, lr_splitting_by, lrs) # Create dynamically callbacks callbacks_constructed = [] for name in callbacks: clbk = get_callback(name, verbose=0) if clbk is not None: print(name) callbacks_constructed.append(clbk) # Pass everything to the training loop if train is not None: steps_per_epoch = len(train) else: steps_per_epoch = None target_indice = None if fb_method: target_indice = weights.index(1) if 1 in weights else 0 elif weights is not None: target_indice = 0 if mode == 'train': assert train is not None, "please provide train data" assert valid is not None, "please provide validation data" training_loop( model=model, optimizer=optimizer, scheduler=scheduler, loss_function=loss_function, metrics=[acc_chexnet_covid], train=train, valid=valid, test=test, meta_data=meta_data, steps_per_epoch=steps_per_epoch, n_epochs=n_epochs, save_path=save_path, config=_CONFIG, use_tb=True, custom_callbacks=callbacks_constructed, fb_method=fb_method, target_indice=target_indice, ) else: assert test is not None, "please provide test data for evaluation" evaluation_loop( model=model, optimizer=optimizer, loss_function=loss_function, metrics=[acc_chexnet_covid], test=test, meta_data=meta_data, save_path=save_path, config=_CONFIG, custom_callbacks=callbacks_constructed, target_indice=target_indice, )
def train(config, save_path): tf.set_random_seed(config['seed']) np.random.seed(config['seed']) (train, valid, test, meta_data), (model, model_inference, _) = init_data_and_model(config) n_classes = len(set(meta_data['y_train'].reshape(-1, ))) model.summary() if config['optim'] == "sgd": optimizer = SGD(lr=config['lr'], momentum=config['m']) elif config['optim'] == "nsgd": opt_kwargs = eval(config['opt_kwargs']) optimizer = NSGD(lr=config['lr'], momentum=config['m'], **opt_kwargs) elif config['optim'] == "adam": opt_kwargs = eval(config['opt_kwargs']) optimizer = Adam(lr=config['lr'], **opt_kwargs) else: raise NotImplementedError() model.compile(optimizer=optimizer, loss=config['loss'], metrics=['accuracy', config['loss']]) if model_inference is not None: model_inference.summary() model_inference.compile(optimizer=optimizer, loss=config['loss'], metrics=['accuracy', config['loss']]) model.metrics_names.append("lr") model.metrics_tensors.append(optimizer.lr) # Config Lanczos if config['lanczos_top_K_N'] == -1: config['lanczos_top_K_N'] = len(meta_data['x_train']) if config['measure_train_loss']: train_eval = [ meta_data['x_train'][0:10000], np_utils.to_categorical(meta_data['y_train'][0:10000], int(n_classes)) ] else: train_eval = None callbacks, lanczos_clbk = add_lanczos_callbacks( config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, n_classes=int(n_classes)) callbacks += add_eigenloss_callback(config=config, save_path=save_path, top_eigenvalues_clbk=lanczos_clbk, model_inference=model_inference, meta_data=meta_data, n_classes=int(n_classes)) callbacks += add_common_callbacks(config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, train=train_eval, n_classes=int(n_classes)) if isinstance(optimizer, NSGD): callbacks += config_alex(config=config, optimizer=optimizer, top_eigenvalues_clbk=lanczos_clbk, model=model) if config['epoch_size'] != -1: epoch_size = config['epoch_size'] else: epoch_size = len(meta_data['x_train']) steps_per_epoch = epoch_size / config['batch_size'] if config['reduce_callback']: kwargs = eval(config['reduce_callback_kwargs']) callbacks.append(PickableReduceLROnPlateau(**kwargs)) if config['load_weights_from']: logger.info("Loading weights from " + config['load_weights_from']) logger.info("Loading weights") model.load_weights(config['load_weights_from']) logger.info("Loaded weights") ## Don't load opt if config['load_opt_weights']: with h5py.File(config['load_weights_from']) as f: if 'optimizer_weights' in f: # build train function (to get weight updates) model._make_train_function( ) # Note: might need call to model optimizer_weights_group = f['optimizer_weights'] optimizer_weight_names = [ n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names'] ] optimizer_weight_values = [ optimizer_weights_group[n] for n in optimizer_weight_names ] model.optimizer.set_weights(optimizer_weight_values) else: logger.error("No optimizer weights in wieghts file!") raise Exception() training_loop(model=model, train=train, steps_per_epoch=steps_per_epoch, save_freq=config['save_freq'], checkpoint_monitor="val_acc", epochs=config['n_epochs'], save_path=save_path, reload=config['reload'], valid=valid, custom_callbacks=callbacks, verbose=2)
def train(save_path, model, datasets=['cifar10'], optimizer="SGD", data_seed=777, seed=777, batch_size=128, lr=0.0, wd=0.0, nesterov=False, checkpoint_monitor='val_categorical_accuracy:0', loss='ce', steps_per_epoch=-1, momentum=0.9, testing=False, testing_reload_best_val=True, callbacks=[]): np.random.seed(seed) # Create dataset generators (seeded) datasets = [ get_dataset(d, seed=data_seed, batch_size=batch_size) for d in datasets ] # Create model model = models.__dict__[model](input_shape=datasets[0][-1]['input_shape'], n_classes=datasets[0][-1]['num_classes']) logger.info("# of parameters " + str(sum([np.prod(p.shape) for p in model.trainable_weights]))) model.summary() if loss == 'ce': loss_function = tf.keras.losses.categorical_crossentropy else: raise NotImplementedError() if optimizer == "SGD": optimizer = SGD(learning_rate=lr, momentum=momentum, nesterov=nesterov) elif optimizer == "Adam": optimizer = Adam(learning_rate=lr) else: raise NotImplementedError() # Create callbacks callbacks_constructed = [] for name in callbacks: clbk = get_callback(name, verbose=0) if clbk is not None: callbacks_constructed.append(clbk) else: raise NotImplementedError(f"Did not find callback {name}") # Pass everything to the training loop metrics = [categorical_accuracy] if steps_per_epoch == -1: steps_per_epoch = (datasets[0][-1]['n_examples_train'] + batch_size - 1) // batch_size training_loop(model=model, optimizer=optimizer, loss_function=loss_function, metrics=metrics, datasets=datasets, weight_decay=wd, save_path=save_path, config=_CONFIG, steps_per_epoch=steps_per_epoch, use_tb=True, checkpoint_monitor=checkpoint_monitor, custom_callbacks=callbacks_constructed, seed=seed) if testing: if testing_reload_best_val: model = restore_model(model, os.path.join(save_path, "model_best_val.h5")) m_val = evaluate(model, [datasets[0][1]], loss_function, metrics) m_test = evaluate(model, [datasets[0][2]], loss_function, metrics) logger.info("Saving") eval_results = {} for k in m_test: eval_results['test_' + k] = float(m_test[k]) for k in m_val: eval_results['val_' + k] = float(m_val[k]) logger.info(eval_results) json.dump(eval_results, open(os.path.join(save_path, "eval_results.json"), "w"))
def train(config, save_path): tf.set_random_seed(config['seed']) np.random.seed(config['seed']) (train, valid, test, meta_data), (model, model_inference) = init_data_and_model(config) logger.info("len(train_data)=" + str(len(meta_data['train_data']))) vocab_size = config['vocab_size'] if config['epoch_size'] == -1: steps_per_epoch = ( (len(meta_data['train_data']) // config['batch_size']) - 1) // config['num_steps'] else: steps_per_epoch = config['epoch_size'] / config['batch_size'] # steps_per_epoch = (((config['num_steps'] * config['epoch_size']) // config['batch_size']) - 1) // config[ # 'num_steps'] logger.info("steps_per_epoch=" + str(steps_per_epoch)) model.summary() logger.info("Running!") logger.info(next(meta_data['train_2'])[0].shape) logger.info(next(meta_data['train_2'])[1].shape) if config['opt'] == 'ptbsgd': optimizer = PtbSGD(lr=config['lr'], decay=config['lr_decay'], clipnorm=config['max_grad_norm'], epoch_size=steps_per_epoch, max_epoch=config['max_epoch']) elif config['opt'] == 'sgd': opt_kw = eval(config['opt_kwargs']) optimizer = SGD(momentum=config['m'], lr=config['lr'], **opt_kw) elif config['opt'] == "nsgd": opt_kwargs = eval(config['opt_kwargs']) optimizer = NSGD(lr=config['lr'], momentum=config['m'], **opt_kwargs) else: raise NotImplementedError() model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy", perplexity]) model_inference.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy", perplexity]) model.metrics_names.append("lr") model.metrics_tensors.append(optimizer.lr) with open(os.path.join(save_path, 'vocab.bin'), 'wb') as f: pickle.dump(meta_data['word_to_id'], f) logger.info('Training with {} size'.format(config['hidden_size'])) # IDEA: Change to cls inside lanczos if neeed, simple callbacks, lanczos_clbk = add_lanczos_callbacks( config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, n_classes=config['vocab_size']) if isinstance(optimizer, NSGD): callbacks += config_alex(config=config, optimizer=optimizer, top_eigenvalues_clbk=lanczos_clbk, model=model) callbacks += add_common_callbacks(config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, train=None, n_classes=vocab_size) # We use stateful LSTM and model PTB as seq-2-seq def reset_model(epoch, logs): model.reset_states() callbacks += [LambdaCallbackPickable(on_epoch_end=reset_model)] if config['load_weights_from']: logger.info("Loading weights from " + config['load_weights_from']) logger.info("Loading weights") model.load_weights(config['load_weights_from']) logger.info("Loaded weights") ## Don't load opt if config['load_opt_weights']: with h5py.File(config['load_weights_from']) as f: if 'optimizer_weights' in f: # build train function (to get weight updates) model._make_train_function( ) # Note: might need call to model optimizer_weights_group = f['optimizer_weights'] optimizer_weight_names = [ n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names'] ] optimizer_weight_values = [ optimizer_weights_group[n] for n in optimizer_weight_names ] model.optimizer.set_weights(optimizer_weight_values) else: logger.error("No optimizer weights in wieghts file!") raise Exception() training_loop(model=model, train=train, steps_per_epoch=steps_per_epoch, save_freq=config['save_freq'], checkpoint_monitor="val_acc", epochs=config['n_epochs'], save_path=save_path, reload=config['reload'], validation_steps=meta_data['num_steps_valid'], valid=valid, custom_callbacks=callbacks, verbose=2)
def train(config, save_path): from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) tf.set_random_seed(config['seed']) np.random.seed(config['seed']) (train, valid, test, meta_data), (model, model_inference, meta_model) = init_data_and_model(config) n_classes = len(set(meta_data['y_train'].reshape(-1, ))) model.summary() model.steerable_variables['bs'] = meta_data['batch_size_np'] if config['optim'] == "sgd": optimizer = SGD(lr=config['lr'], momentum=config['m']) elif config['optim'] == "nsgd": opt_kwargs = eval(config['opt_kwargs']) optimizer = NSGD(lr=config['lr'], momentum=config['m'], **opt_kwargs) elif config['optim'] == 'rmsprop': optimizer = RMSprop(lr=config['lr']) elif config['optim'] == 'adam': optimizer = Adam(lr=config['lr']) elif config['optim'] == "nsgd": opt_kwargs = eval(config['opt_kwargs']) optimizer = NSGD(lr=config['lr'], momentum=config['m'], **opt_kwargs) else: raise NotImplementedError() model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', 'categorical_crossentropy']) if model_inference is not None: model_inference.summary() model_inference.compile( optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', 'categorical_crossentropy']) model.metrics_names.append("lr") model.metrics_tensors.append(optimizer.lr) # Config Lanczos if config['lanczos_top_K_N'] == -1: config['lanczos_top_K_N'] = len(meta_data['x_train']) if config['measure_train_loss']: train_eval = [ meta_data['x_train'][0:10000], np_utils.to_categorical(meta_data['y_train'][0:10000], int(n_classes)) ] else: train_eval = None callbacks, lanczos_clbk = add_lanczos_callbacks( config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, n_classes=int(n_classes)) callbacks += add_eigenloss_callback(config=config, save_path=save_path, top_eigenvalues_clbk=lanczos_clbk, meta_data=meta_data, n_classes=int(n_classes), model_inference=model_inference) callbacks += add_common_callbacks(config=config, save_path=save_path, meta_data=meta_data, model=model, model_inference=model_inference, train=train_eval, n_classes=int(n_classes)) if isinstance(optimizer, NSGD): callbacks += config_alex(config=config, optimizer=optimizer, top_eigenvalues_clbk=lanczos_clbk, model=model) if config['decompose_analysis']: kw = eval(config['decompose_analysis_kw']) X, y = meta_data['x_train'], np_utils.to_categorical( meta_data['y_train'], 10) callbacks.append( DecomposeStepAnalysis(X=X, y=y, batch_size=config['batch_size'], save_path=save_path, sharpest_clbk=lanczos_clbk, **kw)) if config['epoch_size'] != -1: epoch_size = config['epoch_size'] else: epoch_size = len(meta_data['x_train']) steps_per_epoch = epoch_size / config['batch_size'] if config['reduce_callback']: kwargs = eval(config['reduce_callback_kwargs']) callbacks.append(PickableReduceLROnPlateau(**kwargs)) if config.get("random_Y_fraction", 0.0) > 0: logger.info("Addding random label evaluation") x_train = meta_data['x_train'] y_train = np_utils.to_categorical(meta_data['y_train'], int(n_classes)) callbacks += add_random_labels_evaluation( config=config, model=model, x_train=x_train, y_train=y_train, ids_random_train=meta_data['ids_random_train']) if config['load_weights_from']: logger.info("Loading weights from " + config['load_weights_from']) logger.info("Loading weights") model.load_weights(config['load_weights_from']) logger.info("Loaded weights") ## Don't load opt if config['load_opt_weights']: with h5py.File(config['load_weights_from']) as f: if 'optimizer_weights' in f: # build train function (to get weight updates) model._make_train_function( ) # Note: might need call to model optimizer_weights_group = f['optimizer_weights'] optimizer_weight_names = [ n.decode('utf8') for n in optimizer_weights_group.attrs['weight_names'] ] optimizer_weight_values = [ optimizer_weights_group[n] for n in optimizer_weight_names ] model.optimizer.set_weights(optimizer_weight_values) else: logger.error("No optimizer weights in wieghts file!") raise Exception() training_loop(model=model, train=train, steps_per_epoch=steps_per_epoch, save_freq=config['save_freq'], checkpoint_monitor="val_acc", epochs=config['n_epochs'], save_path=save_path, reload=config['reload'], valid=valid, custom_callbacks=callbacks, verbose=2)