def train(cfg): print('Preparing to train {} on {} data...'.format(cfg['main']['model_name'], cfg['main']['dataset_name'])) np.random.seed(1337) # for reproducibility print('Tensorflow backend detected; Applying memory usage constraints') ss = K.tf.Session(config=K.tf.ConfigProto(gpu_options=K.tf.GPUOptions(allow_growth=True))) K.set_session(ss) ss.run(K.tf.global_variables_initializer()) K.set_learning_phase(1) Prostate_dataset = select_dataset(cfg) if not cfg["validation"]["do_validation"]: image_gen = Prostate_dataset.create_generator() val_data = None else: image_gen, val_data = Prostate_dataset.create_generator_with_validation() model = select_model(cfg, Prostate_dataset) # model.summary() myAdam = keras.optimizers.Adam(lr=1e-4) model.compile(optimizer=Adam(lr=1e-4), loss={'Radboud_Branch': 'categorical_crossentropy', 'Karolinska_Branch': 'categorical_crossentropy'}, loss_weights=[0.5, 0.5], metrics=['accuracy']) print('Created image generator from dataset {}!'.format(cfg['main']['dataset_name'])) tf_saver = tf.train.Saver(max_to_keep=2) checkpoint_callback = TFCheckPointCallback(tf_saver, ss, cfg) modelsaver_callback = SaveKerasModel(cfg) learningRate_callback = SelectLRCallBack(cfg) tbCallBack = TensoBoardCallback(cfg) model.fit_generator(image_gen, steps_per_epoch=Prostate_dataset.sample_number / cfg['training']['batch_size'], epochs=cfg['training']['epochs'], callbacks=[checkpoint_callback, learningRate_callback, modelsaver_callback, tbCallBack], validation_data=val_data)
args.o2n = o2n args.n2o = n2o for key in ['train_labels_path', 'val_labels_path']: if args[key] == 'None': args[key] = None settings.set_settings(args) #args.output_path = tempfile.TemporaryDirectory().name os.makedirs(args.output_path) train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders( args) model = models.select_model(args) def SIDX(template_id): return (3 + (template_id - 1) * 7) def EIDX(template_id): return (3 + (template_id - 1) * 7 + 7) # for i in val_loader.dataset.raw_data[:,SIDX(6)]: # print (i) # print (val_loader.dataset.raw_data[:,SIDX(2)]) #my_score, max_score, similarity, rank, conditional_rank, mean, std
ss = K.tf.Session(config=K.tf.ConfigProto(gpu_options=K.tf.GPUOptions(allow_growth=True))) K.set_session(ss) # debugging parameters interim_testing = False # parameters dataset_name = 'mscoco' load_mscoco = False dw = 512 dh = 512 model_name = 'enet_unpooling' pw = os.path.join('models', dataset_name, 'enet_unpooling', 'weights', '{}_best.h5'.format(model_name)) nc = datasets.load(dataset_name).num_classes() autoencoder = models.select_model(model_name=model_name) segmenter, model_name = autoencoder.build(nc=nc, w=dw, h=dh) segmenter.load_weights(pw) if load_mscoco: data = load_mscoco_data(segmenter=segmenter) else: txt_file = sys.argv[1] image_dir = os.path.dirname(txt_file) with open(txt_file) as fin: image_filenames = [os.path.join(image_dir, line.rstrip('\n')) for line in fin] data = load_arbitrary_data(segmenter=segmenter, image_filenames=image_filenames) data_gen = data['data_gen'] if interim_testing: for idx, item in enumerate(data_gen):
def main(): # build parser and check arguments args = _build_parser() _check_args(args) # Setup Estimator '''Estimator name: xgb: XGBoost Classifier log: Logistic Regression knn: KNeighbors Classifier rfo: RandomForest Classifier ada: AdaBoost Classifier ext: ExtraTrees Classifier svc: Support Vector Classifier keras: Keras Neural Networks ''' if not args.estimator == 'all': estimators = [args.estimator] elif args.estimator == 'all': estimators = ['xgb', 'lgb', 'log', 'rfo', 'ext', 'ada', 'knn', 'svc'] # Training neural nets with keras if args.train_nn: estimator_name = 'keras' print('Training %s...' % estimator_name) params = { 'n_features': n_features, 'n_classes': n_classes, 'dropout': args.dropout, 'hidden_unit': args.hidden_unit, 'n_layers': args.layers, 'optimizer': args.optimizer, 'init': args.init, 'batch_size': args.batch_size, 'epochs': args.epochs, } estimator = keras_model(**params) train_kwargs = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'score_name': args.score, 'num': args.num } _ = estimator.train(**train_kwargs) print('params: \n', params) # Training random search CV with scikit-learn models if args.train_random: for estimator_name in estimators: print('Training %s...' % estimator_name) if not estimator_name == 'keras': seed = args.seed if args.seed != None else np.random.randint( 100) estimator, params = select_model(estimator_name, n_features, n_classes, seed) # kwargs dict for train and predict train_kwargs = { 'estimator': estimator, 'params': params, 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'n_iter': args.n_iter, 'score_name': args.score, } # Train model and Predict results best_params, best_score, val_score = random_model( **train_kwargs) timestamp = get_timestamp() # Write params to file write_params(estimator_name, best_params, best_score, val_score, timestamp, args.num) elif estimator_name == 'keras': space_params = { 'n_features': n_features, 'n_classes': n_classes, 'dropout': hp.uniform('dropout', .20, .80), 'hidden_unit': hp.quniform('hidden_unit', 10, 50, q=1), 'n_layers': hp.choice('n_layers', [1, 2, 3, 4]), 'optimizer': hp.choice('optimizer', ['adam', 'adadelta', 'sgd']), 'init': hp.choice('init', ['glorot_uniform', 'normal', 'uniform']), 'batch_size': hp.choice('batch_size', [16, 32, 64, 128]), 'epochs': hp.quniform('epochs', 100, 1000, q=1), 'score_name': args.score, 'num': args.num, } trials = Trials() best_params = fmin(random_nn, space_params, algo=tpe.suggest, max_evals=args.n_iter, trials=trials) print('best_params \n', best_params) # Evaluate with ensemble method and predict result if args.predict: eva_kwargs = { 'estimators': estimators, 'threshold': args.threshold, 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': X_test, 'score_name': args.score, 'n_classes': n_classes, } # Predict with ensemble voting and write result prediction = ensemble(**eva_kwargs) if args.ensemble == 'vote': result = prediction.vote() elif args.ensemble == 'stack': result = prediction.stack(args.num_imp) timestamp = get_timestamp() write_result(result, label_list, timestamp)
def main(args): # Store name of experiment exp_name = args.exp_name exp_name = '{}_r{}_p{}_n{}_i{}_k{}'.format(exp_name, args.rho, args.pos_reward, args.neg_reward, args.class_imbalance, args.kldiv_lambda) # Create an directory for output path args.output_path = os.path.join(args.output_path, args.exp_name) os.makedirs(args.output_path, exist_ok=True) utils.LOG_FILE = os.path.join(args.output_path, 'log.txt') LEARNING_PROFILE_FILE = os.path.join(args.output_path, 'learning_curve.txt') lpf = open(LEARNING_PROFILE_FILE, 'a') args.lpf = lpf # Set logging logging.basicConfig(filename=utils.LOG_FILE, filemode='a', format='%(levelname)s :: %(asctime)s - %(message)s', level=args.log_level, datefmt='%d/%m/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setLevel(args.log_level) formatter = logging.Formatter('%(levelname)s :: %(asctime)s - %(message)s', datefmt='%d/%m/%Y %I:%M:%S %p') console.setFormatter(formatter) logging.getLogger().addHandler(console) logging.info( 'Beginning code for experiment {} and storing stuff in {}'.format( exp_name, args.output_path)) logging.info('Loaded arguments as \n{}'.format(str(pprint.pformat(args)))) # Begin of main code train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders( args) model = models.select_model(args) my_eval_fn = compute.get_evaluation_function(args) if args.optim == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), momentum=args.momentum, lr=args.lr, weight_decay=args.decay) else: optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.decay) checkpoint_file = os.path.join(args.output_path, '{}_checkpoint.pth'.format(exp_name)) best_checkpoint_file = os.path.join( args.output_path, '{}_best_checkpoint.pth'.format(exp_name)) logging.info('Saving checkpoints at {} and best checkpoint at : {}'.format( checkpoint_file, best_checkpoint_file)) start_epoch = 0 best_score = -9999999 # Load checkpoint if present in input arguments if args.checkpoint != '': logging.info('Starting from checkpoint: {}'.format(args.checkpoint)) cp = torch.load(args.checkpoint) start_epoch = cp['epoch'] + 1 model.load_state_dict(cp['model']) # optimizer.load_state_dict(cp['optimizer']) TODO: - Why not do this? best_score = cp['best_score'] for param_group in optimizer.param_groups: param_group['lr'] = args.lr param_group['weight_decay'] = args.decay num_epochs = args.num_epochs logging.info('Beginning train/validate cycle') time1 = time.time() if val_loader is not None: record, metric_idx, headers = compute.compute(start_epoch - 1, model, val_loader, optimizer, 'eval', eval_fn=my_eval_fn, args=args) if (args.log_eval is not None): handler = open(args.log_eval, "a") print(','.join([ str(round(x, 6)) if isinstance(x, float) else str(x) for x in record ]), file=handler) handler.close() print("Time taken:", time.time() - time1) if (args.only_eval): logging.info('Ran only eval mode, now exiting') exit(0) # Start TRAINING for epoch in range(start_epoch, num_epochs): logging.info('Beginning epoch {}'.format(epoch)) if labelled_train_loader is not None: record, metric_idx, _ = compute.compute(epoch, model, labelled_train_loader, optimizer, 'train_sup', eval_fn=my_eval_fn, args=args) if train_loader is not None: record, metric_idx, _ = compute.compute( epoch, model, train_loader, optimizer, 'train_un', eval_fn=my_eval_fn, args=args, labelled_train_loader=labelled_train_loader) if val_loader is not None: record, metric_idx, _ = compute.compute(epoch, model, val_loader, None, 'eval', eval_fn=my_eval_fn, args=args) is_best = False logging.info('Best score: {}, This score: {}'.format( best_score, record[metric_idx])) if record[metric_idx] > best_score: best_score = record[metric_idx] is_best = True utils.save_checkpoint( { 'epoch': epoch, 'best_score': best_score, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'is_best': is_best }, epoch, is_best, checkpoint_file, best_checkpoint_file) args.lpf.close()
def __init__(self,config,train_loader,val_loader,model_name): self.config = config self.train_loader = train_loader self.val_loader = val_loader self.model_name = model_name self.num_train = len(self.train_loader.dataset) self.num_valid = len(self.val_loader.dataset) self.saved_model_dir = self.config.saved_model_dir self.output_model_dir = self.config.output_model_dir self.best_model = config.best_model self.use_gpu = self.config.use_gpu if(self.config.resume == True): print("LOADING SAVED MODEL") self.net = select_model(self.model_name,self.config.file_type) self.loadCheckpoint() else: print("INTIALIZING NEW MODEL") self.net = select_model(self.model_name,self.config.file_type) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.net = self.net.to(self.device) self.total_epochs = config.epochs if(self.model_name =="lstm_model" or self.model_name =="cnn_lstm_model"): print("NLL LOSS") self.criterion = nn.NLLLoss() self.optimizer = optim.Adam(self.net.parameters(), lr=0.0001) loss_name = "nll_loss" lr = 0.0001 else: self.criterion = nn.CrossEntropyLoss() self.optimizer = optim.SGD(self.net.parameters(),lr = self.config.lr, momentum = self.config.momentum, weight_decay = self.config.weight_decay) loss_name = "crossentropy_loss" lr = self.config.lr self.num_params = sum([p.data.nelement() for p in self.net.parameters()]) self.batch_size = self.config.batch_size self.train_paitence = config.train_paitence self.num_classes = 10 if(self.config.debug == False): self.experiment = wandb.init(project="audio_classification") hyper_params = { "model_name": self.model_name, "file_type": self.config.file_type, "dataset": self.config.dataset, "batch_size": self.config.batch_size, "num_epochs": self.total_epochs, "loss_function": loss_name, "learning_rate": lr, "momentum": self.config.momentum, "weight_decay": self.config.weight_decay } self.experiment.config.update(hyper_params) wandb.watch(self.net) # summary(self.net, input_size=(1, 128, 216)) print('[*] Number of model parameters: {:,}'.format(self.num_params))