def train(args): assert args.checkpoint is not None path = os.path.join(settings.PROJECT_ROOT, settings.LOAD_DIR) if args.checkpoint[-3:] == 'txt': with open(os.path.join(path, args.checkpoint), 'r') as f: genome = f.readlines() model = common.load_genome(genome, args) else: model = common.load_model(os.path.join(path, args.checkpoint)) args.split_ratio = 0.99 env = getattr(envs, args.env)(args) trainer = Trainer(env, model, args) logger = Logger('MAIN', args=args) logger.log("Begin training {}".format(args.checkpoint)) best_acc = 0 for epoch in range(args.epochs): trainer.train() if epoch % args.log_step == 0: logger.log("Training statistics for epoch: {}".format(epoch)) logger.scalar_summary(trainer.info.avg, epoch) trainer.info.reset() trainer.infer(test=True) acc = trainer.info.avg['Accuracy/Top1'] trainer.info.reset() logger.log("Validation accuracy: {}".format(acc)) if acc > best_acc: best_acc = acc path = os.path.join(logger.log_dir, 'model.pth'.format(epoch)) logger.log("Saving model at epoch: {}".format(epoch)) common.save_model(model, path)
def train(training_dataset: dataset.Word2VecDataset, config: configure.Config): """The training process Args: training_dataset(utils.dataset.Word2VecDataset): the dataset will be trained config(utils.configure.Config): all parameter that would be used in training """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") training_loader = data.DataLoader(training_dataset, batch_size=config.batch_size, shuffle=True) model = word2vec.init_word2vec_model(config.vocab_size, config.embedding_dim, config.batch_size, config.bag_size, config.mode) model = model.to(device) for epoch in range(config.epochs): print("Epoch [{}/{}]".format(epoch + 1, config.epochs)) epoch_avg_loss = executor.train_step(model, training_loader, device, config) common.save_model(model, config.save_path, epoch_avg_loss, epoch)
state_all = torch.load(cfg.finetune)['model'] state_clip = {} # only use backbone parameters for k, v in state_all.items(): if 'model' in k: state_clip[k] = v net.load_state_dict(state_clip, strict=False) if cfg.resume is not None: dist_print('==> Resume model from ' + cfg.resume) resume_dict = torch.load(cfg.resume, map_location='cpu') net.load_state_dict(resume_dict['model']) if 'optimizer' in resume_dict.keys(): optimizer.load_state_dict(resume_dict['optimizer']) resume_epoch = int(os.path.split(cfg.resume)[1][2:5]) + 1 else: resume_epoch = 0 scheduler = get_scheduler(optimizer, cfg, len(train_loader)) dist_print(len(train_loader)) metric_dict = get_metric_dict(cfg) loss_dict = get_loss_dict(cfg) logger = get_logger(work_dir, cfg) cp_projects(work_dir) for epoch in range(resume_epoch, cfg.epoch): train(net, train_loader, loss_dict, optimizer, scheduler, logger, epoch, metric_dict, cfg.use_aux) save_model(net, optimizer, epoch, work_dir, distributed) logger.close()
state_all = torch.load(cfg['finetune'])['model'] state_clip = {} # only use backbone parameters for k, v in state_all.items(): if 'model' in k: state_clip[k] = v net.load_state_dict(state_clip, strict=False) if cfg['resume'] is not None: logger.log('==> Resume model from ' + cfg['resume']) resume_dict = torch.load(cfg['resume'], map_location='cpu') net.load_state_dict(resume_dict['model']) if 'optimizer' in resume_dict.keys(): optimizer.load_state_dict(resume_dict['optimizer']) resume_epoch = int(os.path.split(cfg['resume'])[1][2:5]) + 1 else: resume_epoch = 0 scheduler = get_scheduler(optimizer, cfg['train'], len(train_loader)) logger.log('Train Datasets Totoal: %d' % len(train_loader)) metric_dict = get_metric_dict(cfg['dataset']) loss_dict = get_loss_dict(cfg) max_F = 0 for epoch in range(resume_epoch, cfg['train']['epoch']): train(net, train_loader, loss_dict, optimizer, scheduler, logger, epoch, metric_dict, cfg['dataset']['use_aux'], args.local_rank) save_model(net, optimizer, epoch) if cfg['test']['val_intervals'] > 0 and epoch % cfg['test'][ 'val_intervals'] == 0: F = test(net, test_loader, cfg['dataset'], cfg['log_path'], logger) if F > max_F: save_model(net, optimizer, epoch, 'model_best.pth') max_F = F
def train(args, x_train, y_train, x_val, y_val, x_test, y_test, fold_idx): # class_weights = class_weight.compute_class_weight('balanced', # np.unique(y_train), # y_train) y_train_bin = to_categorical(y_train, num_classes=args.num_classes) y_val_bin = to_categorical(y_val, num_classes=args.num_classes) y_test_bin = to_categorical(y_test, num_classes=args.num_classes) print('train shape:', x_train.shape, y_train.shape) print('val shape:', x_val.shape, y_val.shape) print('test shape:', x_test.shape, y_test.shape) print('Build model...') n_timesteps, n_features = x_train.shape[-2], x_train.shape[-1] print(f'n_timesteps {n_timesteps}') print(f'n_features: {n_features}') # n_features = x_train.shape[1] n_outputs = y_train_bin.shape[1] if os.path.isfile(args.model_file): model = load_model(args.model_file, custom_objects={ 'focal_loss_fixed': focal_loss(), 'ghm_class_loss': ghm.ghm_class_loss }) else: model = None type = model_type[args.type] if type == ModelType.ANN: x_train = reshape_data(x_train) n_features = x_train.shape[1] elif type == ModelType.LSTM: folder_name = '/lstm' if model is None: model = lstm_model(n_timesteps, n_features, n_outputs, args.hidden_size, args.dense_size) elif type == ModelType.CNN: folder_name = '/cnn' if model is None: model = cnn_model_base(n_timesteps, n_features, n_outputs, hidden_layers=args.hidden_layers, kernels=args.kernels, dropout_rate=args.dropout_rate, activation=args.activation) # model = cnn_model(n_timesteps, n_features, n_outputs) elif type == ModelType.CNNF: folder_name = '/cnn_feature' if model is None: model = cnn_features_model(n_timesteps, n_features, n_outputs, nb_features) elif type == ModelType.CNNM: folder_name = '/cnn_m' if model is None: model = cnn_model(n_timesteps, n_features, n_outputs) elif type == ModelType.ANN: folder_name = '/ann' if model is None: model = ann_model(n_features, n_outputs) else: raise argparse.ArgumentTypeError( 'Unsupported model type value encountered.') print(model.summary()) ckpt_dir = args.ckpt_dir + folder_name log_dir = args.ckpt_dir + '/logs' + folder_name out_path = args.ckpt_dir + folder_name # checkpoint if not os.path.isdir(ckpt_dir): os.makedirs(ckpt_dir) if args.save_best: filepath = ckpt_dir + "/weights.best-" + str(fold_idx) + ".hdf5" fold_idx += 1 else: filepath = ckpt_dir + "/weights-improvement-" \ "{epoch:02d}-{val_categorical_accuracy:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=1, save_best_only=args.save_best, mode='max') lr_reduce = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=20, verbose=0) early_stopper = EarlyStopping(monitor='val_loss', patience=30, verbose=1, mode='min', restore_best_weights=args.save_best) if not os.path.isdir(log_dir): os.makedirs(log_dir) csv_log = CSVLogger(log_dir + '/trainning.log', separator=',', append=False) tensorboard = TensorBoard(log_dir=log_dir) callbacks_list = [checkpoint, lr_reduce, csv_log, tensorboard] print('Train...') if type == ModelType.CNNF: model.fit( [x_train, features_train], y_train_bin, batch_size=args.batch, epochs=args.epochs, validation_data=([x_val, features_val], y_val_bin), # class_weight=class_weights, callbacks=callbacks_list) else: model.fit( x_train, y_train_bin, batch_size=args.batch, epochs=args.epochs, validation_data=(x_val, y_val_bin), # class_weight=class_weights, callbacks=callbacks_list) save_model(model, 'har_cnn' + str(fold_idx), out_path) model = load_model(filepath, custom_objects={ 'focal_loss_fixed': focal_loss(), 'ghm_class_loss': ghm.ghm_class_loss }) if type == ModelType.CNNF: loss, acc = model.evaluate([x_test, features_test], y_test_bin, batch_size=args.batch, verbose=1) else: loss, acc = model.evaluate(x_test, y_test_bin, batch_size=args.batch, verbose=1) print('Test loss:', loss) print('Test accuracy:', acc) print("model's inputs:", model.inputs) print("model's outputs:", model.outputs) print(f'0 num: {np.sum(y_test == 0)}') print(f'1 num: {np.sum(y_test == 1)}') print(f'2 num: {np.sum(y_test == 2)}') print(f'3 num: {np.sum(y_test == 3)}') print(f'4 num: {np.sum(y_test == 4)}') print(f'5 num: {np.sum(y_test == 5)}') if type == ModelType.CNNF: y_pred_probs = model.predict([x_test, features_test]) else: y_pred_probs = model.predict(x_test) stats_evaluation(y_test, y_pred_probs, num_classes=len(CategoryNames), shift=args.shift, show=False) return acc