def run_rf(hdf5, experiment): exp_storage = hdf5["experiments"][experiment] folds = [] for fold in exp_storage: X_train, y_train, \ X_valid, y_valid, \ X_test, y_test = load_fold(hdf5["patients"], exp_storage, fold) X_train = np.concatenate([X_train, X_valid]) y_train = np.concatenate([y_train, y_valid]) folds.append(run(X_train, y_train, X_test, y_test)) return np.mean(folds, axis=0).tolist()
def test_proposed(): from glob import glob _, _, tst_dat1, tst_lbl1 = utils.load_fold(fold_num=0) _, _, tst_dat2, tst_lbl2 = utils.load_fold(fold_num=1) _, _, tst_dat3, tst_lbl3 = utils.load_fold(fold_num=2) _, _, tst_dat4, tst_lbl4 = utils.load_fold(fold_num=3) _, _, tst_dat5, tst_lbl5 = utils.load_fold(fold_num=4) all_tst_pm = [] all_tst_dat = [tst_dat1, tst_dat2, tst_dat3, tst_dat4, tst_dat5] all_tst_lbl = [tst_lbl1, tst_lbl2, tst_lbl3, tst_lbl4, tst_lbl5] placeholders = { "bdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="bdat_place"), "mdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="mdat_place"), "tdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="tdat_place"), "lbl": tf.placeholder(shape=(None, 1), dtype=tf.int64), "train": st.is_training } models = model.MGICNN(placeholders=placeholders) models.build_proposed() with tf.Session() as sess: saver = tf.train.Saver() for cur_fold in range(st.max_fold): tst_dat = all_tst_dat[cur_fold] tst_lbl = all_tst_lbl[cur_fold] tst_cnt = 0 tst_pm = np.zeros(shape=len(tst_dat), dtype=np.float32) sess.run(tf.global_variables_initializer()) saver.restore(sess=sess, save_path=os.path.join( st.tst_model_path, "%d_%d_%d_%d.ckpt" % (cur_fold, st.multistream_mode, st.model_mode, st.tst_epoch))) for tst_step in range(0, len(tst_dat), st.batch_size): test_feed_dict = { placeholders["bdat"]: tst_dat[tst_step:tst_step + st.batch_size, 0], placeholders["mdat"]: tst_dat[tst_step:tst_step + st.batch_size, 1], placeholders["tdat"]: tst_dat[tst_step:tst_step + st.batch_size, 2], placeholders["lbl"]: tst_lbl[tst_step:tst_step + st.batch_size], placeholders["train"]: False } tst_pm[tst_step:tst_step + st.batch_size] = sess.run( models.pred_sig, feed_dict=test_feed_dict)[:, 0] all_tst_pm += [tst_pm] tst_cnt += len(tst_pm) all_tst_pm = np.concatenate(all_tst_pm, axis=0) np.save( st.summ_path_root + "pm/%d_%d_pm.npy" % (st.model_mode, st.multistream_mode), all_tst_pm)
def train_proposed(): trn_dat, trn_lbl, tst_dat, tst_lbl = utils.load_fold() placeholders = { "bdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="bdat_place"), "mdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="mdat_place"), "tdat": tf.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="tdat_place"), "lbl": tf.placeholder(shape=(None, 1), dtype=tf.int64), "train": st.is_training } models = model.MGICNN(placeholders=placeholders) models.build_proposed() config = tf.ConfigProto() config.gpu_options.allow_growth = True summary_writer = tf.summary.FileWriter(logdir=st.summ_path) saver = tf.train.Saver(max_to_keep=0) tst_ones_true = tst_lbl[..., 0] == 1 tst_zeros_true = tst_lbl[..., 0] == 0 global_time = time() local_time = time() trn_cnt = 0 with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for cur_epoch in range(st.epoch): rand_idx = np.random.permutation(len(trn_dat)) for cur_step in range(0, len(trn_dat), st.batch_size): cur_idx = rand_idx[cur_step:cur_step + st.batch_size] feed_dict = { placeholders["bdat"]: trn_dat[cur_idx, 0], placeholders["mdat"]: trn_dat[cur_idx, 1], placeholders["tdat"]: trn_dat[cur_idx, 2], placeholders["lbl"]: trn_lbl[cur_idx], placeholders["train"]: True } sess.run(models.optim, feed_dict=feed_dict) if time() - local_time > 60: feed_dict.update({placeholders["train"]: False}) loss, summ = sess.run([models.loss, models.summary_op], feed_dict=feed_dict) summary_writer.add_summary(summ, global_step=trn_cnt) print("\rEpoch %d, Step %d, Loss %f" % (cur_epoch, cur_step, loss), end="") local_time = time() trn_cnt += 1 saver.save( sess=sess, save_path=st.summ_path + "%d_%d_%d_%d.ckpt" % (st.fold_num, st.multistream_mode, st.model_mode, cur_epoch)) tst_pred = np.zeros(shape=len(tst_dat), dtype=np.uint8) for tst_step in range(0, len(tst_dat), st.batch_size): test_feed_dict = { placeholders["bdat"]: tst_dat[tst_step:tst_step + st.batch_size, 0], placeholders["mdat"]: tst_dat[tst_step:tst_step + st.batch_size, 1], placeholders["tdat"]: tst_dat[tst_step:tst_step + st.batch_size, 2], placeholders["lbl"]: tst_lbl[tst_step:tst_step + st.batch_size], placeholders["train"]: False } tst_pred[tst_step:tst_step + st.batch_size] = sess.run( models.prediction, feed_dict=test_feed_dict) tst_correct = np.equal(tst_lbl[..., 0], tst_pred) tst_wrong = np.not_equal(tst_lbl[..., 0], tst_pred) tst_TP = np.count_nonzero( np.logical_and(tst_correct, tst_ones_true)) tst_TN = np.count_nonzero( np.logical_and(tst_correct, tst_zeros_true)) tst_FP = np.count_nonzero(np.logical_and(tst_wrong, tst_ones_true)) tst_FN = np.count_nonzero(np.logical_and(tst_wrong, tst_zeros_true)) print( "\nSaved in %s" % st.summ_path + "%d.ckpt" % cur_epoch, "TP/FP/TN/FN (%d/%d/%d/%d)" % (tst_TP, tst_FP, tst_TN, tst_FN))
plt.figure(figsize=(8, 8)) plt.plot(fpr, tpr, color='red', lw=lw, label='ROC curve') plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0, 1.0]) plt.ylim([0, 1.05]) plt.xlabel('False Positive Rate', fontsize=16) plt.ylabel('True Positive Rate', fontsize=16) plt.title('AUC: %0.3f' % roc_auc, fontsize=16) plt.legend(loc="lower right", fontsize=16) plt.show() if __name__ == '__main__': pred = np.load("G:\\DL Project\\FN\\summary\\pred_result\\0_0_pm.npy") _, _, tst_dat1, tst_lbl1 = utils.load_fold(fold_num=0) _, _, tst_dat2, tst_lbl2 = utils.load_fold(fold_num=1) _, _, tst_dat3, tst_lbl3 = utils.load_fold(fold_num=2) _, _, tst_dat4, tst_lbl4 = utils.load_fold(fold_num=3) _, _, tst_dat5, tst_lbl5 = utils.load_fold(fold_num=4) all_tst_lbl = [tst_lbl1, tst_lbl2, tst_lbl3, tst_lbl4, tst_lbl5] all_tst_lbl = np.concatenate(all_tst_lbl, axis=0) """ # 统计每个fold的TP个数 index_1 = np.argwhere(all_tst_lbl[6574:7581] == 1) print(index_1[:, 0], " ", len(index_1[:, 0]), "\n") index_2 = np.argwhere(all_tst_lbl[7581:8455+7581] == 1) print(index_2[:, 0], " ", len(index_2[:, 0]), "\n") index_3 = np.argwhere(all_tst_lbl[8455+7581:7451+8455+7581] == 1)
def nn_results(hdf5, experiment, code_size_1, code_size_2, code_size_3): exp_storage = hdf5["experiments"]['cc200_whole'] experiment = "cc200_whole" print exp_storage n_classes = 2 results = [] list = [''] list2 = [] for fold in exp_storage: experiment_cv = format_config("{experiment}_{fold}", { "experiment": experiment, "fold": fold, }) print "experiment_cv" print fold X_train, y_train, \ X_valid, y_valid, \ X_test, y_test,test_pid = load_fold(hdf5["patients"], exp_storage, fold) list.append(test_pid) print "X_train" print X_train.shape y_test = np.array([to_softmax(n_classes, y) for y in y_test]) ae1_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", { "experiment": experiment_cv, }) ae2_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", { "experiment": experiment_cv, }) ae3_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", { "experiment": experiment_cv, }) nn_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_mlp.ckpt", { "experiment": experiment_cv, }) try: model = nn(X_test.shape[1], n_classes, [ { "size": 2500, "actv": tf.nn.tanh }, { "size": 1250, "actv": tf.nn.tanh }, { "size": 625, "actv": tf.nn.tanh }, ]) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver = tf.train.Saver(model["params"]) print "savernn_model_path" print nn_model_path saver.restore(sess, nn_model_path) output = sess.run(model["output"], feed_dict={ model["input"]: X_test, model["dropouts"][0]: 1.0, model["dropouts"][1]: 1.0, model["dropouts"][2]: 1.0, }) np.set_printoptions(suppress=True) y_score = output[:, 1] print "y_score" print y_score y_pred = np.argmax(output, axis=1) print "y_pred" print y_pred print "output" hang = output.shape[0] lie = output.shape[1] print hang print lie for tt in range(hang): for xx in range(lie): output[tt][xx] = round(output[tt][xx], 4) output[tt][xx] = str(output[tt][xx]) aa = output[:, 0] print type(aa) list2.append(output) list.append(y_pred) print "-------------------------------------" y_true = np.argmax(y_test, axis=1) list.append(y_true) print "y_true" print y_true auc_score = roc_auc_score(y_true, y_score) print auc_score [[TN, FP], [FN, TP]] = confusion_matrix(y_true, y_pred, labels=[0, 1]).astype(float) accuracy = (TP + TN) / (TP + TN + FP + FN) print(TP) print(TN) print(FP) print(FN) specificity = TN / (FP + TN) precision = TP / (TP + FP) sensivity = recall = TP / (TP + FN) fscore = 2 * TP / (2 * TP + FP + FN) results.append([ accuracy, precision, recall, fscore, sensivity, specificity, auc_score ]) finally: reset() workbook = xlwt.Workbook(encoding='utf-8') booksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok=True) wb = xlwt.Workbook(encoding='utf-8') worksheet = wb.add_sheet('Sheet 1', cell_overwrite_ok=True) DATA = list print list2 for i, row in enumerate(DATA): for j, col in enumerate(row): booksheet.write(j, i, col) # workbook.save('./data/dos_tichu_2500_1250_625_xlst.xls') return [experiment] + np.mean(results, axis=0).tolist()
def run_nn(hdf5, experiment, code_size_1, code_size_2): # tf.disable_v2_behavior() exp_storage = hdf5["experiments"][experiment] for fold in exp_storage: experiment_cv = format_config("{experiment}_{fold}", { "experiment": experiment, "fold": fold, }) X_train, y_train, \ X_valid, y_valid, \ X_test, y_test = load_fold(hdf5["patients"], exp_storage, fold) ae1_model_path = format_config( "./data/models/{experiment}_autoencoder-1.ckpt", { "experiment": experiment_cv, }) ae2_model_path = format_config( "./data/models/{experiment}_autoencoder-2.ckpt", { "experiment": experiment_cv, }) nn_model_path = format_config("./data/models/{experiment}_mlp.ckpt", { "experiment": experiment_cv, }) reset() # Run first autoencoder run_autoencoder1(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=ae1_model_path, code_size=code_size_1) reset() # Run second autoencoder run_autoencoder2(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=ae2_model_path, prev_model_path=ae1_model_path, prev_code_size=code_size_1, code_size=code_size_2) reset() # Run multilayer NN with pre-trained autoencoders run_finetuning(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=nn_model_path, prev_model_1_path=ae1_model_path, prev_model_2_path=ae2_model_path, code_size_1=code_size_1, code_size_2=code_size_2)
def run_nn(hdf5, experiment, code_size_1, code_size_2, code_size_3): exp_storage = hdf5["experiments"]["cc200_whole"] #exp_storage = hdf5["experiments"]["aal_whole"] #exp_storage = hdf5["experiments"]["dosenbach160_whole"] for fold in exp_storage: experiment_cv = format_config("{experiment}_{fold}", { "experiment": experiment, "fold": fold, }) X_train, y_train, \ X_valid, y_valid, \ X_test, y_test,test_pid = load_fold(hdf5["patients"], exp_storage, fold) ae1_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", { "experiment": experiment_cv, }) ae2_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", { "experiment": experiment_cv, }) ae3_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", { "experiment": experiment_cv, }) nn_model_path = format_config( "./data/cc200_tichu_2500_1250_625/{experiment}_mlp.ckpt", { "experiment": experiment_cv, }) # ae1_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", { # "experiment": experiment_cv, # }) # ae2_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", { # "experiment": experiment_cv, # }) # ae3_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", { # "experiment": experiment_cv, # }) # nn_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_mlp.ckpt", { # "experiment": experiment_cv, # }) # ae1_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", { # "experiment": experiment_cv, # }) # ae2_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", { # "experiment": experiment_cv, # }) # ae3_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", { # "experiment": experiment_cv, # }) # nn_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_mlp.ckpt", { # "experiment": experiment_cv, # }) reset() # Run first autoencoder run_autoencoder1(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=ae1_model_path, code_size=code_size_1) reset() # Run second autoencoder run_autoencoder2(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=ae2_model_path, prev_model_path=ae1_model_path, prev_code_size=code_size_1, code_size=code_size_2) reset() run_autoencoder3(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=ae3_model_path, prev_model_path=ae2_model_path, prev_code_size=code_size_2, code_size=code_size_3) reset() # Run multilayer NN with pre-trained autoencoders run_finetuning(experiment_cv, X_train, y_train, X_valid, y_valid, X_test, y_test, model_path=nn_model_path, prev_model_1_path=ae1_model_path, prev_model_2_path=ae2_model_path, prev_model_3_path=ae3_model_path, code_size_1=code_size_1, code_size_2=code_size_2, code_size_3=code_size_3)
def nn_results(hdf5, experiment, code_size_1, code_size_2): exp_storage = hdf5["experiments"][experiment] n_classes = 2 results = [] for fold in exp_storage: experiment_cv = format_config("{experiment}_{fold}", { "experiment": experiment, "fold": fold, }) X_train, y_train, \ X_valid, y_valid, \ X_test, y_test = load_fold(hdf5["patients"], exp_storage, fold) y_test = np.array([to_softmax(n_classes, y) for y in y_test]) ae1_model_path = format_config( "./data/models/{experiment}_autoencoder-1.ckpt", { "experiment": experiment_cv, }) ae2_model_path = format_config( "./data/models/{experiment}_autoencoder-2.ckpt", { "experiment": experiment_cv, }) nn_model_path = format_config("./data/models/{experiment}_mlp.ckpt", { "experiment": experiment_cv, }) try: model = nn(X_test.shape[1], n_classes, [ { "size": 1000, "actv": tf.nn.tanh }, { "size": 600, "actv": tf.nn.tanh }, ]) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver = tf.train.Saver(model["params"]) saver.restore(sess, nn_model_path) output = sess.run(model["output"], feed_dict={ model["input"]: X_test, model["dropouts"][0]: 1.0, model["dropouts"][1]: 1.0, }) print(output) y_pred = np.argmax(output, axis=1) y_true = np.argmax(y_test, axis=1) [[TN, FP], [FN, TP]] = confusion_matrix(y_true, y_pred, labels=[0, 1]).astype(float) accuracy = (TP + TN) / (TP + TN + FP + FN) specificity = TN / (FP + TN) precision = TP / (TP + FP) sensivity = recall = TP / (TP + FN) fscore = 2 * TP / (2 * TP + FP + FN) results.append([ accuracy, precision, recall, fscore, sensivity, specificity ]) finally: reset() return [experiment] + np.mean(results, axis=0).tolist()
def test_proposed(): # tst_dat:测试数据、tst_lbl:标签,5折交叉验证 _, _, tst_dat1, tst_lbl1 = utils.load_fold(fold_num=0) _, _, tst_dat2, tst_lbl2 = utils.load_fold(fold_num=1) _, _, tst_dat3, tst_lbl3 = utils.load_fold(fold_num=2) _, _, tst_dat4, tst_lbl4 = utils.load_fold(fold_num=3) _, _, tst_dat5, tst_lbl5 = utils.load_fold(fold_num=4) all_tst_pm = [] all_tst_dat = [tst_dat1, tst_dat2, tst_dat3, tst_dat4, tst_dat5] all_tst_lbl = [tst_lbl1, tst_lbl2, tst_lbl3, tst_lbl4, tst_lbl5] print("data loading over!") placeholders = { "bdat": tf.compat.v1.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="bdat_place"), "mdat": tf.compat.v1.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="mdat_place"), "tdat": tf.compat.v1.placeholder(shape=(None, 20, 20, 6), dtype=tf.float32, name="tdat_place"), "lbl": tf.compat.v1.placeholder(shape=(None, 1), dtype=tf.int64), "train": st.is_training } models = model.MGICNN(placeholders=placeholders) models.build_proposed() print("model loading over!") with tf.compat.v1.Session() as sess: print("In sess!") saver = tf.compat.v1.train.Saver() for cur_fold in range(st.max_fold): tst_dat = all_tst_dat[cur_fold] tst_lbl = all_tst_lbl[cur_fold] tst_cnt = 0 tst_pm = np.zeros(shape=len(tst_dat), dtype=np.float32) sess.run(tf.compat.v1.global_variables_initializer()) # saver.restore(sess=sess, save_path=os.path.join(st.tst_model_path, "%d_%d_%d_%d.ckpt" % ( # st.fold_num, st.multistream_mode, st.model_mode, st.tst_epoch))) ckpt = tf.train.latest_checkpoint(st.tst_model_path) saver.restore(sess, ckpt) print("len of tst_data is:", len(tst_dat)) for tst_step in range(0, len(tst_dat), st.batch_size): test_feed_dict = { placeholders["bdat"]: tst_dat[tst_step:tst_step + st.batch_size, 0], placeholders["mdat"]: tst_dat[tst_step:tst_step + st.batch_size, 1], placeholders["tdat"]: tst_dat[tst_step:tst_step + st.batch_size, 2], placeholders["lbl"]: tst_lbl[tst_step:tst_step + st.batch_size], placeholders["train"]: False } tst_pm[tst_step:tst_step + st.batch_size] = sess.run( models.pred_sig, feed_dict=test_feed_dict)[:, 0] all_tst_pm += [tst_pm] tst_cnt += len(tst_pm) all_tst_pm = np.concatenate(all_tst_pm, axis=0) # 跨行拼接 print("test result's shape:", all_tst_pm.shape) np.save( st.summ_path_root + "pred_result/%d_%d_pm.npy" % (st.model_mode, st.multistream_mode), all_tst_pm) print("result saving done!")
def main(opt): opt.hostname = os.uname()[1] # cudnn if opt.device.lstrip('-').isdigit() and int(opt.device) <= -1: device = torch.device('cpu') else: os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.device) device = torch.device('cuda') # seed if opt.manual_seed is None: opt.manual_seed = random.randint(1, 10000) print(f"seed: {opt.manual_seed}") random.seed(opt.manual_seed) torch.manual_seed(opt.manual_seed) # xp dir if os.path.isdir(opt.xp_dir): if input( f'Experiment folder already exists at {opt.xp_dir}. Erase it? (y|n)' ) in ('yes', 'y'): shutil.rmtree(opt.xp_dir) else: print('Terminating experiment...') exit(0) os.makedirs(opt.xp_dir) print(f'Experiment directory created at {opt.xp_dir}') ################################################################################################################## # Data ################################################################################################################## print('Loading data...') # load corpus corpus = load_corpus(opt) # trainset trainset = load_fold(corpus, 'train', opt.data_dir) trainloader = DataLoader(trainset, batch_size=opt.batch_size, collate_fn=text_collate, shuffle=True, pin_memory=True, drop_last=True) # testset testset = load_fold(corpus, 'test', opt.data_dir) testloader = DataLoader(testset, batch_size=opt.batch_size, collate_fn=text_collate, shuffle=False, pin_memory=True) # attributes opt.n_ex = len(trainset) opt.naut = trainset.na opt.ntoken = corpus.vocab_size opt.padding_idx = Corpus.pad_id ################################################################################################################## # Model ################################################################################################################## print('Building model...') model = DynamicAuthorLanguageModel( opt.ntoken, opt.nwe, opt.naut, opt.nha, opt.nhat, opt.nhid_dyn, opt.nlayers_dyn, opt.cond_fusion, opt.nhid_lm, opt.nlayers_lm, opt.dropouti, opt.dropoutl, opt.dropoutw, opt.dropouto, opt.tie_weights, opt.padding_idx).to(device) opt.model = str(model) opt.nparameters = sum(p.nelement() for p in model.parameters()) print(f'{opt.nparameters} parameters') ################################################################################################################## # Optimizer ################################################################################################################## model_params = list(model.named_parameters()) no_wd = ['entity_embedding'] optimizer_grouped_parameters = [{ 'params': [p for n, p in model_params if not any(nd in n for nd in no_wd)], 'weight_decay': opt.wd }, { 'params': [p for n, p in model_params if any(nd in n for nd in no_wd)], 'weight_decay': 0.0 }] optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr=opt.lr) opt.optimizer = str(optimizer) # learning rate scheduling niter = opt.lr_scheduling_burnin + opt.lr_scheduling_niter lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda i: max(0, (opt.lr_scheduling_niter - i) / opt. lr_scheduling_niter)) ################################################################################################################## # Training ################################################################################################################## print('Training...') cudnn.benchmark = True assert niter > 0 pb = tqdm(total=niter, ncols=0, desc='iter') itr = -1 finished = False ppl_test = None while not finished: # train for batch in trainloader: itr += 1 # gradient step ppl_train = train_step(model, optimizer, batch, device, opt) # lr scheduling if itr >= opt.lr_scheduling_burnin: lr_scheduler.step() # progress bar pb.set_postfix(ppl_train=ppl_train, ppl_test=ppl_test, lr=optimizer.param_groups[0]['lr']) pb.update() # break ? if itr > 0 and itr % opt.chkpt_interval == 0: break if itr >= niter: finished = True break # eval if itr % opt.chkpt_interval == 0: with torch.no_grad(): ppl_test = evaluate(model, testloader, device) torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'opt': opt }, os.path.join(opt.xp_dir, 'model.pth')) pb.close() with torch.no_grad(): ppl_test = evaluate(model, testloader, device) print(f'Final test ppl: {ppl_test}') print('Saving model...') torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'opt': opt }, os.path.join(opt.xp_dir, 'model.pth')) print('Done')