def multi_parameter_tuning(args): lrs = [1e-2, 1e-3, 5e-3, 1e-4, 5e-4] hidden_sizes = [128, 256, 512] lr_decays = [0.9, 0.7, 0.5] iter = 0 valid_metric = {} # 存储各个模型ppl的值 dev_data_src = read_corpus(args['dev_source'], source='src') dev_data_tgt = read_corpus(args['dev_target'], source='tgt') dev_data = list(zip(dev_data_src, dev_data_tgt)) for i in lrs: for j in hidden_sizes: for k in lr_decays: print( '第%d次测试=================================================' % iter) arg_test = args arg_test['lr'], arg_test['hidden_size'], arg_test[ 'lr_decay'] = i, j, k arg_test['save_to'] = 'model_' + 'lr_' + str( i) + 'hd_size_' + str(j) + 'lr_dys_' + str(k) + '.bin' run.train(arg_test) model = NMT.load(arg_test['save_to']) dev_ppl = run.evaluate_ppl( model, dev_data, batch_size=128) # dev batch size can be a bit larger valid_metric[arg_test['save_to']] = dev_ppl print(arg_test['save_to'], ' validation: iter %d, dev. ppl %f' % (iter, dev_ppl), file=sys.stderr) iter += 1 model = min(valid_metric, key=valid_metric.get()) print('best_model is %s ,ppl is %f' % (model, valid_metric[model]))
def main(): """ Starting point of the application """ hvd.init() params = parse_args(PARSER.parse_args()) set_flags(params) model_dir = prepare_model_dir(params) params.model_dir = model_dir logger = get_logger(params) model = Unet() dataset = Dataset(data_dir=params.data_dir, batch_size=params.batch_size, fold=params.crossvalidation_idx, augment=params.augment, gpu_id=hvd.rank(), num_gpus=hvd.size(), seed=params.seed) if 'train' in params.exec_mode: train(params, model, dataset, logger) if 'evaluate' in params.exec_mode: if hvd.rank() == 0: evaluate(params, model, dataset, logger) if 'predict' in params.exec_mode: if hvd.rank() == 0: predict(params, model, dataset, logger)
def main(params): """ Starting point of the application """ backends = [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir is not None: os.makedirs(params.log_dir, exist_ok=True) logfile = os.path.join(params.log_dir, "log.json") backends.append(JSONStreamBackend(Verbosity.VERBOSE, logfile)) logger = Logger(backends) # Optimization flags os.environ['CUDA_CACHE_DISABLE'] = '0' os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private' os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '2' hvd.init() #init horovod #set gpu configurations if params.use_xla: tf.config.optimizer.set_jit(True) gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') if params.use_amp: tf.keras.mixed_precision.experimental.set_policy('mixed_float16') else: os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' # get dataset from tf.data api dataset = Dataset(batch_size=params.batch_size, gpu_id=hvd.rank(), num_gpus=hvd.size()) # Build the model input_shape = (1024, 2048, 3) model = custom_unet( input_shape, num_classes=8, use_batch_norm=False, upsample_mode='decov', # 'deconv' or 'simple' use_dropout_on_upsampling=True, dropout=0.3, dropout_change_per_layer=0.0, filters=7, num_layers=4, output_activation='softmax') # do not use model compile as we are using gradientTape #start training train(params, model, dataset, logger)
def entry(): mpqa2_df = pd.read_csv(os.path.join(config.RESULTS_FOLDER, "folds/mpqa2.5fold.csv"), index_col=None) dev_docids = mpqa2_df.loc[mpqa2_df["fold"] == "dev", "docid"].values train_docids = mpqa2_df.loc[mpqa2_df["fold"] != "dev", "docid"].values test_docids = dev_docids dev_dataset = RoleLabelerDataset(dev_docids, mpqa2=True) train_dataset = RoleLabelerDataset(train_docids, mpqa2=True, ignore_negatives=True) train(train_dataset, dev_dataset, dev_dataset)
def train_main(): with mlflow.start_run(): config = MlConfig(agent_name="trend-agent") git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']) mlflow.log_param("git_hash", git_hash) for param in [a for a in dir(config) if not a.startswith('__')]: mlflow.log_param(param, config.__getattribute__(param)) network_spec = "auto" mlflow.log_param("network_spec", network_spec) #network_spec = conv_network(data_provider.load(0)) #mlflow.log_param("network_spec", ',\n'.join([',\n'.join([json.dumps(part_ele) if not callable(part_ele) else str(part_ele) for part_ele in net_part]) # for net_part in network_spec])) train(config, network_spec)
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss = test(model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_loss': test_loss, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_loss < best_test, filepath=save_path) if test_loss < best_test: best_test = test_loss csv_logger.write_text('Best loss is {}'.format(best_test))
def train_network(start_epoch, epochs, optim, model, train_loader, val_loader, criterion, device, dtype, writer, best_test, child, experiment_name, logger, save_path, local_rank): my_range = range if child else trange train_it, val_it = 0, 0 for epoch in my_range(start_epoch, epochs + 1): train_it, _, train_accuracy1, train_accuracy5 = train( model, train_loader, logger, writer, experiment_name, epoch, train_it, optim, criterion, device, dtype, child) val_it, _, val_accuracy1, val_accuracy5 = val(model, val_loader, logger, criterion, writer, experiment_name, epoch, val_it, device, dtype, child) optim.epoch_step() save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optim.state_dict() }, val_accuracy1 > best_test, filepath=save_path, local_rank=local_rank) if val_accuracy1 > best_test: best_test = val_accuracy1 logger.debug('Best validation accuracy so far is {:.2f}% top-1'.format( best_test * 100.)) logger.info('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, test_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, best_val): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_mae, = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) val_loss, val_mae = test(model, val_loader, criterion, device, dtype) test_loss, test_mae = test(model, test_loader, criterion, device, dtype) csv_logger.write({'epoch': epoch + 1, 'test_mae': test_mae, 'test_loss': test_loss, 'val_mae': val_mae, 'val_loss': val_loss, 'train_mae': train_mae, 'train_loss': train_loss}) save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_val, 'optimizer': optimizer.state_dict()}, val_mae < best_val, filepath=save_path) # csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) csv_logger.plot_progress() if val_mae < best_val: best_val = val_mae csv_logger.write_text('Lowest mae is {:.2f}'.format(best_val))
def submit(identifier, policy_fn, seed, iter): client = Client(remote_base) # Create environment observation = client.env_create(crowdai_token, env_id="ProstheticsEnv") # IMPLEMENTATION OF YOUR CONTROLLER pi = train(identifier, policy_fn, 1, 1, seed, save_final=False, play=True, bend=0) load_state(identifier, iter) while True: ob = state_desc_to_ob(observation) action = pi.act(False, np.array(ob))[0].tolist() for _ in range(param.action_repeat): [observation, reward, done, info] = client.env_step(action, True) if done: break if done: observation = client.env_reset() if not observation: break client.submit()
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, adv_data, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval) if adv_data is not None: traina_loss, traina_accuracy1, traina_accuracy5, = train( model, adv_data, epoch, optimizer, criterion, device, dtype, batch_size, log_interval) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 for layer in model.modules(): from layers import NoisedConv2D, NoisedLinear if isinstance(layer, NoisedConv2D) or isinstance( layer, NoisedLinear): print("Mean of alphas is {}".format(torch.mean(layer.alpha))) scheduler.step() csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def main(): parser = get_parser() config = parser.parse_args() if config.train: auto(config, 'train') val_config = parser.parse_args() auto(val_config, 'val') val_config.train = False run.train(config, val_config=val_config) else: if config.serve: auto(config, 'serve') config.fresh = True demo.demo(config) else: auto(config, 'test') run.test(config)
def main(): parser = ArgumentParser(description='train model from data') parser.add_argument('--mode', help='train or test', metavar='MODE', default='train') parser.add_argument('--config-path', help='config json path', metavar='DIR') parser.add_argument('--init-checkpoint', help='checkpoint file', metavar='FILE') parser.add_argument('--batch-size', help='batch size <default: 32>', metavar='INT', type=int, default=32) parser.add_argument('--epoch', help='epoch number <default: 10>', metavar='INT', type=int, default=10) parser.add_argument('--embedding-dim', help='embedding dimension <default: 256>', metavar='INT',type=int, default=256) parser.add_argument('--max-len', help='max length of a sentence <default: 90>', metavar='INT',type=int, default=90) parser.add_argument('--units', help='units <default: 512>', metavar='INT', type=int, default=512) parser.add_argument('--dev-split', help='<default: 0.1>', metavar='REAL', type=float, default=0.1) parser.add_argument('--optimizer', help='optimizer <default: adam>', metavar='STRING', default='adam') parser.add_argument('--learning-rate', help='learning rate <default: 0.001>', metavar='REAL', type=float, default=0.001) parser.add_argument('--dropout', help='dropout probability <default: 0>', metavar='REAL', type=float, default=.0) parser.add_argument('--method', help='content-based function <default: concat>', metavar='STRING', default='concat') parser.add_argument('--gpu-num', help='GPU number to use <default: 0>', metavar='INT', type=int, default=0) args = parser.parse_args() if args.mode == 'train': train(args) elif args.mode == 'test': test(args)
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 print('Best accuracy is {:.2f}% top-1'.format(best_test * 100.)) temp_dict = { 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss } for x in temp_dict: print(x, ":", temp_dict[x]) csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def main(): """ Main function for running the whole network based on the parameters set in the "conf" dictionary in the config() function. First, the network is trained, and checked against a development set at the prefered increments, then the training progress is plotted, before the final evaluation is done on all three data sets. """ # Get parameters from config() function conf = config() # Get all data and split it into three sets. Format: (datasize, channels, height, width). X_train, Y_train, X_devel, Y_devel, X_test, Y_test = get_data(conf) # Test with keras if conf["keras"] == True: train_progress, devel_progress = run.kerasnet(conf, X_train, Y_train, X_devel, Y_devel, X_test, Y_test) plot_progress(conf, train_progress, devel_progress) sys.exit() # Run training and save weights and biases in params_dnn and params_cnn. conf, params_dnn, params_cnn, train_progress, devel_progress = run.train( conf, X_train, Y_train, X_devel, Y_devel, ) # Plot the progress of the network over training steps plot_progress(conf, train_progress, devel_progress) # Evaluate the network on all three data sets. If output=True, then the predictions made on the test set is saved. print("Evaluating train set") num_correct, num_evaluated = run.evaluate(conf, params_dnn, params_cnn, X_train, Y_train) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format( num_correct, num_evaluated, num_correct / num_evaluated)) print("Evaluating development set") num_correct, num_evaluated = run.evaluate(conf, params_dnn, params_cnn, X_devel, Y_devel) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format( num_correct, num_evaluated, num_correct / num_evaluated)) print("Evaluating test set") num_correct, num_evaluated = run.evaluate(conf, params_dnn, params_cnn, X_test, Y_test, output=conf["output"]) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format( num_correct, num_evaluated, num_correct / num_evaluated))
def train_network(start_epoch, epochs, optim, model, train_loader, val_loader, criterion, mixup, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test, local_rank, child): my_range = range if child else trange for epoch in my_range(start_epoch, epochs + 1): if not isinstance(optim.scheduler, CyclicLR) and not isinstance( optim.scheduler, CosineLR): optim.scheduler_step() train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, mixup, epoch, optim, criterion, device, dtype, batch_size, log_interval, child) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype, child) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optim.state_dict() }, test_accuracy1 > best_test, filepath=save_path, local_rank=local_rank) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_accuracy1, train_accuracy5, = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss, test_accuracy1, test_accuracy5 = test(model, val_loader, criterion, device, dtype) csv_logger.write({'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss}) save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict()}, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def main(): """Run the program according to specified configurations.""" conf = config() X_train, Y_train, X_devel, Y_devel, X_test, Y_test = get_data(conf) params, train_progress, devel_progress = run.train(conf, X_train, Y_train, X_devel, Y_devel) plot_progress(train_progress, devel_progress) print("Evaluating train set") num_correct, num_evaluated = run.evaluate(conf, params, X_train, Y_train) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format(num_correct, num_evaluated, num_correct/num_evaluated)) print("Evaluating development set") num_correct, num_evaluated = run.evaluate(conf, params, X_devel, Y_devel) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format(num_correct, num_evaluated, num_correct/num_evaluated)) print("Evaluating test set") num_correct, num_evaluated = run.evaluate(conf, params, X_test, Y_test) print("CCR = {0:>5} / {1:>5} = {2:>6.4f}".format(num_correct, num_evaluated, num_correct/num_evaluated))
import os import csv import numpy as np import keras from run import train input_file = "./ag_news_csv/train.csv" max_feature_length = 4096 num_classes = 4 embedding_size = 16 learning_rate = 0.001 batch_size = 100 num_epochs = 250 train(input_file, max_feature_length, num_classes, embedding_size, learning_rate, batch_size, num_epochs)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1, help='the gpu will be used, e.g "0,1,2,3"') parser.add_argument('--max_iter', type=int, default=10, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument( '--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm') parser.add_argument('--hidden_dim', type=int, default=128, help='hidden layer dimension') parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers') dataset = 'assist2009_updated' if dataset == 'oj': parser.add_argument('--batch_size', type=int, default=5, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=68, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/oj', help='data directory') parser.add_argument('--data_name', type=str, default='oj', help='data set name') parser.add_argument('--load', type=str, default='oj', help='model file to load') parser.add_argument('--save', type=str, default='oj', help='path to save model') elif dataset == 'assistments': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=124, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/assistments', help='data directory') parser.add_argument('--data_name', type=str, default='assistments', help='data set name') parser.add_argument('--load', type=str, default='assistments', help='model file to load') parser.add_argument('--save', type=str, default='assistments', help='path to save model') elif dataset == 'assist2009_updated': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') elif dataset == 'STATICS': parser.add_argument('--batch_size', type=int, default=10, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=800, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') params = parser.parse_args() params.lr = params.init_lr print(params) dat = DataLoader(',', params.seqlen, 1, 0) # dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') # train_data_path = params.data_dir + "/" + "builder_train.csv" # valid_data_path = params.data_dir + "/" + "builder_test.csv" train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv" # test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" max_length, min_length, max_q_id = dat.scan_file(train_data_path) train_q_data, train_q_t_data, train_answer_data = dat.prepare_model_data( train_data_path, max_q_id) train_q_data = np.array(train_q_data) print(train_q_data.shape) train_q_t_data = np.array(train_q_t_data) train_answer_data = np.array(train_answer_data) valid_q_data, valid_q_t_data, valid_answer_data = dat.prepare_model_data( valid_data_path, max_q_id) valid_q_data = np.array(valid_q_data) valid_q_t_data = np.array(valid_q_t_data) valid_answer_data = np.array(valid_answer_data) # train_q_data, train_q_t_data, train_answer_data = dat.load_data(train_data_path) # valid_q_data, valid_q_t_data, valid_answer_data = dat.load_data(valid_data_path) # test_q_data, test_q_t_data, test_answer_data = dat.load_data(test_data_path) model = MODEL(n_question=params.n_question, hidden_dim=params.hidden_dim, x_embed_dim=params.qa_embed_dim, hidden_layers=params.n_hidden, gpu=params.gpu) model.init_embeddings() model.init_params() # model = torch.load(params.data_dir + "/save/"+params.save) # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum) optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, idx, params, optimizer, train_q_data, train_q_t_data, train_answer_data) print( 'Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data, valid_answer_data) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy)) # test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data, # test_answer_data) # print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % ( # idx + 1, params.max_iter, test_auc, test_accuracy)) all_train_auc[idx + 1] = train_auc all_train_accuracy[idx + 1] = train_accuracy all_train_loss[idx + 1] = train_loss all_valid_loss[idx + 1] = valid_loss all_valid_accuracy[idx + 1] = valid_accuracy all_valid_auc[idx + 1] = valid_auc # # output the epoch with the best validation auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data): ### ================================== model initialization ================================== g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim = params.final_fc_dim) # create a module by given a Symbol net = mx.mod.Module(symbol=g_model.sym_gen(), data_names = ['q_data', 'qa_data'], label_names = ['target'], context=params.ctx) # create memory by given input shapes net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) # initial parameters with the default random initializer net.init_params(initializer=mx.init.Normal(sigma=params.init_std)) # decay learning rate in the lr_scheduler lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20*(train_q_data.shape[0]/params.batch_size), factor=0.667, stop_factor_lr=1e-5) net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum':params.momentum,'lr_scheduler': lr_scheduler}) for parameters in net.get_params()[0]: print parameters, net.get_params()[0][parameters].asnumpy().shape print "\n" ### ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in xrange(params.max_iter): train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train') valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid') print 'epoch', idx + 1 print "valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc print "valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy print "valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=idx+1) # output the epoch with the best validation auc if valid_auc > best_valid_auc: best_valid_auc = valid_auc best_epoch = idx+1 f_save_log = open(os.path.join('result', params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.close() return best_epoch
if torch.is_tensor(v): state[k] = v.to(device) print(f'Using Checkpoint') else: last_epoch = 0 valid_loss_list, train_loss_list = [], [] model.apply(init_weights) model.to(device) for epoch in range(last_epoch, epochs): start_time = time.time() log.info(f'Epoch {epoch+1} training') train_loss = train(model, device, training_loader, optimizer, criterion, clip) log.info(f'\nEpoch {epoch + 1} validation') valid_loss, bleu_score = eval(model, device, valid_loader, criterion) train_loss_list.append(train_loss) valid_loss_list.append(valid_loss) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) # if valid_loss < best_valid_loss: # best_valid_loss = valid_loss save_checkpoint(model_path / stage / f'decoder/model0epoch{epoch}', epoch, model, optimizer, valid_loss_list, train_loss_list)
exit() # Set gpu config = tf.ConfigProto() #log_device_placement=True config.gpu_options.allow_growth = True # Create run instance run = run.run(config, lr_size, ckpt_path, scale, args.batch, args.epochs, args.lr, args.fromscratch, fsrcnn_params, small, args.validdir) if args.train: # if finetune, load model and train on general100 if args.finetune: traindir = args.finetunedir augmented_path = "./augmented_general100" # augment (if not done before) and then load images data_utils.augment(traindir, save_path=augmented_path) run.train(augmented_path) if args.test: run.testFromPb(args.image) #run.test(args.image) #run.upscale(args.image) if args.export: run.export() print("I ran successfully.")
netDQFront.apply(weights_init) netD.apply(weights_init) netQ.apply(weights_init) # optimizer optimG = t.optim.Adam([{'params': netG.parameters()}, {'params': netQ.parameters()}], lr=params.LR_GEN_Q) optimD = t.optim.Adam([{'params': netDQFront.parameters()}, {'params': netD.parameters()}], lr=params.LR_DIS) # fixed test noise testNoise = dataloader.create_continuous_noise(params.C_SIZE) for epoch_i in range(1, params.EPOCH+1): print('\nepoch', epoch_i) print('START:', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) print('loss=(G, D, Q), probability=(proT, proF1, proF2)') # train run.train(train_loader, netG, netDQFront, netD, netQ, optimG, optimD, epoch_i, testNoise, file_Gloss, file_Dloss, file_Qloss, file_proT, file_proF1, file_proF2) # close file file_Gloss.close() file_Dloss.close() file_Qloss.close() file_proT.close() file_proF1.close() file_proF2.close()
def train_one_dataset(params, file_name, train_q_data, train_qa_data, train_pid, valid_q_data,\ valid_qa_data, valid_pid, test_q_data, test_qa_data, test_pid): # ================================== model initialization ================================== model = load_model(params) optimizer = torch.optim.Adam(model.parameters(), lr=params.lr, betas=(0.9, 0.999), eps=1e-8) print("\n") # total_params = sum(p.numel() for p in model.parameters()) # print(f'{total_params:,} total parameters.') # total_trainable_params = sum( # p.numel() for p in model.parameters() if p.requires_grad) # print(f'{total_trainable_params:,} training parameters.') # ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} all_test_loss = {} all_test_accuracy = {} all_test_auc = {} best_valid_auc = 0 cur_train_auc = 0 cur_test_auc = 0 for idx in range(params.max_iter): # Train Model train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_qa_data, train_pid, label='Train') # Validation step valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data, valid_pid, label='Valid') # Test step test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_qa_data, test_pid, label='Test') print('epoch', idx + 1) print("\ttrain_auc\t", train_auc, "valid_auc\t", valid_auc, "\ttest_auc\t", test_auc) print("\ttrain_accuracy\t", train_accuracy, "valid_accuracy\t", valid_accuracy,\ "\ttest_accuracy\t", test_accuracy) print("\ttrain_loss\t", train_loss, "valid_loss\t", valid_loss, "test_loss\t", test_loss) try_makedirs('model') try_makedirs(os.path.join('model', params.model)) try_makedirs(os.path.join('model', params.model, params.save)) # all_valid_auc[idx + 1] = valid_auc # all_train_auc[idx + 1] = train_auc # all_test_auc[idx + 1] = test_auc # all_valid_loss[idx + 1] = valid_loss # all_train_loss[idx + 1] = train_loss # all_test_loss[idx + 1] = test_loss # all_valid_accuracy[idx + 1] = valid_accuracy # all_train_accuracy[idx + 1] = train_accuracy # all_test_accuracy[idx + 1] = test_accuracy # output the epoch with the best validation auc if valid_auc > best_valid_auc: path = os.path.join('model', params.model, params.save, file_name) + '_*' for i in glob.glob(path): os.remove(i) print(best_valid_auc, ' to ', valid_auc) best_valid_auc = valid_auc cur_train_auc = train_auc cur_test_auc = test_auc best_epoch = idx + 1 torch.save( { 'epoch': idx, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': train_loss, }, os.path.join('model', params.model, params.save, file_name) + '_' + str(idx + 1)) if idx - best_epoch > 40: break print("cur_train_auc\t", cur_train_auc, "best_valid_auc\t", best_valid_auc, "\n", "cur_test_auc\t",\ cur_test_auc) try_makedirs('result') try_makedirs(os.path.join('result', params.model)) try_makedirs(os.path.join('result', params.model, params.save)) f_save_log = open( os.path.join('result', params.model, params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("test_auc:\n" + str(all_test_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("test_loss:\n" + str(all_test_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.write("test_accuracy:\n" + str(all_test_accuracy) + "\n\n") f_save_log.close() return best_epoch
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data): ### ================================== model initialization ================================== g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim) # create a module by given a Symbol net = mx.mod.Module(symbol=g_model.sym_gen(), data_names=['q_data', 'qa_data'], label_names=['target'], context=params.ctx) # create memory by given input shapes net.bind(data_shapes=[ mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN') ], label_shapes=[ mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN') ]) # initial parameters with the default random initializer net.init_params(initializer=mx.init.Normal(sigma=params.init_std)) # decay learning rate in the lr_scheduler lr_scheduler = mx.lr_scheduler.FactorScheduler( step=20 * (train_q_data.shape[0] / params.batch_size), factor=0.667, stop_factor_lr=1e-5) net.init_optimizer(optimizer='sgd', optimizer_params={ 'learning_rate': params.lr, 'momentum': params.momentum, 'lr_scheduler': lr_scheduler }) for parameters in net.get_params()[0]: print parameters, net.get_params()[0][parameters].asnumpy().shape print "\n" ### ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in xrange(params.max_iter): train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train') valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid') print 'epoch', idx + 1 print "valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc print "valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy print "valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=idx + 1) # output the epoch with the best validation auc if valid_auc > best_valid_auc: best_valid_auc = valid_auc best_epoch = idx + 1 f_save_log = open(os.path.join('result', params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.close() return best_epoch
from run import train, test import time from utilities import loadMainConfig if __name__ == '__main__': start = time.time() print(f"Model Name : {loadMainConfig('modelName')}") print("Start Training") train() print("Training Completed") print("Start Evaluating") test() print("Evaluating Completed") print(f"time spent: {time.time()-start}")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--max_iter', type=int, default=30, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument('--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm') parser.add_argument('--hidden_dim', type=int, default=64, help='hidden layer dimension') parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers') parser.add_argument('--dataset', type=str, default='assist2009_updated') parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--dropout_rate', type=float, default=0.6) if parser.parse_args().dataset == 'assist2009_updated': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') elif parser.parse_args().dataset == 'assist2015': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=100, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2015', help='data directory') parser.add_argument('--data_name', type=str, default='assist2015', help='data set name') parser.add_argument('--load', type=str, default='assist2015', help='model file to load') parser.add_argument('--save', type=str, default='assist2015', help='path to save model') elif parser.parse_args().dataset == 'STATICS': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') elif parser.parse_args().dataset == 'synthetic': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=50, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/synthetic', help='data directory') parser.add_argument('--data_name', type=str, default='synthetic', help='data set name') parser.add_argument('--load', type=str, default='synthetic', help='model file to load') parser.add_argument('--save', type=str, default='synthetic', help='path to save model') elif parser.parse_args().dataset == 'assist2017': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=102, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2017/train_valid_test', help='data directory') parser.add_argument('--data_name', type=str, default='assist2017', help='data set name') parser.add_argument('--load', type=str, default='assist2017', help='model file to load') parser.add_argument('--save', type=str, default='assist2017', help='path to save model') params = parser.parse_args() params.lr = params.init_lr print(params) dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') if params.dataset != 'synthetic': train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv" test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" else: train_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_train1.csv" valid_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_valid1.csv" test_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_test.csv" train_q_data, train_q_t_data, train_answer_data, train_repeated_time_gap, train_past_trail_counts,\ train_seq_time_gap = dat.load_data(train_data_path) valid_q_data, valid_q_t_data, valid_answer_data, valid_repeated_time_gap, valid_past_trail_counts,\ valid_seq_time_gap = dat.load_data(valid_data_path) test_q_data, test_q_t_data, test_answer_data, test_repeated_time_gap, test_past_trail_counts,\ test_seq_time_gap = dat.load_data(test_data_path) model = MODEL(batch_size=params.batch_size, seqlen=params.seqlen, n_question=params.n_question, hidden_dim=params.hidden_dim, x_embed_dim=params.qa_embed_dim, hidden_layers=params.n_hidden, dropout_rate=params.dropout_rate, gpu=params.gpu) model.init_embeddings() model.init_params() optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() # all_train_loss = {} # all_train_accuracy = {} # all_train_auc = {} # all_valid_loss = {} # all_valid_accuracy = {} # all_valid_auc = {} # all_test_loss = {} # all_test_accuracy = {} # all_test_auc = {} best_valid_auc = 0 cur_test_auc = 0 cur_train_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_q_t_data, train_answer_data, train_repeated_time_gap,\ train_past_trail_counts, train_seq_time_gap) print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % ( idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data, valid_answer_data, valid_repeated_time_gap,\ valid_past_trail_counts, valid_seq_time_gap) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % ( idx + 1, params.max_iter, valid_auc, valid_accuracy)) test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data, test_answer_data, test_repeated_time_gap, test_past_trail_counts, test_seq_time_gap) print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % ( idx + 1, params.max_iter, test_auc, test_accuracy)) # all_train_auc[idx + 1] = train_auc # all_train_accuracy[idx + 1] = train_accuracy # all_train_loss[idx + 1] = train_loss # all_valid_loss[idx + 1] = valid_loss # all_valid_accuracy[idx + 1] = valid_accuracy # all_valid_auc[idx + 1] = valid_auc # all_test_loss[idx + 1] = test_loss # all_test_accuracy[idx + 1] = test_accuracy # all_test_auc[idx + 1] = test_auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc cur_test_auc = test_auc cur_train_auc = train_auc print('DATASET: {}, TRAIN AUC: {}, BEST VALID AUC: {}, TEST AUC: {}'.format(params.data_name, cur_train_auc, \ best_valid_auc, cur_test_auc))
def get(): return jsonify(score=train()), 200
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0, help='the gpu will be used, e.g "0,1,2,3"') parser.add_argument('--max_iter', type=int, default=50, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument('--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=50.0, help='maximum gradient norm') # parser.add_argument('--final_fc_dim', type=float, default=200, help='hidden state dim for final fc layer') parser.add_argument('--first_k', type=int, default=8, help='first k question without loss calculation') parser.add_argument('--dataset', type=str, default='assist2009_updated') parser.add_argument('--train_set', type=int, default=1) parser.add_argument('--memory_size', type=int, default=20, help='memory size') parser.add_argument('--q_embed_dim', type=int, default=50, help='question embedding dimensions') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') if parser.parse_args().dataset == 'assist2009_updated': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 200 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=110, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'assist2015': # memory_size: 50, q_embed_dim: 50, qa_embed_dim: 200 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=100, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2015', help='data directory') parser.add_argument('--data_name', type=str, default='assist2015', help='data set name') parser.add_argument('--load', type=str, default='assist2015', help='model file to load') parser.add_argument('--save', type=str, default='assist2015', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=100, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'assist2017': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--n_question', type=int, default=102, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2017/train_valid_test/', help='data directory') parser.add_argument('--data_name', type=str, default='assist2017', help='data set name') parser.add_argument('--load', type=str, default='assist2017', help='model file to load') parser.add_argument('--save', type=str, default='assist2017', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=102, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'STATICS': # memory_size: 50, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=1223, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'synthetic': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=50, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/synthetic/', help='data directory') parser.add_argument('--data_name', type=str, default='synthetic', help='data set name') parser.add_argument('--load', type=str, default='synthetic', help='model file to load') parser.add_argument('--save', type=str, default='synthetic', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=50, help='hidden state dim for final fc layer') params = parser.parse_args() params.lr = params.init_lr params.memory_key_state_dim = params.q_embed_dim params.memory_value_state_dim = params.qa_embed_dim print(params) dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') if params.dataset != 'synthetic': train_data_path = params.data_dir + "/" + params.data_name + "_train" + str(params.train_set) + ".csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid" + str(params.train_set) + ".csv" test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" else: train_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_train" + str(params.train_set) + ".csv" valid_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_valid" + str(params.train_set) + ".csv" test_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_test.csv" train_q_data, train_qa_data, train_a_data = dat.load_data(train_data_path) valid_q_data, valid_qa_data, valid_a_data = dat.load_data(valid_data_path) test_q_data, test_qa_data, test_a_data = dat.load_data(test_data_path) params.memory_key_state_dim = params.q_embed_dim params.memory_value_state_dim = params.qa_embed_dim model = MODEL(n_question=params.n_question, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim, first_k=params.first_k, gpu=params.gpu) model.init_embeddings() model.init_params() optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() best_valid_auc = 0 correspond_train_auc = 0 correspond_test_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_qa_data, train_a_data) print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data, valid_a_data) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy)) test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_qa_data, test_a_data) print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % (idx + 1, params.max_iter, test_auc, test_accuracy)) # output the epoch with the best validation auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc correspond_train_auc = train_auc correspond_test_auc = test_auc print("DATASET: {}, MEMO_SIZE: {}, Q_EMBED_SIZE: {}, QA_EMBED_SIZE: {}, LR: {}".format(params.data_name, params.memory_size, params.q_embed_dim, params.qa_embed_dim, params.init_lr)) print("BEST VALID AUC: {}, CORRESPOND TRAIN AUC: {}, CORRESPOND TEST AUC: {}".format(best_valid_auc, correspond_train_auc, correspond_test_auc))
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data): ### ================================== model initialization ================================== g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim = params.final_fc_dim) # 创建模型 # create a module by given a Symbol net = mx.mod.Module(symbol=g_model.sym_gen(), data_names = ['q_data', 'qa_data'], label_names = ['target'], context=params.ctx) ''' symbol:网络符号 context:执行设备(设备列表) data_names:数据变量名称列表 label_names:标签变量名称列表 ''' # 中间层接口 # create memory by given input shapes 通过内存分配为计算搭建环境 net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) # initial parameters with the default random initializer 初始化参数 net.init_params(initializer=mx.init.Normal(sigma=params.init_std)) # decay learning rate in the lr_scheduler lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20*(train_q_data.shape[0]/params.batch_size), factor=0.667, stop_factor_lr=1e-5) # 初始化优化器 net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum':params.momentum,'lr_scheduler': lr_scheduler}) for parameters in net.get_params()[0]: print(parameters, net.get_params()[0][parameters].asnumpy().shape) print("\n") ### ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train') valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid') print('epoch', idx + 1) print("valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc) print("valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy) print("valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss) if not os.path.isdir('model'): os.makedirs('model') if not os.path.isdir(os.path.join('model', params.save)): os.makedirs(os.path.join('model', params.save)) all_valid_auc[idx + 1] = valid_auc all_train_auc[idx + 1] = train_auc all_valid_loss[idx + 1] = valid_loss all_train_loss[idx + 1] = train_loss all_valid_accuracy[idx + 1] = valid_accuracy all_train_accuracy[idx + 1] = train_accuracy # output the epoch with the best validation auc if valid_auc > best_valid_auc : best_valid_auc = valid_auc best_epoch = idx+1 # here the epoch is default, set to be 100 # we only save the model in the epoch with the better results net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=100) if not os.path.isdir('result'): os.makedirs('result') if not os.path.isdir(os.path.join('result', params.save)): os.makedirs(os.path.join('result', params.save)) f_save_log = open(os.path.join('result', params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.close() return best_epoch
parser.add_argument('--data_split', type=str, default='train') parser.add_argument('--fullwiki', action='store_true') parser.add_argument('--prediction_file', type=str) parser.add_argument('--sp_threshold', type=float, default=0.3) config = parser.parse_args() def _concat(filename): if config.fullwiki: return 'fullwiki.{}'.format(filename) return filename # config.train_record_file = _concat(config.train_record_file) config.dev_record_file = _concat(config.dev_record_file) config.test_record_file = _concat(config.test_record_file) # config.train_eval_file = _concat(config.train_eval_file) config.dev_eval_file = _concat(config.dev_eval_file) config.test_eval_file = _concat(config.test_eval_file) if config.mode == 'train': train(config) elif config.mode == 'prepro': prepro(config) elif config.mode == 'test': test(config) elif config.mode == 'count': cnt_len(config)
source_helper = Helper( data_filepath='./data/LCSTS/train_source.npy', length_filepath='./data/LCSTS/train_source_length.npy', mode='source') target_input_helper = Helper( data_filepath='./data/LCSTS/train_target.npy', length_filepath='./data/LCSTS/train_target_length.npy', mode='target_input') target_output_helper = Helper( data_filepath='./data/LCSTS/train_target.npy', length_filepath='./data/LCSTS/train_target_length.npy', mode='target_output') valid_source_helper = Helper( data_filepath='./data/LCSTS/dev_source.npy', length_filepath='./data/LCSTS/dev_source_length.npy', mode='source') # valid_target_input_helper = Helper(data_filepath='./data/LCSTS/dev_target.npy', mode='target_input') valid_target_output_helper = Helper( data_filepath='./data/LCSTS/dev_target.npy', length_filepath='./data/LCSTS/dev_target_length.npy', mode='target_output') char_dict = json.load(open('./data/LCSTS/target_char_dict.json', 'r')) train(source_helper, target_input_helper, target_output_helper, valid_source_helper, valid_target_output_helper, char_dict) # if result.test: # source_helper = Helper(data_filepath='./data/LCSTS/test_source.npy', mode='source') # target_input_helper = Helper(data_filepath='./data/LCSTS/test_target.npy', mode='target_input') # target_output_helper = Helper(data_filepath='./data/LCSTS/test_target.npy', mode='target_output') # test(source_helper)