def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, test_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, best_val): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_mae, = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) val_loss, val_mae = test(model, val_loader, criterion, device, dtype) test_loss, test_mae = test(model, test_loader, criterion, device, dtype) csv_logger.write({'epoch': epoch + 1, 'test_mae': test_mae, 'test_loss': test_loss, 'val_mae': val_mae, 'val_loss': val_loss, 'train_mae': train_mae, 'train_loss': train_loss}) save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_val, 'optimizer': optimizer.state_dict()}, val_mae < best_val, filepath=save_path) # csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) csv_logger.plot_progress() if val_mae < best_val: best_val = val_mae csv_logger.write_text('Lowest mae is {:.2f}'.format(best_val))
def main(unused_argv): noise_list=[i for i in np.arange(0,0.65,0.05)] nn = basicNNModel() # for nt in ["uniform", "permutation"]: # for ip in noise_list: # train_nn(model=nn,noise_type=nt,incorrect_percent=ip) nlnn = basicNLNNModel() # for nt in ["uniform", "permutation"]: # for ip in noise_list: # train_nlnn(model=nlnn,noise_type=nt,incorrect_percent=ip) nlnn_true = basicNLNNTrueModel() # for nt in ["uniform", "permutation"]: # for ip in noise_list: # train_nlnn_true(model=nlnn_true,noise_type=nt,incorrect_percent=ip) acc_dict={} acc_dict["uniform"]=[[],[],[]] acc_dict["permutation"]=[[],[],[]] for nt in ["uniform", "permutation"]: for ip in noise_list: acc_dict[nt][0].append(test(nn, model_path="logs/"+str(type(nn).__name__)+"/"+nt+"-"+str(ip)+"/model.ckpt-100")) acc_dict[nt][1].append(test(nlnn, model_path="logs/" + str(type(nlnn).__name__) + "/" + nt + "-" + str(ip) + "/model.ckpt-6")) acc_dict[nt][2].append(test(nlnn_true, model_path="logs/" + str(type(nlnn_true).__name__) + "/" + nt + "-" + str(ip) + "/model.ckpt-6")) plot_lines_chart(noise_list, acc_list=acc_dict["uniform"], saveDir="graphs/uniform/") plot_lines_chart(noise_list, acc_list=acc_dict["permutation"], saveDir="graphs/permutation/")
def test_one_dataset(params, file_name, test_q_data, test_qa_data, best_epoch): print("\n\nStart testing ......................\n Best epoch:", best_epoch) val_best_epochs.append(best_epoch) g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim) # create a module by given a Symbol test_net = mx.mod.Module(symbol=g_model.sym_gen(), data_names=['q_data', 'qa_data'], label_names=['target'], context=params.ctx) # cresate memory by given input shapes test_net.bind(data_shapes=[ mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, file_name), epoch=best_epoch) test_net.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=False) test_loss, test_accuracy, test_auc = test(test_net, params, test_q_data, test_qa_data, label='Test') print("\ntest_auc\t", test_auc) print("test_accuracy\t", test_accuracy) print("test_loss\t", test_loss) val_auc_acc.append(test_auc) val_accuracy.append(test_accuracy) val_loss.append(test_loss)
def test_one_dataset(params, file_name, test_q_data, test_qa_data, test_tf_data, best_epoch, user_id): # print("\n\nStart testing ......................\n best_epoch:", best_epoch) g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim) # create a module by given a Symbol test_net = mx.mod.Module(symbol=g_model.sym_gen(), data_names=['q_data', 'qa_data'], label_names=['target'], context=params.ctx) # create memory by given input shapes test_net.bind(data_shapes=[ mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, file_name), epoch=best_epoch) test_net.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=False) pred_list, target_list = run.test(test_net, params, test_q_data, test_qa_data, test_tf_data, label='Test') return pred_list, target_list
def test_one_dataset(params, file_name, test_q_data, test_qa_data): print("\n\nStart testing ......................") g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim) # create a module by given a Symbol test_net = mx.mod.Module(symbol=g_model.sym_gen(), data_names=['q_data', 'qa_data'], label_names=['target'], context=params.ctx) # cresate memory by given input shapes test_net.bind(data_shapes=[ mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, file_name), epoch=100) test_net.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=False) test_loss, test_accuracy, test_auc = test(test_net, params, test_q_data, test_qa_data, label='Test') log_info = "\ntest_auc:\t{}\ntest_accuracy:\t{}\ntest_loss:\t{}\n".format(test_auc, test_accuracy, test_loss) print(log_info) f_save_log = open(os.path.join('result', params.save, file_name), 'a') f_save_log.write(log_info)
def test_one_dataset(params, file_name, test_q_data, test_qa_data, best_epoch): print "\n\nStart testing ......................\n Best epoch:", best_epoch g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim) # create a module by given a Symbol test_net = mx.mod.Module(symbol=g_model.sym_gen(), data_names=['q_data', 'qa_data'], label_names=['target'], context=params.ctx) # cresate memory by given input shapes test_net.bind(data_shapes=[ mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, file_name), epoch=best_epoch) test_net.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=False) test_loss, test_accuracy, test_auc = test(test_net, params, test_q_data, test_qa_data, label='Test') print "\ntest_auc\t", test_auc print "test_accuracy\t", test_accuracy print "test_loss\t", test_loss
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss = test(model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_loss': test_loss, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_loss < best_test, filepath=save_path) if test_loss < best_test: best_test = test_loss csv_logger.write_text('Best loss is {}'.format(best_test))
def mytest_one_dataset(params, file_name, test_q_data, test_qa_data, test_pid, best_epoch): print("\n\nStart testing ......................\n Best epoch:", best_epoch) model = load_model(params) checkpoint = torch.load( os.path.join('model', params.model, params.save, file_name) + '_' + str(best_epoch)) model.load_state_dict(checkpoint['model_state_dict']) test_loss, test_accuracy, test_auc = test(model, params, None, test_q_data, test_qa_data, test_pid, label='Test') print("\ntest_auc\t", test_auc) print("test_accuracy\t", test_accuracy) print("test_loss\t", test_loss) # Now Delete all the models path = os.path.join('model', params.model, params.save, file_name) + '_*' for i in glob.glob(path): os.remove(i)
def main(): parser = get_parser() config = parser.parse_args() if config.train: auto(config, 'train') val_config = parser.parse_args() auto(val_config, 'val') val_config.train = False run.train(config, val_config=val_config) else: if config.serve: auto(config, 'serve') config.fresh = True demo.demo(config) else: auto(config, 'test') run.test(config)
def main(): parser = ArgumentParser(description='train model from data') parser.add_argument('--mode', help='train or test', metavar='MODE', default='train') parser.add_argument('--config-path', help='config json path', metavar='DIR') parser.add_argument('--init-checkpoint', help='checkpoint file', metavar='FILE') parser.add_argument('--batch-size', help='batch size <default: 32>', metavar='INT', type=int, default=32) parser.add_argument('--epoch', help='epoch number <default: 10>', metavar='INT', type=int, default=10) parser.add_argument('--embedding-dim', help='embedding dimension <default: 256>', metavar='INT',type=int, default=256) parser.add_argument('--max-len', help='max length of a sentence <default: 90>', metavar='INT',type=int, default=90) parser.add_argument('--units', help='units <default: 512>', metavar='INT', type=int, default=512) parser.add_argument('--dev-split', help='<default: 0.1>', metavar='REAL', type=float, default=0.1) parser.add_argument('--optimizer', help='optimizer <default: adam>', metavar='STRING', default='adam') parser.add_argument('--learning-rate', help='learning rate <default: 0.001>', metavar='REAL', type=float, default=0.001) parser.add_argument('--dropout', help='dropout probability <default: 0>', metavar='REAL', type=float, default=.0) parser.add_argument('--method', help='content-based function <default: concat>', metavar='STRING', default='concat') parser.add_argument('--gpu-num', help='GPU number to use <default: 0>', metavar='INT', type=int, default=0) args = parser.parse_args() if args.mode == 'train': train(args) elif args.mode == 'test': test(args)
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 print('Best accuracy is {:.2f}% top-1'.format(best_test * 100.)) temp_dict = { 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss } for x in temp_dict: print(x, ":", temp_dict[x]) csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, adv_data, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval) if adv_data is not None: traina_loss, traina_accuracy1, traina_accuracy5, = train( model, adv_data, epoch, optimizer, criterion, device, dtype, batch_size, log_interval) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 for layer in model.modules(): from layers import NoisedConv2D, NoisedLinear if isinstance(layer, NoisedConv2D) or isinstance( layer, NoisedLinear): print("Mean of alphas is {}".format(torch.mean(layer.alpha))) scheduler.step() csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def adv_train_network_alpha(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test, adv_method, eps, adv_w, normalize): # alpha_sched = np.concatenate((np.ones(epochs // 8), np.linspace(1, 0, epochs - 2 * (epochs // 8)), np.zeros(epochs // 8))) alpha_sched = np.concatenate( (np.ones(epochs // 8), np.logspace(0, -4, epochs - 2 * (epochs // 8)), np.zeros(epochs // 8 + 20))) for epoch in trange(start_epoch, epochs + 1): model.set_alpha(alpha_sched[epoch]) tqdm.write("alpha={}".format(alpha_sched[epoch])) train_loss, train_accuracy1, train_accuracy5, = adv_train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, adv_method, eps, adv_w, normalize, 0.05, True, alpha_sched[epoch], alpha_sched[epoch + 1]) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 scheduler.step() csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def train_network(start_epoch, epochs, optim, model, train_loader, val_loader, criterion, mixup, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test, local_rank, child): my_range = range if child else trange for epoch in my_range(start_epoch, epochs + 1): if not isinstance(optim.scheduler, CyclicLR) and not isinstance( optim.scheduler, CosineLR): optim.scheduler_step() train_loss, train_accuracy1, train_accuracy5, = train( model, train_loader, mixup, epoch, optim, criterion, device, dtype, batch_size, log_interval, child) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype, child) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optim.state_dict() }, test_accuracy1 > best_test, filepath=save_path, local_rank=local_rank) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test): for epoch in trange(start_epoch, epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() train_loss, train_accuracy1, train_accuracy5, = train(model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, scheduler) test_loss, test_accuracy1, test_accuracy5 = test(model, val_loader, criterion, device, dtype) csv_logger.write({'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss}) save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict()}, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
default="./Test/t1.png") parser.add_argument('--scale', help='Scaling factor of the model', default=2) parser.add_argument('--epoch', help='Number of epochs during training', default=100) parser.add_argument('--lr', help='Sets the learning rate', default=0.01) args = parser.parse_args() ARGS = dict() ARGS["SCALE"] = int(args.scale) main_ckpt_dir = "./checkpoints" if not os.path.exists(main_ckpt_dir): os.makedirs(main_ckpt_dir) ARGS["CKPT_dir"] = main_ckpt_dir + "/checkpoint" + "_sc" + str(args.scale) ARGS["CKPT"] = ARGS["CKPT_dir"] + "/ESPCN_ckpt_sc" + str(args.scale) ARGS["TRAINDIR"] = args.traindir ARGS["EPOCH_NUM"] = int(args.epoch) ARGS["TESTIMG"] = args.testimg ARGS["LRATE"] = float(args.lr) if args.train: run.training(ARGS) elif args.test: run.test(ARGS) elif args.export: run.export(ARGS)
from run import train, test import time from utilities import loadMainConfig if __name__ == '__main__': start = time.time() print(f"Model Name : {loadMainConfig('modelName')}") print("Start Training") train() print("Training Completed") print("Start Evaluating") test() print("Evaluating Completed") print(f"time spent: {time.time()-start}")
def main(): args = get_args() device, dtype = args.device, args.dtype train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size, args.workers, args.world_size, args.local_rank) model = MnasNet(n_class=args.num_classes, width_mult=args.scaling, drop_prob=0.0, num_steps=len(train_loader) * args.epochs) num_parameters = sum([l.nelement() for l in model.parameters()]) flops = flops_benchmark.count_flops(MnasNet, 1, device, dtype, args.input_size, 3, width_mult=args.scaling) if not args.child: print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format(flops)) # define loss function (criterion) and optimizer criterion = CrossEntropyLoss() mixup = Mixup(args.num_classes, args.mixup, args.smooth_eps) model, criterion = model.to(device=device, dtype=dtype), criterion.to(device=device, dtype=dtype) if args.dtype == torch.float16: for module in model.modules(): # FP batchnorm if is_bn(module): module.to(dtype=torch.float32) if args.distributed: args.device_ids = [args.local_rank] dist.init_process_group(backend=args.dist_backend, init_method=args.dist_init, world_size=args.world_size, rank=args.local_rank) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) print('Node #{}'.format(args.local_rank)) else: model = torch.nn.parallel.DataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) optimizer_class = torch.optim.SGD optimizer_params = { "lr": args.learning_rate, "momentum": args.momentum, "weight_decay": args.decay, "nesterov": True } if args.find_clr: optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=args.save_path) return if args.sched == 'clr': scheduler_class = CyclicLR scheduler_params = { "base_lr": args.min_lr, "max_lr": args.max_lr, "step_size": args.epochs_per_step * len(train_loader), "mode": args.mode } elif args.sched == 'multistep': scheduler_class = MultiStepLR scheduler_params = {"milestones": args.schedule, "gamma": args.gamma} elif args.sched == 'cosine': scheduler_class = CosineLR scheduler_params = { "max_epochs": args.epochs, "warmup_epochs": args.warmup, "iter_in_epoch": len(train_loader) } elif args.sched == 'gamma': scheduler_class = StepLR scheduler_params = {"step_size": 30, "gamma": args.gamma} else: raise ValueError('Wrong scheduler!') optim = OptimizerWrapper(model, optimizer_class=optimizer_class, optimizer_params=optimizer_params, scheduler_class=scheduler_class, scheduler_params=scheduler_params, use_shadow_weights=args.dtype == torch.float16) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join( args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank)) csv_path = os.path.join(args.resume, 'results{}.csv'.format(args.local_rank)) print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype, args.child) # TODO return csv_logger = CsvLogger(filepath=args.save_path, data=data, local_rank=args.local_rank) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if args.input_size in claimed_acc_top1: if args.scaling in claimed_acc_top1[args.input_size]: claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling] if not args.child: csv_logger.write_text( 'Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 * 100.)) train_network(args.start_epoch, args.epochs, optim, model, train_loader, val_loader, criterion, mixup, device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1, claimed_acc5, best_test, args.local_rank, args.child)
def main(): args = get_args() device, dtype = args.device, args.dtype add_args = {'num_classes': args.num_classes} if args.cpni: add_args = { 'weight_noise': args.weight_noise, 'act_noise_a': args.act_noise_a, 'act_noise_b': args.act_noise_b, 'rank': args.noise_rank } if args.dataset == torchvision.datasets.ImageNet: add_args['pretrained'] = True else: add_args['width'] = args.width add_args['num_classes'] = args.num_classes smoothing_args = {} if args.smoothing: smoothing_args = { 'noise_sd': args.noise_sd, 'm_test': args.m_test, 'm_train': args.m_train } model = args.net(**smoothing_args, **add_args) num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print("Number of parameters {}".format(num_parameters)) train_loader, val_loader, adv_data = args.get_loaders( args.dataset, args.data, args.batch_size, args.val_batch_size, args.workers, args.adv_data) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() model, criterion = model.to(device=device, dtype=dtype), criterion.to(device=device, dtype=dtype) if args.opt == 'sgd': optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.decay) else: raise ValueError('Wrong optimzier!') scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.zero_start: args.start_epoch = 0 if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=args.save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if args.adv: normalize = { 'mean': np.array([0.491, 0.482, 0.447]), 'std': np.array([0.247, 0.243, 0.262]) } if args.alpha: adv_train_network_alpha(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1, claimed_acc5, best_test, args.attack, args.eps, 0.5, normalize) else: a = smoothing_args a.update(add_args) a['width'] = args.width adv_train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1, claimed_acc5, best_test, args.attack, args.eps, args.adv_w, normalize, args, a) else: train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, adv_data, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1, claimed_acc5, best_test)
from run import test test(5)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1, help='the gpu will be used, e.g "0,1,2,3"') parser.add_argument('--max_iter', type=int, default=10, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument( '--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm') parser.add_argument('--hidden_dim', type=int, default=128, help='hidden layer dimension') parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers') dataset = 'assist2009_updated' if dataset == 'oj': parser.add_argument('--batch_size', type=int, default=5, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=68, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/oj', help='data directory') parser.add_argument('--data_name', type=str, default='oj', help='data set name') parser.add_argument('--load', type=str, default='oj', help='model file to load') parser.add_argument('--save', type=str, default='oj', help='path to save model') elif dataset == 'assistments': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=124, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/assistments', help='data directory') parser.add_argument('--data_name', type=str, default='assistments', help='data set name') parser.add_argument('--load', type=str, default='assistments', help='model file to load') parser.add_argument('--save', type=str, default='assistments', help='path to save model') elif dataset == 'assist2009_updated': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') elif dataset == 'STATICS': parser.add_argument('--batch_size', type=int, default=10, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=800, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') params = parser.parse_args() params.lr = params.init_lr print(params) dat = DataLoader(',', params.seqlen, 1, 0) # dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') # train_data_path = params.data_dir + "/" + "builder_train.csv" # valid_data_path = params.data_dir + "/" + "builder_test.csv" train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv" # test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" max_length, min_length, max_q_id = dat.scan_file(train_data_path) train_q_data, train_q_t_data, train_answer_data = dat.prepare_model_data( train_data_path, max_q_id) train_q_data = np.array(train_q_data) print(train_q_data.shape) train_q_t_data = np.array(train_q_t_data) train_answer_data = np.array(train_answer_data) valid_q_data, valid_q_t_data, valid_answer_data = dat.prepare_model_data( valid_data_path, max_q_id) valid_q_data = np.array(valid_q_data) valid_q_t_data = np.array(valid_q_t_data) valid_answer_data = np.array(valid_answer_data) # train_q_data, train_q_t_data, train_answer_data = dat.load_data(train_data_path) # valid_q_data, valid_q_t_data, valid_answer_data = dat.load_data(valid_data_path) # test_q_data, test_q_t_data, test_answer_data = dat.load_data(test_data_path) model = MODEL(n_question=params.n_question, hidden_dim=params.hidden_dim, x_embed_dim=params.qa_embed_dim, hidden_layers=params.n_hidden, gpu=params.gpu) model.init_embeddings() model.init_params() # model = torch.load(params.data_dir + "/save/"+params.save) # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum) optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, idx, params, optimizer, train_q_data, train_q_t_data, train_answer_data) print( 'Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data, valid_answer_data) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy)) # test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data, # test_answer_data) # print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % ( # idx + 1, params.max_iter, test_auc, test_accuracy)) all_train_auc[idx + 1] = train_auc all_train_accuracy[idx + 1] = train_accuracy all_train_loss[idx + 1] = train_loss all_valid_loss[idx + 1] = valid_loss all_valid_accuracy[idx + 1] = valid_accuracy all_valid_auc[idx + 1] = valid_auc # # output the epoch with the best validation auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc
def main(): args = parser.parse_args() if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.gpus: torch.cuda.manual_seed_all(args.seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 if (args.model == "recnn"): print("Training RECNN") model = RECNN() ex_model = RECNN_Mask() else: print("Error: no model matched!") num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss() if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) ex_model = torch.nn.DataParallel(ex_model, args.gpus) model.to(device=device, dtype=dtype) ex_model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'model_best.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) ex_model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.extract_features: test_hdf5_list = [ x for x in glob.glob(os.path.join(args.h5dir, 'test', '*.h5')) ] test_hdf5_list.sort() print(test_hdf5_list) tcnt = 0 for f in test_hdf5_list: h5_file = h5py.File(f, 'r') tcnt = tcnt + 1 if tcnt == 1: testx = torch.from_numpy(np.array(h5_file['data'])) testy = torch.from_numpy(np.array(h5_file['label'])) else: testcx = torch.from_numpy(np.array(h5_file['data'])) testcy = torch.from_numpy(np.array(h5_file['label'])) testx = torch.cat((testx, testcx), 0) testy = torch.cat((testy, testcy), 0) tex_shape = testx.shape testx = testx.view(tex_shape[0], 1, tex_shape[1], tex_shape[2], tex_shape[3]) testxy = torch.utils.data.TensorDataset(testx, testy) val_loader = torch.utils.data.DataLoader(testxy, batch_size=args.batch_size, shuffle=False) (test_features, test_preds, test_target) = extract_features(model, ex_model, val_loader, criterion, device, dtype) test_features_numpy = test_features.cpu().numpy() test_preds_numpy = test_preds.cpu().numpy() test_target_numpy = test_target.cpu().numpy() test_data = { 'test_features': test_features_numpy, 'test_preds': test_preds_numpy, 'test_target': test_target_numpy } test_mat_filename = 'test' + args.setting scipy.io.savemat(test_mat_filename, test_data) train_hdf5_list = [ x for x in glob.glob(os.path.join(args.h5dir, 'train', '*.h5')) ] train_hdf5_list.sort() tcnt = 0 for f in train_hdf5_list: h5_file = h5py.File(f, 'r') tcnt = tcnt + 1 if tcnt == 1: trainx = torch.from_numpy(np.array(h5_file['data'])) trainy = torch.from_numpy(np.array(h5_file['label'])) else: traincx = torch.from_numpy(np.array(h5_file['data'])) traincy = torch.from_numpy(np.array(h5_file['label'])) trainx = torch.cat((trainx, traincx), 0) trainy = torch.cat((trainy, traincy), 0) trx_shape = trainx.shape trainx = trainx.view(trx_shape[0], 1, trx_shape[1], trx_shape[2], trx_shape[3]) trainxy = torch.utils.data.TensorDataset(trainx, trainy) train_loader = torch.utils.data.DataLoader(trainxy, batch_size=args.batch_size, shuffle=False) (train_features, train_preds, train_target) = extract_features(model, ex_model, train_loader, criterion, device, dtype) train_features_numpy = train_features.cpu().numpy() train_preds_numpy = train_preds.cpu().numpy() train_target_numpy = train_target.cpu().numpy() train_data = { 'train_features': train_features_numpy, 'train_preds': train_preds_numpy, 'train_target': train_target_numpy } train_mat_filename = 'train' + args.setting scipy.io.savemat(train_mat_filename, train_data) return if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None best_test = 10000000 train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test)
def main(): args = parser.parse_args() if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.gpus: torch.cuda.manual_seed_all(args.seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 model = MobileNet2(input_size=args.input_size, scale=args.scaling) num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format( flops_benchmark.count_flops(MobileNet2, args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size, device, dtype, args.input_size, 3, args.scaling))) train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size, args.workers) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if args.input_size in claimed_acc_top1: if args.scaling in claimed_acc_top1[args.input_size]: claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling] claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling] csv_logger.write_text( 'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.)) train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test)
def train_one_dataset(params, file_name, train_q_data, train_qa_data, train_pid, valid_q_data,\ valid_qa_data, valid_pid, test_q_data, test_qa_data, test_pid): # ================================== model initialization ================================== model = load_model(params) optimizer = torch.optim.Adam(model.parameters(), lr=params.lr, betas=(0.9, 0.999), eps=1e-8) print("\n") # total_params = sum(p.numel() for p in model.parameters()) # print(f'{total_params:,} total parameters.') # total_trainable_params = sum( # p.numel() for p in model.parameters() if p.requires_grad) # print(f'{total_trainable_params:,} training parameters.') # ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} all_test_loss = {} all_test_accuracy = {} all_test_auc = {} best_valid_auc = 0 cur_train_auc = 0 cur_test_auc = 0 for idx in range(params.max_iter): # Train Model train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_qa_data, train_pid, label='Train') # Validation step valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data, valid_pid, label='Valid') # Test step test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_qa_data, test_pid, label='Test') print('epoch', idx + 1) print("\ttrain_auc\t", train_auc, "valid_auc\t", valid_auc, "\ttest_auc\t", test_auc) print("\ttrain_accuracy\t", train_accuracy, "valid_accuracy\t", valid_accuracy,\ "\ttest_accuracy\t", test_accuracy) print("\ttrain_loss\t", train_loss, "valid_loss\t", valid_loss, "test_loss\t", test_loss) try_makedirs('model') try_makedirs(os.path.join('model', params.model)) try_makedirs(os.path.join('model', params.model, params.save)) # all_valid_auc[idx + 1] = valid_auc # all_train_auc[idx + 1] = train_auc # all_test_auc[idx + 1] = test_auc # all_valid_loss[idx + 1] = valid_loss # all_train_loss[idx + 1] = train_loss # all_test_loss[idx + 1] = test_loss # all_valid_accuracy[idx + 1] = valid_accuracy # all_train_accuracy[idx + 1] = train_accuracy # all_test_accuracy[idx + 1] = test_accuracy # output the epoch with the best validation auc if valid_auc > best_valid_auc: path = os.path.join('model', params.model, params.save, file_name) + '_*' for i in glob.glob(path): os.remove(i) print(best_valid_auc, ' to ', valid_auc) best_valid_auc = valid_auc cur_train_auc = train_auc cur_test_auc = test_auc best_epoch = idx + 1 torch.save( { 'epoch': idx, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': train_loss, }, os.path.join('model', params.model, params.save, file_name) + '_' + str(idx + 1)) if idx - best_epoch > 40: break print("cur_train_auc\t", cur_train_auc, "best_valid_auc\t", best_valid_auc, "\n", "cur_test_auc\t",\ cur_test_auc) try_makedirs('result') try_makedirs(os.path.join('result', params.model)) try_makedirs(os.path.join('result', params.model, params.save)) f_save_log = open( os.path.join('result', params.model, params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("test_auc:\n" + str(all_test_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("test_loss:\n" + str(all_test_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.write("test_accuracy:\n" + str(all_test_accuracy) + "\n\n") f_save_log.close() return best_epoch
find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr, max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 if evaluate == 'true': loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if input_size in claimed_acc_top1: if scaling in claimed_acc_top1[input_size]: claimed_acc1 = claimed_acc_top1[input_size][scaling] claimed_acc5 = claimed_acc_top5[input_size][scaling] csv_logger.write_text( 'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.)) train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--max_iter', type=int, default=30, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument('--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm') parser.add_argument('--hidden_dim', type=int, default=64, help='hidden layer dimension') parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers') parser.add_argument('--dataset', type=str, default='assist2009_updated') parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--dropout_rate', type=float, default=0.6) if parser.parse_args().dataset == 'assist2009_updated': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') elif parser.parse_args().dataset == 'assist2015': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=100, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2015', help='data directory') parser.add_argument('--data_name', type=str, default='assist2015', help='data set name') parser.add_argument('--load', type=str, default='assist2015', help='model file to load') parser.add_argument('--save', type=str, default='assist2015', help='path to save model') elif parser.parse_args().dataset == 'STATICS': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') elif parser.parse_args().dataset == 'synthetic': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=50, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/synthetic', help='data directory') parser.add_argument('--data_name', type=str, default='synthetic', help='data set name') parser.add_argument('--load', type=str, default='synthetic', help='model file to load') parser.add_argument('--save', type=str, default='synthetic', help='path to save model') elif parser.parse_args().dataset == 'assist2017': # parser.add_argument('--batch_size', type=int, default=32, help='the batch size') # parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument('--n_question', type=int, default=102, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2017/train_valid_test', help='data directory') parser.add_argument('--data_name', type=str, default='assist2017', help='data set name') parser.add_argument('--load', type=str, default='assist2017', help='model file to load') parser.add_argument('--save', type=str, default='assist2017', help='path to save model') params = parser.parse_args() params.lr = params.init_lr print(params) dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') if params.dataset != 'synthetic': train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv" test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" else: train_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_train1.csv" valid_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_valid1.csv" test_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_test.csv" train_q_data, train_q_t_data, train_answer_data, train_repeated_time_gap, train_past_trail_counts,\ train_seq_time_gap = dat.load_data(train_data_path) valid_q_data, valid_q_t_data, valid_answer_data, valid_repeated_time_gap, valid_past_trail_counts,\ valid_seq_time_gap = dat.load_data(valid_data_path) test_q_data, test_q_t_data, test_answer_data, test_repeated_time_gap, test_past_trail_counts,\ test_seq_time_gap = dat.load_data(test_data_path) model = MODEL(batch_size=params.batch_size, seqlen=params.seqlen, n_question=params.n_question, hidden_dim=params.hidden_dim, x_embed_dim=params.qa_embed_dim, hidden_layers=params.n_hidden, dropout_rate=params.dropout_rate, gpu=params.gpu) model.init_embeddings() model.init_params() optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() # all_train_loss = {} # all_train_accuracy = {} # all_train_auc = {} # all_valid_loss = {} # all_valid_accuracy = {} # all_valid_auc = {} # all_test_loss = {} # all_test_accuracy = {} # all_test_auc = {} best_valid_auc = 0 cur_test_auc = 0 cur_train_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_q_t_data, train_answer_data, train_repeated_time_gap,\ train_past_trail_counts, train_seq_time_gap) print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % ( idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data, valid_answer_data, valid_repeated_time_gap,\ valid_past_trail_counts, valid_seq_time_gap) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % ( idx + 1, params.max_iter, valid_auc, valid_accuracy)) test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data, test_answer_data, test_repeated_time_gap, test_past_trail_counts, test_seq_time_gap) print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % ( idx + 1, params.max_iter, test_auc, test_accuracy)) # all_train_auc[idx + 1] = train_auc # all_train_accuracy[idx + 1] = train_accuracy # all_train_loss[idx + 1] = train_loss # all_valid_loss[idx + 1] = valid_loss # all_valid_accuracy[idx + 1] = valid_accuracy # all_valid_auc[idx + 1] = valid_auc # all_test_loss[idx + 1] = test_loss # all_test_accuracy[idx + 1] = test_accuracy # all_test_auc[idx + 1] = test_auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc cur_test_auc = test_auc cur_train_auc = train_auc print('DATASET: {}, TRAIN AUC: {}, BEST VALID AUC: {}, TEST AUC: {}'.format(params.data_name, cur_train_auc, \ best_valid_auc, cur_test_auc))
([0.0008591]), ([0.00097999]), ([0.00183782]), ([0.00078619]), ([0.00076167]), ([0.00071047]), ([0.00117489]), ([0.00098078]), ([0.00138367]),([0.00075473])]) for n in range(r.number_of_user): f_opt = min((r.X_n[n] + r.Y_n[n]) / r.D_n[n], r.f_n[n]) e_n = r.g * r.k * (r.X_n[n] + r.Y_n[n]) * math.pow(f_opt, 2) r.loc_only_e[n] = e_n """ for n in range(r.number_of_user): if r.full_offload[n] == 1: r.X_n[n] = r.X_n[n] + r.Y_n[n] r.Y_n[n] = 0.0 r.B[n] = r.A[n] """ # full 0.012 0.006 g = int((27 + run * 5)/r.number_of_user) r.set_initial_sub_channel(27 + run * 5, 3, chs=None) r.set_multipliers(step=0.0002, p_adjust=1.5, v_n=1, var_k=math.pow(10, -10), delta_l_n=0.5 * math.pow(10, -16), delta_var_k=math.pow(10, -18), delta_d_n_k=math.pow(10, -15)) r.set_initial_values() r.run(run, t=1, t_max=8000, t_delay=2500, t_stable=3000) ec_o[run], ec_l[run], ec_i[run] = test(r) run = run + 1 print(ec_o) print(ec_l) print("ec_i=", list(ec_i)) print(r.full_offload) # [6, 6, 10, 6, 13, 6, 8, 7, 10, 10]
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data): ### ================================== model initialization ================================== g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim = params.final_fc_dim) # create a module by given a Symbol net = mx.mod.Module(symbol=g_model.sym_gen(), data_names = ['q_data', 'qa_data'], label_names = ['target'], context=params.ctx) # create memory by given input shapes net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) # initial parameters with the default random initializer net.init_params(initializer=mx.init.Normal(sigma=params.init_std)) # decay learning rate in the lr_scheduler lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20*(train_q_data.shape[0]/params.batch_size), factor=0.667, stop_factor_lr=1e-5) net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum':params.momentum,'lr_scheduler': lr_scheduler}) for parameters in net.get_params()[0]: print parameters, net.get_params()[0][parameters].asnumpy().shape print "\n" ### ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in xrange(params.max_iter): train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train') valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid') print 'epoch', idx + 1 print "valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc print "valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy print "valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=idx+1) # output the epoch with the best validation auc if valid_auc > best_valid_auc: best_valid_auc = valid_auc best_epoch = idx+1 f_save_log = open(os.path.join('result', params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.close() return best_epoch
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0, help='the gpu will be used, e.g "0,1,2,3"') parser.add_argument('--max_iter', type=int, default=50, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument('--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=50.0, help='maximum gradient norm') # parser.add_argument('--final_fc_dim', type=float, default=200, help='hidden state dim for final fc layer') parser.add_argument('--first_k', type=int, default=8, help='first k question without loss calculation') parser.add_argument('--dataset', type=str, default='assist2009_updated') parser.add_argument('--train_set', type=int, default=1) parser.add_argument('--memory_size', type=int, default=20, help='memory size') parser.add_argument('--q_embed_dim', type=int, default=50, help='question embedding dimensions') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') if parser.parse_args().dataset == 'assist2009_updated': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 200 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=110, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'assist2015': # memory_size: 50, q_embed_dim: 50, qa_embed_dim: 200 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=100, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2015', help='data directory') parser.add_argument('--data_name', type=str, default='assist2015', help='data set name') parser.add_argument('--load', type=str, default='assist2015', help='model file to load') parser.add_argument('--save', type=str, default='assist2015', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=100, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'assist2017': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--n_question', type=int, default=102, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/assist2017/train_valid_test/', help='data directory') parser.add_argument('--data_name', type=str, default='assist2017', help='data set name') parser.add_argument('--load', type=str, default='assist2017', help='model file to load') parser.add_argument('--save', type=str, default='assist2017', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=102, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'STATICS': # memory_size: 50, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=1223, help='hidden state dim for final fc layer') elif parser.parse_args().dataset == 'synthetic': # memory_size: 20, q_embed_dim: 50, qa_embed_dim: 100 parser.add_argument('--batch_size', type=int, default=128, help='the batch size') parser.add_argument('--n_question', type=int, default=50, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../dataset/synthetic/', help='data directory') parser.add_argument('--data_name', type=str, default='synthetic', help='data set name') parser.add_argument('--load', type=str, default='synthetic', help='model file to load') parser.add_argument('--save', type=str, default='synthetic', help='path to save model') parser.add_argument('--final_fc_dim', type=float, default=50, help='hidden state dim for final fc layer') params = parser.parse_args() params.lr = params.init_lr params.memory_key_state_dim = params.q_embed_dim params.memory_value_state_dim = params.qa_embed_dim print(params) dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') if params.dataset != 'synthetic': train_data_path = params.data_dir + "/" + params.data_name + "_train" + str(params.train_set) + ".csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid" + str(params.train_set) + ".csv" test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" else: train_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_train" + str(params.train_set) + ".csv" valid_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_valid" + str(params.train_set) + ".csv" test_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_test.csv" train_q_data, train_qa_data, train_a_data = dat.load_data(train_data_path) valid_q_data, valid_qa_data, valid_a_data = dat.load_data(valid_data_path) test_q_data, test_qa_data, test_a_data = dat.load_data(test_data_path) params.memory_key_state_dim = params.q_embed_dim params.memory_value_state_dim = params.qa_embed_dim model = MODEL(n_question=params.n_question, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim=params.final_fc_dim, first_k=params.first_k, gpu=params.gpu) model.init_embeddings() model.init_params() optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() best_valid_auc = 0 correspond_train_auc = 0 correspond_test_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, params, optimizer, train_q_data, train_qa_data, train_a_data) print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data, valid_a_data) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy)) test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_qa_data, test_a_data) print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % (idx + 1, params.max_iter, test_auc, test_accuracy)) # output the epoch with the best validation auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc correspond_train_auc = train_auc correspond_test_auc = test_auc print("DATASET: {}, MEMO_SIZE: {}, Q_EMBED_SIZE: {}, QA_EMBED_SIZE: {}, LR: {}".format(params.data_name, params.memory_size, params.q_embed_dim, params.qa_embed_dim, params.init_lr)) print("BEST VALID AUC: {}, CORRESPOND TRAIN AUC: {}, CORRESPOND TEST AUC: {}".format(best_valid_auc, correspond_train_auc, correspond_test_auc))
def adv_train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test, adv_method, eps, adv_w, normalize, args, subts_args=None): att_object = adv_method(model, criterion) for epoch in trange(start_epoch, epochs + 1): train_loss, train_accuracy1, train_accuracy5, = adv_train( model, train_loader, epoch, optimizer, criterion, device, dtype, batch_size, log_interval, att_object, eps, adv_w, normalize, 0.05) test_loss, test_accuracy1, test_accuracy5 = test( model, val_loader, criterion, device, dtype) csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - test_accuracy1, 'val_error5': 1 - test_accuracy5, 'val_loss': test_loss, 'train_error1': 1 - train_accuracy1, 'train_error5': 1 - train_accuracy5, 'train_loss': train_loss }) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_test, 'optimizer': optimizer.state_dict() }, test_accuracy1 > best_test, filepath=save_path) csv_logger.plot_progress(claimed_acc1=claimed_acc1, claimed_acc5=claimed_acc5) if test_accuracy1 > best_test: best_test = test_accuracy1 for layer in model.modules(): from layers import NoisedConv2D, NoisedLinear, NoisedConv2DColored if isinstance(layer, NoisedConv2D) or isinstance( layer, NoisedLinear): tqdm.write("Mean of alphas is {}".format( torch.mean(layer.alpha))) if isinstance(layer, NoisedConv2DColored): try: tqdm.write("Mean of alphas_diag_w is {}+-{} ({}) ".format( torch.mean(torch.abs(layer.alphad_w)), torch.std(torch.abs(layer.alphad_w)), torch.max(torch.abs(layer.alphad_w)))) tqdm.write( "Mean of alphas_factor_w is {}+-{} ({}) ".format( torch.mean(torch.abs(layer.alphaf_w)), torch.std(layer.alphaf_w), torch.max(torch.abs(layer.alphaf_w)))) except: pass try: tqdm.write("Mean of alphas_diag_a is {}+-{} ({}) ".format( torch.mean(torch.abs(layer.alphad_i)), torch.std(torch.abs(layer.alphad_i)), torch.max(torch.abs(layer.alphad_i)))) tqdm.write( "Mean of alphas_factor_a is {}+-{} ({}) ".format( torch.mean(torch.abs(layer.alphaf_i)), torch.std(layer.alphaf_i), torch.max(torch.abs(layer.alphaf_i)))) except: pass scheduler.step() csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test * 100.))
def main(): args = parser.parse_args() if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 arch_setting = [(24, 1, 1), (24, 1, 1), (24, 1, 1), (24, 1, 1), (24, 1, 1), (24, 1, 1), (32, 1, 2), (32, 1, 1), (32, 1, 1), (32, 1, 1), (32, 1, 1), (32, 1, 1), (64, 1, 2), (64, 1, 1), (64, 1, 1), (64, 1, 1), (64, 1, 1), (64, 1, 1), (96, 1, 1), (96, 1, 1), (96, 1, 1), (96, 1, 1), (96, 1, 1), (96, 1, 1), (160, 1, 2), (160, 1, 1), (160, 1, 1), (160, 1, 1), (160, 1, 1), (160, 1, 1), (320, 1, 1), (320, 1, 1), (320, 1, 1), (320, 1, 1), (320, 1, 1), (320, 1, 1)] print("Evaluate RCNet on CIFAR100") bin_setting = [ 5, 3, 0, 1, 0, 0, 5, 2, 0, 3, 3, 5, 5, 5, 3, 3, 5, 6, 5, 1, 6, 5, 4, 6, 5, 4, 6, 5, 4, 1, 5, 0, 0, 0, 0, 0 ] cfg4net = create_arch4net(bin_setting, arch_setting) model = RCNet(cfg4net) file_handler = RCNet num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format( flops_benchmark.count_flops( file_handler, args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size, device, dtype, args.input_size, 3, args.scaling, cfg4net))) testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # define loss function (criterion) criterion = torch.nn.CrossEntropyLoss() if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) if os.path.isfile(args.load): print("=> loading checkpoint '{}'".format(args.load)) checkpoint = torch.load(args.load, map_location=device) model.load_state_dict(checkpoint['state_dict']) loss, top1, top5 = test(model, test_loader, criterion, device, dtype) print("=> Test loss: {}, Top 1 accu: {}, Top 5 accu: {}").format( loss, top1, top5) return
import run import run_switch #import run_cython import run_list import time start = time.time() run.test() end = time.time() py_time = end - start print("run time = {}".format(py_time)) start = time.time() run_list.test() end = time.time() py_time = end - start print("run_list time = {}".format(py_time)) start = time.time() run_switch.test() end = time.time() py_time = end - start print("run_switch time = {}".format(py_time)) # start = time.time() # run_cython.test() # end = time.time()
parser.add_argument('--data_split', type=str, default='train') parser.add_argument('--fullwiki', action='store_true') parser.add_argument('--prediction_file', type=str) parser.add_argument('--sp_threshold', type=float, default=0.3) config = parser.parse_args() def _concat(filename): if config.fullwiki: return 'fullwiki.{}'.format(filename) return filename # config.train_record_file = _concat(config.train_record_file) config.dev_record_file = _concat(config.dev_record_file) config.test_record_file = _concat(config.test_record_file) # config.train_eval_file = _concat(config.train_eval_file) config.dev_eval_file = _concat(config.dev_eval_file) config.test_eval_file = _concat(config.test_eval_file) if config.mode == 'train': train(config) elif config.mode == 'prepro': prepro(config) elif config.mode == 'test': test(config) elif config.mode == 'count': cnt_len(config)
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data): ### ================================== model initialization ================================== g_model = MODEL(n_question=params.n_question, seqlen=params.seqlen, batch_size=params.batch_size, q_embed_dim=params.q_embed_dim, qa_embed_dim=params.qa_embed_dim, memory_size=params.memory_size, memory_key_state_dim=params.memory_key_state_dim, memory_value_state_dim=params.memory_value_state_dim, final_fc_dim = params.final_fc_dim) # 创建模型 # create a module by given a Symbol net = mx.mod.Module(symbol=g_model.sym_gen(), data_names = ['q_data', 'qa_data'], label_names = ['target'], context=params.ctx) ''' symbol:网络符号 context:执行设备(设备列表) data_names:数据变量名称列表 label_names:标签变量名称列表 ''' # 中间层接口 # create memory by given input shapes 通过内存分配为计算搭建环境 net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'), mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')], label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')]) # initial parameters with the default random initializer 初始化参数 net.init_params(initializer=mx.init.Normal(sigma=params.init_std)) # decay learning rate in the lr_scheduler lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20*(train_q_data.shape[0]/params.batch_size), factor=0.667, stop_factor_lr=1e-5) # 初始化优化器 net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum':params.momentum,'lr_scheduler': lr_scheduler}) for parameters in net.get_params()[0]: print(parameters, net.get_params()[0][parameters].asnumpy().shape) print("\n") ### ================================== start training ================================== all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train') valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid') print('epoch', idx + 1) print("valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc) print("valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy) print("valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss) if not os.path.isdir('model'): os.makedirs('model') if not os.path.isdir(os.path.join('model', params.save)): os.makedirs(os.path.join('model', params.save)) all_valid_auc[idx + 1] = valid_auc all_train_auc[idx + 1] = train_auc all_valid_loss[idx + 1] = valid_loss all_train_loss[idx + 1] = train_loss all_valid_accuracy[idx + 1] = valid_accuracy all_train_accuracy[idx + 1] = train_accuracy # output the epoch with the best validation auc if valid_auc > best_valid_auc : best_valid_auc = valid_auc best_epoch = idx+1 # here the epoch is default, set to be 100 # we only save the model in the epoch with the better results net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=100) if not os.path.isdir('result'): os.makedirs('result') if not os.path.isdir(os.path.join('result', params.save)): os.makedirs(os.path.join('result', params.save)) f_save_log = open(os.path.join('result', params.save, file_name), 'w') f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n") f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n") f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n") f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n") f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n") f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n") f_save_log.close() return best_epoch