def test_2layer_net(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) scores = net_2.forward(X) correct_scores = np.asarray([[-1.07260209, 0.05083871, -0.87253915], [-2.02778743, -0.10832494, -1.52641362], [-0.74225908, 0.15259725, -0.39578548], [-0.38172726, 0.10835902, -0.17328274], [-0.64417314, -0.18886813, -0.41106892]]) diff = np.sum(np.abs(scores - correct_scores)) assert (np.isclose(diff, 0.0, atol=1e-6)) loss = net_2.loss(X, Y_enc) correct_loss = 1.071696123862817 assert (np.isclose(loss, correct_loss, atol=1e-8))
def test_sample(filename): with open(filename, 'r') as o: lr = float(o.readline().strip()) p, q, r = list(map(int, o.readline().split())) W = np.zeros([p, q]) b = np.zeros([r]) grads_flat = np.zeros([p*q + r]) W_ans = np.zeros([p, q]) b_ans = np.zeros([r]) for i in range(p): line = list(map(float, o.readline().split())) W[i, :] = line line = list(map(float, o.readline().split())) b[:] = line line = list(map(float, o.readline().split())) grads_flat[:] = line for i in range(p): line = list(map(float, o.readline().split())) W_ans[i, :] = line line = list(map(float, o.readline().split())) b_ans[:] = line model = TestModel(W, b) optimizer = optim.SGD(model, lr) optimizer.update(grads_flat) self.assertTrue(np.isclose(model.params()[0], W_ans).all(), filename) self.assertTrue(np.isclose(model.params()[1], b_ans).all(), filename)
def main(args): train_cfg = config_from_json(args.train_cfg) model_cfg = config_from_json(args.model_cfg) model_cfg.block_size = model_cfg.max_len // model_cfg.n_blocks set_seeds(train_cfg.seed) if model_cfg.projection not in ["dense", "cnn"]: if args.max_len == 0: model_cfg.reduced_max_len = model_cfg.max_len else: model_cfg.reduced_max_len = args.max_len if args.reduce_block_size: assert model_cfg.reduced_max_len % model_cfg.n_blocks == 0, "Reduced len cannot be divided by n_blocks" model_cfg.block_size = model_cfg.reduced_max_len // model_cfg.n_blocks else: assert model_cfg.reduced_max_len % model_cfg.block_size == 0, "Reduced len cannot be divided by initial block_size" model_cfg.n_blocks = model_cfg.reduced_max_len // model_cfg.block_size print("max_len:", model_cfg.reduced_max_len, "block_size:", model_cfg.block_size, "n_blocks:", model_cfg.n_blocks) else: if args.max_len != 0: warnings.warn("Projection is incompatible with a reduced max len, using default max_len") print("Loading dataset") (data, labels), criterion = get_data_and_optimizer_from_dataset(args.data_file, train_cfg.task) loader = GlueDataset(data, labels, train_cfg, model_cfg) model = BertInnerForSequenceClassification(model_cfg, loader.get_n_labels(), criterion) if train_cfg.optimizer == "lamb": if train_cfg.opt_level != "" and train_cfg.opt_level is not None: optimizer = apex.optimizers.FusedLAMB(model.parameters(), **train_cfg.optimizer_parameters) else: optimizer = torch_optimizer.Lamb(model.parameters(), **train_cfg.optimizer_parameters) elif train_cfg.optimizer == "radam": optimizer = torch_optimizer.RAdam(model.parameters(), **train_cfg.optimizer_parameters) elif train_cfg.optimizer == "sgd": optimizer = optim.SGD(model.parameters(), **train_cfg.optimizer_parameters) else: optimizer = optim4GPU(train_cfg, model) trainer = GlueTrainer(loader, model, optimizer, args.save_dir, get_device(), train_cfg.parallel) if args.load_model != "": print("Loading checkpoint") trainer.load_model(args.load_model, args.load_dataset_state) if not args.eval: trainer.train(train_cfg) else: trainer.eval(train_cfg)
def train_framework(train_set, val_set, test_set, epochs=1000, mini_batch_size=50, lr=0.001): # Turn autograd off torch.set_grad_enabled(False) # Net definition model = nn.Sequential(nn.Linear(2, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 2, activation="relu")) # Training params opt = optim.SGD(lr, model) criterion = losses.LossMSE() # Train start_time = time.perf_counter() history = train_model(model, train_set[0], train_set[1], val_set[0], val_set[1], criterion, opt, epochs, mini_batch_size, pytorch=False, verbose=True) end_time = time.perf_counter() # Compute final accuracies train_acc = compute_accuracy(model, train_set[0], train_set[1], pytorch=False) test_acc = compute_accuracy(model, test_set[0], test_set[1], pytorch=False) print("\tTraining time : %s s" % (end_time - start_time)) print("\tAccuracy : train_acc = %s \t test_acc = %s" % (train_acc, test_acc)) return history, end_time - start_time, (train_acc, test_acc)
def test_2layer_grad(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) loss = net_2.loss(X, Y_enc) net_2.backward() def f_change_param(param_name, U): if param_name == 3: net_2.layers[0].params['b'] = U if param_name == 2: net_2.layers[0].params['W'] = U if param_name == 1: net_2.layers[2].params['b'] = U if param_name == 0: net_2.layers[2].params['W'] = U return net_2.loss(X, Y_enc) rel_errs = np.empty(4) for param_name in range(4): f = lambda U: f_change_param(param_name, U) if param_name == 3: pass_pars = net_2.layers[0].params['b'] if param_name == 2: pass_pars = net_2.layers[0].params['W'] if param_name == 1: pass_pars = net_2.layers[2].params['b'] if param_name == 0: pass_pars = net_2.layers[2].params['W'] param_grad_num = dutil.grad_check(f, pass_pars, epsilon=1e-5) rel_errs[param_name] = ut.rel_error(param_grad_num, net_2.grads[param_name]) assert (np.allclose(rel_errs, np.zeros(4), atol=1e-7))
X_train, Y_train = generate_linear(n=100) X_test, Y_test = generate_linear(n=100) elif args.dataset == 'xor': X_train, Y_train = generate_XOR_easy() X_test, Y_test = generate_XOR_easy() else: raise RuntimeError('Dataset Not Found') net = Net() if args.criterion == 'mse': criterion = nn.MSE() elif args.criterion == 'crossentropy': criterion = nn.CrossEntropy() else: raise RuntimeError('Criterion Not Found') if args.optimizer == 'sgd': optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) elif args.optimizer == 'adagrad': optimizer = optim.Adagrad(net.parameters(), lr=args.lr) else: raise RuntimeError('Optimizer Not Found') model = Model(net, criterion, optimizer) train_history = model.train(X_train, Y_train, epochs=args.epochs) test_history = model.test(X_test, Y_test) show_history(train_history) show_result(X_test, Y_test, test_history['predict'])
def run(): with open(args.cfg_path) as f: cfg = json.load(f) os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids num_GPU = len(args.device_ids.split(',')) batch_size_train = cfg['train_batch_size'] * num_GPU batch_size_valid = cfg['test_batch_size'] * num_GPU num_workers = args.num_workers * num_GPU data_path = cfg['data_path_40'] if cfg['image_size'] % cfg['patch_size'] != 0: raise Exception('Image size / patch size != 0 : {} / {}'.format( cfg['image_size'], cfg['patch_size'])) patch_per_side = cfg['image_size'] // cfg['patch_size'] grid_size = patch_per_side * patch_per_side model = MODELS[cfg['model']](num_classes=1, num_nodes=grid_size, use_crf=cfg['use_crf']) if args.resume: model = load_checkpoint(args, model) model = DataParallel(model, device_ids=None) model = model.to(device) loss_fn = nn.BCEWithLogitsLoss().to(device) optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=1e-4, l2_reg=False) summary_train = { 'epoch': 0, 'step': 0, 'fp': 0, 'tp': 0, 'Neg': 0, 'Pos': 0 } summary_valid = {'loss': float('inf'), 'step': 0, 'acc': 0} summary_writer = SummaryWriter(log_path) loss_valid_best = float('inf') tumor_all = [] paracancerous_all = [] for epoch in range(args.start_epoch, args.end_epoch): dataset_train = GridImageDataset(data_path, cfg['json_path_train'], cfg['image_size'], cfg['patch_size'], cfg['crop_size'], rand_list=[]) dataloader_train = DataLoader(dataset_train, batch_size=batch_size_train, num_workers=num_workers, drop_last=True, shuffle=True) dataset_valid = GridImageDataset(data_path, cfg['json_path_valid'], cfg['image_size'], cfg['patch_size'], cfg['crop_size'], way="valid") dataloader_valid = DataLoader(dataset_valid, batch_size=batch_size_valid, num_workers=num_workers, drop_last=True, shuffle=True) summary_train = train_epoch(epoch, summary_train, cfg, model, loss_fn, optimizer, dataloader_train) torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'state_dict': model.module.state_dict() }, (ckpt_path_save + '/' + str(epoch) + '.ckpt')) summary_writer.add_scalar('train/loss', summary_train['loss'], epoch) summary_writer.add_scalar('train/acc', summary_train['acc'], epoch) summary_writer.add_scalar('learning_rate', summary_train['lr'], epoch) summary_writer.add_scalar('train/Precision', summary_train['Precision'], epoch) summary_writer.add_scalar('train/Recall', summary_train['Recall'], epoch) summary_writer.add_scalar('train/F1', summary_train['F1'], epoch) if epoch % 2 == 0: summary_valid = valid_epoch(summary_valid, summary_writer, epoch, model, loss_fn, dataloader_valid) summary_writer.add_scalar('valid/loss', summary_valid['loss'], epoch) summary_writer.add_scalar('valid/acc', summary_valid['acc'], epoch) summary_writer.add_scalar('valid/Precision', summary_valid['Precision'], epoch) summary_writer.add_scalar('valid/Recall', summary_valid['Recall'], epoch) summary_writer.add_scalar('valid/F1', summary_valid['F1'], epoch) # summary_writer.add_scalar('learning_rate', lr, epoch) if summary_valid['loss'] < loss_valid_best: loss_valid_best = summary_valid['loss'] torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'state_dict': model.module.state_dict() }, os.path.join(ckpt_path_save, 'best.ckpt')) summary_writer.close()
print("Generating dataset...") nPoints = 1000 train, train_label = generate_disk_dataset(nPoints) test, test_label = generate_disk_dataset(nPoints) # Select model print("Building the model...") model = m.Sequential(m.Linear(2, 25), m.ReLU(), m.Linear(25, 25), m.ReLU(), m.Linear(25, 25), m.ReLU(), m.Linear(25, 2)) #model = m.Sequential(m.Linear(2,25), m.ReLU(), m.Linear(25,2, bias = False)) #model = m.Sequential(m.Linear(2,128), m.ReLU(), m.Linear(128,2)) #model = m.Sequential(m.Linear(2,2)) # #Select optimizer # optim = o.GradientDescent(0.04) optim = o.SGD(10, 0.01) # optim = o.SGDWithRepetition(153,0.05) # #Select loss loss = l.MSE() # loss = l.CrossEntropyLoss() # #Train the model and plot the train loss evolution print("Training the model...") v = optim.train_model(model, train, train_label, loss, n_epochs=100) file = open('train_loss.out', 'w') json.dump(v, file) file.close() fig = plt.figure() plt.plot(