Exemple #1
0
def train_bvae(BVAE,optim,train_data_generator,test_data_generator,check_point_dir,epoch_num,writer,output_file_path):
        for epoch in range(epoch_num):
            #running_loss=0.0
            for i,batch in enumerate(train_data_generator,0):
                batch=batch.type(torch.cuda.FloatTensor).cuda()
                reconstruction,mean,std=BVAE(batch)
                loss=BVAE.compute_loss(batch,reconstruction,mean,std)
                writer.add_scalar("Loss/train", loss, epoch)
                #running_loss+=loss.item()
                optim.zero_grad()
                loss.backward()
                optim.step()
                """if(i%20==19):
                    with open(output_file_path,"a") as ofile:
                        ofile.write('[%d, %5d] loss: %.3f \n' %
                          (epoch + 1, i + 1, running_loss /20))
                        ofile.close()
                    running_loss = 0.0"""
            if(epoch%100==99):
                #print("saving!")
                state = {
                    'checkpoint_num': epoch,
                    'state_dict': BVAE.state_dict(),
                    'optimizer': optim.state_dict(),                
                }
                path=str(epoch+1)+".pt"
                saves=os.path.join(check_point_dir,path)
                torch.save(state,saves)
                for i,batch in enumerate(test_data_generator,0):
                    batch=batch.type(torch.cuda.FloatTensor).cuda()
                    reconstruction,mean,std=BVAE(batch)
                    loss=BVAE.compute_loss(batch,reconstruction,mean,std)
                    writer.add_scalar("Loss/test", loss, epoch)
        writer.flush()
Exemple #2
0
 def update_model_state(self, optim):
     """
     :return: dictionary of model parameters to be saved
     """
     return_dict = self.states
     return_dict.update({'state_dict': self.model.state_dict(), 'optimizer': optim.state_dict()})
     return return_dict
Exemple #3
0
    def save_model(self, model, model2, optim, optim2, epoch, batch_size):
        self.epoch = epoch
        self.batch_size = batch_size
        try:
            os.stat(self.args['save_path'])
        except:
            os.mkdir(self.args['save_path'])
        save_dir = os.path.join(self.args['save_path'], 'models/')
        # Create directory if necessary
        try:
            os.stat(save_dir)
        except:
            os.mkdir(save_dir)

        filename = 'gnn.pt'
        path = os.path.join(save_dir, filename)

        data = {
            'model': model.state_dict(),
            'model2': model2.state_dict(),
            'optimizer': optim.state_dict(),
            'optimizer2': optim2.state_dict(),
            'epoch': epoch + 1,
            'loss': self.loss_train,
            'overlap': self.overlap_train
        }
        torch.save(data, path)
        # if batch_size > 1:
        #     save_freq = 10000
        # elif batch_size == 1:
        #     save_freq = 40000
        # if (epoch>0 and epoch%save_freq==0):
        torch.save(data, save_dir + 'gnn_epoch_{}.pt'.format(epoch))
        print('Model Saved.')
def save_model(model,
               filename='trained_model',
               optimizers=None,
               savepoint=None,
               use_datetime=False,
               **kwargs):

    if optimizers is not None:
        if not isinstance(optimizers, list):
            optimizers = [optimizers]
    else:
        optimizers = []

    for k in kwargs:
        filename += '_' + k + '_%f' % kwargs[k]
    if '.' not in filename:
        filename += '.pth'

    path = savepoint

    create_folder(path)
    path = join(path, filename)

    save_dict = dict(model.state_dict())
    for i, optim in enumerate(optimizers):
        save_dict['optim_%i' % i] = optim.state_dict()

    torch.save(save_dict, path)
Exemple #5
0
def save(name):
    torch.save(
        {
            'opt': optim.state_dict(),
            'opt_f': feature_optim.state_dict(),
            'net': combined_model.state_dict()
        }, name)
Exemple #6
0
def save_model_best_acc(acc, best_acc, net, optim, epoch, save_path, filename):
    """Save a model and its optimizer if its accuracy is better than the saved one

    Args:
        acc (int): performance of the model to save
        best_acc (int): saved model best performance
        net (nn.Module): model to save
        optim (torch.optim): optimizer of the model to save
        epoch (int): number of epoch the model were trained
        save_path (str): path on disk where to save the model to
        filename (str): filename on disk

    Returns:
        best_acc (int): the saved model best performance
    """
    if acc > best_acc:
        print('Saving ...')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
            'optim': optim.state_dict()
        }
        torch.save(state, os.path.join(save_path, filename))
        best_acc = acc
    return best_acc
def save_model(model, optim, epoch, path):
    torch.save(
        {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'opimizer': optim.state_dict()
        }, path)
def save_model(net, optim, epoch, path):
    state_dict = net.state_dict()
    torch.save({
        'epoch': epoch + 1,
        'state_dict': state_dict,
        'optimizer': optim.state_dict(),
        }, path)
Exemple #9
0
def train():
    acc = 0
    for epoche in range(epoches):
        print("epoche:", epoche, " start")
        image, label = None, None
        for (image, label) in data_loader_train:
            # image = torchvision.transforms.functional.resize(image)
            res = lenet(image.cuda())
            loss = criterion(res, label.cuda())
            print("epoche :", epoche, " current acc=", acc, " custom loss=",
                  loss.item())
            loss_list.append(loss.item())
            optim.zero_grad()
            loss.backward()
            optim.step()
        acc = batch_test(image, label)
        if epoche % 5 == 0:
            torch.save(
                {
                    'epoch': epoche + 1,
                    'state_dict': lenet.state_dict(),
                    'optimizer': optim.state_dict()
                }, PATH + str(epoche))
        loss_np = np.array(loss_list)
        np.save("loss.npy", loss_np)
        print(loss_np.shape)
        if epoche % val_every_epoche == 0:
            val_list.append(validation())
            np.save("val.npy", np.array(val_list))
Exemple #10
0
def train(model, training_data, validation_data, test_data, optim, vocab_size,
          max_tensor_length):
    val_accrs = []
    if saved:
        val_accrs.append(savedModel['config']['maxacc'])
    test_accrs = []
    for i in range(n_epoch):
        start = time.time()
        train_accr, train_loss = train_epoch(model, training_data, optim)
        trainWriter.write(str(train_accr) + "\n")
        # trainWriter.write("\n")
        trainWriter.flush()
        lossWriter.write(str(train_loss) + '\n')
        lossWriter.flush()
        print('\n  - (Training)   accuracy: {accu:3.3f} %, '
              'elapse: {elapse:3.3f} min'.format(accu=100 * train_accr,
                                                 elapse=(time.time() - start) /
                                                 60))
        start = time.time()
        val_accr = eval_epoch(model, validation_data)
        validWriter.write(str(val_accr) + "\n")
        # validWriter.write("\n")
        validWriter.flush()
        print('\n  - (Validation)   accuracy: {accu:3.3f} %, '
              'elapse: {elapse:3.3f} min'.format(accu=100 * val_accr,
                                                 elapse=(time.time() - start) /
                                                 60))
        val_accrs.append(val_accr)
        # print("Accuracies so far: ", val_accrs)

        start = time.time()
        test_accr = test_epoch(model, test_data)
        testWriter.write(str(test_accr) + "\n")
        # validWriter.write("\n")
        testWriter.flush()
        print('\n  - (Test)   accuracy: {accu:3.3f} %, '
              'elapse: {elapse:3.3f} min'.format(accu=100 * test_accr,
                                                 elapse=(time.time() - start) /
                                                 60))
        test_accrs.append(test_accr)
        # print("Accuracies so far: ", val_accrs)

        model_state_dict = model.state_dict()
        config = {
            'max_src_seq_len': max_tensor_length,
            'vocab_size': vocab_size,
            'maxacc': max(val_accrs),
            'dropout': p_dropout
        }
        checkpoint = {
            'model': model_state_dict,
            'epoch': i,
            'optimizer': optim.state_dict(),
            'config': config
        }
        model_name = os.path.join(model_folder, "TypeModel.ckpt")
        if val_accr >= max(val_accrs):
            print("Save model at epoch ", i)
            torch.save(checkpoint, model_name)
def save_model(model, optim, epoch, loss, path):
    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optim.state_dict(),
            'loss': loss
        }, path)
Exemple #12
0
	def save_model(self, model, optim, epoch, best_score, model_path):
		"""save model to local file"""
		torch.save({
			'model_state_dict': model.state_dict(),
			'optimizer_state_dict': optim.state_dict(),
			'epoch': epoch,
			'best_score': best_score
		}, model_path)
Exemple #13
0
def save_checkpoint(model: nn.Module, optim: optimizer.Optimizer, epoch_id: int, step: int, best_score: float):
    torch.save({
        _MODEL_STATE_DICT: model.state_dict(),
        _OPTIMIZER_STATE_DICT: optim.state_dict(),
        _EPOCH: epoch_id,
        _STEP: step,
        _BEST_SCORE: best_score
    }, "./result/fr_en/checkpoint.tar")
Exemple #14
0
def save_model(model, optim, logs, ckpt_dir, filename):
    file_path = os.path.join(ckpt_dir, filename)
    state = {'model': model.state_dict(),
             'optim': optim.state_dict(),
             'logs': tuple(logs),
             'steps': len(logs)}
    torch.save(state, file_path)
    return
Exemple #15
0
	def save_branchyNet(self, path):
		dict_models_branches = {}
		dict_models_branches["model_main_state_dict"] = self.main.state_dict()
		for i, (model, optim) in enumerate(zip(self.models, self.optimizers), 1):
			dict_models_branches["model_branch_%s_state_dict"%(i)] = model.state_dict()
			dict_models_branches["optim_branch_%s_state_dict"%(i)] = optim.state_dict()

		torch.save(dict_models_branches, path)
Exemple #16
0
def save_state(net, optim):
    model_states = {'VAE_net': net.state_dict()}
    optim_states = {'VAE_optim': optim.state_dict()}
    states = {'model_states': model_states, 'optim_states': optim_states}

    file_path = "MI_estimator"
    with open(file_path, mode='wb+') as f:
        save(states, f)
def loop(data_loader, num_epochs=1000, save_every=1000, train_losses=[], test_losses=[], train_cnts=[], test_cnts=[], dummy=False):
    print("starting training loop for data with %s batches"%data_loader.num_batches)
    st = time.time()
    if len(train_losses):
        # resume cnt from last save
        last_save = train_cnts[-1]
        cnt = train_cnts[-1]
    else:
        last_save = 0
        cnt = 0
    v_xn, v_yn = data_loader.validation_data()
    v_x = Variable(torch.FloatTensor(np.swapaxes(v_xn,1,0))).to(DEVICE)
    v_y = Variable(torch.FloatTensor(np.swapaxes(v_yn,1,0))).to(DEVICE)

    if dummy:
        print("WARNING DUMMMY Validation")
        v_x, v_y = get_dummy_data(v_x, v_y)
    for e in range(num_epochs):
        ecnt = 0
        tst = round((time.time()-st)/60., 0)
        if not e%1 and e>0:
            print("starting epoch %s, %s mins, loss %s, seen %s, last save at %s" %(e, tst, train_losses[-1], cnt, last_save))
        batch_loss = []
        for b in range(data_loader.num_batches):
            x, y = data_loader.next_batch()
            x = Variable(torch.FloatTensor(np.swapaxes(x,1,0))).to(DEVICE)
            y = Variable(torch.FloatTensor(np.swapaxes(y,1,0))).to(DEVICE)
            if dummy:
                y_pred, loss = train(v_x, v_y, validation=False)
                print('DUMMY test loss', cnt, loss)
            else:
                y_pred, loss = train(x.to(DEVICE),y.to(DEVICE),validation=False)
            train_cnts.append(cnt)
            train_losses.append(loss)

            if cnt%100:
                valy_pred, val_mean_loss = train(v_x,v_y,validation=True)
                test_losses.append(val_mean_loss)
                test_cnts.append(cnt)
            if cnt-last_save >= save_every:
                last_save = cnt
                # find test loss
                print('epoch: {} saving after example {} train loss {} test loss {}'.format(e,cnt,loss,val_mean_loss))
                state = {
                        'train_cnts':train_cnts,
                        'train_losses':train_losses,
                        'test_cnts':  test_cnts,
                        'test_losses':test_losses,
                        'state_dict':lstm.state_dict(),
                        'optimizer':optim.state_dict(),
                         }
                basename = os.path.join(savedir, '%s_%015d'%(model_save_name,cnt))
                plot_losses(train_cnts, train_losses, test_cnts, test_losses, name=basename+'_loss.png')
                save_checkpoint(state, filename=basename+'.pkl')

            cnt+= x.shape[1]
            ecnt+= x.shape[1]
Exemple #18
0
	def save_branches(self, best_loss, best_acc,path):
		dict_models_branches = {}
		for i, (model, optim) in enumerate(zip(self.models, self.optimizers), 1):
			dict_models_branches["model_branch_%s_state_dict"%(i)] = model.state_dict()
			dict_models_branches["optim_branch_%s_state_dict"%(i)] = optim.state_dict()

		
		dict_models_branches["best_loss"] = best_loss
		dict_models_branches["best_acc"] = best_acc
		torch.save(dict_models_branches, path)
Exemple #19
0
def save_model(epoch, model, optim, filename):
    head, tail = os.path.split(filename)
    if not os.path.exists(head):
        os.makedirs(head)

    torch.save(
        {
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optim.state_dict()
        }, filename)
Exemple #20
0
def test(epoch):
    global best_prec1
    model.eval()
    loss = 0
    pred_y = []
    true_y = []

    correct = 0
    ema_correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(target_loader):
            data, target = data.cuda(), target.cuda(non_blocking=True)
            data = data.unsqueeze(1)
            output = model(data)

            target = target.long()
            loss += criterion_cel(output, target).item()  # sum up batch loss

            pred = output.max(
                1, keepdim=True)[1]  # get the index of the max log-probability

            for i in range(len(pred)):
                pred_y.append(pred[i].item())
                true_y.append(target[i].item())

            correct += pred.eq(target.view_as(pred)).sum().item()

    loss /= len(target_loader.dataset)

    utils.cal_acc(true_y, pred_y, NUM_CLASSES)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            loss, correct, len(target_loader.dataset),
            100. * correct / len(target_loader.dataset)))

    prec1 = 100. * correct / len(target_loader.dataset)
    if epoch % 1 == 0:
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        utils.save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        if is_best:
            global best_gt_y
            global best_pred_y
            best_gt_y = true_y
            best_pred_y = pred_y
Exemple #21
0
def save_checkpoint(cr, sr, optim, epoch):
    path = 'checkpoint/L{}_QF{}.pth'.format(PENALTY, QF)
    if not os.path.exists("checkpoint/"):
        os.makedirs("checkpoint/")
    torch.save(
        {
            'epoch': epoch,
            'cr': cr.state_dict(),
            'sr': sr.state_dict(),
            'optim': optim.state_dict()
        }, path)
    print("Checkpoint saved to {}".format(path))
    return path
Exemple #22
0
def save_model(path, epoch, iteration, model, optim, optim_method, batch_size,
               mis_class):
    torch.save(
        {
            "model_type": "".join(model.__class__.__name__.split("_")),
            'epoch': epoch,
            'iteration': iteration,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optim.state_dict(),
            'optim_method': optim_method,
            'batch_size': batch_size,
            'mis_class': mis_class
        }, path)
Exemple #23
0
def save_checkpoint(args, model, optim, iter):
    cpt = {
        'iter': iter,
        'model_state': model.state_dict(),
        'optim_state': optim.state_dict()
    }

    cpt_name = get_checkpoint_name(args, iter)
    cpt_link = get_checkpoint_name(args, None)
    torch.save(cpt, cpt_name)
    if os.path.exists(cpt_link):
        os.remove(cpt_link)
    os.symlink(os.path.abspath(cpt_name), cpt_link)
Exemple #24
0
def build_optim(_model, train_args, checkpoint=None):
    saved_optimizer_state_dict = None
    if checkpoint:
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.state_dict()
    else:
        optim = AdamW(_model.parameters(), lr=train_args.lr, eps=1e-8)

    if train_args.train_from is not None:
        optim.load_state_dict(saved_optimizer_state_dict)
        if train_args.device != 'cpu':
            for state in optim.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda(device=train_args.device)
    return optim
def checkpoint(model, model_store_folder, epoch_num, model_name, period, frames,\
                bestLoss, loss, auc, optim, scheduler = None):
    print('Saving checkpoints...')
    save = {
        'epoch': epoch_num,
        'state_dict': model.state_dict(),
        'period': period,
        'frames': frames,
        'best_loss': bestLoss,
        'loss': loss,
        'opt_dict': optim.state_dict(),
        'scheduler_dict':
        scheduler.state_dict() if scheduler != None else None,
    }

    suffix_latest = '{}.pth'.format(model_name)
    torch.save(save, '{}/{}'.format(model_store_folder, suffix_latest))
    def log_and_save_epoch(self, model, optim, epoch, loss):
        epoch = epoch + 1
        self.logger.info(f'####################')
        self.logger.info(f'COMPLETED EPOCH: {epoch}')
        self.logger.info(f'####################')
        # Log document weights - check for sparsity
        doc_weights = model.doc_weights.weight
        proportions = F.softmax(doc_weights, dim=1)
        avg_s_score = np.mean(
            [utils.get_sparsity_score(p) for p in proportions])

        self.logger.info(f'DOCUMENT PROPORTIIONS:\n {proportions}')
        self.logger.info(f'AVERAGE SPARSITY SCORE: {avg_s_score}\n')
        self.writer.add_scalar('avg_doc_prop_sparsity_score', avg_s_score,
                               epoch)

        _, max_indices = torch.max(proportions, dim=1)
        max_indices = list(max_indices.cpu().numpy())
        max_counter = Counter(max_indices)

        self.logger.info(
            f'MAXIMUM TOPICS AT INDICES, FREQUENCY: {max_counter}\n')
        self.logger.info(
            f'MOST FREQUENCT MAX INDICES: {max_counter.most_common(10)}\n')

        if epoch % self.args.save_step == 0:
            # Visualize document embeddings
            self.writer.add_embedding(
                model.get_doc_vectors(),
                global_step=epoch,
                tag=f'de_epoch_{epoch}',
            )

            # Save checkpoint
            self.logger.info(f'Beginning to save checkpoint')
            self.saver.save_checkpoint({
                'epoch':
                epoch,
                'model_state_dict':
                model.state_dict(),
                'optimizer_state_dict':
                optim.state_dict(),
                'loss':
                loss,
            })
            self.logger.info(f'Finished saving checkpoint')
Exemple #27
0
def save_model(acc, net, optim, epoch, save_path, filename):
    """Save a model and its optimizer

    Args:
        acc (int): performance of the model to save
        net (nn.Module): model to save
        optim (torch.optim): optimizer of the model to save
        epoch (int): number of epoch the model were trained
        save_path (str): path on disk where to save the model to
        filename (str): filename on disk
    """
    print('Saving ...')
    state = {
        'net': net.state_dict(),
        'acc': acc,
        'epoch': epoch,
        'optim': optim.state_dict()
    }
    torch.save(state, os.path.join(save_path, filename))
Exemple #28
0
def train_network(start_epoch, epochs, optim, model, train_loader, val_loader,
                  criterion, mixup, device, dtype, batch_size, log_interval,
                  csv_logger, save_path, claimed_acc1, claimed_acc5, best_test,
                  local_rank, child):
    my_range = range if child else trange
    for epoch in my_range(start_epoch, epochs + 1):
        if not isinstance(optim.scheduler, CyclicLR) and not isinstance(
                optim.scheduler, CosineLR):
            optim.scheduler_step()
        train_loss, train_accuracy1, train_accuracy5, = train(
            model, train_loader, mixup, epoch, optim, criterion, device, dtype,
            batch_size, log_interval, child)
        test_loss, test_accuracy1, test_accuracy5 = test(
            model, val_loader, criterion, device, dtype, child)
        csv_logger.write({
            'epoch': epoch + 1,
            'val_error1': 1 - test_accuracy1,
            'val_error5': 1 - test_accuracy5,
            'val_loss': test_loss,
            'train_error1': 1 - train_accuracy1,
            'train_error5': 1 - train_accuracy5,
            'train_loss': train_loss
        })
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_test,
                'optimizer': optim.state_dict()
            },
            test_accuracy1 > best_test,
            filepath=save_path,
            local_rank=local_rank)

        csv_logger.plot_progress(claimed_acc1=claimed_acc1,
                                 claimed_acc5=claimed_acc5)

        if test_accuracy1 > best_test:
            best_test = test_accuracy1

    csv_logger.write_text('Best accuracy is {:.2f}% top-1'.format(best_test *
                                                                  100.))
    def train_loop(self, num_batches=10):
        ecnt = 0
        batch_loss = []
        for b in range(data_loader.num_batches):
            xnp, ynp = self.data_loader.next_batch()
            x = Variable(torch.FloatTensor(xnp))
            y = Variable(torch.FloatTensor(ynp))
            y_pred, loss = train(x,y,validation=False)
            train_cnts.append(cnt)
            train_losses.append(loss)
            if cnt%100:
                valy_pred, val_mean_loss = train(v_x,v_y,validation=True)
                test_losses.append(val_mean_loss)
                test_cnts.append(cnt)
            if cnt-last_save >= save_every:
                last_save = cnt
                # find test loss
                print('epoch: {} saving after example {} train loss {} test loss {}'.format(e,cnt,loss,val_mean_loss))
                state = {
                        'train_cnts':train_cnts,
                        'train_losses':train_losses,
                        'test_cnts':  test_cnts,
                        'test_losses':test_losses,
                        'state_dict':lstm.state_dict(),
                        'optimizer':optim.state_dict(),
                         }
                basename = os.path.join(savedir, '%s_%015d'%(model_save_name,cnt))
                n = 500
                plot_losses(rolling_average(train_cnts, n),
                            rolling_average(train_losses, n),
                            rolling_average(test_cnts, n),
                            rolling_average(test_losses, n), name=basename+'_loss.png')
                save_checkpoint(state, filename=basename+'.pkl')

            cnt+= x.shape[1]
            ecnt+= x.shape[1]


        loop(data_loader, save_every=save_every, num_epochs=args.num_epochs,
         train_losses=train_losses, test_losses=test_losses,
         train_cnts=train_cnts, test_cnts=test_cnts, dummy=args.dummy)
Exemple #30
0
def save_checkpoint(acc, model, optim, epoch, index=False):
    # Save checkpoint.
    print('Saving..')

    if isinstance(model, nn.DataParallel):
        model = model.module

    state = {
        'net': model.state_dict(),
        'optimizer': optim.state_dict(),
        'acc': acc,
        'epoch': epoch,
        'rng_state': torch.get_rng_state()
    }

    if index:
        ckpt_name = 'ckpt_epoch' + str(epoch) + '_' + str(SEED) + '.t7'
    else:
        ckpt_name = 'ckpt_' + str(SEED) + '.t7'

    ckpt_path = os.path.join(LOGDIR, ckpt_name)
    torch.save(state, ckpt_path)