Exemple #1
0
def train_model(dataloader, model, criterion, optimizer, device, num_epochs,
                dataset_size):
    model.to(device)
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, pred = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(pred == labels.data)

            epoch_loss = running_loss / dataset_size[phase]
            epoch_acc = running_corrects.double() / dataset_size[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.save(
            best_model_wts,
            osp.join(Config['root_path'], Config['checkpoint_path'],
                     'model.pth'))
        print('Model saved at: {}'.format(
            osp.join(Config['root_path'], Config['checkpoint_path'],
                     'model.pth')))

    time_elapsed = time.time() - since
    print('Time taken to complete training: {:0f}m {:0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best acc: {:.4f}'.format(best_acc))
Exemple #2
0
def save_checkpoint(epoch):
    model_folder = "../training/"
    model_out_path = model_folder + "epoch_{}.pth".format(epoch +
                                                          args.save_epoch_bias)
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    torch.save(model.state_dict(), model_out_path)
    print("===> Checkpoint saved to {}".format(model_out_path))
Exemple #3
0
def train_iters():
    best_valid_acc = 0
    for e in range(EPOCHS):
        print(f"epoch: {e}")
        train_acc, train_loss, train_tgt, train_pred = train(model)
        print(f"training loss: {train_loss:.4f} | training accuracy: {train_acc:.4f} | training precision:"
              f" {precision_score(train_tgt, train_pred):.4f} | training recall:"
              f" {recall_score(train_tgt, train_pred):.4f}")
        valid_acc, valid_loss, expected, prediction = evaluate(model, valid_loader, valid_len)
        print(f"validation loss: {valid_loss:.4f} | validation accuracy: {valid_acc:.4f} | validation precision:"
              f" {precision_score(expected, prediction):.4f} | validation recall: "
              f"{recall_score(expected, prediction):.4f}")
        if best_valid_acc < valid_acc:
            print("new best model! improvement: %f" % (best_valid_acc - valid_acc))
            best_valid_acc = valid_acc
            torch.save(model.state_dict(), 'model.pt')
def main():
    model = Net()
    if torch.cuda.is_available():
        model.cuda()
    else:
        pass
    model.apply(weights_init)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'"
                  .format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # 数据处理
    # 直接在train里面处理
    # dataParser = DataParser(batch_size)
    loss_function = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    # train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=settings.MILESTONES,gamma=0.2)#learning rate decay
    scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)

    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr)))
    sys.stdout = log
    train_loss = []
    train_loss_detail = []

    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")
            # 暂时空着

        tr_avg_loss, tr_detail_loss = train(model = model,optimizer = optimizer,epoch= epoch,save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))
        test()

        log.flush()
        # Save checkpoint
        save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()})

        scheduler.step()  # 自动调整学习率
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Exemple #5
0
def train_model(dataloader, model, optimizer, device, num_epochs,
                dataset_size):
    model.to(device)

    for epoch in range(num_epochs):
        print('-' * 15)
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        for phase in ['train', 'val']:  #train and validate every epoch
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0

            for i in tqdm(range(len(dataloader[phase].dataset[0]))):
                inputs = dataloader[phase].dataset[0][i]
                #print(inputs.shape)
                labels = dataloader[phase].dataset[1][i]
                #print(labels.shape)
                inputs = inputs.unsqueeze(0)
                labels = labels.unsqueeze(0)
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / dataset_size[phase]

            print('{} Loss: {:.4f} '.format(phase, epoch_loss))

    # save the model
    #saved_model = copy.deepcopy(model.state_dict())
    with open(osp.join(Config['path'], "my_model.pth"), "wb") as output_file:
        torch.save(model.state_dict(), output_file)
Exemple #6
0
def train(epoch):
    print('#' * 15)
    print('Epoch {}, Latent Size {}'.format(epoch, model.latent_size))
    print('#' * 15)
    model.train()
    for index, (x, _) in enumerate(loader):
        x = x.mean(dim=1, keepdim=True).to(device)
        optimizer.zero_grad()
        x_generated, mu, logvar = model(x)
        loss = get_loss(x_generated, x, mu, logvar)
        loss.backward()
        optimizer.step()
        if index % 100 == 0:
            print('Loss at iteration {0}: {1:.4f}'.format(index, loss.item()))
    if epoch == 4:
        filename = 'epoch{}_ls{}.pkl'.format(epoch, model.latent_size)
        torch.save(model.state_dict(), os.path.join(weights_dir, filename))
    if epoch < 4:
        scheduler.step()
def train_model(dataloader, model, criterion, optimizer, device, num_epochs, dataset_size):
    model.to(device)
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    acc_list = []
    loss_list = []
    test_acc_list= []
    test_loss_list = []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'test']:
            if phase=='train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for input1, input2, labels in tqdm(dataloaders[phase], position=0, leave=True):
                input1 = input1.to(device)
                input2 = input2.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(input1, input2)
                    outputs = torch.reshape(outputs, (outputs.shape[0],))
                    outputs = outputs.type(torch.DoubleTensor)
                    labels = labels.type(torch.DoubleTensor)

                    pred = []
                    for i in outputs:
                      if i>0.5:
                        pred.append(0)
                      else:
                        pred.append(1)
                    
                    pred = torch.FloatTensor(pred)

                    loss = criterion(outputs,labels)

                    if phase=='train':
                        loss.backward()
                        optimizer.step()


                running_loss += loss.item() * input1.size(0)
                running_corrects += torch.sum(pred==labels.data)

            epoch_loss = running_loss / dataset_size[phase]
            epoch_acc = running_corrects.double() / dataset_size[phase]

            if phase=='train':
              acc_list.append(epoch_acc)
              loss_list.append(epoch_loss)
            elif phase=='test':
              test_acc_list.append(epoch_acc)
              test_loss_list.append(epoch_loss)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase=='test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.save(best_model_wts, osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth'))
        print('Model saved at: {}'.format(osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth')))

    time_elapsed = time.time() - since
    print('Time taken to complete training: {:0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best acc: {:.4f}'.format(best_acc))

    np.savetxt('acc_list.txt',acc_list)
    np.savetxt('test_acc_list.txt',test_acc_list)
    np.savetxt('loss_list.txt',loss_list)
    np.savetxt('test_loss_list.txt',test_loss_list)
            epoch_start_time = time.time()
            train(args,model,train_dataset,epoch)
            val_loss = evaluate(args,model,test_dataset)
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | '.format(epoch, (time.time() - epoch_start_time),                                                                                        val_loss))
            print('-' * 89)

            generate_output(args,epoch,model,gen_dataset,startPoint=1500)

            if epoch%args.save_interval==0:
                # Save the model if the validation loss is the best we've seen so far.
                is_best = val_loss > best_val_loss
                best_val_loss = max(val_loss, best_val_loss)
                model_dictionary = {'epoch': epoch,
                                    'best_loss': best_val_loss,
                                    'state_dict': model.state_dict(),
                                    'optimizer': optimizer.state_dict(),
                                    'args':args
                                    }
                model.save_checkpoint(model_dictionary, is_best)

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')


# Calculate mean and covariance for each channel's prediction errors, and save them with the trained model
print('=> calculating mean and covariance')
means, covs = list(),list()
train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData, bsz=1)
for channel_idx in range(model.enc_input_size):
Exemple #9
0
'''

if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

#best_acc = 0
for epoch in range(1, epoch + 1):
    train(epoch)
    validation()
    #acc = 100. * correct / len(val_loader.dataset)
    #if acc > best_acc :
    #best_acc = acc
    #best_model = copy.deepcopy(model.state_dict())
    model_file = experiment + '/model_' + str(epoch) + '.pth'
    torch.save(model.state_dict(), model_file)
    print('Saved model to ' + model_file +
          '. You can run `python evaluate.py --model ' + model_file +
          '` to generate the Kaggle formatted csv file\n')

#model.load_state_dict(best_model)
Exemple #10
0
    num_minibatches = len(train_inputs) // minibatch_size

    for epoch in (range(30)):
        # Training
        print("Training")
        # Put the model in training mode
        m.train()
        start_train = time.time()

        for group in tqdm(range(num_minibatches)):
            total_loss = None
            optimizer.zero_grad()
            for i in range(group * minibatch_size,
                           (group + 1) * minibatch_size):
                input_seq = train_inputs[i]
                gold_seq = torch.tensor(train_outputs[i])
                prediction = m(input_seq)
                loss = m.compute_Loss(prediction, gold_seq)
                # On the first gradient update
                if total_loss is None:
                    total_loss = loss
                else:
                    total_loss += loss
            total_loss = total_loss / 3
            total_loss.backward()
            optimizer.step()
        print("Training time: {} for epoch {}".format(
            time.time() - start_train, epoch))

    torch.save(m.state_dict(), 'model_1.pt')
Exemple #11
0
def main():
    """Run training"""
    from model import model

    parser = argparse.ArgumentParser(
        description='PyTorch RNN Prediction Model on Time-series Dataset')
    parser.add_argument(
        '--data',
        type=str,
        default='ecg',
        help=
        'type of the dataset (ecg, gesture, power_demand, space_shuttle, respiration, nyc_taxi'
    )
    parser.add_argument('--filename',
                        type=str,
                        default='chfdb_chf13_45590.pkl',
                        help='filename of the dataset')
    parser.add_argument(
        '--model',
        type=str,
        default='LSTM',
        help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU, SRU)')
    parser.add_argument('--augment', type=bool, default=True, help='augment')
    parser.add_argument('--emsize',
                        type=int,
                        default=32,
                        help='size of rnn input features')
    parser.add_argument('--nhid',
                        type=int,
                        default=32,
                        help='number of hidden units per layer')
    parser.add_argument('--nlayers',
                        type=int,
                        default=2,
                        help='number of layers')
    parser.add_argument('--res_connection',
                        action='store_true',
                        help='residual connection')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0002,
                        help='initial learning rate')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=1e-4,
                        help='weight decay')
    parser.add_argument('--clip',
                        type=float,
                        default=10,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=400,
                        help='upper epoch limit')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--eval_batch_size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='eval_batch size')
    parser.add_argument('--bptt', type=int, default=50, help='sequence length')
    parser.add_argument('--teacher_forcing_ratio',
                        type=float,
                        default=0.7,
                        help='teacher forcing ratio (deprecated)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout applied to layers (0 = no dropout)')
    parser.add_argument(
        '--tied',
        action='store_true',
        help='tie the word embedding and softmax weights (deprecated)')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--device',
                        type=str,
                        default='cuda',
                        help='cuda or cpu')
    parser.add_argument('--log_interval',
                        type=int,
                        default=10,
                        metavar='N',
                        help='report interval')
    parser.add_argument('--save_interval',
                        type=int,
                        default=10,
                        metavar='N',
                        help='save interval')
    parser.add_argument('--save_fig', action='store_true', help='save figure')
    parser.add_argument(
        '--resume',
        '-r',
        help=
        'use checkpoint model parameters as initial parameters (default: False)',
        action="store_true")
    parser.add_argument(
        '--pretrained',
        '-p',
        help=
        'use checkpoint model parameters and do not train anymore (default: False)',
        action="store_true")
    parser.add_argument('--prediction_window_size',
                        type=int,
                        default=10,
                        help='prediction_window_size')
    args = parser.parse_args()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    ###############################################################################
    # Load data
    ###############################################################################
    TimeseriesData = preprocess_data.PickleDataLoad(
        data_type=args.data,
        filename=args.filename,
        augment_test_data=args.augment)
    train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData,
                                            args.batch_size)
    test_dataset = TimeseriesData.batchify(args, TimeseriesData.testData,
                                           args.eval_batch_size)
    gen_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, 1)

    ###############################################################################
    # Build the model
    ###############################################################################
    feature_dim = TimeseriesData.trainData.size(1)
    model = model.RNNPredictor(rnn_type=args.model,
                               enc_inp_size=feature_dim,
                               rnn_inp_size=args.emsize,
                               rnn_hid_size=args.nhid,
                               dec_out_size=feature_dim,
                               nlayers=args.nlayers,
                               dropout=args.dropout,
                               tie_weights=args.tied,
                               res_connection=args.res_connection).to(
                                   args.device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    criterion = nn.MSELoss()

    ###############################################################################
    # Training code
    ###############################################################################
    def get_batch(args, source, i):
        seq_len = min(args.bptt, len(source) - 1 - i)
        data = source[i:i + seq_len]  # [ seq_len * batch_size * feature_size ]
        target = source[i + 1:i + 1 +
                        seq_len]  # [ (seq_len x batch_size x feature_size) ]
        return data, target

    def generate_output(args,
                        epoch,
                        model,
                        gen_dataset,
                        disp_uncertainty=True,
                        startPoint=500,
                        endPoint=3500):
        if args.save_fig:
            # Turn on evaluation mode which disables dropout.
            model.eval()
            hidden = model.init_hidden(1)
            outSeq = []
            upperlim95 = []
            lowerlim95 = []
            with torch.no_grad():
                for i in range(endPoint):
                    if i >= startPoint:
                        # if disp_uncertainty and epoch > 40:
                        #     outs = []
                        #     model.train()
                        #     for i in range(20):
                        #         out_, hidden_ = model.forward(out+0.01*Variable(torch.randn(out.size())).cuda(),hidden,noise=True)
                        #         outs.append(out_)
                        #     model.eval()
                        #     outs = torch.cat(outs,dim=0)
                        #     out_mean = torch.mean(outs,dim=0) # [bsz * feature_dim]
                        #     out_std = torch.std(outs,dim=0) # [bsz * feature_dim]
                        #     upperlim95.append(out_mean + 2.58*out_std/np.sqrt(20))
                        #     lowerlim95.append(out_mean - 2.58*out_std/np.sqrt(20))

                        out, hidden = model.forward(out, hidden)

                        #print(out_mean,out)

                    else:
                        out, hidden = model.forward(
                            gen_dataset[i].unsqueeze(0), hidden)
                    outSeq.append(out.data.cpu()[0][0].unsqueeze(0))

            outSeq = torch.cat(outSeq, dim=0)  # [seqLength * feature_dim]

            target = preprocess_data.reconstruct(gen_dataset.cpu(),
                                                 TimeseriesData.mean,
                                                 TimeseriesData.std)
            outSeq = preprocess_data.reconstruct(outSeq, TimeseriesData.mean,
                                                 TimeseriesData.std)
            # if epoch>40:
            #     upperlim95 = torch.cat(upperlim95, dim=0)
            #     lowerlim95 = torch.cat(lowerlim95, dim=0)
            #     upperlim95 = preprocess_data.reconstruct(upperlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std)
            #     lowerlim95 = preprocess_data.reconstruct(lowerlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std)

            plt.figure(figsize=(15, 5))
            for i in range(target.size(-1)):
                plt.plot(target[:, :, i].numpy(),
                         label='Target' + str(i),
                         color='black',
                         marker='.',
                         linestyle='--',
                         markersize=1,
                         linewidth=0.5)
                plt.plot(range(startPoint),
                         outSeq[:startPoint, i].numpy(),
                         label='1-step predictions for target' + str(i),
                         color='green',
                         marker='.',
                         linestyle='--',
                         markersize=1.5,
                         linewidth=1)
                # if epoch>40:
                #     plt.plot(range(startPoint, endPoint), upperlim95[:,i].numpy(), label='upperlim'+str(i),
                #              color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1)
                #     plt.plot(range(startPoint, endPoint), lowerlim95[:,i].numpy(), label='lowerlim'+str(i),
                #              color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1)
                plt.plot(range(startPoint, endPoint),
                         outSeq[startPoint:, i].numpy(),
                         label='Recursive predictions for target' + str(i),
                         color='blue',
                         marker='.',
                         linestyle='--',
                         markersize=1.5,
                         linewidth=1)

            plt.xlim([startPoint - 500, endPoint])
            plt.xlabel('Index', fontsize=15)
            plt.ylabel('Value', fontsize=15)
            plt.title('Time-series Prediction on ' + args.data + ' Dataset',
                      fontsize=18,
                      fontweight='bold')
            plt.legend()
            plt.tight_layout()
            plt.text(startPoint - 500 + 10,
                     target.min(),
                     'Epoch: ' + str(epoch),
                     fontsize=15)
            save_dir = Path(
                'result', args.data,
                args.filename).with_suffix('').joinpath('fig_prediction')
            save_dir.mkdir(parents=True, exist_ok=True)
            plt.savefig(
                save_dir.joinpath('fig_epoch' +
                                  str(epoch)).with_suffix('.png'))
            #plt.show()
            plt.close()
            return outSeq

        else:
            pass

    def evaluate_1step_pred(args, model, test_dataset):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_loss = 0
        with torch.no_grad():
            hidden = model.init_hidden(args.eval_batch_size)
            for nbatch, i in enumerate(
                    range(0,
                          test_dataset.size(0) - 1, args.bptt)):

                inputSeq, targetSeq = get_batch(args, test_dataset, i)
                outSeq, hidden = model.forward(inputSeq, hidden)

                loss = criterion(outSeq.view(args.batch_size, -1),
                                 targetSeq.view(args.batch_size, -1))
                hidden = model.repackage_hidden(hidden)
                total_loss += loss.item()

        return total_loss / nbatch

    def train(args, model, train_dataset, epoch):

        with torch.enable_grad():
            # Turn on training mode which enables dropout.
            model.train()
            total_loss = 0
            start_time = time.time()
            hidden = model.init_hidden(args.batch_size)
            for batch, i in enumerate(
                    range(0,
                          train_dataset.size(0) - 1, args.bptt)):
                inputSeq, targetSeq = get_batch(args, train_dataset, i)
                # inputSeq: [ seq_len * batch_size * feature_size ]
                # targetSeq: [ seq_len * batch_size * feature_size ]

                # Starting each batch, we detach the hidden state from how it was previously produced.
                # If we didn't, the model would try backpropagating all the way to start of the dataset.
                hidden = model.repackage_hidden(hidden)
                hidden_ = model.repackage_hidden(hidden)
                optimizer.zero_grad()
                '''Loss1: Free running loss'''
                outVal = inputSeq[0].unsqueeze(0)
                outVals = []
                hids1 = []
                for i in range(inputSeq.size(0)):
                    outVal, hidden_, hid = model.forward(outVal,
                                                         hidden_,
                                                         return_hiddens=True)
                    outVals.append(outVal)
                    hids1.append(hid)
                outSeq1 = torch.cat(outVals, dim=0)
                hids1 = torch.cat(hids1, dim=0)
                loss1 = criterion(
                    outSeq1.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss2: Teacher forcing loss'''
                outSeq2, hidden, hids2 = model.forward(inputSeq,
                                                       hidden,
                                                       return_hiddens=True)
                loss2 = criterion(
                    outSeq2.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss3: Simplified Professor forcing loss'''
                loss3 = criterion(hids1.view(args.batch_size, -1),
                                  hids2.view(args.batch_size, -1).detach())
                '''Total loss = Loss1+Loss2+Loss3'''
                loss = loss1 + loss2 + loss3
                loss.backward()

                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
                optimizer.step()

                total_loss += loss.item()

                if batch % args.log_interval == 0 and batch > 0:
                    cur_loss = total_loss / args.log_interval
                    elapsed = time.time() - start_time
                    print(
                        '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | '
                        'loss {:5.2f} '.format(
                            epoch, batch,
                            len(train_dataset) // args.bptt,
                            elapsed * 1000 / args.log_interval, cur_loss))
                    total_loss = 0
                    start_time = time.time()

    def evaluate(args, model, test_dataset):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        with torch.no_grad():
            total_loss = 0
            hidden = model.init_hidden(args.eval_batch_size)
            nbatch = 1
            for nbatch, i in enumerate(
                    range(0,
                          test_dataset.size(0) - 1, args.bptt)):
                inputSeq, targetSeq = get_batch(args, test_dataset, i)
                # inputSeq: [ seq_len * batch_size * feature_size ]
                # targetSeq: [ seq_len * batch_size * feature_size ]
                hidden_ = model.repackage_hidden(hidden)
                '''Loss1: Free running loss'''
                outVal = inputSeq[0].unsqueeze(0)
                outVals = []
                hids1 = []
                for i in range(inputSeq.size(0)):
                    outVal, hidden_, hid = model.forward(outVal,
                                                         hidden_,
                                                         return_hiddens=True)
                    outVals.append(outVal)
                    hids1.append(hid)
                outSeq1 = torch.cat(outVals, dim=0)
                hids1 = torch.cat(hids1, dim=0)
                loss1 = criterion(
                    outSeq1.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss2: Teacher forcing loss'''
                outSeq2, hidden, hids2 = model.forward(inputSeq,
                                                       hidden,
                                                       return_hiddens=True)
                loss2 = criterion(
                    outSeq2.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss3: Simplified Professor forcing loss'''
                loss3 = criterion(hids1.view(args.batch_size, -1),
                                  hids2.view(args.batch_size, -1).detach())
                '''Total loss = Loss1+Loss2+Loss3'''
                loss = loss1 + loss2 + loss3

                total_loss += loss.item()

        return total_loss / (nbatch + 1)

    # Loop over epochs.
    if args.resume or args.pretrained:
        print("=> loading checkpoint ")
        checkpoint = torch.load(
            Path('save', args.data, 'checkpoint',
                 args.filename).with_suffix('.pth'))
        args, start_epoch, best_val_loss = model.load_checkpoint(
            args, checkpoint, feature_dim)
        optimizer.load_state_dict((checkpoint['optimizer']))
        del checkpoint
        epoch = start_epoch
        print("=> loaded checkpoint")
    else:
        epoch = 1
        start_epoch = 1
        best_val_loss = float('inf')
        print("=> Start training from scratch")
    print('-' * 89)
    print(args)
    print('-' * 89)

    if not args.pretrained:
        # At any point you can hit Ctrl + C to break out of training early.
        try:
            for epoch in range(start_epoch, args.epochs + 1):

                epoch_start_time = time.time()
                train(args, model, train_dataset, epoch)
                val_loss = evaluate(args, model, test_dataset)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | '
                    .format(epoch, (time.time() - epoch_start_time), val_loss))
                print('-' * 89)

                generate_output(args,
                                epoch,
                                model,
                                gen_dataset,
                                startPoint=1500)

                if epoch % args.save_interval == 0:
                    # Save the model if the validation loss is the best we've seen so far.
                    is_best = val_loss < best_val_loss
                    best_val_loss = min(val_loss, best_val_loss)
                    model_dictionary = {
                        'epoch': epoch,
                        'best_loss': best_val_loss,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'args': args
                    }
                    model.save_checkpoint(model_dictionary, is_best)

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

    # Calculate mean and covariance for each channel's prediction errors, and save them with the trained model
    print('=> calculating mean and covariance')
    means, covs = list(), list()
    train_dataset = TimeseriesData.batchify(args,
                                            TimeseriesData.trainData,
                                            bsz=1)
    for channel_idx in range(model.enc_input_size):
        mean, cov = fit_norm_distribution_param(
            args, model, train_dataset[:TimeseriesData.length], channel_idx)
        means.append(mean), covs.append(cov)
    model_dictionary = {
        'epoch': max(epoch, start_epoch),
        'best_loss': best_val_loss,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'args': args,
        'means': means,
        'covs': covs
    }
    model.save_checkpoint(model_dictionary, True)
    print('-' * 89)
Exemple #12
0
    model = model.to(device)
    l1_criterion = l1_criterion.to(device)

if args.pretrained:

    if os.path.isfile(args.pretrained):
        print("===> loading models '{}'".format(args.pretrained))
        checkpoint = torch.load(args.pretrained)
        new_state_dcit = OrderedDict()
        for k, v in checkpoint.items():
            if 'module' in k:
                name = k[7:]
            else:
                name = k
            new_state_dcit[name] = v
        model_dict = model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in new_state_dcit.items() if k in model_dict
        }

        for k, v in model_dict.items():
            if k not in pretrained_dict:
                print(k)
        model.load_state_dict(pretrained_dict, strict=True)

    else:
        print("===> no models found at '{}'".format(args.pretrained))

print("===> Setting Optimizer")
def train_model(model,
                device,
                train_data_loader,
                valid_data_loader,
                criterion,
                optimizer,
                scheduler,
                num_epochs=5):
    """
    training

    Parameters
    --------------
    model : DogClassificationModel
        Network model to be trained.
    device : device
        cuda or cpu
    train_data_loader : dataloader
        dataloader for training
    valid_data_loader : dataloader
        dataloader for validation
    criterion : 
        Loss function.
    optimizer :
        Optimizer.
    scheduler : 
        Learning rate scheduler.
    num_epochs : int
        The number of epochs.

    Returns
    --------------
    model : DogClassificationModel
        Trained model.
    """
    since = time.time()
    model = model.to(device)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        bar = tqdm(total=len(train_data_loader))
        bar.set_description("Epoch: {}/{}".format(epoch + 1, num_epochs))
        """
        Training Phase
        """
        model.train()

        running_loss = 0.0
        running_corrects = 0

        for j, (inputs, labels) in enumerate(train_data_loader):
            optimizer.zero_grad()
            tmp_loss_item = 0.0

            # training
            with torch.set_grad_enabled(True):
                outputs = model(inputs.to(device))
                torch.cuda.empty_cache()

                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels.to(device))

                # backward + optimize only if in training phase
                loss.backward()
                optimizer.step()

                tmp_loss_item = loss.item()

            # statistics
            running_loss += tmp_loss_item * inputs.size(0)
            running_corrects += torch.sum(preds.to('cpu') == labels.data)

            # progress bar
            bar.update(1)
            tmp_loss = float(running_loss /
                             (j + 1)) / 32  # 32: mini-batch size
            tmp_acc = float(running_corrects // (j + 1)) / 32
            bar.set_postfix(OrderedDict(loss=tmp_loss, acc=tmp_acc))

        # update learning rate scheduler
        scheduler.step()

        dataset_size = len(train_data_loader.dataset)
        epoch_loss = running_loss / dataset_size
        epoch_acc = running_corrects.double() / dataset_size
        """
        Validation Phase
        """
        model.eval()  # Set model to validation mode

        val_running_loss = 0.0
        val_running_corrects = 0

        # Iterate over data.
        for inputs, labels in valid_data_loader:
            val_inputs = inputs.to(device)
            val_labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.no_grad():
                val_outputs = model(val_inputs)
                _, preds = torch.max(val_outputs, 1)
                loss = criterion(val_outputs, val_labels)

            # statistics
            val_running_loss += loss.item() * val_inputs.size(0)
            val_running_corrects += torch.sum(preds == val_labels.data)

        dataset_size = len(valid_data_loader.dataset)
        val_epoch_loss = val_running_loss / dataset_size
        val_epoch_acc = val_running_corrects.double() / dataset_size

        print('VALIDATION  Loss: {:.4f} Acc: {:.4f}'.format(
            val_epoch_loss, val_epoch_acc))
        print("Elapsed time: {} [sec]".format(time.time() - since))

        # deep copy the model
        if val_epoch_acc > best_acc:
            best_acc = val_epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
Exemple #14
0
        drop_last=False)
    LFW_valid_loader = torch.utils.data.Dataloader(
        dataset.ImageList_x(root=args.root_path,
                            fileList=LFW_valid_std,
                            transform=valid_transform),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=False,
        drop_last=False)

    # 4.4 load model
    model_dir = './imageset/label/model_ir_se50.pth'
    pretrained_dict = torch.load(model_dir)
    model = model.Backbone(num_layers=50, drop_ratio=0.6, mode='ir')
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)  # update parameter
    model.load_state_dict(pretrained_dict)
    model = torch.nn.DataParallel(model).to(args.device)

    # 4.5 set loss_function
    loss_function_A = torch.nn.MarginRankingLoss().to(args.device)
    loss_function_B = torch.nn.MSELoss().to(args.device)

    # 4.6 choose optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
def train(model,optimizer,epoch,save_dir):
    dataParser = DataParser(args.batch_size)
    batch_time = Averagvalue()
    data_time = Averagvalue()
    losses = Averagvalue()
    # switch to train mode
    model.train()
    end = time.time()
    epoch_loss = []
    counter = 0


    for batch_index ,(images,labels_numpy) in enumerate(generate_minibatches(dataParser,True)):

        # measure data loading time
        data_time.update(time.time()-end)

        labels = []
        if torch.cuda.is_available():
            images = torch.from_numpy(images).cuda()
            for item in labels_numpy:
                labels.append(torch.from_numpy(item).cuda())
        else:
            images = torch.from_numpy(images)
            for item in labels_numpy:
                labels.append(torch.from_numpy(item))

        if torch.cuda.is_available():
            loss =torch.zeros(1).cuda()
        else:
            loss = torch.zeros(1)

        optimizer.zero_grad()
        outputs = model(images)
        # 四张GT监督

        for o in outputs[9:]: # o2 o3 o4
            t_loss = cross_entropy_loss(o, labels[-1])
            loss = loss +t_loss
        counter +=1

        for c_index,c in enumerate(outputs[:8]):
            loss = loss + cross_entropy_loss(c, labels[c_index])
        loss = loss/11
        loss.backward()
        acc_scroe = my_accuracy_score(outputs[9].cpu().detach().numpy(),labels[-1].cpu().detach().numpy())
        print('the acc is :',acc_scroe)




        # 下面应该是用来解决batch size 过下的问题
        # if counter == args.itersize:
        #     optimizer.step()
        #     optimizer.zero_grad()
        #     counter = 0

        optimizer.step()
        optimizer.zero_grad()

        # measure the accuracy and record loss
        losses.update(loss.item(),images.size(0))
        epoch_loss.append(loss.item())
        batch_time.update(time.time()-end)
        end = time.time()

        # display and logging
        if not isdir(save_dir):
            os.makedirs(save_dir)
        if batch_index % args.print_freq ==0:
            info = 'Epoch: [{0}/{1}][{2}/{3}] '.format(epoch, args.maxepoch, batch_index, dataParser.steps_per_epoch) + \
                   'Time {batch_time.val:.3f} (avg:{batch_time.avg:.3f}) '.format(batch_time=batch_time) + \
                   'Loss {loss.val:f} (avg:{loss.avg:f}) '.format(
                       loss=losses)
            print(info)

        # torch.save(model,join(save_dir,"checkpoint.pth"))
    # 每一轮保存一次参数
    save_checkpoint({'epoch': epoch,'state_dict':model.state_dict(), 'optimizer': optimizer.state_dict()},filename=join(save_dir,"epooch-%d-checkpoint.pth" %epoch))


    return losses.avg,epoch_loss
            optimizer.zero_grad()  #梯度归零
            output = model(data)  #前向计算
            loss = criterion(output, target)  #计算 loss
            loss.backward()  #反向传播
            optimizer.step()  #优化器梯度下降

            predictions = output.argmax(dim=1, keepdim=True).squeeze()  #预测
            correct += (predictions == target).sum().item()  #统计预测正确数
            accuracy = correct / (BATCH_SIZE * batch)  #计算准确度

            tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy)

    if epoch % 15 == 0:
        print("Epoch done, evaluating:", epoch)
        torch.save(model.state_dict(), "./chkpoint_res.bin")  #每 15 epoch 保存一次
        model.eval()  #测试
        with tqdm(eval_dataloader, unit="batch") as eepoch:
            correct = 0
            batch = 0
            for data, target in eepoch:
                batch += 1
                eepoch.set_description(f"Epoch {epoch}")
                data, target = data.cuda(), target.cuda()
                output = model(data)
                predictions = output.argmax(dim=1, keepdim=True).squeeze()
                correct += (predictions == target).sum().item()
                accuracy = correct / (BATCH_SIZE * batch)

                eepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | '
                .format(epoch, (time.time() - epoch_start_time), val_loss))
            print('-' * 89)

            generate_output(args, epoch, model, gen_dataset, startPoint=1500)

            if epoch % args.save_interval == 0:
                # Save the model if the validation loss is the best we've seen so far.
                is_best = val_loss < best_val_loss
                best_val_loss = min(val_loss, best_val_loss)
                model_dictionary = {
                    'epoch': epoch,
                    'best_loss': best_val_loss,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'args': args
                }
                model.save_checkpoint(model_dictionary, is_best)

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

# Calculate mean and covariance for each channel's prediction errors, and save them with the trained model
print('=> calculating mean and covariance')
means, covs = list(), list()
train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData, bsz=1)
for channel_idx in range(model.enc_input_size):
    mean, cov = fit_norm_distribution_param(
Exemple #18
0
    train()
    val_loss = evaluate(model, val_data)
    print("-" * 89)
    print("| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | "
          "".format(
              epoch,
              (time.time() - epoch_start_time),
              val_loss,
          ))
    print("-" * 89)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model

    scheduler.step()

######################################################################
# Evaluate the model with the test dataset
# -------------------------------------
#
# Apply the best model to check the result with the test dataset.

test_loss = evaluate(best_model, test_data)
print("=" * 89)
print("| End of training | test loss {:5.2f} | test ppl {:8.2f}".format(
    test_loss, test_loss))
print("=" * 89)

torch.save(model.state_dict(), "model.pt")
Exemple #19
0
    start_time = time.time()
    train_iterator = DataIter(srclocationDatas[0:numTrainData],
                              trgLocationDatas[0:numTrainData], device, 2,
                              centerLocs)
    valid_iterator = DataIter(srclocationDatas[numTrainData:],
                              trgLocationDatas[numTrainData:], device, 2,
                              centerLocs)
    # 训练
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    # 验证
    valid_loss = evaluate(model, valid_iterator, criterion)
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'my-model-test.pt')
    print("Epoch:", epoch + 1, "| Time:", epoch_mins, "m", epoch_secs, "s")
    print("\tTrain Loss:", train_loss)
    print("\tVal Loss:", valid_loss)

print('best_valid_loss is ')
print(best_valid_loss)

# 将训练数据和测试数据存储在txt文件中,将mu和sig存在.npy文件中
fileTest = open(homeDirectory + 'testPath.txt', mode='w')
fileTrain = open(homeDirectory + 'trainPath.txt', mode='w')
for testPath in paths[int(numTrainPaths):]:
    fileTest.write(testPath)
    fileTest.write('\n')
fileTest.close()
Exemple #20
0
    val_acc = 0.0

    model.train()
    for idx, (image, label) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(image.cuda())
        loss = loss_function(output, label.cuda())
        loss.backward()
        optimizer.step()
        print('Epoch: [{0}][{1}/{2}] loss: {3}'.format(epoch + 1, idx + 1,
                                                       len(train_loader),
                                                       loss.item()))

    model.eval()
    with torch.no_grad():
        for idx, (image, label) in enumerate(val_loader):
            output = model(image.cuda())
            for i in range(BATCH):
                pred = torch.max(output[i])
                for j in range(3):
                    if output[i][j] == pred and label[i][j] == 1.0:
                        val_acc += 1
                        break

    print('Epoch: [{0}] val_acc: {1}'.format(epoch + 1,
                                             val_acc / len(val_label)))
    if val_acc / len(val_label) > best_acc:
        best_acc = val_acc / len(val_label)
        torch.save(model.state_dict(), os.path.join('model/model.tar'))
    scheduler.step()