Beispiel #1
0
def train(model):
    model = model.train()
    optimizer.zero_grad()

    losses = []
    accurate_preds = 0
    all_targets = []
    all_predictions = []
    for d in train_loader:
        inputs = d['input_ids'].to(device)
        masks = d['attention_mask'].to(device)
        all_targets.extend(list(d['targets'].squeeze().numpy()))
        targets = d['targets'].to(device)
        outputs = model(
            input_ids=inputs,
            attention_mask=masks
        )
        _, preds = torch.max(outputs, dim=1)
        loss = criterion(outputs, targets)
        all_predictions.extend(list(preds.cpu().squeeze().numpy()))
        accurate_preds += torch.sum(preds == targets)
        losses.append(loss.item())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
    return accurate_preds / train_len, np.mean(losses), all_targets, all_predictions
def train(args, data):
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(args.epochs):
        running_loss = 0.0

        for i, d in enumerate(data.trainloader, 0):

            inputs, labels = d
            inputs, labels = inputs.cuda(), labels.cuda()
            # Setting the gradients to 0
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            
            # Calculate the gradient based on all the parameters
            loss.backward()
            # Update all the parameters based on the gradients
            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
Beispiel #3
0
def train(args, model, train_dataset, epoch):

    with torch.enable_grad():
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0
        start_time = time.time()
        hidden = model.init_hidden(args.batch_size)
        for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)):
            inputSeq, targetSeq = get_batch(args,train_dataset, i)
            # inputSeq: [ seq_len * batch_size * feature_size ]
            # targetSeq: [ seq_len * batch_size * feature_size ]

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            hidden = model.repackage_hidden(hidden)
            hidden_ = model.repackage_hidden(hidden)
            optimizer.zero_grad()

            '''Loss1: Free running loss'''
            outVal = inputSeq[0].unsqueeze(0)
            outVals=[]
            hids1 = []
            for i in range(inputSeq.size(0)):
                outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True)
                outVals.append(outVal)
                hids1.append(hid)
            outSeq1 = torch.cat(outVals,dim=0)
            hids1 = torch.cat(hids1,dim=0)
            loss1 = criterion(outSeq1.contiguous().view(args.batch_size,-1), targetSeq.contiguous().view(args.batch_size,-1))

            '''Loss2: Teacher forcing loss'''
            outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True)
            loss2 = criterion(outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1))

            '''Loss3: Simplified Professor forcing loss'''
            loss3 = criterion(hids1.contiguous().view(args.batch_size,-1), hids2.contiguous().view(args.batch_size,-1).detach())

            '''Total loss = Loss1+Loss2+Loss3'''
            loss = loss1+loss2+loss3
            
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            optimizer.step()

            total_loss += loss.item()

            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | '
                      'loss {:5.5f} '.format(
                    epoch, batch, len(train_dataset) // args.bptt,
                                  elapsed * 1000 / args.log_interval, cur_loss))
                total_loss = 0
                start_time = time.time()
def train(args, model, train_dataset,epoch):

    with torch.enable_grad():
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0
        start_time = time.time()
        hidden = model.init_hidden(args.batch_size)
        for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)):
            inputSeq, targetSeq = get_batch(args,train_dataset, i)
            # inputSeq: [ seq_len * batch_size * feature_size ]
            # targetSeq: [ seq_len * batch_size * feature_size ]

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            hidden = model.repackage_hidden(hidden)
            hidden_ = model.repackage_hidden(hidden)
            optimizer.zero_grad()

            '''Loss1: Free running loss'''
            outVal = inputSeq[0].unsqueeze(0)
            outVals=[]
            hids1 = []
            for i in range(inputSeq.size(0)):
                outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True)
                outVals.append(outVal)
                hids1.append(hid)
            outSeq1 = torch.cat(outVals,dim=0)
            hids1 = torch.cat(hids1,dim=0)
            loss1 = criterion(outSeq1.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1))

            '''Loss2: Teacher forcing loss'''
            outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True)
            loss2 = criterion(outSeq2.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1))

            '''Loss3: Simplified Professor forcing loss'''
            loss3 = criterion(hids1.view(args.batch_size,-1), hids2.view(args.batch_size,-1).detach())

            '''Total loss = Loss1+Loss2+Loss3'''
            loss = loss1+loss2+loss3
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            optimizer.step()

            total_loss += loss.item()

            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | '
                      'loss {:5.2f} '.format(
                    epoch, batch, len(train_dataset) // args.bptt,
                                  elapsed * 1000 / args.log_interval, cur_loss))
                total_loss = 0
                start_time = time.time()
def train(args, model, train_dataset):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)):
        inputSeq, targetSeq = get_batch(train_dataset, i)
        # inputSeq: [ seq_len * batch_size * feature_size ]
        # targetSeq: [ seq_len * batch_size * feature_size ]

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = model.repackage_hidden(hidden)
        optimizer.zero_grad()
        USE_TEACHER_FORCING = random.random() < args.teacher_forcing_ratio
        if USE_TEACHER_FORCING:
            outSeq, hidden = model.forward(inputSeq, hidden)
        else:
            outVal = inputSeq[0].unsqueeze(0)
            outVals = []
            for i in range(inputSeq.size(0)):
                outVal, hidden = model.forward(outVal, hidden)
                outVals.append(outVal)
            outSeq = torch.cat(outVals, dim=0)

        #print('outSeq:',outSeq.size())

        #print('targetSeq:', targetSeq.size())

        loss = criterion(outSeq.view(args.batch_size, -1),
                         targetSeq.view(args.batch_size, -1))
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        # for p in model2_for_timeDiff.parameters():
        #    p.data.add_(-lr, p.grad.data)

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | '
                  'loss {:5.2f} '.format(epoch, batch,
                                         len(train_dataset) // args.bptt,
                                         elapsed * 1000 / args.log_interval,
                                         cur_loss))
            total_loss = 0
            start_time = time.time()
Beispiel #6
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)

        optimizer.zero_grad()

        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        optimizer.step()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        # for p in model.parameters():
        #     p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            try:
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, batch,
                        len(train_data) // args.bptt, lr,
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss)))
            except OverflowError as err:
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, batch,
                        len(train_data) // args.bptt, lr,
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

        data.to("cpu")
        targets.to("cpu")
Beispiel #7
0
def train(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0
    len_iterator = 0
    for i, batch in enumerate(iterator):
        src = batch.src
        trg = batch.trg
        optimizer.zero_grad()
        output = model(src, trg)
        loss = criterion(output, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
        len_iterator += 1
    return epoch_loss / len_iterator
def main():
    model = Net()
    if torch.cuda.is_available():
        model.cuda()
    else:
        pass
    model.apply(weights_init)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'"
                  .format(args.resume))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # 数据处理
    # 直接在train里面处理
    # dataParser = DataParser(batch_size)
    loss_function = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    # train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=settings.MILESTONES,gamma=0.2)#learning rate decay
    scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)

    log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr)))
    sys.stdout = log
    train_loss = []
    train_loss_detail = []

    for epoch in range(args.start_epoch, args.maxepoch):
        if epoch == 0:
            print("Performing initial testing...")
            # 暂时空着

        tr_avg_loss, tr_detail_loss = train(model = model,optimizer = optimizer,epoch= epoch,save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch))
        test()

        log.flush()
        # Save checkpoint
        save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch))
        save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()})

        scheduler.step()  # 自动调整学习率
        train_loss.append(tr_avg_loss)
        train_loss_detail += tr_detail_loss
Beispiel #9
0
def train():
    model.train()  # Turn on the train mode
    total_loss = 0.0
    start_time = time.time()

    batches = np.random.permutation(
        range(0,
              train_data.size(0) - seq_len, seq_len))
    for batch_counter, i in enumerate(batches):
        data, targets = get_batch(train_data, i)
        optimizer.zero_grad()

        src_mask = model.generate_square_subsequent_mask(
            data.size(0)).to(device)

        output = model(
            data,
            src_mask=src_mask,
        )

        loss = criterion(output, targets)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = 100
        if batch_counter % log_interval == 0 and batch_counter > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print("| epoch {:3d} | {:5d}/{:5d} batches | {:5.6f}"
                  "| ms/batch {:5.4f} | "
                  "loss {:5.4f}".format(
                      epoch,
                      batch_counter,
                      len(train_data) // seq_len,
                      scheduler.get_last_lr()[0],
                      elapsed * 1000 / log_interval,
                      cur_loss,
                  ))
            total_loss = 0
            start_time = time.time()
Beispiel #10
0
def main(config):
    print('loading dataset')
    if config.dataset_path == None:
        if config.model == 'cbow':
            nlp_dataset = dataset.cbow_dataset.CBOWDataset(config)
        elif config.model == 'skip-gram':
            nlp_dataset = dataset.skipgram_dataset.SkipGramDataset(config)
        elif config.model == 'neg-sampling':
            nlp_dataset = dataset.negsampling_dataset.NegSamplingDataset(
                config)
        elif config.model == 'fast-text':
            nlp_dataset = dataset.fasttext_dataset.FastTextDataset(config)
        else:
            raise AssertionError('dataset should be one of w2v models.')
    else:
        with open(config.dataset_path, 'rb') as f:
            nlp_dataset = pickle.load(f)
    dataloader = DataLoader(
        nlp_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn,
    )
    print('dataloader made')
    if config.model == 'neg-sampling':
        trainer = NegSamplingTrainer(dataloader, config)
    else:
        model = EmbeddingModule(len(nlp_dataset), config).to(config.device)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
        trainer = Trainer(dataloader, model, criterion, optimizer, config)
    print('start training')
    trainer.train()
    model = trainer.model
    with open('./checkpoints/w2v' + config.model + '_model.pkl', 'wb') as f:
        pickle.dump(model, f)
train_dataset = batchify(TimeseriesData.trainData, 1)[:10000]
test_dataset = batchify(TimeseriesData.testData, 1)


###############################################################################
# Build the model
###############################################################################

model = model.RNNPredictor(rnn_type = args.model, enc_inp_size=3, rnn_inp_size = args.emsize, rnn_hid_size = args.nhid,
                           dec_out_size=3,
                           nlayers = args.nlayers,)

if args.cuda:
    model.cuda()
optimizer = optim.Adam(model.parameters(), lr= 0.0001)
criterion = nn.MSELoss()
###############################################################################
# Training code
###############################################################################


def fit_norm_distribution_param(args, model, train_dataset, endPoint=10000):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    pasthidden = model.init_hidden(1)
    predictions = []
    organized = []
    errors = []
    #out = Variable(test_dataset[0].unsqueeze(0))
    for t in range(endPoint):
Beispiel #12
0
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
Beispiel #13
0
def main():
    """Run training"""
    from model import model

    parser = argparse.ArgumentParser(
        description='PyTorch RNN Prediction Model on Time-series Dataset')
    parser.add_argument(
        '--data',
        type=str,
        default='ecg',
        help=
        'type of the dataset (ecg, gesture, power_demand, space_shuttle, respiration, nyc_taxi'
    )
    parser.add_argument('--filename',
                        type=str,
                        default='chfdb_chf13_45590.pkl',
                        help='filename of the dataset')
    parser.add_argument(
        '--model',
        type=str,
        default='LSTM',
        help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU, SRU)')
    parser.add_argument('--augment', type=bool, default=True, help='augment')
    parser.add_argument('--emsize',
                        type=int,
                        default=32,
                        help='size of rnn input features')
    parser.add_argument('--nhid',
                        type=int,
                        default=32,
                        help='number of hidden units per layer')
    parser.add_argument('--nlayers',
                        type=int,
                        default=2,
                        help='number of layers')
    parser.add_argument('--res_connection',
                        action='store_true',
                        help='residual connection')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0002,
                        help='initial learning rate')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=1e-4,
                        help='weight decay')
    parser.add_argument('--clip',
                        type=float,
                        default=10,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=400,
                        help='upper epoch limit')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--eval_batch_size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='eval_batch size')
    parser.add_argument('--bptt', type=int, default=50, help='sequence length')
    parser.add_argument('--teacher_forcing_ratio',
                        type=float,
                        default=0.7,
                        help='teacher forcing ratio (deprecated)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout applied to layers (0 = no dropout)')
    parser.add_argument(
        '--tied',
        action='store_true',
        help='tie the word embedding and softmax weights (deprecated)')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--device',
                        type=str,
                        default='cuda',
                        help='cuda or cpu')
    parser.add_argument('--log_interval',
                        type=int,
                        default=10,
                        metavar='N',
                        help='report interval')
    parser.add_argument('--save_interval',
                        type=int,
                        default=10,
                        metavar='N',
                        help='save interval')
    parser.add_argument('--save_fig', action='store_true', help='save figure')
    parser.add_argument(
        '--resume',
        '-r',
        help=
        'use checkpoint model parameters as initial parameters (default: False)',
        action="store_true")
    parser.add_argument(
        '--pretrained',
        '-p',
        help=
        'use checkpoint model parameters and do not train anymore (default: False)',
        action="store_true")
    parser.add_argument('--prediction_window_size',
                        type=int,
                        default=10,
                        help='prediction_window_size')
    args = parser.parse_args()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    ###############################################################################
    # Load data
    ###############################################################################
    TimeseriesData = preprocess_data.PickleDataLoad(
        data_type=args.data,
        filename=args.filename,
        augment_test_data=args.augment)
    train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData,
                                            args.batch_size)
    test_dataset = TimeseriesData.batchify(args, TimeseriesData.testData,
                                           args.eval_batch_size)
    gen_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, 1)

    ###############################################################################
    # Build the model
    ###############################################################################
    feature_dim = TimeseriesData.trainData.size(1)
    model = model.RNNPredictor(rnn_type=args.model,
                               enc_inp_size=feature_dim,
                               rnn_inp_size=args.emsize,
                               rnn_hid_size=args.nhid,
                               dec_out_size=feature_dim,
                               nlayers=args.nlayers,
                               dropout=args.dropout,
                               tie_weights=args.tied,
                               res_connection=args.res_connection).to(
                                   args.device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    criterion = nn.MSELoss()

    ###############################################################################
    # Training code
    ###############################################################################
    def get_batch(args, source, i):
        seq_len = min(args.bptt, len(source) - 1 - i)
        data = source[i:i + seq_len]  # [ seq_len * batch_size * feature_size ]
        target = source[i + 1:i + 1 +
                        seq_len]  # [ (seq_len x batch_size x feature_size) ]
        return data, target

    def generate_output(args,
                        epoch,
                        model,
                        gen_dataset,
                        disp_uncertainty=True,
                        startPoint=500,
                        endPoint=3500):
        if args.save_fig:
            # Turn on evaluation mode which disables dropout.
            model.eval()
            hidden = model.init_hidden(1)
            outSeq = []
            upperlim95 = []
            lowerlim95 = []
            with torch.no_grad():
                for i in range(endPoint):
                    if i >= startPoint:
                        # if disp_uncertainty and epoch > 40:
                        #     outs = []
                        #     model.train()
                        #     for i in range(20):
                        #         out_, hidden_ = model.forward(out+0.01*Variable(torch.randn(out.size())).cuda(),hidden,noise=True)
                        #         outs.append(out_)
                        #     model.eval()
                        #     outs = torch.cat(outs,dim=0)
                        #     out_mean = torch.mean(outs,dim=0) # [bsz * feature_dim]
                        #     out_std = torch.std(outs,dim=0) # [bsz * feature_dim]
                        #     upperlim95.append(out_mean + 2.58*out_std/np.sqrt(20))
                        #     lowerlim95.append(out_mean - 2.58*out_std/np.sqrt(20))

                        out, hidden = model.forward(out, hidden)

                        #print(out_mean,out)

                    else:
                        out, hidden = model.forward(
                            gen_dataset[i].unsqueeze(0), hidden)
                    outSeq.append(out.data.cpu()[0][0].unsqueeze(0))

            outSeq = torch.cat(outSeq, dim=0)  # [seqLength * feature_dim]

            target = preprocess_data.reconstruct(gen_dataset.cpu(),
                                                 TimeseriesData.mean,
                                                 TimeseriesData.std)
            outSeq = preprocess_data.reconstruct(outSeq, TimeseriesData.mean,
                                                 TimeseriesData.std)
            # if epoch>40:
            #     upperlim95 = torch.cat(upperlim95, dim=0)
            #     lowerlim95 = torch.cat(lowerlim95, dim=0)
            #     upperlim95 = preprocess_data.reconstruct(upperlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std)
            #     lowerlim95 = preprocess_data.reconstruct(lowerlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std)

            plt.figure(figsize=(15, 5))
            for i in range(target.size(-1)):
                plt.plot(target[:, :, i].numpy(),
                         label='Target' + str(i),
                         color='black',
                         marker='.',
                         linestyle='--',
                         markersize=1,
                         linewidth=0.5)
                plt.plot(range(startPoint),
                         outSeq[:startPoint, i].numpy(),
                         label='1-step predictions for target' + str(i),
                         color='green',
                         marker='.',
                         linestyle='--',
                         markersize=1.5,
                         linewidth=1)
                # if epoch>40:
                #     plt.plot(range(startPoint, endPoint), upperlim95[:,i].numpy(), label='upperlim'+str(i),
                #              color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1)
                #     plt.plot(range(startPoint, endPoint), lowerlim95[:,i].numpy(), label='lowerlim'+str(i),
                #              color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1)
                plt.plot(range(startPoint, endPoint),
                         outSeq[startPoint:, i].numpy(),
                         label='Recursive predictions for target' + str(i),
                         color='blue',
                         marker='.',
                         linestyle='--',
                         markersize=1.5,
                         linewidth=1)

            plt.xlim([startPoint - 500, endPoint])
            plt.xlabel('Index', fontsize=15)
            plt.ylabel('Value', fontsize=15)
            plt.title('Time-series Prediction on ' + args.data + ' Dataset',
                      fontsize=18,
                      fontweight='bold')
            plt.legend()
            plt.tight_layout()
            plt.text(startPoint - 500 + 10,
                     target.min(),
                     'Epoch: ' + str(epoch),
                     fontsize=15)
            save_dir = Path(
                'result', args.data,
                args.filename).with_suffix('').joinpath('fig_prediction')
            save_dir.mkdir(parents=True, exist_ok=True)
            plt.savefig(
                save_dir.joinpath('fig_epoch' +
                                  str(epoch)).with_suffix('.png'))
            #plt.show()
            plt.close()
            return outSeq

        else:
            pass

    def evaluate_1step_pred(args, model, test_dataset):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_loss = 0
        with torch.no_grad():
            hidden = model.init_hidden(args.eval_batch_size)
            for nbatch, i in enumerate(
                    range(0,
                          test_dataset.size(0) - 1, args.bptt)):

                inputSeq, targetSeq = get_batch(args, test_dataset, i)
                outSeq, hidden = model.forward(inputSeq, hidden)

                loss = criterion(outSeq.view(args.batch_size, -1),
                                 targetSeq.view(args.batch_size, -1))
                hidden = model.repackage_hidden(hidden)
                total_loss += loss.item()

        return total_loss / nbatch

    def train(args, model, train_dataset, epoch):

        with torch.enable_grad():
            # Turn on training mode which enables dropout.
            model.train()
            total_loss = 0
            start_time = time.time()
            hidden = model.init_hidden(args.batch_size)
            for batch, i in enumerate(
                    range(0,
                          train_dataset.size(0) - 1, args.bptt)):
                inputSeq, targetSeq = get_batch(args, train_dataset, i)
                # inputSeq: [ seq_len * batch_size * feature_size ]
                # targetSeq: [ seq_len * batch_size * feature_size ]

                # Starting each batch, we detach the hidden state from how it was previously produced.
                # If we didn't, the model would try backpropagating all the way to start of the dataset.
                hidden = model.repackage_hidden(hidden)
                hidden_ = model.repackage_hidden(hidden)
                optimizer.zero_grad()
                '''Loss1: Free running loss'''
                outVal = inputSeq[0].unsqueeze(0)
                outVals = []
                hids1 = []
                for i in range(inputSeq.size(0)):
                    outVal, hidden_, hid = model.forward(outVal,
                                                         hidden_,
                                                         return_hiddens=True)
                    outVals.append(outVal)
                    hids1.append(hid)
                outSeq1 = torch.cat(outVals, dim=0)
                hids1 = torch.cat(hids1, dim=0)
                loss1 = criterion(
                    outSeq1.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss2: Teacher forcing loss'''
                outSeq2, hidden, hids2 = model.forward(inputSeq,
                                                       hidden,
                                                       return_hiddens=True)
                loss2 = criterion(
                    outSeq2.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss3: Simplified Professor forcing loss'''
                loss3 = criterion(hids1.view(args.batch_size, -1),
                                  hids2.view(args.batch_size, -1).detach())
                '''Total loss = Loss1+Loss2+Loss3'''
                loss = loss1 + loss2 + loss3
                loss.backward()

                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
                optimizer.step()

                total_loss += loss.item()

                if batch % args.log_interval == 0 and batch > 0:
                    cur_loss = total_loss / args.log_interval
                    elapsed = time.time() - start_time
                    print(
                        '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | '
                        'loss {:5.2f} '.format(
                            epoch, batch,
                            len(train_dataset) // args.bptt,
                            elapsed * 1000 / args.log_interval, cur_loss))
                    total_loss = 0
                    start_time = time.time()

    def evaluate(args, model, test_dataset):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        with torch.no_grad():
            total_loss = 0
            hidden = model.init_hidden(args.eval_batch_size)
            nbatch = 1
            for nbatch, i in enumerate(
                    range(0,
                          test_dataset.size(0) - 1, args.bptt)):
                inputSeq, targetSeq = get_batch(args, test_dataset, i)
                # inputSeq: [ seq_len * batch_size * feature_size ]
                # targetSeq: [ seq_len * batch_size * feature_size ]
                hidden_ = model.repackage_hidden(hidden)
                '''Loss1: Free running loss'''
                outVal = inputSeq[0].unsqueeze(0)
                outVals = []
                hids1 = []
                for i in range(inputSeq.size(0)):
                    outVal, hidden_, hid = model.forward(outVal,
                                                         hidden_,
                                                         return_hiddens=True)
                    outVals.append(outVal)
                    hids1.append(hid)
                outSeq1 = torch.cat(outVals, dim=0)
                hids1 = torch.cat(hids1, dim=0)
                loss1 = criterion(
                    outSeq1.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss2: Teacher forcing loss'''
                outSeq2, hidden, hids2 = model.forward(inputSeq,
                                                       hidden,
                                                       return_hiddens=True)
                loss2 = criterion(
                    outSeq2.contiguous().view(args.batch_size, -1),
                    targetSeq.contiguous().view(args.batch_size, -1))
                '''Loss3: Simplified Professor forcing loss'''
                loss3 = criterion(hids1.view(args.batch_size, -1),
                                  hids2.view(args.batch_size, -1).detach())
                '''Total loss = Loss1+Loss2+Loss3'''
                loss = loss1 + loss2 + loss3

                total_loss += loss.item()

        return total_loss / (nbatch + 1)

    # Loop over epochs.
    if args.resume or args.pretrained:
        print("=> loading checkpoint ")
        checkpoint = torch.load(
            Path('save', args.data, 'checkpoint',
                 args.filename).with_suffix('.pth'))
        args, start_epoch, best_val_loss = model.load_checkpoint(
            args, checkpoint, feature_dim)
        optimizer.load_state_dict((checkpoint['optimizer']))
        del checkpoint
        epoch = start_epoch
        print("=> loaded checkpoint")
    else:
        epoch = 1
        start_epoch = 1
        best_val_loss = float('inf')
        print("=> Start training from scratch")
    print('-' * 89)
    print(args)
    print('-' * 89)

    if not args.pretrained:
        # At any point you can hit Ctrl + C to break out of training early.
        try:
            for epoch in range(start_epoch, args.epochs + 1):

                epoch_start_time = time.time()
                train(args, model, train_dataset, epoch)
                val_loss = evaluate(args, model, test_dataset)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | '
                    .format(epoch, (time.time() - epoch_start_time), val_loss))
                print('-' * 89)

                generate_output(args,
                                epoch,
                                model,
                                gen_dataset,
                                startPoint=1500)

                if epoch % args.save_interval == 0:
                    # Save the model if the validation loss is the best we've seen so far.
                    is_best = val_loss < best_val_loss
                    best_val_loss = min(val_loss, best_val_loss)
                    model_dictionary = {
                        'epoch': epoch,
                        'best_loss': best_val_loss,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'args': args
                    }
                    model.save_checkpoint(model_dictionary, is_best)

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

    # Calculate mean and covariance for each channel's prediction errors, and save them with the trained model
    print('=> calculating mean and covariance')
    means, covs = list(), list()
    train_dataset = TimeseriesData.batchify(args,
                                            TimeseriesData.trainData,
                                            bsz=1)
    for channel_idx in range(model.enc_input_size):
        mean, cov = fit_norm_distribution_param(
            args, model, train_dataset[:TimeseriesData.length], channel_idx)
        means.append(mean), covs.append(cov)
    model_dictionary = {
        'epoch': max(epoch, start_epoch),
        'best_loss': best_val_loss,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'args': args,
        'means': means,
        'covs': covs
    }
    model.save_checkpoint(model_dictionary, True)
    print('-' * 89)
Beispiel #14
0
            epoch_acc = running_corrects.double() / dataset_size[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase=='test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.save(best_model_wts, osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth'))
        print('Model saved at: {}'.format(osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth')))
    
    f.create_dataset('acc', data = np.array(learnning_curve))
    time_elapsed = time.time() - since
    print('Time taken to complete training: {:0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best acc: {:.4f}'.format(best_acc))




if __name__=='__main__':

    dataloaders, classes, dataset_size = get_dataloader(debug=Config['debug'], batch_size=Config['batch_size'], num_workers=Config['num_workers'])
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=Config['learning_rate'])
    device = torch.device('cuda:0' if torch.cuda.is_available() and Config['use_cuda'] else 'cpu')

    train_model(dataloaders, model, criterion, optimizer, device, num_epochs=Config['num_epochs'], dataset_size=dataset_size)
Beispiel #15
0
    model = model.Backbone(num_layers=50, drop_ratio=0.6, mode='ir')
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)  # update parameter
    model.load_state_dict(pretrained_dict)
    model = torch.nn.DataParallel(model).to(args.device)

    # 4.5 set loss_function
    loss_function_A = torch.nn.MarginRankingLoss().to(args.device)
    loss_function_B = torch.nn.MSELoss().to(args.device)

    # 4.6 choose optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[30, 60, 100],
                                                     gamma=0.1,
                                                     last_epoch=-1)

    # # 4.6.1 fixed the convolution layers
    # weight_p, bais_p = [], []
    # count = 0
    # for k in model.children():
    #     count += 1
    #     if count == 3:
    #         for name, p in k.named_parameters():
Beispiel #16
0
import torch
from data import loader
from model import model, device
from loss import get_loss
import os

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
current_dir = os.path.dirname(__file__)
weights_dir = os.path.join(current_dir, '..', 'weights')


def train(epoch):
    print('#' * 15)
    print('Epoch {}, Latent Size {}'.format(epoch, model.latent_size))
    print('#' * 15)
    model.train()
    for index, (x, _) in enumerate(loader):
        x = x.mean(dim=1, keepdim=True).to(device)
        optimizer.zero_grad()
        x_generated, mu, logvar = model(x)
        loss = get_loss(x_generated, x, mu, logvar)
        loss.backward()
        optimizer.step()
        if index % 100 == 0:
            print('Loss at iteration {0}: {1:.4f}'.format(index, loss.item()))
    if epoch == 4:
        filename = 'epoch{}_ls{}.pkl'.format(epoch, model.latent_size)
        torch.save(model.state_dict(), os.path.join(weights_dir, filename))
    if epoch < 4:
        scheduler.step()
              test_loss_list.append(epoch_loss)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase=='test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.save(best_model_wts, osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth'))
        print('Model saved at: {}'.format(osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth')))

    time_elapsed = time.time() - since
    print('Time taken to complete training: {:0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best acc: {:.4f}'.format(best_acc))

    np.savetxt('acc_list.txt',acc_list)
    np.savetxt('test_acc_list.txt',test_acc_list)
    np.savetxt('loss_list.txt',loss_list)
    np.savetxt('test_loss_list.txt',test_loss_list)

if __name__=='__main__':

    dataloaders, dataset_size = get_dataloader_compat(debug=False, batch_size=Config['batch_size'], num_workers=Config['num_workers'])

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=Config['learning_rate'])
    device = torch.device('cuda:0' if torch.cuda.is_available() and Config['use_cuda'] else 'cpu')

    train_model(dataloaders, model, criterion, optimizer, device, num_epochs=Config['num_epochs'], dataset_size=dataset_size)

Beispiel #18
0

train_data = batchify(train_data, batch_size)
val_data = batchify(val_data, eval_batch_size)
test_data = batchify(test_data, eval_batch_size)


def get_batch(source, batch_index):
    data = source[batch_index:batch_index + seq_len]
    # Shift target by one step.
    target = source[batch_index + 1:batch_index + 1 + seq_len]
    return data, target


criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=gamma)


def train():
    model.train()  # Turn on the train mode
    total_loss = 0.0
    start_time = time.time()

    batches = np.random.permutation(
        range(0,
              train_data.size(0) - seq_len, seq_len))
    for batch_counter, i in enumerate(batches):
        data, targets = get_batch(train_data, i)
        optimizer.zero_grad()
from model import model
from trainer import trainer
from dataset import dataset
from config import config
import torch.nn as nn
import torch.optim as optim
import time
import os

device = config.device
feature_extract = config.feature_extract
if __name__ == "__main__":

    model = model.LeNet(config.train_class_num).to(device)

    params_to_update = model.parameters()
    print("Params to learn:")

    if feature_extract:
        params_to_update = []
        for name, param in model.named_parameters():
            if param.requires_grad:
                params_to_update.append(param)
                print("\t", name)
    else:
        for name, param in model.named_parameters():
            if param.requires_grad:
                print("\t", name)

    optimizer_ft = optim.Adam(params_to_update, lr=0.001)
    criterion = nn.CrossEntropyLoss()
Beispiel #20
0
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)
            data.to("cpu")
            targets.to("cpu")
    return total_loss / len(data_source)


optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr, lr_decay=1e-4, weight_decay=1e-5)


def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
Beispiel #21
0
            target_transform=TransformVOCDetectionAnnotation(class_to_ind, False))

def collate_fn(batch):
    imgs, gt = zip(*batch)
    return imgs[0].unsqueeze(0), gt[0]

train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=1, shuffle=True,
            num_workers=0, collate_fn=collate_fn)


val_loader = torch.utils.data.DataLoader(
            val_data, batch_size=1, shuffle=False,
            num_workers=0, collate_fn=collate_fn)

optimizer = optim.SGD(model.parameters(), lr=args.lr, 
                      momentum=args.momentum,
                      weight_decay=args.weight_decay)

def train(train_loader, model, optimizer, epoch):
  batch_time = AverageMeter()
  data_time = AverageMeter()
  losses = AverageMeter()

  model.train()
  end = time.time()
  for i, (im, gt) in (enumerate(train_loader)):
    adjust_learning_rate(optimizer, epoch)

    # measure data loading time
    data_time.update(time.time() - end)
Beispiel #22
0
            temp = []
            temp_box_x = int(i[4]) - int(i[2])
            temp_box_y = int(i[5]) - int(i[3])
            temp.append(i[0])
            temp.append(min(temp_box_x, temp_box_y))
            data_outputs.append(i[1:2])
            data_inputs.append(temp)
    return data_inputs, data_outputs


## add more data: velocity , omega, past position
if __name__ == '__main__':
    train_inputs, train_outputs = read_input()
    m = m()
    m.load_state_dict(torch.load('model_1.pt'))
    optimizer = optim.Adam(m.parameters(), lr=0.005)
    minibatch_size = 3
    num_minibatches = len(train_inputs) // minibatch_size

    for epoch in (range(30)):
        # Training
        print("Training")
        # Put the model in training mode
        m.train()
        start_train = time.time()

        for group in tqdm(range(num_minibatches)):
            total_loss = None
            optimizer.zero_grad()
            for i in range(group * minibatch_size,
                           (group + 1) * minibatch_size):
Beispiel #23
0
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)
            data.to("cpu")
            targets.to("cpu")
    return total_loss / len(data_source)


optimizer = torch.optim.Adagrad(model.parameters(),
                                lr=args.lr,
                                lr_decay=1e-4,
                                weight_decay=1e-5)


def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
gen_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, 1)

###############################################################################
# Build the model
###############################################################################
feature_dim = TimeseriesData.trainData.size(1)
model = model.RNNPredictor(rnn_type=args.model,
                           enc_inp_size=feature_dim,
                           rnn_inp_size=args.emsize,
                           rnn_hid_size=args.nhid,
                           dec_out_size=feature_dim,
                           nlayers=args.nlayers,
                           dropout=args.dropout,
                           tie_weights=args.tied,
                           res_connection=args.res_connection).to(args.device)
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)
criterion = nn.MSELoss()


###############################################################################
# Training code
###############################################################################
def get_batch(args, source, i):
    seq_len = min(args.bptt, len(source) - 1 - i)
    data = source[i:i + seq_len]  # [ seq_len * batch_size * feature_size ]
    target = source[i + 1:i + 1 +
                    seq_len]  # [ (seq_len x batch_size x feature_size) ]
    return data, target

###############################################################################
# Build the model
###############################################################################
feature_dim = TimeseriesData.trainData.size(1)
model = model.RNNPredictor(rnn_type = args.model,
                           enc_inp_size=feature_dim,
                           rnn_inp_size = args.emsize,
                           rnn_hid_size = args.nhid,
                           dec_out_size=feature_dim,
                           nlayers = args.nlayers,
                           dropout = args.dropout,
                           tie_weights= args.tied,
                           res_connection=args.res_connection).to(args.device)
optimizer = optim.Adam(model.parameters(), lr= args.lr,weight_decay=args.weight_decay)
criterion = nn.MSELoss()
###############################################################################
# Training code
###############################################################################
def get_batch(args,source, i):
    seq_len = min(args.bptt, len(source) - 1 - i)
    data = source[i:i+seq_len] # [ seq_len * batch_size * feature_size ]
    target = source[i+1:i+1+seq_len] # [ (seq_len x batch_size x feature_size) ]
    return data, target

def generate_output(args,epoch, model, gen_dataset, disp_uncertainty=True,startPoint=500, endPoint=3500):
    if args.save_fig:
        # Turn on evaluation mode which disables dropout.
        model.eval()
        hidden = model.init_hidden(1)
Beispiel #26
0
    args.data + '/val_images', transform=data_transforms['val']),
                                         batch_size=args.batch_size,
                                         shuffle=False,
                                         num_workers=1)

# Neural network and optimizer
# We define neural net in model.py so that it can be reused by the evaluate.py script

from model import model
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)


def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
Beispiel #27
0

###############################################################################
# Build the model
###############################################################################
feature_dim = TimeseriesData.trainData.size(1)
model = model.RNNPredictor(rnn_type = args.model,
                           enc_inp_size=feature_dim,
                           rnn_inp_size = args.emsize,
                           rnn_hid_size = args.nhid,
                           dec_out_size=feature_dim,
                           nlayers = args.nlayers,
                           dropout = args.dropout,
                           tie_weights= args.tied,
                           res_connection=args.res_connection).to(args.device)
optimizer = optim.Adam(model.parameters(), lr= args.lr,weight_decay=args.weight_decay)
criterion = nn.MSELoss()
###############################################################################
# Training code
###############################################################################
def get_batch(args,source, i):
    seq_len = min(args.bptt, len(source) - 1 - i)
    data = source[i:i+seq_len] # [ seq_len * batch_size * feature_size ]
    target = source[i+1:i+1+seq_len] # [ (seq_len x batch_size x feature_size) ]
    return data, target

def generate_output(args,epoch, model, gen_dataset, disp_uncertainty=True,startPoint=1, endPoint=300):
    if args.save_fig:
        # Turn on evaluation mode which disables dropout.
        model.eval()
        hidden = model.init_hidden(1)
Beispiel #28
0
from model import model, nn
from data import load_data
from sklearn.metrics import precision_score, recall_score
from constants import *
from transformers import AdamW, get_linear_schedule_with_warmup
import numpy as np

train_loader, valid_loader, test_loader, train_len, valid_len, test_len = load_data()
model = model.to(device)
criterion = nn.CrossEntropyLoss()

# as per original bert paper, fine-tuning is done by Adam optimizer with weight decay
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)


def train(model):
    model = model.train()
    optimizer.zero_grad()

    losses = []
    accurate_preds = 0
    all_targets = []
    all_predictions = []
    for d in train_loader:
        inputs = d['input_ids'].to(device)
print(model)

#定义训练和验证集 和 dataloader
eval_dataset = dsets.ImageFolder(EVAL_DATA_PATH,
                                 transform=preprocessing.transform)
train_dataset = dsets.ImageFolder(TRAIN_DATA_PATH,
                                  transform=preprocessing.transform)

eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True)
train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

#使用交叉熵为 loss,使用 SGD 优化方法
criterion = NN.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#载入上次训练
model.load_state_dict(torch.load("./chkpoint_res.bin"))

#开始训练
for epoch in range(0, 100):
    model.train()
    with tqdm(train_dataloader, unit="batch") as tepoch:  #进度条
        correct = 0
        batch = 0
        for data, target in tepoch:
            batch += 1
            tepoch.set_description(f"Epoch {epoch}")
            data, target = data.cuda(), target.cuda()  #数据载入 GPU
Beispiel #30
0
print('train dataset length : ', len(train_dataset))
print('test dataset length : ', len(test_dataset))

train_dataloader = data_loader.load_pathloss_dataset(train_dataset,
                                                     shuffle=True,
                                                     num_workers=12,
                                                     batch_size=batch_size,
                                                     type='RNN')
test_dataloader = data_loader.load_pathloss_dataset(test_dataset, shuffle=True, batch_size=batch_size,
                                                    num_workers=12, type='RNN')

model = model.VanillaLSTMNetwork(input_size=input_sequence).cuda()

criterion = nn.MSELoss().cuda()
optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate)

writer = set_tensorboard_writer('../runs_rnn/model01-vanilla-sample12')

for epoch in range(num_epochs):
    for i, data in enumerate(train_dataloader):
        y_pred = model(data[:][0].cuda()).reshape(-1)
        y_data = data[:][1].cuda()
        loss = criterion(y_pred, y_data)
        loss.backward()
        optimizer.step()

        # ...학습 중 손실(running loss)을 기록하고
        writer.add_scalar('mseloss training loss',
                          loss / 1000,
                          epoch * len(train_dataloader) + i)
Beispiel #31
0
        pretrained_dict = {
            k: v
            for k, v in new_state_dcit.items() if k in model_dict
        }

        for k, v in model_dict.items():
            if k not in pretrained_dict:
                print(k)
        model.load_state_dict(pretrained_dict, strict=True)

    else:
        print("===> no models found at '{}'".format(args.pretrained))

print("===> Setting Optimizer")

optimizer = optim.Adam(model.parameters(), lr=args.lr)


def train(epoch):
    model.train()
    utils.adjust_learning_rate(optimizer, epoch, args.step_size, args.lr,
                               args.gamma)
    print('epoch =', epoch, 'lr = ', optimizer.param_groups[0]['lr'])
    for iteration, (lr_tensor, hr_tensor) in enumerate(training_data_loader,
                                                       1):

        if args.cuda:
            lr_tensor = lr_tensor.to(device)  # ranges from [0, 1]
            hr_tensor = hr_tensor.to(device)  # ranges from [0, 1]

        optimizer.zero_grad()
test_dataset = preprocess_data.batchify(args, TimeseriesData.testData,
                                        args.eval_batch_size)
gen_dataset = preprocess_data.batchify(args, TimeseriesData.testData, 1)

###############################################################################
# Build the model
###############################################################################
model = model.RNNPredictor(rnn_type=args.model,
                           enc_inp_size=3,
                           rnn_inp_size=args.emsize,
                           rnn_hid_size=args.nhid,
                           dec_out_size=3,
                           nlayers=args.nlayers,
                           dropout=args.dropout,
                           tie_weights=args.tied)
print(list(model.parameters()))
if args.cuda:
    model.cuda()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
criterion = nn.MSELoss()


###############################################################################
# Training code
###############################################################################
def get_batch(source, i, evaluation=False):
    seq_len = min(args.bptt, len(source) - 1 - i)
    data = Variable(
        source[i:i + seq_len],
        volatile=evaluation)  # [ seq_len * batch_size * feature_size ]
    target = Variable(