Esempio n. 1
0
while True:
    
    min_test_loss = 1.e6
    
    loss = 0.0
    train_loss_seq = []
    test_loss_seq = []

    if model_type == 'Transformer':
        model = TransformerModel(config)
    elif model_type == 'LSTM':
        model = LSTMModel(config)
    if cuda:
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                             lr=config['train']['learning_rate'],
                             weight_decay=config['train']['weight_decay'])
    criterion = torch.nn.MSELoss()
    
    optimizer.zero_grad()
        
    for it in range(n_iter):
        model.train()
        country = random.choice(train_countries)

        inp, target = get_data_tensor(data, country, measure_mode, output_mode=output_mode, cuda=cuda)
        
        out_nn, _ = get_net_output(inp, model_type, model, cuda)

        temp_loss = criterion(out_nn, target)
Esempio n. 2
0
    target = source[i + 1:i + 1 + seq_len].view(-1)
    return data, target


ntokens = len(TEXT.vocab.stoi)  # the size of vocabulary
emsize = 200  # embedding dimension
nhid = 200  # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2  # the number of heads in the multiheadattention models
dropout = 0.2  # the dropout value
model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers,
                         dropout).to(device)

criterion = nn.CrossEntropyLoss()
lr = 5.0  # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)


def train():
    model.train()  # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    ntokens = len(TEXT.vocab.stoi)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
Esempio n. 3
0
    TRG.build_vocab(train_data, min_freq=2)

    # Create model
    model = TransformerModel(len(SRC.vocab), len(TRG.vocab), args.d_model,
                             args.n_head, args.num_enc_layers,
                             args.num_dec_layers, args.dim_feedforword,
                             args.dropout, args.activation).to(device)
    if args.resume_model is not None:
        start_epoch, best_wer = resume_model(model, args.resume_model)
    # Run the model parallelly
    if torch.cuda.device_count() > 1:
        logger.info("Using {} GPUs".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)
    # Create loss criterion & optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    # Start training
    logger.info("Training Started".center(60, '#'))
    for epoch in range(start_epoch, args.epochs):
        # Train the model
        train(model, criterion, optimizer, train_iter, device, epoch, logger,
              args.log_interval, writer, TRG)
        # Test the model
        bleu = test(model, criterion, val_iter, device, epoch, logger,
                    args.log_interval, writer, TRG)
        # Save model
        # remember best wer and save checkpoint
        is_best = bleu < best_bleu
        best_bleu = min(bleu, best_bleu)
        save_checkpoint(
Esempio n. 4
0
            ],
                               axis=0)
            if loss_func == "cross_entropy":
                loss_vals = criterion(pred.transpose(1, 2), labels)

                # Avg of sum of token loss (after ignoring padding tokens)
                # loss = loss_vals
                loss = loss_vals.sum(axis=0).mean()

            elif loss_func == "label_smoothing":
                loss = criterion(pred, labels)

            loss.backward()
            # Clipping
            if clipping:
                torch.nn.utils.clip_grad_norm_(model.parameters(), clipping)
            optimizer.step()

            loss_val = loss.data.item()  # * batch.in_text.size(0)
            if verbose >= 2:
                if batch_idx % 500 == 0:
                    print("Train: {} loss={}".format(batch_idx, loss_val))
                    print("Input: {}".format(
                        denumericalize(batch.in_text, OUT_TEXT.vocab)[0]))
                    print("True: {}".format(
                        denumericalize(batch.out_text, OUT_TEXT.vocab)[0]))
                    print("Pred: {}".format(
                        denumericalize(pred.argmax(dim=2), OUT_TEXT.vocab)[0]))

            training_loss += loss_val
Esempio n. 5
0
def main():
    args = parse_args()
    if args.deterministic:
        random.seed(0)
        torch.manual_seed(0)
        np.random.seed(0)
        torch.backends.cudnn.deterministic = True

    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger = logging.getLogger(__name__)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.gpu = 0

    TEXT = torchtext.data.Field(tokenize=get_tokenizer("basic_english"),
                                init_token='<sos>',
                                eos_token='<eos>',
                                lower=False)
    train_txt, val_txt, test_txt = torchtext.datasets.WikiText2.splits(
        TEXT, root=args.data_dir)
    TEXT.build_vocab(train_txt)

    model = TransformerModel(len(TEXT.vocab.stoi), args.em_size,
                             args.num_heads, args.hid_size,
                             args.num_layers).to(device)
    # model = torch.nn.DataParallel(model, dim=1)
    # optimiser = optim.Adam(model.parameters())
    # optimiser = Ranger(model.parameters())
    optimiser = RAdam(model.parameters())

    if args.eval:
        dataloaders = {
            "test":
            DataLoader(TextEvalDataset(test_txt, args.ngram, TEXT),
                       batch_size=args.eval_batch_size,
                       shuffle=False)
        }
        if args.resume:
            resume(model, args)

        test_loss, test_acc = eval_pll(device, model, dataloaders["test"],
                                       args)
        logger.info(f"Eval: Test Loss = {test_loss}, Test Acc = {test_acc}")
    else:
        dataloaders = {
            "train":
            DataLoader(TextTrainDataset(train_txt, args.ngram, TEXT,
                                        args.poisson_rate),
                       batch_size=args.train_batch_size,
                       shuffle=True),
            "val":
            DataLoader(TextEvalDataset(val_txt, args.ngram, TEXT),
                       batch_size=args.eval_batch_size,
                       shuffle=False),
            "test":
            DataLoader(TextEvalDataset(test_txt, args.ngram, TEXT),
                       batch_size=args.eval_batch_size,
                       shuffle=False)
        }
        args.start_epoch = 0
        args.best_acc = 1 / args.ngram
        if args.resume:
            resume(model, args, optimiser)

        # Create folder for the current model and save args
        model_dir = time.ctime().replace(" ", "_").replace(":", "_")
        args.model_dir = os.path.join("models", model_dir)
        os.makedirs(args.model_dir, exist_ok=True)
        with open(os.path.join(args.model_dir, "args.json"), "w") as f:
            json.dump(args.__dict__, f, indent=2)
        args.logger = logger
        train_pll(device, model, optimiser, dataloaders, args)