Esempio n. 1
0
elif args.dataset_name == 'coildel':
    EMBED_DIM = 2
    num_classes = 100
    num_heads = 8
    depth = 6
    p, q = 1, 1

# k, num_heads, depth, seq_length, num_tokens, num_
model = Transformer(EMBED_DIM, num_heads, test_dataset.walklength, depth,
                    num_classes).to(device)

lr_warmup = 10000

lr = 1e-3

opt = torch.optim.Adam(lr=lr, params=model.parameters())
sch = torch.optim.lr_scheduler.LambdaLR(
    opt, lambda i: min(i / (lr_warmup / args.batch_size), 1.0))
loss_func = nn.NLLLoss()


def train_validate(model, loader, opt, loss_func, train, device):

    if train:
        model.train()
    else:
        model.eval()

    batch_loss = 0
    batch_acc = 0
Esempio n. 2
0
def main():
    device = torch.device("cpu" if hparams.no_cuda else "cuda")

    print("=== build model ===")
    start = time.time()
    model = Transformer(hparams.d_model, hparams.d_ff, vocab_size,
                        hparams.num_heads, hparams.num_layers, hparams.max_len,
                        hparams.dropout, EOS_id, PAD_id, device).to(device)
    end = time.time()
    print("=== build model done === {} seconds".format(end - start))

    train.global_step = 0

    #     train_dataset, val_dataset = split_data(train_path_en, train_path_de, hparams.validation_rate)
    train_dataset = make_dataset(train_path_en, train_path_de)
    val_dataset = make_dataset(val_path_en, val_path_de)

    train_loader = DataLoader(train_dataset,
                              batch_size=hparams.batch_size,
                              collate_fn=custom_collate,
                              shuffle=True,
                              num_workers=hparams.num_workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=hparams.batch_size,
                            collate_fn=custom_collate,
                            num_workers=hparams.num_workers)

    criterion = torch.nn.NLLLoss(ignore_index=PAD_id,
                                 reduction="sum").to(device)
    optimizer = torch.optim.Adam(model.parameters(), hparams.lr)
    writer = SummaryWriter()

    for epoch in range(hparams.max_epochs):
        """train"""
        print("=== train start ===")
        start = time.time()

        loss, bleu_score = train(model, train_loader, criterion, optimizer,
                                 device, writer, epoch, hparams.print_steps)

        end = time.time()
        print("=== train done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        torch.save(model.state_dict(), save_path)
        print("model saved to '{}'".format(os.path.abspath(save_path)))

        writer.add_scalar("Loss/train", loss, epoch + 1)
        writer.add_scalar("Bleu score/train", bleu_score, epoch + 1)
        """"""

        print("=== evaluation start ===")
        start = time.time()

        loss, bleu_score = evaluate(model, val_loader, criterion, optimizer,
                                    device, writer)

        end = time.time()
        print("=== evaluation done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        writer.add_scalar("Loss/eval", loss, epoch + 1)
        writer.add_scalar("Bleu score/eval", bleu_score, epoch + 1)
Esempio n. 3
0
                        type=int,
                        default=350,
                        help="No of epochs")
    args = parser.parse_args()

    train_iter, FR, EN, train_length = load_dataloaders(args)
    src_vocab = len(EN.vocab)
    trg_vocab = len(FR.vocab)

    cuda = torch.cuda.is_available()
    net = Transformer(src_vocab=src_vocab,
                      trg_vocab=trg_vocab,
                      d_model=args.d_model,
                      num=args.num,
                      n_heads=args.n_heads)
    for p in net.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=1)
    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           betas=(0.9, 0.98),
                           eps=1e-9)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20,30,40,50,100,200], gamma=0.7)
    scheduler = CosineWithRestarts(optimizer, T_max=500)
    if cuda:
        net.cuda()
    start_epoch, acc = load_state(net,
                                  optimizer,
                                  scheduler,
                                  args.model_no,
Esempio n. 4
0
model = Transformer(len(SRC.vocab),
                    len(TGT.vocab),
                    N=ARGS.n_layers,
                    d_model=ARGS.d_model,
                    d_ff=4 * ARGS.d_model,
                    h=ARGS.n_heads,
                    dropout=ARGS.p_dropout).to(ARGS.device)
criterion = LabelSmoothing(size=len(TGT.vocab),
                           padding_idx=pad_idx,
                           smoothing=0.1).to(ARGS.device)

# train
if ARGS.run_mode == 'train':
    optimizer = NoamOpt(
        ARGS.d_model, 1, 2000,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98),
                         eps=1e-9))
    iter_cnt = 1
    min_norm_val_loss = math.inf
    model.train()
    for epoch in range(ARGS.n_epochs):
        for train_batch in train_iter:
            train_batch = utils.rebatch(pad_idx, train_batch)
            train_out = model(train_batch.src, train_batch.trg,
                              train_batch.src_mask, train_batch.trg_mask)
            train_loss = criterion(
                train_out.contiguous().view(-1, train_out.size(-1)),
                train_batch.trg_y.contiguous().view(-1)) / train_batch.ntokens

            train_loss.backward()
            optimizer.step()
class ModelDev:
    def __init__(self, config):
        self.config = config
        self.prepare_dataloaders(config['data'])

        # self.model = MLP(config['MLP'])
        # self.model = MLP_3D(config['MLP'])
        # self.model = LSTM(config['LSTM'])
        self.model = Transformer(config['Trans'])
        print(self.model)

        self.model_name = config['train']['model_name']

        self.checkpoint_dir = './checkpoint_dir/{}/'.format(self.model_name)
        if not os.path.exists(self.checkpoint_dir):
            os.mkdir(self.checkpoint_dir)
        self.tb_log_dir = './tb_log/{}/'.format(self.model_name)
        if not os.path.exists(self.tb_log_dir):
            os.mkdir(self.tb_log_dir)

        self.optimal_metric = 100000
        self.cur_metric = 100000

        self.loss = nn.MSELoss()
        self.optim = optim.Adam(self.model.parameters(),
                                lr=self.config['train']['lr'],
                                betas=(0.5, 0.999))

    def prepare_dataloaders(self, config):
        data = Rossler(config)
        train_data = dataset(data.train_X, data.train_Y, data.train_Z,
                             config['w_size'])
        self.train_dataloader = DataLoader(train_data,
                                           batch_size=config['batch_size'],
                                           shuffle=True,
                                           drop_last=True)
        valid_data = dataset(data.valid_X, data.valid_Y, data.valid_Z,
                             config['w_size'])
        self.valid_dataloader = DataLoader(valid_data,
                                           batch_size=config['batch_size'],
                                           shuffle=False,
                                           drop_last=True)
        test_data = dataset(data.test_X, data.test_Y, data.test_Z,
                            config['w_size'])
        self.test_dataloader = DataLoader(test_data,
                                          batch_size=config['batch_size'],
                                          shuffle=False,
                                          drop_last=True)
        self.data = data

    def train(self):
        self.writer = SummaryWriter(self.tb_log_dir)
        for self.epoch in range(self.config['train']['epochs']):
            self.train_on_epoch()
            self.cur_metric = self.valid_on_epoch()
            print(self.cur_metric)
            if self.needToSave():
                self.saveWeights()

    def train_on_epoch(self):
        self.model.train(False)
        LOSS = []
        for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.train_dataloader:
            X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                X_i, Y_i, Z_i, X_o, Y_o, Z_o)
            X_i += torch.normal(0, 0.1, X_i.shape)
            self.model.zero_grad()
            pred = self.model(X_i)
            loss = self.loss(pred, X_o)
            loss.backward()
            self.optim.step()
            LOSS.append(loss.data.cpu().numpy())
        self.writer.add_scalar('train Loss', np.mean(LOSS), self.epoch)

    def valid_on_epoch(self):
        self.model.train(False)
        LOSS = []
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.valid_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                loss = self.loss(pred, X_o)
                LOSS.append(loss.data.cpu().numpy())
        self.writer.add_scalar('valid Loss', np.mean(LOSS), self.epoch)
        return np.mean(LOSS)

    def cast_to_float(self, X_i, Y_i, Z_i, X_o, Y_o, Z_o):
        X_i = X_i.float()
        Y_i = Y_i.float()
        Z_i = Z_i.float()
        X_o = X_o.float()
        Y_o = Y_o.float()
        Z_o = Z_o.float()
        return X_i, Y_i, Z_i, X_o, Y_o, Z_o

    def needToSave(self):
        if self.cur_metric < self.optimal_metric:
            self.optimal_metric = self.cur_metric
            return True
        return False

    def saveWeights(self, clean_previous=True):
        if clean_previous:
            files = glob(self.checkpoint_dir + '*.pth')
            for f in files:
                os.remove(f)
        torch.save(self.model.state_dict(),
                   '{}model_{}.pth'.format(self.checkpoint_dir, self.epoch))

    def test_MSE(self):
        self.model.train(False)
        self.load_weights()
        LOSS = []
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.test_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                loss = self.loss(pred, X_o)
                LOSS.append(loss.data.cpu().numpy())
        return np.mean(LOSS)

    def test_a_window(self):
        self.model.train(False)
        self.load_weights()
        idx = 0
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.test_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                show_a_test_window(X_i.data.numpy()[0, :],
                                   X_o.data.numpy()[0, :],
                                   pred.data.numpy()[0, :], idx,
                                   self.config['data']['stride'])
                idx += 1

    def test_long_window(self, length):
        self.model.train(False)
        self.load_weights()
        for start_idx in [100, 200, 300, 400, 500]:
            X_I = self.data.test_X[start_idx:start_idx + length]
            X_pred = X_I[:self.config['data']['w_size'] - 1]
            with torch.no_grad():
                while len(X_pred) < len(X_I):
                    nparray = np.array(X_pred[-self.config['data']['w_size'] +
                                              1:])
                    nparray = np.expand_dims(nparray, axis=0)
                    torchTensor = torch.FloatTensor(nparray)
                    pred = self.model(
                        torchTensor).data.squeeze().numpy().tolist()
                    X_pred.append(pred)
            show_long_window(X_I, X_pred, self.config['data']['stride'],
                             self.config['data']['w_size'], start_idx,
                             self.config['train']['model_name'])

    def load_weights(self):
        target_file = list(glob(self.checkpoint_dir + 'model*.pth'))[0]
        print('loading ', target_file)
        weights = torch.load(target_file)
        self.model.load_state_dict(weights)
Esempio n. 6
0
                    child_name = "{}.{}".format(name, n)
                param_trace(child_name, m, depth + 1, max_depth, threshold)

        param_trace('seq2seq', seq2seq, 0, max_depth=5, threshold=K * 100)

        exit()

    #  optimizer = optim.SGD(seq2seq.parameters(), lr=0.25)
    #  lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, min_lr=1e-4,
    #                                                      verbose=True)

    #optimizer = optim.Adamax(seq2seq.parameters())

    T_ep = len(train_loader)
    #optimizer = optim.Adam(seq2seq.parameters(), lr=3e-4, betas=(0.9, 0.98), eps=1e-9)
    optimizer = optim.Adam(seq2seq.parameters(), lr=3e-4)
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                        T_max=T_ep * epochs,
                                                        eta_min=3e-6)
    if 'warmup' in cfg['train']:
        warmup_ep = cfg['train']['warmup']
        lr_scheduler = WarmupLR(optimizer,
                                init_scale=1e-3,
                                T_max=T_ep * warmup_ep,
                                after=lr_scheduler)

    if VIZ_ATTN:
        utils.makedirs('evals')
        evaluateAndShowAttentions(seq2seq,
                                  dset.in_lang,
                                  dset.out_lang,