Beispiel #1
0
    def train(self):

        print("Loading vocab", self.vocab_path)
        vocab = WordVocab.load_vocab(self.vocab_path)
        print("vocab Size: ", len(vocab))

        print("\nLoading Train Dataset")
        logkey_train, logkey_valid, time_train, time_valid = generate_train_valid(self.output_path + "train", window_size=self.window_size,
                                     adaptive_window=self.adaptive_window,
                                     valid_size=self.valid_ratio,
                                     sample_ratio=self.sample_ratio,
                                     scale=self.scale,
                                     scale_path=self.scale_path,
                                     seq_len=self.seq_len,
                                     min_len=self.min_len
                                    )

        train_dataset = LogDataset(logkey_train,time_train, vocab, seq_len=self.seq_len,
                                    corpus_lines=self.corpus_lines, on_memory=self.on_memory, mask_ratio=self.mask_ratio)

        print("\nLoading valid Dataset")
        # valid_dataset = generate_train_valid(self.output_path + "train", window_size=self.window_size,
        #                              adaptive_window=self.adaptive_window,
        #                              sample_ratio=self.valid_ratio)

        valid_dataset = LogDataset(logkey_valid, time_valid, vocab, seq_len=self.seq_len, on_memory=self.on_memory, mask_ratio=self.mask_ratio)

        print("Creating Dataloader")
        self.train_data_loader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=self.num_workers,
                                      collate_fn=train_dataset.collate_fn, drop_last=True)
        self.valid_data_loader = DataLoader(valid_dataset, batch_size=self.batch_size, num_workers=self.num_workers,
                                       collate_fn=train_dataset.collate_fn, drop_last=True)
        del train_dataset
        del valid_dataset
        del logkey_train
        del logkey_valid
        del time_train
        del time_valid
        gc.collect()

        print("Building BERT model")
        bert = BERT(len(vocab), max_len=self.max_len, hidden=self.hidden, n_layers=self.layers, attn_heads=self.attn_heads,
                    is_logkey=self.is_logkey, is_time=self.is_time)

        print("Creating BERT Trainer")
        self.trainer = BERTTrainer(bert, len(vocab), train_dataloader=self.train_data_loader, valid_dataloader=self.valid_data_loader,
                              lr=self.lr, betas=(self.adam_beta1, self.adam_beta2), weight_decay=self.adam_weight_decay,
                              with_cuda=self.with_cuda, cuda_devices=self.cuda_devices, log_freq=self.log_freq,
                              is_logkey=self.is_logkey, is_time=self.is_time,
                              hypersphere_loss=self.hypersphere_loss)

        self.start_iteration(surfix_log="log2")

        self.plot_train_valid_loss("_log2")
Beispiel #2
0
    def __init__(self, device=None, jit=False):
        self.device = device
        self.jit = jit
        args = parse_args(args=[
            '--train_dataset', 'data/corpus.small',
            '--test_dataset', 'data/corpus.small',
            '--vocab_path', 'data/vocab.small',
            '--output_path', 'bert.model',
        ]) # Avoid reading sys.argv here
        args.with_cuda = self.device == 'cuda'
        args.script = self.jit
        print("Loading Vocab", args.vocab_path)
        vocab = WordVocab.load_vocab(args.vocab_path)
        print("Vocab Size: ", len(vocab))

        train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len,
                                    corpus_lines=args.corpus_lines, on_memory=args.on_memory)
        test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
            if args.test_dataset is not None else None

        print("Creating Dataloader")
        train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
        test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
            if test_dataset is not None else None

        print("Building BERT model")
        bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)

        if args.script:
            print("Scripting BERT model")
            bert = torch.jit.script(bert)

        self.trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
                                   lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
                                   with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq, debug=args.debug)

        example_batch = next(iter(train_data_loader))
        self.example_inputs = example_batch['bert_input'].to(self.device), example_batch['segment_label'].to(self.device)
Beispiel #3
0
print("Creating Dataloader")
train_data_loader = DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               num_workers=args.num_workers)
test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
    if test_dataset is not None else None

print("Building BERT model")
bert = BERT(len(vocab),
            hidden=args.hidden,
            n_layers=args.layers,
            attn_heads=args.attn_heads)

print("Creating BERT Trainer")
trainer = BERTTrainer(bert,
                      len(vocab),
                      train_dataloader=train_data_loader,
                      test_dataloader=test_data_loader,
                      lr=args.lr,
                      betas=(args.adam_beta1, args.adam_beta2),
                      weight_decay=args.adam_weight_decay)

print("Training Start")
for epoch in range(args.epochs):
    trainer.train(epoch)
    trainer.save(args.output_dir, epoch)

    if test_data_loader is not None:
        trainer.test(epoch)
Beispiel #4
0
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None

    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads)

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=test_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq)

    print("Training Start")
    for epoch in range(args.epochs):
        trainer.train(epoch)
        trainer.save(epoch, args.output_path)

        if test_data_loader is not None:
            trainer.test(epoch)
Beispiel #5
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--train_dataset",
                        type=str,
                        default='../data/data.bert',
                        help="train dataset for train bert")
    parser.add_argument("-t",
                        "--test_dataset",
                        type=str,
                        default=None,
                        help="test set for evaluate train set")
    parser.add_argument("-v",
                        "--vocab_path",
                        type=str,
                        default='../data/bert.vb',
                        help="built vocab model path with bert-vocab")
    parser.add_argument("-o",
                        "--output_path",
                        type=str,
                        default='../data/bert.model',
                        help="ex)output/bert.model")

    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=8,
                        help="number of layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-s",
                        "--seq_len",
                        type=int,
                        default=20,
                        help="maximum sequence len")

    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=64,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=5,
                        help="dataloader worker size")

    parser.add_argument("--with_cuda",
                        type=bool,
                        default=True,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=int,
                        nargs='+',
                        default=None,
                        help="CUDA device ids")
    parser.add_argument("--on_memory",
                        type=bool,
                        default=True,
                        help="Loading on memory: true or false")

    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")

    args = parser.parse_args()

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset,
                                vocab,
                                seq_len=args.seq_len,
                                corpus_lines=args.corpus_lines,
                                on_memory=args.on_memory)

    print("Loading Test Dataset", args.test_dataset)
    test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
        if args.test_dataset is not None else None

    # tmp_ = train_dataset.__getitem__(20)
    # print(tmp_['bert_input'])
    #
    # x = tmp_['bert_input'].unsqueeze(0)

    # mask_ = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)
    #
    # print(mask_.size())
    # print(mask_)

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None
    #
    #
    #
    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads)

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          test_dataloader=test_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq)
    #
    print("Training Start")
    for epoch in range(args.epochs):
        trainer.train(epoch)
        trainer.save(epoch, args.output_path)

        if test_data_loader is not None:
            trainer.test(epoch)
Beispiel #6
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--train_dataset",
                        required=True,
                        type=str,
                        help="train dataset for train bert")
    parser.add_argument("-t",
                        "--valid_dataset",
                        type=str,
                        default=None,
                        help="valid set for evaluate train set")
    parser.add_argument("-v",
                        "--vocab_path",
                        required=True,
                        type=str,
                        help="built vocab model path with bert-vocab")
    parser.add_argument("-o",
                        "--output_path",
                        required=True,
                        type=str,
                        help="ex)output/bert.model")

    parser.add_argument("-hs",
                        "--hidden",
                        type=int,
                        default=256,
                        help="hidden size of transformer model")
    parser.add_argument("-l",
                        "--layers",
                        type=int,
                        default=8,
                        help="number of layers")
    parser.add_argument("-a",
                        "--attn_heads",
                        type=int,
                        default=8,
                        help="number of attention heads")
    parser.add_argument("-s",
                        "--seq_len",
                        type=int,
                        default=20,
                        help="maximum sequence len")

    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=64,
                        help="number of batch_size")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=10,
                        help="number of epochs")
    parser.add_argument("-w",
                        "--num_workers",
                        type=int,
                        default=5,
                        help="dataloader worker size")

    parser.add_argument("--with_cuda",
                        type=bool,
                        default=True,
                        help="training with CUDA: true, or false")
    parser.add_argument("--log_freq",
                        type=int,
                        default=10,
                        help="printing loss every n iter: setting n")
    parser.add_argument("--corpus_lines",
                        type=int,
                        default=None,
                        help="total number of lines in corpus")
    parser.add_argument("--cuda_devices",
                        type=int,
                        nargs='+',
                        default=None,
                        help="CUDA device ids")
    parser.add_argument("--on_memory",
                        type=bool,
                        default=True,
                        help="Loading on memory: true or false")

    parser.add_argument("--lr",
                        type=float,
                        default=1e-3,
                        help="learning rate of adam")
    parser.add_argument("--adam_weight_decay",
                        type=float,
                        default=0.01,
                        help="weight_decay of adam")
    parser.add_argument("--adam_beta1",
                        type=float,
                        default=0.9,
                        help="adam first beta value")
    parser.add_argument("--adam_beta2",
                        type=float,
                        default=0.999,
                        help="adam first beta value")

    args = parser.parse_args(
        '-c ../data/corpus.small -t ../data/valid.small -v ../data/vocab.small -o ../output/bert.model'
        .split())

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset,
                                vocab,
                                seq_len=args.seq_len,
                                corpus_lines=args.corpus_lines,
                                on_memory=args.on_memory)

    print("Loading valid Dataset", args.valid_dataset)
    valid_dataset = BERTDataset(args.valid_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
        if args.valid_dataset is not None else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers)
    valid_data_loader = DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if valid_dataset is not None else None
    # valid_data_loader = train_data_loader[:5]

    print("Building BERT model")
    bert = BERT(len(vocab),
                hidden=args.hidden,
                n_layers=args.layers,
                attn_heads=args.attn_heads)

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert,
                          len(vocab),
                          train_dataloader=train_data_loader,
                          valid_dataloader=valid_data_loader,
                          lr=args.lr,
                          betas=(args.adam_beta1, args.adam_beta2),
                          weight_decay=args.adam_weight_decay,
                          with_cuda=args.with_cuda,
                          cuda_devices=args.cuda_devices,
                          log_freq=args.log_freq)

    print("Training Start")
    best_loss = float('inf')
    for epoch in range(args.epochs):
        trainer.train(epoch)
        avg_loss = trainer.valid(epoch)
        if (avg_loss < best_loss):
            best_loss = avg_loss
            trainer.save(epoch, args.output_path)
Beispiel #7
0
print("Loading Vocab", args.vocab_path)
vocab = 257
print("Vocab Size: ", vocab)

print("Loading Train Dataset", args.train_dataset)
train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len,
                            corpus_lines=args.corpus_lines, on_memory=args.on_memory)

print("Loading Test Dataset", args.test_dataset)
test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \
    if args.test_dataset is not None else None

print("Creating Dataloader")
train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
    if test_dataset is not None else None

print("Building BERT model")
bert = BERT(vocab, hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads)

print("Creating BERT Trainer")
trainer = BERTTrainer(bert, vocab, train_dataloader=train_data_loader, test_dataloader=test_data_loader,
                      lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
                      with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq)

print("Training Start")

for epoch in range(1):
    trainer.load(epoch, args.model_path)
    trainer.test(epoch, benchmark=True)
Beispiel #8
0
class Trainer():
    def __init__(self, options):
        self.device = options["device"]
        self.model_dir = options["model_dir"]
        self.model_path = options["model_path"]
        self.vocab_path = options["vocab_path"]
        self.output_path = options["output_dir"]
        self.window_size = options["window_size"]
        self.adaptive_window = options["adaptive_window"]
        self.sample_ratio = options["train_ratio"]
        self.valid_ratio = options["valid_ratio"]
        self.seq_len = options["seq_len"]
        self.max_len = options["max_len"]
        self.corpus_lines = options["corpus_lines"]
        self.on_memory = options["on_memory"]
        self.batch_size = options["batch_size"]
        self.num_workers = options["num_workers"]
        self.lr = options["lr"]
        self.adam_beta1 = options["adam_beta1"]
        self.adam_beta2 = options["adam_beta2"]
        self.adam_weight_decay = options["adam_weight_decay"]
        self.with_cuda = options["with_cuda"]
        self.cuda_devices = options["cuda_devices"]
        self.log_freq = options["log_freq"]
        self.epochs = options["epochs"]
        self.hidden = options["hidden"]
        self.layers = options["layers"]
        self.attn_heads = options["attn_heads"]
        self.is_logkey = options["is_logkey"]
        self.is_time = options["is_time"]
        self.scale = options["scale"]
        self.scale_path = options["scale_path"]
        self.n_epochs_stop = options["n_epochs_stop"]
        self.hypersphere_loss = options["hypersphere_loss"]
        self.mask_ratio = options["mask_ratio"]
        self.min_len = options['min_len']

        print("Save options parameters")
        save_parameters(options, self.model_dir + "parameters.txt")

    def train(self):

        print("Loading vocab", self.vocab_path)
        vocab = WordVocab.load_vocab(self.vocab_path)
        print("vocab Size: ", len(vocab))

        print("\nLoading Train Dataset")
        logkey_train, logkey_valid, time_train, time_valid = generate_train_valid(self.output_path + "train", window_size=self.window_size,
                                     adaptive_window=self.adaptive_window,
                                     valid_size=self.valid_ratio,
                                     sample_ratio=self.sample_ratio,
                                     scale=self.scale,
                                     scale_path=self.scale_path,
                                     seq_len=self.seq_len,
                                     min_len=self.min_len
                                    )

        train_dataset = LogDataset(logkey_train,time_train, vocab, seq_len=self.seq_len,
                                    corpus_lines=self.corpus_lines, on_memory=self.on_memory, mask_ratio=self.mask_ratio)

        print("\nLoading valid Dataset")
        # valid_dataset = generate_train_valid(self.output_path + "train", window_size=self.window_size,
        #                              adaptive_window=self.adaptive_window,
        #                              sample_ratio=self.valid_ratio)

        valid_dataset = LogDataset(logkey_valid, time_valid, vocab, seq_len=self.seq_len, on_memory=self.on_memory, mask_ratio=self.mask_ratio)

        print("Creating Dataloader")
        self.train_data_loader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=self.num_workers,
                                      collate_fn=train_dataset.collate_fn, drop_last=True)
        self.valid_data_loader = DataLoader(valid_dataset, batch_size=self.batch_size, num_workers=self.num_workers,
                                       collate_fn=train_dataset.collate_fn, drop_last=True)
        del train_dataset
        del valid_dataset
        del logkey_train
        del logkey_valid
        del time_train
        del time_valid
        gc.collect()

        print("Building BERT model")
        bert = BERT(len(vocab), max_len=self.max_len, hidden=self.hidden, n_layers=self.layers, attn_heads=self.attn_heads,
                    is_logkey=self.is_logkey, is_time=self.is_time)

        print("Creating BERT Trainer")
        self.trainer = BERTTrainer(bert, len(vocab), train_dataloader=self.train_data_loader, valid_dataloader=self.valid_data_loader,
                              lr=self.lr, betas=(self.adam_beta1, self.adam_beta2), weight_decay=self.adam_weight_decay,
                              with_cuda=self.with_cuda, cuda_devices=self.cuda_devices, log_freq=self.log_freq,
                              is_logkey=self.is_logkey, is_time=self.is_time,
                              hypersphere_loss=self.hypersphere_loss)

        self.start_iteration(surfix_log="log2")

        self.plot_train_valid_loss("_log2")

    def start_iteration(self, surfix_log):
        print("Training Start")
        best_loss = float('inf')
        epochs_no_improve = 0
        # best_center = None
        # best_radius = 0
        # total_dist = None
        for epoch in range(self.epochs):
            print("\n")
            if self.hypersphere_loss:
                center = self.calculate_center([self.train_data_loader, self.valid_data_loader])
                # center = self.calculate_center([self.train_data_loader])
                self.trainer.hyper_center = center

            _, train_dist = self.trainer.train(epoch)
            avg_loss, valid_dist = self.trainer.valid(epoch)
            self.trainer.save_log(self.model_dir, surfix_log)

            if self.hypersphere_loss:
                self.trainer.radius = self.trainer.get_radius(train_dist + valid_dist, self.trainer.nu)

            # save model after 10 warm up epochs
            if avg_loss < best_loss:
                best_loss = avg_loss
                self.trainer.save(self.model_path)
                epochs_no_improve = 0

                if epoch > 10 and self.hypersphere_loss:
                    best_center = self.trainer.hyper_center
                    best_radius = self.trainer.radius
                    total_dist = train_dist + valid_dist

                    if best_center is None:
                        raise TypeError("center is None")

                    print("best radius", best_radius)
                    best_center_path = self.model_dir + "best_center.pt"
                    print("Save best center", best_center_path)
                    torch.save({"center": best_center, "radius": best_radius}, best_center_path)

                    total_dist_path = self.model_dir + "best_total_dist.pt"
                    print("save total dist: ", total_dist_path)
                    torch.save(total_dist, total_dist_path)
            else:
                epochs_no_improve += 1

            if epochs_no_improve == self.n_epochs_stop:
                print("Early stopping")
                break

    def calculate_center(self, data_loader_list):
        print("start calculate center")
        # model = torch.load(self.model_path)
        # model.to(self.device)
        with torch.no_grad():
            outputs = 0
            total_samples = 0
            for data_loader in data_loader_list:
                totol_length = len(data_loader)
                data_iter = tqdm.tqdm(enumerate(data_loader), total=totol_length)
                for i, data in data_iter:
                    data = {key: value.to(self.device) for key, value in data.items()}

                    result = self.trainer.model.forward(data["bert_input"], data["time_input"])
                    cls_output = result["cls_output"]

                    outputs += torch.sum(cls_output.detach().clone(), dim=0)
                    total_samples += cls_output.size(0)

        center = outputs / total_samples

        return center

    def plot_train_valid_loss(self, surfix_log):
        train_loss = pd.read_csv(self.model_dir + f"train{surfix_log}.csv")
        valid_loss = pd.read_csv(self.model_dir + f"valid{surfix_log}.csv")
        sns.lineplot(x="epoch", y="loss", data=train_loss, label="train loss")
        sns.lineplot(x="epoch", y="loss", data=valid_loss, label="valid loss")
        plt.title("epoch vs train loss vs valid loss")
        plt.legend()
        plt.savefig(self.model_dir + "train_valid_loss.png")
        plt.show()
        print("plot done")