Esempio n. 1
0
def eval(tag_path, corpus_path):
    correct = 0
    total = 0
    acc_list = []
    model_name = MODEL_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}
    sentences, tags = load_train_data(tag_path, corpus_path)
    labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]])

    with torch.no_grad():
        for i, sen in enumerate(tqdm(sentences[:])):
            input = prepare_sequence(sen, word_to_ix)
            output = model(input)
            _, predicted = torch.max(output.data, 1)
            label = labels[i]
            total += label.size(0)
            correct += (predicted == label).sum().item()
            acc = round(100 * correct / total, 2)
            acc_list.append(acc)
    assert len(acc_list) == len(sentences)
    final_acc = acc
    plt.plot(list(range(len(tags))), acc_list)
    plt.xlabel('pred_num')
    plt.ylabel('accuracy / %')
    plt.show()
    return final_acc
Esempio n. 2
0
def predict(sentence):
    sentence = sentence.split()
    model_name = BEST_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    input = prepare_sequence(sentence, word_to_ix)
    with torch.no_grad():
        output = model(input)
        print(output)
        _, predicted = torch.max(output.data, 1)
        print(predicted)
Esempio n. 3
0
def get_time_to_score(tsv_path, thing, model_path):
    time_to_count = {}
    time_to_scoresum = {}
    if thing == 'hair_dryer':
        id = '732252283'
    elif thing == 'microwave':
        id = '423421857'
    else:
        id = '246038397'

    with open('train_' + thing + '_word_to_ix.json', 'r') as j:
        word_to_ix = json.load(j)
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoints = torch.load(model_path)
    model.load_state_dict(checkpoints['model_state_dict'])
    model.eval()

    with open(tsv_path, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        for i, r in enumerate(reader):
            if i == 0 or r[4] != id:
                continue
            month, _, year = r[14].split('/')
            if year not in {'2014', '2015'}:
                continue
            time = get_idx_by_year_month(int(year), int(month))
            if time < 8:
                continue
            sen = (r[12] + ' ' + r[13]).lower()
            sen = re.sub(r'[^A-Za-z0-9,.!]+', ' ', sen)
            input = prepare_sequence(sen.split(), word_to_ix)
            with torch.no_grad():
                output = model(input)
                _, predicted = torch.max(output.data, 1)
            pred_score = predicted.item()
            if time not in time_to_count:
                time_to_count[time] = 0
                time_to_scoresum[time] = 0.
            time_to_count[time] += 1
            time_to_scoresum[time] += pred_score
    time_to_scoremean = {}
    for time in time_to_count.keys():
        time_to_scoremean[time] = time_to_scoresum[time] / time_to_count[time]
    print(time_to_count)
    return time_to_scoremean
Esempio n. 4
0
def model_load_test(test_df,
                    vocab_file,
                    embeddings_file,
                    pretrained_file,
                    test_prediction_dir,
                    test_prediction_name,
                    mode,
                    num_labels=2,
                    max_length=50,
                    gpu_index=0,
                    batch_size=128):

    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for testing ", 20 * "=")
    if platform == "linux" or platform == "linux2":
        checkpoint = torch.load(pretrained_file)
    else:
        checkpoint = torch.load(pretrained_file, map_location=device)
    # Retrieving model parameters from checkpoint.
    embeddings = load_embeddings(embeddings_file)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    print("\t* Building model...")
    model = BiLSTM(embeddings,
                   num_labels=num_labels,
                   max_length=max_length,
                   device=device).to(device)
    model.load_state_dict(checkpoint["model"])
    print(20 * "=", " Testing BiLSTM model on device: {} ".format(device),
          20 * "=")
    batch_time, total_time, accuracy, predictions = test(model, test_loader)
    print(
        "\n-> Average batch processing time: {:.4f}s, total test time: {:.4f}s, accuracy: {:.4f}%\n"
        .format(batch_time, total_time, (accuracy * 100)))
    test_prediction = pd.DataFrame({'prediction': predictions})
    if not os.path.exists(test_prediction_dir):
        os.makedirs(test_prediction_dir)
    test_prediction.to_csv(os.path.join(test_prediction_dir,
                                        test_prediction_name),
                           index=False)
Esempio n. 5
0
max_sent_length = 36  # set from the paper

# ---- Define Model, Loss, Optim ------
config = args
config.d_out = num_classes
config.n_directions = 2 if config.birnn else 1
print(config)
model = BiLSTM(config)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)

# ---- Test Model ------
if args.test:
    print("Test Mode: loading pre-trained model and testing on test set...")
    # model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu))
    model.load_state_dict(torch.load(args.resume_snapshot))
    test_acc = evaluate_dataset_batch(test_set, max_sent_length, model,
                                      w2v_map, label_to_ix)
    print("Accuracy: {}".format(test_acc))
    sys.exit(0)

# ---- Train Model ------
start = time.time()
best_val_acc = -1
iter = 0
header = '  Time Epoch Iteration     Loss   Train/Acc.   Val/Acc.'
print(header)
log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f}'.split(','))
dev_log_template = ' '.join(
    '{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f},{:9.6f},{:11.6f}'.split(','))
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--model',
        type=str,
        default='rnn',
        help=
        "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'"
    )
    parser.add_argument('--train_data_path',
                        type=str,
                        default="./data/train_clean.csv",
                        help="Path to the training data")
    parser.add_argument('--test_data_path',
                        type=str,
                        default="./data/dev_clean.csv",
                        help="Path to the test data")
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--vectors',
                        type=str,
                        default='fasttext.simple.300d',
                        help="""
                                Pretrained vectors:
                                Visit 
                                https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146
                                for more 
                                """)
    parser.add_argument('--max_vocab_size', type=int, default=750)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--bidirectional', type=bool, default=True)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--hidden_dim', type=int, default=64)
    parser.add_argument('--output_dim', type=int, default=1)
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_epochs', type=int, default=5)
    parser.add_argument('--n_filters', type=int, default=100)
    parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5])

    args = parser.parse_args()

    torch.manual_seed(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ##########  BILSTM ##########

    if args.model == "bilstm":
        print('\nBiLSTM')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors,
                         unk_init=torch.Tensor.normal_)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)
        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        unk_idx = TEXT.vocab.stoi[TEXT.unk_token]

        model = BiLSTM(input_dim, embedding_dim, args.hidden_dim,
                       args.output_dim, args.n_layers, args.bidirectional,
                       args.dropout, pad_idx)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)
        model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
        model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.139, Test Acc: 95.27%

    ##########  VANILLA RNN ##########

    else:
        print('\nVanilla RNN')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)

        model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.138, Test Acc: 95.05%
Esempio n. 7
0
def model_train_validate_test(train_df,
                              dev_df,
                              test_df,
                              embeddings_file,
                              vocab_file,
                              target_dir,
                              mode,
                              num_labels=2,
                              max_length=50,
                              epochs=50,
                              batch_size=128,
                              lr=0.0005,
                              patience=5,
                              max_grad_norm=10.0,
                              gpu_index=0,
                              if_save_model=False,
                              checkpoint=None):
    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = My_Dataset(train_df, vocab_file, max_length, mode)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = My_Dataset(dev_df, vocab_file, max_length, mode)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    if (embeddings_file is not None):
        embeddings = load_embeddings(embeddings_file)
    else:
        embeddings = None
    model = BiLSTM(embeddings, num_labels=num_labels, device=device).to(device)
    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params:,} total parameters.')
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print(f'{total_trainable_params:,} training parameters.')
    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, _, = validate(model, dev_loader, criterion)
    print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
          format(valid_loss, (valid_accuracy * 100)))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training BiLSTM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, _, = validate(
            model, dev_loader, criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0

            if (if_save_model):
                torch.save(
                    {
                        "epoch": epoch,
                        "model": model.state_dict(),
                        "best_score": best_score,
                        "epochs_count": epochs_count,
                        "train_losses": train_losses,
                        "valid_losses": valid_losses
                    }, os.path.join(target_dir, "best.pth.tar"))

                print("save model succesfully!\n")

            print("* Test for epoch {}:".format(epoch))
            _, _, test_accuracy, predictions = validate(
                model, test_loader, criterion)
            print("Test accuracy: {:.4f}%\n".format(test_accuracy))
            test_prediction = pd.DataFrame({'prediction': predictions})
            test_prediction.to_csv(os.path.join(target_dir,
                                                "test_prediction.csv"),
                                   index=False)

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
Esempio n. 8
0
class Seq_MNIST_Trainer():

    def __init__(self, trainer_params, args):
        self.args = args
        self.trainer_params = trainer_params
        
        random.seed(trainer_params.random_seed)
        torch.manual_seed(trainer_params.random_seed)
        if args.cuda:
                torch.cuda.manual_seed_all(trainer_params.random_seed)

        kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}    
        self.train_data = seq_mnist_train(trainer_params)
        self.val_data = seq_mnist_val(trainer_params) 
        self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, shuffle=True, **kwargs)
        self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, shuffle=True, **kwargs)
        self.starting_epoch = 1
        self.prev_loss = 10000
    
        self.model = BiLSTM(trainer_params) 
        self.criterion = wp.CTCLoss(size_average=True)
        self.labels = [i for i in range(trainer_params.num_classes-1)]
        self.decoder = seq_mnist_decoder(labels=self.labels)

        if args.resume or args.eval or args.export:
            print("Loading model from {}".format(args.save_path))
            package = torch.load(args.save_path, map_location=lambda storage, loc: storage)
            self.model.load_state_dict(package['state_dict'])

        if args.cuda:
            torch.cuda.set_device(args.gpus)
            self.model = self.model.cuda()

        self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr)

        if args.resume:
            self.optimizer.load_state_dict(package['optim_dict']) 
            self.starting_epoch = package['starting_epoch']
            self.prev_loss = package['prev_loss']
            if args.cuda:
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()

        if args.init_bn_fc_fusion:
            if not trainer_params.prefused_bn_fc:
                self.model.batch_norm_fc.init_fusion()
                self.trainer_params.prefused_bn_fc = True
            else:
                raise Exception("BN and FC are already fused.")

    def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss):
        package = {'state_dict': model.state_dict(),
            'trainer_params': trainer_params,
            'optim_dict' : optimizer.state_dict(),
            'starting_epoch' : starting_epoch,
            'prev_loss': prev_loss
        }
        return package

    def save_model(self, epoch, loss_value):
        print("Model saved at: {}\n".format(self.args.save_path))
        self.prev_loss = loss_value
        torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params, 
            optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), self.args.save_path)


    def train(self, epoch):
        self.model.train()
        for i, (item) in enumerate(self.train_loader):
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()
 
            output = self.model(data)

            # print("Input = ", data.shape)
            # print("model output (x) = ", output)
            # print("GTs (y) = ", labels.type())
            # print("model output len (xs) = ", output_len.type())
            # print("GTs len (ys) = ", lab_len.type())
            # exit(0)

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value = loss.data[0]
            print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch, 
                self.trainer_params.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value))
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            if self.args.cuda:
                torch.cuda.synchronize()                   

    def test(self, epoch=0, save_model_flag=False):
        self.model.eval()
        loss_value = 0
        for i, (item) in enumerate(self.val_loader):           
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()

            output = self.model(data)
            
            # print("Input = ", data)
            # print("model output (x) = ", output.shape)
            # print("model output (x) = ", output)        
            # print("Label = ", labels)
            # print("model output len (xs) = ", output_len)
            # print("GTs len (ys) = ", lab_len)
            
            index = random.randint(0,self.trainer_params.test_batch_size-1)      
            label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy()
            label = label-1
            prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index])
            accuracy = self.decoder.hit(prediction, label)

            print("Sample Label      = {}".format(self.decoder.to_string(label))) 
            print("Sample Prediction = {}".format(self.decoder.to_string(prediction)))
            print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy))

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value += loss.data.numpy()

        loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size)
        print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
        
        if loss_value < self.prev_loss and save_model_flag:
            self.save_model(epoch, loss_value)

    def eval_model(self):
        self.test()


    def train_model(self):
        for epoch in range(self.starting_epoch, self.trainer_params.epochs + 1):
            self.train(epoch)
            self.test(epoch=epoch, save_model_flag=True)
            if epoch%20==0:
                self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98

    def export_model(self, simd_factor, pe):
        self.model.eval()
        self.model.export('r_model_fw_bw.hpp', simd_factor, pe)

    def export_image(self, idx=100):
        img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx]
        img = img.transpose(1, 0)
        label -= 1
        label = self.decoder.to_string(label)
        
        from PIL import Image
        from matplotlib import cm

        im = Image.fromarray(np.uint8(cm.gist_earth(img)*255))
        im.save('test_image.png')
        img = img.transpose(1, 0)

        img = np.reshape(img, (-1, 1))
        np.savetxt("test_image.txt", img, fmt='%.10f')
        f = open('test_image_gt.txt','w')
        f.write(label)
        f.close()
        print("Exported image with label = {}".format(label))
Esempio n. 9
0
import torchvision

from model import BiLSTM
from data import load_dataset
from config import model_name, device

if __name__ == "__main__":

    # the string to test!
    test_string = "<s> john can"

    # ########################
    # LOAD DATASET
    # ########################

    corpus, word_to_idx, idx_to_word, train_dataset = load_dataset()

    # ########################
    # TEST VARIABLES
    # ########################

    model = BiLSTM(len(corpus))
    model.load_state_dict(torch.load(model_name))

    model.eval()
    sentence = test_string.split()
    sentence = torch.tensor([[word_to_idx[w] for w in sentence]])

    s = model.sample(sentence)
    print(test_string.split() + s)
Esempio n. 10
0
char_to_id = mapping['char_to_id']
word_embeds = mapping['word_embeds']

model = BiLSTM(voca_size=len(word_to_id),
               word_emb_dim=100,
               pre_word_emb=word_embeds,
               char_emb_dim=25,
               char_lstm_dim=25,
               char_to_ix=char_to_id,
               n_cap=4,
               cap_emb_dim=8,
               hidden_dim=200,
               tag_to_ix=tag_to_id)

x = torch.load(model_path)
model.load_state_dict(x())

model.eval()


def test():
    test_sentences = loader.load_data(test_path, zeros=False)

    loader.update_tag_scheme(test_sentences, 'iob')

    test_data = loader.pepare_dataset(test_sentences, word_to_id, char_to_id,
                                      tag_to_id)

    print("%i sentences in test." % (len(test_data)))

    confusion_matrix = torch.zeros((len(tag_to_id) - 2, len(tag_to_id) - 2))
Esempio n. 11
0
    if 'cuda' in args.device:
        if torch.cuda.is_available():
            device = torch.device(args.device)
        else:
            print("cuda not available...")
    print("Using device {}".format(device))

    print("loading datasets...")
    n = None
    train_data = DataSource("train", n=n)
    print("loaded {} train data".format(len(train_data)))
    dev_data = DataSource("dev", n=n)
    print("loaded {} dev data".format(len(dev_data)))
    test_data = DataSource("test", n=n)
    print("loaded {} test data".format(len(test_data)))

    model = BiLSTM(128, device)
    print("allocated model")

    if args.restore == "":
        losses = train()
        print("graphing")
        graph_losses(losses)
    else:
        model.load_state_dict(torch.load(args.restore))
        print("loaded weights from {}".format(args.restore))

    confusion = evaluate()
    print(confusion)
    print("accuracy: {}".format(np.sum(np.diagonal(confusion))))
Esempio n. 12
0
    train_dataloader = create_dataloader("./data/wsj0_train",
                                         "./data/wsj0_train_merged_labels.npy",
                                         batch_size=batch_size,
                                         shuffle=True)

    test_dataloader = create_dataloader("./data/wsj0_test",
                                        None,
                                        batch_size=batch_size,
                                        test=True,
                                        shuffle=False)
    model = BiLSTM(40, 256, 47, 5, use_gpu=True)
    # model = Model(40, 47, 256)

    if checkpoint:
        model.load_state_dict(torch.load(checkpoint))

    model = model.cuda()
    ctc_loss = nn.CTCLoss()

    def criterion(out, label, data_len, label_len):
        loss = ctc_loss(out, label, data_len, label_len)
        reg_loss = 0
        for param in model.parameters():
            reg_loss += (param**2).sum()

        factor = 0.00001
        loss += factor * reg_loss
        return loss

    optimizer = Adam(model.parameters(), lr=1e-4, weight_decay=5e-5)
Esempio n. 13
0
def train():
    logging.basicConfig(level=logging.INFO,
                        filename='log.txt',
                        format='%(message)s')
    tag_path = TRAIN_TAG_PATH
    corpus_path = TRAIN_CORPUS_PATH
    save_model_name = MODEL_NAME
    best_model_name = BEST_NAME
    load_model_path = None
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    train_epoch = TRAIN_EPOCH
    word_to_ix = WORD_TO_IX
    start_epoch = 0
    best_score = 0.
    loss_info, train_avg_info, test_avg_info = [], [], []

    sentences, tags = load_train_data(tag_path, corpus_path)
    tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}
    label = torch.tensor([[tag_to_ix[tag]] for tag in tags])

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim, dropout=0.3)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    criterion = nn.CrossEntropyLoss()

    if load_model_path is not None:
        checkpoints = torch.load(load_model_path)
        model.load_state_dict(checkpoints['model_state_dict'])
        optimizer.load_state_dict(checkpoints['optim_state_dict'])
        start_epoch = checkpoints['epoch']

    start_time = time.time()
    logging.info('----------------------')
    for epoch in range(start_epoch, train_epoch):
        running_loss = 0.0
        for i, sen in enumerate(tqdm(sentences)):
            optimizer.zero_grad()
            input = prepare_sequence(sen, word_to_ix)
            output = model(input)
            loss = criterion(output, label[i])
            running_loss += loss.item()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 15)
            optimizer.step()

        torch.save(
            {
                'model_state_dict': model.state_dict(),
                'optim_state_dict': optimizer.state_dict(),
                'epoch': epoch + 1
            }, save_model_name)

        train_avg = eval(TRAIN_TAG_PATH, TRAIN_CORPUS_PATH)
        test_avg = eval(TEST_TAG_PATH, TEST_CORPUS_PATH)
        loss_info.append(running_loss)
        train_avg_info.append(train_avg)
        test_avg_info.append(test_avg)

        logging.info('********')
        logging.info('epoch: {}'.format(epoch + 1))
        logging.info('loss: {}'.format(running_loss))
        logging.info('train avg: {}'.format(train_avg))
        logging.info('test avg: {}'.format(test_avg))

        if test_avg > best_score:
            torch.save({
                'model_state_dict': model.state_dict(),
            }, best_model_name)
            best_score = test_avg
            print('save best')

    print('training time:', time.time() - start_time)
Esempio n. 14
0
def getModelOptimizerTokenizer(model_type, vocab_file, embed_file=None, 
                               bert_config_file=None, init_checkpoint=None,
                               label_list=None,
                               do_lower_case=True,
                               num_train_steps=None,
                               learning_rate=None,
                               base_learning_rate=None,
                               warmup_proportion=None):
    if embed_file is not None:
        # in case pretrain embeddings
        embeddings = pickle.load(open(embed_file, 'rb'))

    if model_type == "BiLSTM":
        logger.info("model = BiLSTM")
        tokenizer = WordLevelTokenizer(vocab_file=vocab_file)
        model = BiLSTM(pretrain_embeddings=embeddings, freeze=True)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
        # if pretrain, we will load here
        if init_checkpoint is not None:
            logger.info("retraining with saved model.")
            checkpoint = torch.load(init_checkpoint, map_location='cpu')
            model.load_state_dict(checkpoint)
    elif model_type == "BERTSimple":
        logger.info("model = BERTSimple")
        tokenizer = WordLevelTokenizer(vocab_file=vocab_file)
        bert_config = BertConfig(hidden_size=300,
                                 num_hidden_layers=12,
                                 num_attention_heads=12,
                                 intermediate_size=3072,
                                 hidden_act="gelu",
                                 hidden_dropout_prob=0.1,
                                 attention_probs_dropout_prob=0.1,
                                 max_position_embeddings=512,
                                 type_vocab_size=2,
                                 initializer_range=0.02)
        if embed_file is None:
            raise ValueError("BERTSimple needs a pretrain embedding file.")
        model = \
            BertSimpleForSequenceClassification(bert_config,
                                                pretrain_embeddings=embeddings,
                                                num_labels=len(label_list),
                                                type_id_enable=True,
                                                position_enable=True)
        if init_checkpoint is not None:
            logger.info("retraining with saved model.")
            model.bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        # instead of BERTAdam, we use Adam to be able to perform gs on bias
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    elif model_type == "BERTPretrain":
        logger.info("model = BERTPretrain")
        if bert_config_file is not None:
            bert_config = BertConfig.from_json_file(bert_config_file)
        else:
            # default?
            bert_config = BertConfig(
                hidden_size=768,
                num_hidden_layers=12,
                num_attention_heads=12,
                intermediate_size=3072,
                hidden_act="gelu",
                hidden_dropout_prob=0.1,
                attention_probs_dropout_prob=0.1,
                max_position_embeddings=512,
                type_vocab_size=2,
                initializer_range=0.02
            )
        tokenizer = FullTokenizer(
            vocab_file=vocab_file, do_lower_case=do_lower_case, pretrain=False)
        # overwrite the vocab size to be exact. this also save space incase
        # vocab size is shrinked.
        bert_config.vocab_size = len(tokenizer.vocab)
        # model and optimizer
        model = BertForSequenceClassification(bert_config, len(label_list))

        if init_checkpoint is not None:
            logger.info("retraining with saved model.")
            model.bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        no_decay = ['bias', 'gamma', 'beta']
        optimizer_parameters = [
            {'params': [p for n, p in model.named_parameters() 
                if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
            {'params': [p for n, p in model.named_parameters() 
                if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
            ]
            
        optimizer = BERTAdam(optimizer_parameters,
                            lr=learning_rate,
                            warmup=warmup_proportion,
                            t_total=num_train_steps)
    elif model_type == "ContextBERT":
        logger.info("model = ContextBERT")
        # this is the model we develop
        tokenizer = FullTokenizer(
            vocab_file=vocab_file, do_lower_case=do_lower_case, pretrain=False)
        if bert_config_file is not None:
            bert_config = BertConfig.from_json_file(bert_config_file)
        else:
            # default?
            bert_config = BertConfig(
                hidden_size=768,
                num_hidden_layers=12,
                num_attention_heads=12,
                intermediate_size=3072,
                hidden_act="gelu",
                hidden_dropout_prob=0.1,
                attention_probs_dropout_prob=0.1,
                max_position_embeddings=512,
                type_vocab_size=2,
                initializer_range=0.02
            )
        # overwrite the vocab size to be exact. this also save space incase
        # vocab size is shrinked.
        bert_config.vocab_size = len(tokenizer.vocab)
        # model and optimizer
        model = ContextAwareBertForSequenceClassification(
                    bert_config, len(label_list),
                    init_weight=True)
        if init_checkpoint is not None:
            logger.info("retraining with saved model.")
            # only load fields that are avaliable
            if "checkpoint" in init_checkpoint:
                # load full is it is not google BERT original pretrain
                model.load_state_dict(torch.load(init_checkpoint, map_location='cpu'), strict=False)
            else:
                model.bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'), strict=False)
        #######################################################################
        # Instead of BERTAdam, we use Adam to be able to perform gs on bias
        # we will have a smaller learning rate for BERT orignal parameters
        # and a higher learning rate for new parameters
        # orignal_bert = BertForSequenceClassification(bert_config, len(label_list))
        # original_params = []
        # exclude_params = ["classifier.weight", "classifier.bias"]
        # for params in orignal_bert.named_parameters():
        #     if params not in exclude_params:
        #         original_params.append(params[0])
        # no_decay = ['bias', 'gamma', 'beta']
        # base_params_no_decay = list(map(lambda x: x[1],
        #                             list(filter(lambda kv: kv[0] in original_params \
        #                             and any(nd in kv[0] for nd in no_decay),
        #                             model.named_parameters()))))
        # base_params_decay = list(map(lambda x: x[1],
        #                             list(filter(lambda kv: kv[0] in original_params \
        #                             and not any(nd in kv[0] for nd in no_decay),
        #                             model.named_parameters()))))
        # params = list(map(lambda x: x[1], 
        #                   list(filter(lambda kv: kv[0] not in original_params \
        #                                 or kv[0] in exclude_params,
        #                   model.named_parameters()))))

        # optimizer_parameters = [
        #     {'params': base_params_decay, 'weight_decay_rate': 0.01},
        #     {'params': base_params_no_decay, 'weight_decay_rate': 0.0},
        #     {'params': params, 'lr': learning_rate, 'weight_decay_rate': 0.01}]
        # optimizer = BERTAdam(optimizer_parameters,
        #                      lr=base_learning_rate,
        #                      warmup=warmup_proportion,
        #                      t_total=num_train_steps)

        # orignal_bert = BertForSequenceClassification(bert_config, len(label_list))
        # original_params = []
        # exclude_params = ["classifier.weight", "classifier.bias"]
        # for params in orignal_bert.named_parameters():
        #     if params not in exclude_params:
        #         original_params.append(params[0])
        # no_decay = ['bias', 'gamma', 'beta']
        # base_params_no_decay = list(map(lambda x: x[1],
        #                             list(filter(lambda kv: kv[0] in original_params \
        #                             and any(nd in kv[0] for nd in no_decay),
        #                             model.named_parameters()))))
        # base_params_decay = list(map(lambda x: x[1],
        #                             list(filter(lambda kv: kv[0] in original_params \
        #                             and not any(nd in kv[0] for nd in no_decay),
        #                             model.named_parameters()))))
        # params = list(map(lambda x: x[1], 
        #                   list(filter(lambda kv: kv[0] not in original_params \
        #                                 or kv[0] in exclude_params,
        #                   model.named_parameters()))))

        # no_decay = ['bias', 'gamma', 'beta']
        # optimizer_parameters = [
        #     {'params': [p for n, p in model.named_parameters() 
        #         if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
        #     {'params': [p for n, p in model.named_parameters() 
        #         if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
        #     ]
        # optimizer = BERTAdam(optimizer_parameters,
        #                     lr=learning_rate,
        #                     warmup=warmup_proportion,
        #                     t_total=num_train_steps)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
        #######################################################################
    elif model_type == "HeadwiseContextBERT":
        logger.info("model = HeadwiseContextBERT")
        # this is the model we develop
        tokenizer = FullTokenizer(
            vocab_file=vocab_file, do_lower_case=do_lower_case, pretrain=False)
        if bert_config_file is not None:
            bert_config = BertConfig.from_json_file(bert_config_file)
        else:
            # default?
            bert_config = BertConfig()
        # overwrite the vocab size to be exact. this also save space incase
        # vocab size is shrinked.
        bert_config.vocab_size = len(tokenizer.vocab)
        # model and optimizer
        model = HeadwiseContextAwareBertForSequenceClassification(
                    bert_config, len(label_list),
                    init_weight=True)
        if init_checkpoint is not None:
            logger.info("retraining with saved model.")
            # only load fields that are avaliable
            if "checkpoint" in init_checkpoint:
                logger.info("retraining with a checkpoint model instead.")
                # load full is it is not google BERT original pretrain
                model.load_state_dict(torch.load(init_checkpoint, map_location='cpu'), strict=False)
            else:
                model.bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'), strict=False)
        no_decay = ['bias', 'gamma', 'beta']
        optimizer_parameters = [
            {'params': [p for n, p in model.named_parameters() 
                if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
            {'params': [p for n, p in model.named_parameters() 
                if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
            ]
            
        optimizer = BERTAdam(optimizer_parameters,
                            lr=learning_rate,
                            warmup=warmup_proportion,
                            t_total=num_train_steps)
    else:
        logger.info("***** Not Support Model Type *****")
    return model, optimizer, tokenizer