Esempio n. 1
0
def eval(tag_path, corpus_path):
    correct = 0
    total = 0
    acc_list = []
    model_name = MODEL_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}
    sentences, tags = load_train_data(tag_path, corpus_path)
    labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]])

    with torch.no_grad():
        for i, sen in enumerate(tqdm(sentences[:])):
            input = prepare_sequence(sen, word_to_ix)
            output = model(input)
            _, predicted = torch.max(output.data, 1)
            label = labels[i]
            total += label.size(0)
            correct += (predicted == label).sum().item()
            acc = round(100 * correct / total, 2)
            acc_list.append(acc)
    assert len(acc_list) == len(sentences)
    final_acc = acc
    plt.plot(list(range(len(tags))), acc_list)
    plt.xlabel('pred_num')
    plt.ylabel('accuracy / %')
    plt.show()
    return final_acc
Esempio n. 2
0
    def train_bilstm(self):
        # Load Data
        pre_processor = PreProcessor(file_path=TEST_DATA_PATH)
        sentences, entities = pre_processor.run()

        n_test_data = len(sentences)

        test_generator = generate_data_by_batch(
            x=sentences,
            y=entities,
            n_classes=pre_processor.n_entities + 1,
            entity_to_index=pre_processor.entity_to_index,
            batch_size=BATCH_SIZE
        )

        bilstm = BiLSTM(n_class=pre_processor.n_entities + 1)
        bilstm.load()

        # Saving model with `model.save()` doesn't store custom loss or metrics function. Model has to be stored
        # separately into "config" and "weight" file and loaded from both. This causes an essential step of compiling
        #  before evaluating. I think this issue exist from keras 2.0.
        # https://github.com/keras-team/keras/issues/5916
        bilstm.model.compile(
            optimizer="nadam",
            loss="categorical_crossentropy",
            metrics=["accuracy", custom_f1, custom_precision, custom_recall]
        )
        bilstm.model.evaluate_generator(
            test_generator,
            steps=n_test_data//BATCH_SIZE,
            verbose=1,
        )
def initialize_model(gpu, vocab_size, v_vec, emb_requires_grad, args):
    emb_dim = args.emb_dim
    h_dim = None
    class_num = 2
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if args.emb_type == 'ELMo' or args.emb_type == 'ELMoForManyLangs':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type,
                        elmo_model_dir=args.emb_path)
    elif args.emb_type == 'None':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    else:
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    if args.emb_type != 'ELMo' and args.emb_type != 'ELMoForManyLangs' and args.emb_type != 'None':
        for param in bilstm.word_embed.parameters():
            param.requires_grad = emb_requires_grad

    return bilstm
Esempio n. 4
0
    def __init__(self, config):
        self.config = config
        self.output_path = os.path.join(self.config.BASE_DIR,
                                        self.config.output_path)

        self.w2ix, self.ix2t = self.load_vocab()  # 加载索引字典
        self.vocab_size = len(self.w2ix)
        self.sequence_length = self.config.sequence_length

        self.model = BiLSTM(self.config, self.vocab_size)
        self.load_graph()
Esempio n. 5
0
    def __init__(self, trainer_params, args):
        self.args = args
        self.trainer_params = trainer_params

        random.seed(trainer_params.random_seed)
        torch.manual_seed(trainer_params.random_seed)
        if args.cuda:
            torch.cuda.manual_seed_all(trainer_params.random_seed)

        self.train_data = seq_mnist_train(trainer_params)
        self.val_data = seq_mnist_val(trainer_params)

        self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, \
                                        shuffle=True, num_workers=trainer_params.num_workers)

        self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, \
                                        shuffle=False, num_workers=trainer_params.num_workers)

        self.starting_epoch = 1
        self.prev_loss = 10000

        self.model = BiLSTM(trainer_params)
        self.criterion = wp.CTCLoss(size_average=False)
        self.labels = [i for i in range(trainer_params.num_classes - 1)]
        self.decoder = seq_mnist_decoder(labels=self.labels)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=trainer_params.lr)

        if args.cuda:
            torch.cuda.set_device(args.gpus)
            self.model = self.model.cuda()
            self.criterion = self.criterion.cuda()

        if args.resume or args.eval or args.export:
            print("Loading model from {}".format(args.resume))
            package = torch.load(args.resume,
                                 map_location=lambda storage, loc: storage)
            self.model.load_state_dict(package['state_dict'])
            self.optimizer.load_state_dict(package['optim_dict'])
            self.starting_epoch = package['starting_epoch']
            self.prev_loss = package['prev_loss']
            if args.cuda:
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()

        if args.init_bn_fc_fusion:
            if not trainer_params.prefused_bn_fc:
                self.model.batch_norm_fc.init_fusion()
                self.trainer_params.prefused_bn_fc = True
            else:
                raise Exception("BN and FC are already fused.")
Esempio n. 6
0
def predict(sentence):
    sentence = sentence.split()
    model_name = BEST_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    input = prepare_sequence(sentence, word_to_ix)
    with torch.no_grad():
        output = model(input)
        print(output)
        _, predicted = torch.max(output.data, 1)
        print(predicted)
Esempio n. 7
0
def get_time_to_score(tsv_path, thing, model_path):
    time_to_count = {}
    time_to_scoresum = {}
    if thing == 'hair_dryer':
        id = '732252283'
    elif thing == 'microwave':
        id = '423421857'
    else:
        id = '246038397'

    with open('train_' + thing + '_word_to_ix.json', 'r') as j:
        word_to_ix = json.load(j)
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoints = torch.load(model_path)
    model.load_state_dict(checkpoints['model_state_dict'])
    model.eval()

    with open(tsv_path, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        for i, r in enumerate(reader):
            if i == 0 or r[4] != id:
                continue
            month, _, year = r[14].split('/')
            if year not in {'2014', '2015'}:
                continue
            time = get_idx_by_year_month(int(year), int(month))
            if time < 8:
                continue
            sen = (r[12] + ' ' + r[13]).lower()
            sen = re.sub(r'[^A-Za-z0-9,.!]+', ' ', sen)
            input = prepare_sequence(sen.split(), word_to_ix)
            with torch.no_grad():
                output = model(input)
                _, predicted = torch.max(output.data, 1)
            pred_score = predicted.item()
            if time not in time_to_count:
                time_to_count[time] = 0
                time_to_scoresum[time] = 0.
            time_to_count[time] += 1
            time_to_scoresum[time] += pred_score
    time_to_scoremean = {}
    for time in time_to_count.keys():
        time_to_scoremean[time] = time_to_scoresum[time] / time_to_count[time]
    print(time_to_count)
    return time_to_scoremean
Esempio n. 8
0
def main():
    X_train, Y_train, X_valid, Y_valid, timestamp, close_prices = load_data(
        'data.csv', TIME_WINDOW)
    [X_train, Y_train, X_valid, Y_valid] = [
        torch.from_numpy(i.astype(np.float32))
        for i in [X_train, Y_train, X_valid, Y_valid]
    ]
    model = BiLSTM(feature_num=FEATURE_NUM, time_window=TIME_WINDOW - 1)
    dataset_train = torch.utils.data.TensorDataset(X_train, Y_train)
    dataset_valid = torch.utils.data.TensorDataset(X_valid, Y_valid)
    train_dataloader = torch.utils.data.DataLoader(dataset=dataset_train,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=False)
    valid_dataloader = torch.utils.data.DataLoader(dataset=dataset_valid,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=False)
    min_loss = train(model, train_dataloader, valid_dataloader)
    print(f'Best trained model has a loss of {min_loss:.5f}.')
Esempio n. 9
0
def model_load_test(test_df,
                    vocab_file,
                    embeddings_file,
                    pretrained_file,
                    test_prediction_dir,
                    test_prediction_name,
                    mode,
                    num_labels=2,
                    max_length=50,
                    gpu_index=0,
                    batch_size=128):

    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for testing ", 20 * "=")
    if platform == "linux" or platform == "linux2":
        checkpoint = torch.load(pretrained_file)
    else:
        checkpoint = torch.load(pretrained_file, map_location=device)
    # Retrieving model parameters from checkpoint.
    embeddings = load_embeddings(embeddings_file)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    print("\t* Building model...")
    model = BiLSTM(embeddings,
                   num_labels=num_labels,
                   max_length=max_length,
                   device=device).to(device)
    model.load_state_dict(checkpoint["model"])
    print(20 * "=", " Testing BiLSTM model on device: {} ".format(device),
          20 * "=")
    batch_time, total_time, accuracy, predictions = test(model, test_loader)
    print(
        "\n-> Average batch processing time: {:.4f}s, total test time: {:.4f}s, accuracy: {:.4f}%\n"
        .format(batch_time, total_time, (accuracy * 100)))
    test_prediction = pd.DataFrame({'prediction': predictions})
    if not os.path.exists(test_prediction_dir):
        os.makedirs(test_prediction_dir)
    test_prediction.to_csv(os.path.join(test_prediction_dir,
                                        test_prediction_name),
                           index=False)
Esempio n. 10
0
def initialize_model(gpu, vocab_size, v_vec, dropout_ratio, n_layers, model,
                     statistics_of_each_case_type):
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if model == 'Base' or model == 'FT':
        bilstm = BiLSTM(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'OneH':
        bilstm = OneHot(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'FA':
        bilstm = FeatureAugmentation(vocab_size,
                                     v_vec,
                                     dropout_ratio,
                                     n_layers,
                                     gpu=is_gpu)
    elif model == 'CPS':
        bilstm = ClassProbabilityShift(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    elif model == 'MIX':
        bilstm = Mixture(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    return bilstm
Esempio n. 11
0
embeddings = gensim.models.KeyedVectors.load_word2vec_format('../embeddings/german.model', binary=True)
print("Done.")

# loop through each word in embeddings
for word in embeddings.vocab:
    if word.lower() in words:
        vector = embeddings.wv[word]
        word_embeddings.append(vector)
        word2Idx[word] = len(word2Idx)

word_embeddings = np.array(word_embeddings)
print(f"Found embeddings for {word_embeddings.shape[0]} of {len(words)} words.")

train_sentences = format_to_tensor(train_sentences, word2Idx, label2Idx)

model = BiLSTM(word_embeddings=torch.FloatTensor(word_embeddings), num_classes=len(labels))
model.train()

epochs = 50
learning_rate = 0.015
momentum = 0.9

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

def eval():
    correct = 0
    total = 0

    for tokens, true_labels in train_sentences:
        total += len(true_labels)
Esempio n. 12
0
def train_model(args,
                train_text=None,
                train_labels=None,
                eval_text=None,
                eval_labels=None,
                tokenizer=None):
    textattack.shared.utils.set_seed(args.random_seed)

    _make_directories(args.output_dir)

    num_gpus = torch.cuda.device_count()

    # Save logger writes to file
    log_txt_path = os.path.join(args.output_dir, "log.txt")
    fh = logging.FileHandler(log_txt_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.info(f"Writing logs to {log_txt_path}.")

    train_examples_len = len(train_text)

    # label_id_len = len(train_labels)
    label_set = set(train_labels)
    args.num_labels = len(label_set)
    logger.info(
        f"Loaded dataset. Found: {args.num_labels} labels: {sorted(label_set)}"
    )

    if len(train_labels) != len(train_text):
        raise ValueError(
            f"Number of train examples ({len(train_text)}) does not match number of labels ({len(train_labels)})"
        )
    if len(eval_labels) != len(eval_text):
        raise ValueError(
            f"Number of teste xamples ({len(eval_text)}) does not match number of labels ({len(eval_labels)})"
        )

    if args.model == "gru":
        textattack.shared.logger.info(
            "Loading textattack model: GRUForClassification")
        model = BiGRU()
        model.to(device)
    elif args.model == "lstm":
        textattack.shared.logger.info(
            "Loading textattack model: LSTMForClassification")
        model = BiLSTM()
        model.to(device)

    # attack_class = attack_from_args(args)
    # We are adversarial training if the user specified an attack along with
    # the training args.
    # adversarial_training = (attack_class is not None) and (not args.check_robustness)

    # multi-gpu training
    if num_gpus > 1:
        model = torch.nn.DataParallel(model)
        logger.info("Using torch.nn.DataParallel.")
    logger.info(f"Training model across {num_gpus} GPUs")

    num_train_optimization_steps = (
        int(train_examples_len / args.batch_size / args.grad_accum_steps) *
        args.num_train_epochs)

    if args.model == "lstm" or args.model == "cnn" or args.model == "gru":

        def need_grad(x):
            return x.requires_grad

        optimizer = torch.optim.Adam(filter(need_grad, model.parameters()),
                                     lr=args.learning_rate)
        scheduler = None
    else:
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.01,
            },
            {
                "params": [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0,
            },
        ]

        optimizer = transformers.optimization.AdamW(
            optimizer_grouped_parameters, lr=args.learning_rate)

        scheduler = transformers.optimization.get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.warmup_proportion,
            num_training_steps=num_train_optimization_steps,
        )

    # Start Tensorboard and log hyperparams.
    from torch.utils.tensorboard import SummaryWriter

    tb_writer = SummaryWriter(args.output_dir)

    # Use Weights & Biases, if enabled.
    if args.enable_wandb:
        global wandb
        wandb = textattack.shared.utils.LazyLoader("wandb", globals(), "wandb")
        wandb.init(sync_tensorboard=True)

    # Save original args to file
    args_save_path = os.path.join(args.output_dir, "train_args.json")
    _save_args(args, args_save_path)
    logger.info(f"Wrote original training args to {args_save_path}.")

    tb_writer.add_hparams(
        {k: v
         for k, v in vars(args).items() if _is_writable_type(v)}, {})

    # Start training
    logger.info("***** Running training *****")
    # if augmenter:
    #     logger.info(f"\tNum original examples = {train_examples_len}")
    #     logger.info(f"\tNum examples after augmentation = {len(train_text)}")
    # else:
    #     logger.info(f"\tNum examples = {train_examples_len}")
    logger.info(f"\tNum examples = {train_examples_len}")
    logger.info(f"\tBatch size = {args.batch_size}")
    logger.info(f"\tMax sequence length = {args.max_length}")
    logger.info(f"\tNum steps = {num_train_optimization_steps}")
    logger.info(f"\tNum epochs = {args.num_train_epochs}")
    logger.info(f"\tLearning rate = {args.learning_rate}")

    eval_dataloader = _make_dataloader(tokenizer, eval_text, eval_labels,
                                       args.batch_size)
    train_dataloader = _make_dataloader(tokenizer, train_text, train_labels,
                                        args.batch_size)

    global_step = 0
    tr_loss = 0

    model.train()
    args.best_eval_score = 0
    args.best_eval_score_epoch = 0
    args.epochs_since_best_eval_score = 0

    def loss_backward(loss):
        if num_gpus > 1:
            loss = loss.mean(
            )  # mean() to average on multi-gpu parallel training
        if args.grad_accum_steps > 1:
            loss = loss / args.grad_accum_steps
        loss.backward()
        return loss

    # if args.do_regression:
    #     # TODO integrate with textattack `metrics` package
    #     loss_fct = torch.nn.MSELoss()
    # else:
    #     loss_fct = torch.nn.CrossEntropyLoss()
    loss_fct = torch.nn.CrossEntropyLoss()

    for epoch in tqdm.trange(int(args.num_train_epochs),
                             desc="Epoch",
                             position=0,
                             leave=True):
        # if adversarial_training:
        #     if epoch >= args.num_clean_epochs:
        #         if (epoch - args.num_clean_epochs) % args.attack_period == 0:
        #             # only generate a new adversarial training set every args.attack_period epochs
        #             # after the clean epochs
        #             logger.info("Attacking model to generate new training set...")

        #             adv_attack_results = _generate_adversarial_examples(
        #                 model_wrapper, attack_class, list(zip(train_text, train_labels))
        #             )
        #             adv_train_text = [r.perturbed_text() for r in adv_attack_results]
        #             train_dataloader = _make_dataloader(
        #                 tokenizer, adv_train_text, train_labels, args.batch_size
        #             )
        #     else:
        #         logger.info(f"Running clean epoch {epoch+1}/{args.num_clean_epochs}")

        prog_bar = tqdm.tqdm(train_dataloader,
                             desc="Iteration",
                             position=0,
                             leave=True)

        # Use these variables to track training accuracy during classification.
        correct_predictions = 0
        total_predictions = 0
        for step, batch in enumerate(prog_bar):
            ids1, ids2, msk1, msk2, labels = batch
            # input_ids, labels = batch
            labels = labels.to(device)
            # if isinstance(input_ids, dict):
            #     ## dataloader collates dict backwards. This is a workaround to get
            #     # ids in the right shape for HuggingFace models
            #     input_ids = {
            #         k: torch.stack(v).T.to(device) for k, v in input_ids.items()
            #     }
            #     logits = model(**input_ids)[0]
            # else:

            ids1 = ids1.to(device)
            ids2 = ids2.to(device)
            msk1 = msk1.to(device)
            msk2 = msk2.to(device)
            logits = model(ids1, ids2, msk1, msk2)

            # if args.do_regression:
            #     # TODO integrate with textattack `metrics` package
            #     loss = loss_fct(logits.squeeze(), labels.squeeze())
            # else:
            loss = loss_fct(logits, labels)
            pred_labels = logits.argmax(dim=-1)
            correct_predictions += (pred_labels == labels).sum().item()
            total_predictions += len(pred_labels)

            loss = loss_backward(loss)
            tr_loss += loss.item()

            if global_step % args.tb_writer_step == 0:
                tb_writer.add_scalar("loss", loss.item(), global_step)
                if scheduler is not None:
                    tb_writer.add_scalar("lr",
                                         scheduler.get_last_lr()[0],
                                         global_step)
                else:
                    tb_writer.add_scalar("lr", args.learning_rate, global_step)
            if global_step > 0:
                prog_bar.set_description(f"Loss {tr_loss/global_step}")
            if (step + 1) % args.grad_accum_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()
                optimizer.zero_grad()
            # Save model checkpoint to file.
            if (global_step > 0 and (args.checkpoint_steps > 0)
                    and (global_step % args.checkpoint_steps) == 0):
                _save_model_checkpoint(model, args.output_dir, global_step)

            # Inc step counter.
            global_step += 1

        # Print training accuracy, if we're tracking it.
        if total_predictions > 0:
            train_acc = correct_predictions / total_predictions
            logger.info(f"Train accuracy: {train_acc*100}%")
            tb_writer.add_scalar("epoch_train_score", train_acc, epoch)

        # Check accuracy after each epoch.
        # skip args.num_clean_epochs during adversarial training
        # if (not adversarial_training) or (epoch >= args.num_clean_epochs):
        if (epoch >= args.num_clean_epochs):
            eval_score = _get_eval_score(model, eval_dataloader, False)
            tb_writer.add_scalar("epoch_eval_score", eval_score, epoch)

            if args.checkpoint_every_epoch:
                _save_model_checkpoint(model, args.output_dir,
                                       args.global_step)

            logger.info(
                f"Eval {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%"
            )
            if eval_score > args.best_eval_score:
                args.best_eval_score = eval_score
                args.best_eval_score_epoch = epoch
                args.epochs_since_best_eval_score = 0
                _save_model(model, args.output_dir, args.weights_name,
                            args.config_name)
                logger.info(
                    f"Best acc found. Saved model to {args.output_dir}.")
                _save_args(args, args_save_path)
                logger.info(f"Saved updated args to {args_save_path}")
            else:
                args.epochs_since_best_eval_score += 1
                if (args.early_stopping_epochs >
                        0) and (args.epochs_since_best_eval_score >
                                args.early_stopping_epochs):
                    logger.info(
                        f"Stopping early since it's been {args.early_stopping_epochs} steps since validation acc increased"
                    )
                    break

        if args.check_robustness:
            samples_to_attack = list(zip(eval_text, eval_labels))
            samples_to_attack = random.sample(samples_to_attack, 1000)
            adv_attack_results = _generate_adversarial_examples(
                model_wrapper, attack_class, samples_to_attack)
            attack_types = [r.__class__.__name__ for r in adv_attack_results]
            attack_types = collections.Counter(attack_types)

            adv_acc = 1 - (attack_types["SkippedAttackResult"] /
                           len(adv_attack_results))
            total_attacks = (attack_types["SuccessfulAttackResult"] +
                             attack_types["FailedAttackResult"])
            adv_succ_rate = attack_types[
                "SuccessfulAttackResult"] / total_attacks
            after_attack_acc = attack_types["FailedAttackResult"] / len(
                adv_attack_results)

            tb_writer.add_scalar("robustness_test_acc", adv_acc, global_step)
            tb_writer.add_scalar("robustness_total_attacks", total_attacks,
                                 global_step)
            tb_writer.add_scalar("robustness_attack_succ_rate", adv_succ_rate,
                                 global_step)
            tb_writer.add_scalar("robustness_after_attack_acc",
                                 after_attack_acc, global_step)

            logger.info(f"Eval after-attack accuracy: {100*after_attack_acc}%")

    # read the saved model and report its eval performance
    logger.info(
        "Finished training. Re-loading and evaluating model from disk.")
    model_wrapper = model_from_args(args, args.num_labels)
    model = model_wrapper.model
    model.load_state_dict(
        torch.load(os.path.join(args.output_dir, args.weights_name)))
    eval_score = _get_eval_score(model, eval_dataloader, args.do_regression)
    logger.info(
        f"Saved model {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%"
    )

    if args.save_last:
        _save_model(model, args.output_dir, args.weights_name,
                    args.config_name)

    # end of training, save tokenizer
    try:
        tokenizer.save_pretrained(args.output_dir)
        logger.info(f"Saved tokenizer {tokenizer} to {args.output_dir}.")
    except AttributeError:
        logger.warn(
            f"Error: could not save tokenizer {tokenizer} to {args.output_dir}."
        )

    # Save a little readme with model info
    write_readme(args, args.best_eval_score, args.best_eval_score_epoch)

    _save_args(args, args_save_path)
    tb_writer.close()
    logger.info(f"Wrote final training args to {args_save_path}.")
Esempio n. 13
0
with tf.Graph().as_default():
    session = tf.Session()
    with session.as_default():
        # Define training procedure

        with tf.variable_scope('embedding'):
            embedding = tf.get_variable(
                'embedding',
                shape=word_embedding.shape,
                dtype=tf.float32,
                initializer=tf.constant_initializer(word_embedding),
                trainable=True)

        model = BiLSTM(FLAGS.seq_length, FLAGS.hidden_size, FLAGS.layer_num,
                       FLAGS.class_num, FLAGS.learning_rate,
                       FLAGS.l2_reg_lambda)

        train_writer = tf.summary.FileWriter(FLAGS.log_path + '/train',
                                             session.graph)
        dev_writer = tf.summary.FileWriter(FLAGS.log_path + '/dev',
                                           session.graph)
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        session.run(tf.global_variables_initializer())
        session.run(tf.local_variables_initializer())

        # training loop, for each batch

        for step in range(FLAGS.epochs_num):
Esempio n. 14
0
def main(args):
    print "Running BiLSTM model"
    print args
    random.seed(args.seed)
    
    trainset = []
    devset = []

    print >> sys.stderr, "Loading dataset.."
    assert(os.path.isdir(args.datapath))
    
    word_vocab = []
    for fname in sorted(os.listdir(args.datapath)):
        if os.path.isdir(fname): 
            continue
        
        #if fname.endswith('train.ner.txt'):
        if fname.endswith('.ppi.txt'):
            print fname
            dataset, vocab = load_dataset(os.path.join(args.datapath,fname))
            word_vocab += vocab
            trainset += dataset
        
            print >> sys.stderr, "Loaded {} instances with a vocab size of {} from {}".format(len(dataset),len(vocab),fname)
    
    print "Loaded {} instances from data set".format(len(trainset))
    
    word_vocab = sorted(set(word_vocab))
    vocab_cache = os.path.join(args.datapath,'word_vocab.ner.txt')
    with open(vocab_cache,'w') as f:
        print "Saved vocab to", vocab_cache
        pickle.dump(word_vocab,f)
    
    embeddings = load_embeddings(args.embeddings_path, word_vocab, 200)
    
    labels = ['B-MISC','I-MISC','O']
    
    model_name = 'saved_model_autumn'
    if not os.path.exists('{}/scratch'.format(args.datapath)):
        os.mkdir('{}/scratch'.format(args.datapath))
            
    if os.path.exists('{}/{}'.format(args.datapath,model_name)):
        os.rename('{}/{}'.format(args.datapath,model_name),
            '{}/{}_{}'.format(args.datapath,model_name,int(time.time())))
        
    os.mkdir('{}/{}'.format(args.datapath,model_name))
    
    for j in range(num_ensembles):
        m = BiLSTM(labels=labels,
                    word_vocab=word_vocab,
                    word_embeddings=embeddings,
                        optimizer=args.optimizer,
                        embedding_size=200, 
                        char_embedding_size=32,
                        lstm_dim=200,
                        num_cores=args.num_cores,
                        embedding_factor=args.embedding_factor,
                        learning_rate=args.learning_rate,
                        decay_rate=args.decay_rate,
                        dropout_keep=args.keep_prob)
        
        training_samples = random.sample(trainset,len(trainset)/2)
        
        cut = int(0.8 * len(training_samples))
        X_train, y_train = zip(*training_samples[:cut]) 
        X_dev, y_dev = zip(*training_samples[cut:]) 
        
        print "Training on {}, tuning on {}".format(len(X_train),len(X_dev))
        
        m.fit(X_train, y_train, X_dev, y_dev,
                num_iterations=args.num_iterations,
                num_it_per_ckpt=args.num_it_per_ckpt,
                batch_size=args.batch_size,
                seed=j, fb2=True)
        
        save_path = '{}/{}/model_{}'.format(args.datapath,model_name,j)
        m.save(save_path)
        print "Saved model {} to {}".format(j,save_path)
Esempio n. 15
0
    if FLAGS.mode != 'distil' :
        #创建词表
        word2idx, idx2word, vocab_path = create_vocabulary(FLAGS.vocab_size)
        create_data_ids(word2idx)
    else:
        #创建词表(增强数据集)
        word2idx, idx2word, vocab_path = create_vocabulary_distil(FLAGS.vocab_size)
        create_data_ids_distil(word2idx)

    if not tf.gfile.Exists(FLAGS.model_save_dir):
        tf.gfile.MakeDirs(FLAGS.model_save_dir)
    #创建模型对象
    model = BiLSTM(vocab_size=FLAGS.vocab_size,
                   batch_size=FLAGS.batch_size,
                   embedding_size=FLAGS.num_embedding_units,
                   num_hidden_size=FLAGS.num_hidden_units,
                   maxlen=FLAGS.maxlen)
    #创建训练对象
    solver = Solver(model=model,
                    training_iter=FLAGS.train_step,
                    word2idx=word2idx,
                    idx2word=idx2word,
                    log_dir=FLAGS.log_dir,
                    model_save_dir=FLAGS.model_save_dir)

    if FLAGS.mode == 'train':
        solver.train()
    elif FLAGS.mode == 'test':
        solver.test()
    elif FLAGS.mode=='distil':
Esempio n. 16
0
    model_name, 'epochs',
    str(args.epochs), args.optimizer, 'lr',
    str(args.lr), 'hidden',
    str(args.hidden), 'layers',
    str(args.layers)
]
model_name = '_'.join(model_name)
model_path = os.path.join(save_model_dir, model_name)
print('writer_path:', writer_path)
print('save_model_dir:', save_model_dir)
print('model_name:', model_name)

if args.crf:
    model = LSTM_CRF(args.hidden, args.layers, args.dropout)
else:
    model = BiLSTM(args.hidden, 8, args.dropout, args.layers)
    criterion = nn.CrossEntropyLoss()
if args.load_model:
    model.load_state_dict(torch.load(model_path))
if args.optimizer == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
else:
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)
lr_lambda = lambda epoch: 1 / (1 + (epoch + 1) * args.lr_decay)
scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)

if use_cuda:
    model = model.cuda()
    if not args.crf:
Esempio n. 17
0
    return epoch_loss / len(valid_it), epoch_acc / len(valid_it)


vocab_size = len(TEXT.vocab)
emb_dim = 50
hidden_dim = 50
out_dim = 1
lr = 1e-2
nlayers = 2
bidir = True
dropout = 0.3
model = BiLSTM(vocab_size,
               hidden_dim,
               emb_dim,
               out_dim,
               bsize,
               nlayers,
               bidir,
               dropout,
               gpu=gpu)

n_filters = 3
filter_sizes = [3, 4, 5]
modelc = CNN(vocab_size, emb_dim, n_filters, filter_sizes, out_dim, dropout)

optimizer = optim.Adam(model.parameters())  #no need to specify LR for adam
lossf = nn.BCEWithLogitsLoss()
ep = 5

modelatt = LSTMAttn(vocab_size, hidden_dim, emb_dim, out_dim, bsize, gpu=gpu)
Esempio n. 18
0
def main(options):

    use_cuda = (len(options.gpuid) >= 1)
    if options.gpuid:
        cuda.set_device(options.gpuid[0])

    train, dev, test, vocab = torch.load(open(options.data_file, 'rb'),
                                         pickle_module=dill)

    batched_train, batched_train_mask, _ = utils.tensor.advanced_batchize(
        train, options.batch_size, vocab.stoi["<pad>"])
    batched_dev, batched_dev_mask, _ = utils.tensor.advanced_batchize(
        dev, options.batch_size, vocab.stoi["<pad>"])

    vocab_size = len(vocab)

    if options.load_file:
        rnnlm = torch.load(options.load_file)
    else:
        rnnlm = BiLSTM(vocab_size)
    if use_cuda > 0:
        rnnlm.cuda()
    else:
        rnnlm.cpu()

    criterion = torch.nn.NLLLoss()
    optimizer = eval("torch.optim." + options.optimizer)(rnnlm.parameters(),
                                                         options.learning_rate)

    # main training loop
    last_dev_avg_loss = float("inf")
    rnnlm.train()
    for epoch_i in range(options.epochs):
        logging.info("At {0}-th epoch.".format(epoch_i))
        # srange generates a lazy sequence of shuffled range
        for i, batch_i in enumerate(utils.rand.srange(len(batched_train))):

            train_batch = Variable(
                batched_train[batch_i])  # of size (seq_len, batch_size)
            train_mask = Variable(batched_train_mask[batch_i])
            if use_cuda:
                train_batch = train_batch.cuda()
                train_mask = train_mask.cuda()

            sys_out_batch = rnnlm(
                train_batch
            )  # (seq_len, batch_size, vocab_size) # TODO: substitute this with your module
            train_in_mask = train_mask.view(-1)
            train_in_mask = train_in_mask.unsqueeze(1).expand(
                len(train_in_mask), vocab_size)
            train_out_mask = train_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            train_out_batch = train_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(train_in_mask).view(
                -1, vocab_size)
            train_out_batch = train_out_batch.masked_select(train_out_mask)
            loss = criterion(sys_out_batch, train_out_batch)
            logging.debug("loss at batch {0}: {1}".format(i, loss.data[0]))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # validation -- this is a crude esitmation because there might be some paddings at the end
        dev_loss = 0.0
        rnnlm.eval()
        for batch_i in range(len(batched_dev)):
            dev_batch = Variable(batched_dev[batch_i], volatile=True)
            dev_mask = Variable(batched_dev_mask[batch_i], volatile=True)
            if use_cuda:
                dev_batch = dev_batch.cuda()
                dev_mask = dev_mask.cuda()

            sys_out_batch = rnnlm(dev_batch)
            dev_in_mask = dev_mask.view(-1)
            dev_in_mask = dev_in_mask.unsqueeze(1).expand(
                len(dev_in_mask), vocab_size)
            dev_out_mask = dev_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            dev_out_batch = dev_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(dev_in_mask).view(
                -1, vocab_size)
            dev_out_batch = dev_out_batch.masked_select(dev_out_mask)
            loss = criterion(sys_out_batch, dev_out_batch)
            dev_loss += loss
        dev_avg_loss = dev_loss / len(batched_dev)
        logging.info(
            "Average loss value per instance is {0} at the end of epoch {1}".
            format(dev_avg_loss.data[0], epoch_i))

        #if (last_dev_avg_loss - dev_avg_loss).data[0] < options.estop:
        #  logging.info("Early stopping triggered with threshold {0} (previous dev loss: {1}, current: {2})".format(epoch_i, last_dev_avg_loss.data[0], dev_avg_loss.data[0]))
        #  break
        torch.save(
            rnnlm,
            open(
                options.model_file +
                ".nll_{0:.2f}.epoch_{1}".format(dev_avg_loss.data[0], epoch_i),
                'wb'),
            pickle_module=dill)
        last_dev_avg_loss = dev_avg_loss
Esempio n. 19
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--model',
        type=str,
        default='rnn',
        help=
        "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'"
    )
    parser.add_argument('--train_data_path',
                        type=str,
                        default="./data/train_clean.csv",
                        help="Path to the training data")
    parser.add_argument('--test_data_path',
                        type=str,
                        default="./data/dev_clean.csv",
                        help="Path to the test data")
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--vectors',
                        type=str,
                        default='fasttext.simple.300d',
                        help="""
                                Pretrained vectors:
                                Visit 
                                https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146
                                for more 
                                """)
    parser.add_argument('--max_vocab_size', type=int, default=750)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--bidirectional', type=bool, default=True)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--hidden_dim', type=int, default=64)
    parser.add_argument('--output_dim', type=int, default=1)
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_epochs', type=int, default=5)
    parser.add_argument('--n_filters', type=int, default=100)
    parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5])

    args = parser.parse_args()

    torch.manual_seed(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ##########  BILSTM ##########

    if args.model == "bilstm":
        print('\nBiLSTM')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors,
                         unk_init=torch.Tensor.normal_)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)
        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        unk_idx = TEXT.vocab.stoi[TEXT.unk_token]

        model = BiLSTM(input_dim, embedding_dim, args.hidden_dim,
                       args.output_dim, args.n_layers, args.bidirectional,
                       args.dropout, pad_idx)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)
        model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
        model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.139, Test Acc: 95.27%

    ##########  VANILLA RNN ##########

    else:
        print('\nVanilla RNN')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)

        model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.138, Test Acc: 95.05%
Esempio n. 20
0
print(str(datetime.now()), "Generating vocab")
vocab = Vocab(train_colors,
              min_count=min_count,
              add_padding=True,
              add_bos=True,
              add_eos=True)

embeddings = nn.Embedding(len(vocab.index2token),
                          embedding_size,
                          padding_idx=vocab.PAD.hash)

model = BiLSTM(
    embeddings=embeddings,
    hidden_size=hidden_size,
    num_labels=len(vocab),  #num_labels,
    bidirectional=bidirectional,
    num_layers=num_layers,
    color_representation_size=54)  #54)

model_id = str(int(time.time())) + "w_fourier"
save_path = os.path.join(output_path, model_id)
if not os.path.isdir(save_path):
    os.makedirs(save_path)

writer = SummaryWriter(save_path)

if cuda:
    model.cuda()

print(model)
Esempio n. 21
0
def model_train_validate_test(train_df,
                              dev_df,
                              test_df,
                              embeddings_file,
                              vocab_file,
                              target_dir,
                              mode,
                              num_labels=2,
                              max_length=50,
                              epochs=50,
                              batch_size=128,
                              lr=0.0005,
                              patience=5,
                              max_grad_norm=10.0,
                              gpu_index=0,
                              if_save_model=False,
                              checkpoint=None):
    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = My_Dataset(train_df, vocab_file, max_length, mode)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = My_Dataset(dev_df, vocab_file, max_length, mode)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    if (embeddings_file is not None):
        embeddings = load_embeddings(embeddings_file)
    else:
        embeddings = None
    model = BiLSTM(embeddings, num_labels=num_labels, device=device).to(device)
    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params:,} total parameters.')
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print(f'{total_trainable_params:,} training parameters.')
    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, _, = validate(model, dev_loader, criterion)
    print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
          format(valid_loss, (valid_accuracy * 100)))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training BiLSTM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, _, = validate(
            model, dev_loader, criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0

            if (if_save_model):
                torch.save(
                    {
                        "epoch": epoch,
                        "model": model.state_dict(),
                        "best_score": best_score,
                        "epochs_count": epochs_count,
                        "train_losses": train_losses,
                        "valid_losses": valid_losses
                    }, os.path.join(target_dir, "best.pth.tar"))

                print("save model succesfully!\n")

            print("* Test for epoch {}:".format(epoch))
            _, _, test_accuracy, predictions = validate(
                model, test_loader, criterion)
            print("Test accuracy: {:.4f}%\n".format(test_accuracy))
            test_prediction = pd.DataFrame({'prediction': predictions})
            test_prediction.to_csv(os.path.join(target_dir,
                                                "test_prediction.csv"),
                                   index=False)

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
Esempio n. 22
0
import tensorflow as tf
from model import BiLSTM

vocab_size = 10
num_classes_A = 5
num_classes_B = 2

data = tf.placeholder(tf.int32, [None, None])
target_A = tf.placeholder(tf.float32, [None, None, num_classes_A])
target_B = tf.placeholder(tf.float32, [None, None, num_classes_B])
current_target = tf.placeholder(tf.string)

model = BiLSTM(data, target_A, target_B, vocab_size)

x = [[0, 4, 1, 5, 9], [3, 4, 6, 2, 7]]

y_A = [[[1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1]],
       [[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0]]]

y_B = [[[0, 1], [0, 1], [0, 1], [1, 0], [1, 0]],
       [[0, 1], [0, 1], [1, 0], [0, 1], [1, 0]]]

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(10):
    print('-' * 20)
    print(sess.run(model.cost_A, feed_dict={data: x, target_A: y_A}))
    print(sess.run(model.cost_B, feed_dict={data: x, target_B: y_B}))
Esempio n. 23
0
                     shuffle=False)
novel_iter = Iterator(novel,
                      batch_size=1,
                      device=-1,
                      sort=False,
                      sort_within_batch=False,
                      repeat=False,
                      shuffle=False)

vocab = TEXT.vocab

vocab_size = len(vocab)
emb_dim = 300
hidden_dim = 64
emb_matrix = vocab.vectors
model = BiLSTM(vocab_size, hidden_dim, emb_dim, emb_matrix)

print("Computing the deep features...")
'''
features = []
for x in tqdm(data_iter):
    feature = model(x.comment_text)
    features.append(feature)

feats = []
for f in features:
    feats.append(f.detach().numpy())
feats = np.vstack(feats)
'''
novel_features = []
for x in tqdm(novel_iter):
Esempio n. 24
0
    if 'cuda' in args.device:
        if torch.cuda.is_available():
            device = torch.device(args.device)
        else:
            print("cuda not available...")
    print("Using device {}".format(device))

    print("loading datasets...")
    n = None
    train_data = DataSource("train", n=n)
    print("loaded {} train data".format(len(train_data)))
    dev_data = DataSource("dev", n=n)
    print("loaded {} dev data".format(len(dev_data)))
    test_data = DataSource("test", n=n)
    print("loaded {} test data".format(len(test_data)))

    model = BiLSTM(128, device)
    print("allocated model")

    if args.restore == "":
        losses = train()
        print("graphing")
        graph_losses(losses)
    else:
        model.load_state_dict(torch.load(args.restore))
        print("loaded weights from {}".format(args.restore))

    confusion = evaluate()
    print(confusion)
    print("accuracy: {}".format(np.sum(np.diagonal(confusion))))
Esempio n. 25
0
																		shuffle=True)
		dev_batches, num_dev_batches, num_dev_samples = get_batch(cfg.data_npy_path, cfg.filename_x_dev,
																  cfg.filename_y_dev, cfg.epochs,
																  cfg.maxlen, cfg.len_wv, cfg.batch_size[0],
																  cfg.num_classes, str(fold),
																  shuffle=False)

		# create a iterator of the correct shape and type
		iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
		xs, ys = iter.get_next()

		train_init_opt = iter.make_initializer(train_batches)
		dev_init_opt = iter.make_initializer(dev_batches)

		# index+=1
		model = BiLSTM(param)
		# print('xs')
		# print(xs)
		# print('ys')
		# print(ys)
		loss,train_opt,pred_train,train_summaries,global_step,lstm_cell_fw,x_check = model.train(xs,ys)
		logits_eval,probs_eval,pred_eval,ys = model.eval(xs,ys)

		#Variables for early stop
		dev_history = []

		dev_best = 0

		stop_times = 0

		logging.info('# Session')
Esempio n. 26
0
from model import BiLSTM
from utils import batch_iter, get_data
from vocab import Vocab
from seqeval.metrics import classification_report
from torch import optim
import numpy as np
import torch

x_train, x_valid, x_test, y_train, y_valid, y_test = get_data('time_delay')
train_data = list(zip(x_train, y_train))
vocab = Vocab.from_corpus(x_train)
tag_vocab = Vocab.from_corpus(y_train)

model = BiLSTM(vocab, tag_vocab, 100, 256)
torch.cuda.set_device(0)
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(3):
    for sents, labels in batch_iter(train_data, 16):
        model.zero_grad()
        loss, acc = model(sents, labels)
        print("epoch {}:".format(epoch), loss, acc)
        loss.backward()
        optimizer.step()

test_data = list(zip(x_test, y_test))
preds = []
for sent, labels in test_data:
    pred = model.predict([sent])
    preds.append(pred.tolist()[0])
preds = [[tag_vocab.id2word[i] for i in sent] for sent in preds]
Esempio n. 27
0
    def __init__(self, config):
        self.config = config

        self.load_data()  # 加载数据集
        self.model = BiLSTM(self.config, self.vocab_size,
                            self.word_vectors)  # 初始化模型
Esempio n. 28
0
mapping_file = ".\\dataset\\map_data.map"

mapping = {}
with open(mapping_file, 'rb') as f:
    mapping = cPickle.load(f)

word_to_id = mapping['word_to_id']
tag_to_id = mapping['tag_to_id']
char_to_id = mapping['char_to_id']
word_embeds = mapping['word_embeds']

model = BiLSTM(voca_size=len(word_to_id),
               word_emb_dim=100,
               pre_word_emb=word_embeds,
               char_emb_dim=25,
               char_lstm_dim=25,
               char_to_ix=char_to_id,
               n_cap=4,
               cap_emb_dim=8,
               hidden_dim=200,
               tag_to_ix=tag_to_id)

x = torch.load(model_path)
model.load_state_dict(x())

model.eval()


def test():
    test_sentences = loader.load_data(test_path, zeros=False)

    loader.update_tag_scheme(test_sentences, 'iob')
Esempio n. 29
0
# ---- Build Vocabulary ------
w2v_map = data.load_map("resources/w2v_map_SQ.pkl")
w2v_map['<pad>'] = np.zeros(300)
word_to_ix = data.load_map("resources/word_to_ix_SQ.pkl")
label_to_ix = data.load_map("resources/rel_to_ix_SQ.pkl")
vocab_size = len(word_to_ix)
num_classes = len(label_to_ix)
max_sent_length = 36  # set from the paper

# ---- Define Model, Loss, Optim ------
config = args
config.d_out = num_classes
config.n_directions = 2 if config.birnn else 1
print(config)
model = BiLSTM(config)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)

# ---- Test Model ------
if args.test:
    print("Test Mode: loading pre-trained model and testing on test set...")
    # model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu))
    model.load_state_dict(torch.load(args.resume_snapshot))
    test_acc = evaluate_dataset_batch(test_set, max_sent_length, model,
                                      w2v_map, label_to_ix)
    print("Accuracy: {}".format(test_acc))
    sys.exit(0)

# ---- Train Model ------
start = time.time()
Esempio n. 30
0
import torchvision

from model import BiLSTM
from data import load_dataset
from config import model_name, device

if __name__ == "__main__":

    # the string to test!
    test_string = "<s> john can"

    # ########################
    # LOAD DATASET
    # ########################

    corpus, word_to_idx, idx_to_word, train_dataset = load_dataset()

    # ########################
    # TEST VARIABLES
    # ########################

    model = BiLSTM(len(corpus))
    model.load_state_dict(torch.load(model_name))

    model.eval()
    sentence = test_string.split()
    sentence = torch.tensor([[word_to_idx[w] for w in sentence]])

    s = model.sample(sentence)
    print(test_string.split() + s)