예제 #1
0
    train_acc, train_loss = train_epoch(model, train_data_loader, loss_fn,
                                        optimizer, device, scheduler,
                                        len(df_train))

    print(f'Train loss {train_loss} accuracy {train_acc}')

    val_acc, val_loss = eval_model(model, val_data_loader, loss_fn, device,
                                   len(df_val))

    print(f'Val   loss {val_loss} accuracy {val_acc}')
    print()

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    if val_acc > best_accuracy:
        torch.save(model.state_dict(), 'best_model_state.bin')
        best_accuracy = val_acc

test_acc, _ = eval_model(model, test_data_loader, loss_fn, device,
                         len(df_test))

print(test_acc.item())

y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(
    model, test_data_loader)

print(classification_report(y_test, y_pred, target_names=[0, 1, 2]))
def run():
    df = pd.read_csv("inputs/reviews.csv")
    df["sentiment"] = df.score.apply(rating_to_sentiment)
    df_train, df_rem = train_test_split(df,
                                        test_size=0.1,
                                        random_state=config.RANDOM_SEED)
    df_val, df_test = train_test_split(df_rem,
                                       test_size=0.5,
                                       random_state=config.RANDOM_SEED)
    train_data_loader = create_data_loader(df_train, config.TOKENIZER,
                                           config.MAX_LEN, config.BATCH_SIZE)
    val_data_loader = create_data_loader(df_val, config.TOKENIZER,
                                         config.MAX_LEN, config.BATCH_SIZE)
    test_data_loader = create_data_loader(df_test, config.TOKENIZER,
                                          config.MAX_LEN, config.BATCH_SIZE)

    # data = next(iter(val_data_loader))
    # input_ids = data["input_ids"].to(config.DEVICE)
    # attention_mask = data["attention_mask"].to(config.DEVICE)
    # bert_model = BertModel.from_pretrained(config.BERT_NAME)

    model = SentimentClassifier(num_classes=len(class_labels))
    if config.LOAD_MODEL == True:
        model.load_state_dict(torch.load("best_model_state.bin"))
    model = model.to(config.DEVICE)

    optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
    total_steps = len(train_data_loader) * config.EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=total_steps)
    loss_fn = nn.CrossEntropyLoss().to(config.DEVICE)

    history = defaultdict(list)
    best_accuracy = 0

    for epoch in range(config.EPOCHS):
        print(f"Epoch {epoch + 1}/{config.EPOCHS}")
        print("-" * 10)

        train_acc, train_loss = train_fn(
            model,
            train_data_loader,
            loss_fn,
            optimizer,
            config.DEVICE,
            scheduler,
            len(df_train),
        )

        print(f"Train loss {train_loss} accuracy {train_acc}")

        val_acc, val_loss = eval_fn(model, val_data_loader, loss_fn,
                                    config.DEVICE, len(df_val))

        print(f"Val   loss {val_loss} accuracy {val_acc}")
        print()

        history["train_acc"].append(train_acc)
        history["train_loss"].append(train_loss)
        history["val_acc"].append(val_acc)
        history["val_loss"].append(val_loss)

        if val_acc > best_accuracy:
            torch.save(model.state_dict(), "best_model_state.bin")
            best_accuracy = val_acc