verbose=100)
        y_pred = model.predict(X_val)
        oof_preds[val_index] = y_pred
    f1 = f1_score(y, oof_preds, average="macro")
    return (1.0 - f1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--exp", default="exp")
    parser.add_argument("--ntrial", default=200, type=int)
    parser.add_argument("--n_jobs", default=3, type=int)

    args = parser.parse_args()
    logger = get_logger(exp=args.exp)
    with timer("Load Data", logger):
        loader = DataLoader()

    with timer("tokenize", logger):
        loader.tokenize(tokenizer, {
            "stopwords": get_stopwords(),
            "include_verb": True
        })

    train, test = loader.load()
    X = train["tokenized"].fillna("")
    X_test = test["tokenized"].fillna("")
    y = train["label"].values
    y_test = test["label"].values

    with timer("vectorize", logger):
Ejemplo n.º 2
0
import argparse

from sklearn.metrics import f1_score

from core.loader import DataLoader
from core.util import timer, get_logger
from core.model import lr_model
from core.preprocessing import get_stopwords, tokenizer

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--exp", default="exp")

    args = parser.parse_args()
    logger = get_logger(exp=args.exp)
    with timer("Load Data", logger):
        loader = DataLoader()

    with timer("tokenize", logger):
        loader.tokenize(tokenizer, {
            "stopwords": get_stopwords(),
            "include_verb": True
        })

    train, test = loader.load()
    X = train["tokenized"]
    X_test = test["tokenized"]
    y = train["label"]
    y_test = test["label"]

    model = lr_model()
from core.nn.preprocessing import tokenizer
from core.nn.util import to_sequence, load_w2v
from core.nn.model import train_and_validate

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--exp", default="exp")
    parser.add_argument("--embedding")
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--n_epochs", default=10, type=int)

    args = parser.parse_args()
    assert args.embedding

    logger = get_logger(exp=args.exp)
    with timer("Load Data", logger):
        loader = DataLoader()

    with timer("tokenize", logger):
        loader.tokenize(tokenizer)

    train, test = loader.load()
    X = train["tokenized"]
    X_test = test["tokenized"]
    y = train["label"]
    y_test = test["label"]

    with timer("Convert to sequence", logger):
        X, X_test, word_index = to_sequence(X, X_test, max_features=80000)

    with timer("Load embedding", logger):