verbose=100) y_pred = model.predict(X_val) oof_preds[val_index] = y_pred f1 = f1_score(y, oof_preds, average="macro") return (1.0 - f1) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--exp", default="exp") parser.add_argument("--ntrial", default=200, type=int) parser.add_argument("--n_jobs", default=3, type=int) args = parser.parse_args() logger = get_logger(exp=args.exp) with timer("Load Data", logger): loader = DataLoader() with timer("tokenize", logger): loader.tokenize(tokenizer, { "stopwords": get_stopwords(), "include_verb": True }) train, test = loader.load() X = train["tokenized"].fillna("") X_test = test["tokenized"].fillna("") y = train["label"].values y_test = test["label"].values with timer("vectorize", logger):
import argparse from sklearn.metrics import f1_score from core.loader import DataLoader from core.util import timer, get_logger from core.model import lr_model from core.preprocessing import get_stopwords, tokenizer if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--exp", default="exp") args = parser.parse_args() logger = get_logger(exp=args.exp) with timer("Load Data", logger): loader = DataLoader() with timer("tokenize", logger): loader.tokenize(tokenizer, { "stopwords": get_stopwords(), "include_verb": True }) train, test = loader.load() X = train["tokenized"] X_test = test["tokenized"] y = train["label"] y_test = test["label"] model = lr_model()
from core.nn.preprocessing import tokenizer from core.nn.util import to_sequence, load_w2v from core.nn.model import train_and_validate if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--exp", default="exp") parser.add_argument("--embedding") parser.add_argument("--device", default="cpu") parser.add_argument("--n_epochs", default=10, type=int) args = parser.parse_args() assert args.embedding logger = get_logger(exp=args.exp) with timer("Load Data", logger): loader = DataLoader() with timer("tokenize", logger): loader.tokenize(tokenizer) train, test = loader.load() X = train["tokenized"] X_test = test["tokenized"] y = train["label"] y_test = test["label"] with timer("Convert to sequence", logger): X, X_test, word_index = to_sequence(X, X_test, max_features=80000) with timer("Load embedding", logger):