def run():
    sentences, pos, tag, enc_pos, enc_tag = utils.process_data(config.DATA_FILE)

    meta_data = {
        "enc_pos": enc_pos,
        "enc_tag": enc_tag
    }

    joblib.dump(meta_data, "meta.bin")

    num_pos = len(list(enc_pos.classes_))
    num_tag = len(list(enc_tag.classes_))

    (
        train_sentences,
        test_sentences,
        train_pos,
        test_pos,
        train_tag,
        test_tag
    ) = model_selection.train_test_split(sentences, pos, tag, random_state=42, test_size=0.1)

    train_dataset = dataset.EntityDataset(
        texts = train_sentences, pos=train_pos, tags=train_tag
    )

    test_dataset  = dataset.EntityDataset(
        texts = test_sentences, pos=test_pos, tags=test_tag
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size = config.TRAIN_BATCH_SIZE, num_workers=4
    )

    test_data_loader  = torch.utils.data.DataLoader(
        test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1
    )

    device = torch.device("cuda")

    model = EntityModel(num_tag=num_tag, num_pos=num_pos)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay        = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_param = [
        {
            "params" : [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params" : [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(len(train_sentences) / config.TRAIN_BATCH_SIZE * config.EPOCHS )
    optimizer = AdamW(optimizer_param, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
    )

    best_loss = np.inf

    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        test_loss  = engine.eval_fn(test_data_loader, model, device)
        print(f"Train Loss = {train_loss} Valod Loss = {test_loss}")
        if test_loss < best_loss:
            torch.save(model.state_dict(), config.MODEL_SAVE_PATH)
            best_loss = test_loss
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int(len(train_sentences) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
    )

    best_loss = np.inf
    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        test_loss = engine.eval_fn(valid_data_loader, model, device)
        print(f"Train Loss = {train_loss} Valid Loss = {test_loss}")
        if test_loss < best_loss:
            torch.save(model.state_dict(), config.MODEL_PATH)
            best_loss = test_loss
    num_train_steps = int(len(train_sentences) / config.params["TRAIN_BATCH_SIZE"] * config.params["EPOCHS"])
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
    )

    best_loss = np.inf
    for epoch in range(config.params["EPOCHS"]):
        train_loss = engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
        test_loss, metrics, classification_metrics = engine.eval_with_metrics_combined(valid_data_loader, model, device, enc_tag, classify_enc_tag)

        df_metrics = pd.DataFrame.from_dict(metrics)
        df_metrics = df_metrics.transpose()

        weighted_average = df_metrics["f1-score"][-1]
        micro_average = df_metrics["f1-score"][-3]

        table = wandb.Table(dataframe=df_metrics.transpose())
        wandb.log({"Train loss":train_loss, "Valid loss": test_loss})
        wandb.log({"Validation Metric details": table})
        wandb.log({"Weighted average": weighted_average, "Micro average": micro_average})

        print(f"Train Loss = {train_loss} Valid Loss = {test_loss}, \nMetrics = {df_metrics}, \nClassify Metrics = {pd.DataFrame.from_dict(classification_metrics)} ")
        if test_loss < best_loss:
            torch.save(model.state_dict(), config.params["MODEL_PATH"])
            model.save_pretrained_model(config.params["BASE_MODEL_PATH"]+"_finetuned_"+config.params["language"])
            config.TOKENIZER.save_pretrained(config.params["BASE_MODEL_PATH"]+"_finetuned_"+config.params["language"])
            best_loss = test_loss
            wandb.run.summary["best_valloss"] = best_loss