def test(args):
    from dataio.task_data import TaskData
    from predict.predictor import Predictor
    data = TaskData(args.test_data_num)
    labels, sents = data.read_data(
        raw_data_path=config["test_path"],
        data_dir=config["data_dir"],
        preprocessor=Preprocessor(config["preprocessor"])(
            stopwords_path=config["stopwords_path"],
            userdict_path=config["userdict_path"]),
        is_train=False)
    lines = list(zip(sents, labels))

    processor = Postprocessor(
        config["postprocessor"])(do_lower_case=args.do_lower_case)
    label_list = processor.get_labels(config['data_dir'] / "labels.txt")
    id2label = {i: label for i, label in enumerate(label_list)}

    test_data = processor.get_test(lines=lines)
    test_examples = processor.create_examples(
        lines=test_data,
        example_type='test',
        cached_examples_file=config['data_dir'] /
        "cached_test_examples_{}".format(args.pretrain))
    test_features = processor.create_features(
        examples=test_examples,
        max_seq_len=args.eval_max_seq_len,
        cached_features_file=config['data_dir'] /
        "cached_test_features_{}_{}".format(args.eval_max_seq_len,
                                            args.pretrain))

    test_dataset = processor.create_dataset(test_features)
    test_sampler = SequentialSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=args.train_batch_size)

    if config["pretrain"] == "Nopretrain":
        config["vocab_size"] = processor.vocab_size

    model = Classifier(config["classifier"], config["pretrain"],
                       config["checkpoint_dir"])(num_labels=len(label_list))

    ########### predict ###########
    logger.info('model predicting....')
    predictor = Predictor(model=model, logger=logger, n_gpu=args.n_gpu)
    logits, y_pred = predictor.predict(data=test_dataloader, thresh=0.5)

    pred_labels = []
    for item in y_pred.tolist():
        tmp = []
        for i, v in enumerate(item):
            if v == 1:
                tmp.append(label_list[i])
        pred_labels.append(",".join(tmp))

    assert len(pred_labels) == y_pred.shape[0]
    df_pred_labels = pd.DataFrame(pred_labels, columns=["predict_labels"])

    df_test_raw = pd.read_csv(config["test_path"])
    if args.test_data_num > 0:
        df_test_raw = df_test_raw.head(args.test_data_num)
    df_labels = pd.DataFrame(logits, columns=label_list)
    df = pd.concat([df_test_raw, df_pred_labels, df_labels], axis=1)

    df.to_csv(config["result"] / "output.csv", index=False)
from predict.predictor import Predictor
from time import time
import json

# Read model definition
model_definition = ModelDefinition()

print("Reading exported model")
predictor = Predictor(model_definition)

# Sample input: First file word (sequence with all pad elements)
input = predictor.get_empty_element()
print(input)
json_test = json.dumps(input)

print("Prediction:", predictor.predict(input))

n_repetitions = 1000
print("Testing performance, n. repetitions:", n_repetitions)
start = time()
for i in range(n_repetitions):
    predictor.predict_json(json_test)
end = time()
print("Total time:", end - start, "s")
print("Prediction performance:", ((end - start) / n_repetitions) * 1000, "ms")

# RNN: House computer (Linux):
# seq_len = 16, rnn size = 64 -> With GPU: 1.7 ms / With CPU: 0.85 ms
# seq_len= 64, rnn_size = 256 -> With GPU: 2.76 ms / With CPU: 4.24 ms

# RNN: Work computer (Windows 10):