Esempio n. 1
0
def do_job(message):
    try:
        mid = message['mid']
        dspid = message['dspid']
        xfid = message['xfid']
        yfid = message['yfid'] 
        architecture_json_string = message['model_archiecture']
        training_parameters = json.loads(message['training_parameters'])
    except Exception as e:
        # TODO: log error here.
        print('Could not parse message.')
        print(e)

    # TODO: add support for non-label values.
    if yfid != LABEL_SET_FID:
        print('No support yet for target features which are not string labels.')
        return False

    # data_dir = '/tmp/datasets/'
    data_dir = '/dev/tmp/datasets'
    x_feats_train, y_feats_train, x_feats_test, y_feats_test = get_features(xfid,
                                                                            yfid,
                                                                            dspid,
                                                                            data_dir)
    # Now we are assumming yfid == LABEL_SET_FID
    dsid = get_dsid_from_dspid(dspid)
    label_map = get_label_map_from_dsid(dsid)
    X, y = prepare_features_for_model(x_feats_train,
                                      y_feats_train,
                                      label_map)
    X_test, y_test = prepare_features_for_model(x_feats_test,
                                                y_feats_test,
                                                label_map)

    model = train.load_model_from_architecture_string(architecture_json_string)
    train.compile_model(model, training_parameters)
    # TODO: use k-fold cross validaiton:
    # https://github.com/fchollet/keras/issues/1711
    # and set "validation_split" in training_parameters
    # training_parameters["validation_split"] = 0.1
    hist = train.train_model(model, X, y, training_parameters)
    eval_dict = evaluation.get_evaluation_results_dictionary(model,
                                                             X_test,
                                                             y_test)

    # Upload model architecture, model parameters, hist.history, eval results.
    upload_results(mid, model, hist, eval_dict)
Esempio n. 2
0
def main(config_path):
    with open(config_path, 'r') as fread:
        config_dict = json.load(fread)

    # path
    path_config = config_dict['Path']
    model_dir = path_config['model_dir']
    train = path_config['train']
    dev = path_config['dev']
    dev_ref = path_config['dev_ref']
    test = path_config['test']
    test_ref = path_config['test_ref']
    test_result = path_config['test_result']

    print('Loading question analysis models...')
    category_model = BaselineCategoryClassifier.load(
        path_config['category_model_config'])
    focus_model = BaselineFocusClassifier.load(
        path_config['focus_model_config'])

    words_embed, words_vocab = load_full_embedding_with_vocab(
        path_config['embed_dir'])
    with open(path_config['category_vocab'], 'rb') as fread:
        category_vocab = pickle.load(fread)

    # dataset
    dataset_config = config_dict['Dataset']
    pad_size = dataset_config['pad_size']
    batch_size = dataset_config['batch_size']

    print('Loading train data...')
    train_reader = WikiqaBaselineReader(train,
                                        category_model,
                                        focus_model,
                                        words_vocab.stoi,
                                        category_vocab.itos,
                                        PAD_TOKEN='<pad>',
                                        pad_size=pad_size)
    dev_reader = WikiqaBaselineReader(dev,
                                      category_model,
                                      focus_model,
                                      words_vocab.stoi,
                                      category_vocab.itos,
                                      PAD_TOKEN='<pad>',
                                      pad_size=pad_size)
    vocabs = {'q_words': words_vocab, 'a_words': words_vocab}
    train_reader.set_vocabs(vocabs)
    dev_reader.set_vocabs(vocabs)

    train_iterator = train_reader.get_dataset_iterator(batch_size, train=True)
    dev_iterator = dev_reader.get_dataset_iterator(batch_size,
                                                   train=False,
                                                   sort=False)

    # model
    model_config = config_dict['Model']
    conv_width = model_config['conv_width']
    out_channels = model_config['out_channels']
    hidden_size = model_config['hidden_size']
    cuda_device = model_config['cuda_device']

    clf = BaselineAnswerSelectionClassifier(words_embed=words_embed,
                                            out_channels=out_channels,
                                            conv_width=conv_width,
                                            hidden_size=hidden_size,
                                            cuda_device=cuda_device)

    # train
    train_config = config_dict['Train']
    num_epoch = train_config['epoch']
    weight_decay = train_config['weight_decay']
    lr = train_config['lr']
    early_stopping = train_config['early_stopping']

    input_names = [
        'q_words', 'a_words', 'q_word_over', 'a_word_over', 'q_sem_over',
        'a_sem_over'
    ]

    optimizer = optim.Adam(clf.parameters(),
                           lr=lr,
                           weight_decay=weight_decay,
                           eps=1e-5)
    if cuda_device is not None:
        clf.cuda(device=cuda_device)

    def callback(verbose=True):
        train_labels, train_scores = get_label_score(clf,
                                                     train_iterator,
                                                     cuda_device,
                                                     'label',
                                                     input_names=input_names)
        train_predicts = train_scores.argmax(axis=-1)
        train_scores = train_scores[:, 1]
        if verbose:
            print('train_acc: %.2f' %
                  sklearn.metrics.accuracy_score(train_labels, train_predicts))
            print(
                'train_precision: %.2f' %
                sklearn.metrics.precision_score(train_labels, train_predicts))
            print('train_average_precision: %.2f' %
                  sklearn.metrics.average_precision_score(
                      train_labels, train_scores))

        dev_labels, dev_scores = get_label_score(clf,
                                                 dev_iterator,
                                                 cuda_device,
                                                 'label',
                                                 input_names=input_names)
        dev_predicts = dev_scores.argmax(axis=-1)
        dev_scores = dev_scores[:, 1]
        if verbose:
            print('dev_acc: %.2f' %
                  sklearn.metrics.accuracy_score(dev_labels, dev_predicts))
            print('dev_precision: %.2f' %
                  sklearn.metrics.precision_score(dev_labels, dev_predicts))
            print('dev_average_precision: %.2f' %
                  sklearn.metrics.average_precision_score(
                      dev_labels, dev_scores))

        index = 0
        aps = []  # for mean average precision score
        rrs = []  # for mean reciprocal rank score

        for query_labels in filtered_ref_generator(dev_ref):
            query_scores = dev_scores[index:index + len(query_labels)]
            index += len(query_labels)

            aps.append(
                sklearn.metrics.average_precision_score(
                    query_labels, query_scores))
            query_rel_best = np.argmin(-query_scores * query_labels)
            rrs.append(
                1 /
                (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1))

            # if verbose:
            #     print('DEBUGGING ap:', aps[-1])
            #     print('DEBUGGING rel_best:', query_rel_best)
            #     print('DEBUGGING score:', query_scores)
            #     print('DEBUGGING labels:', query_labels)
            #     print('DEBUGGING RR:', rrs[-1])
            #     print()

        if verbose:
            print('dev_MAP: %.2f' % np.mean(aps))
            print('dev_MRR: %.2f' % np.mean(rrs))

        return np.mean(aps)

    print('Training...')
    best_state_dict = train_model(clf,
                                  optimizer,
                                  train_iterator,
                                  label_name='label',
                                  num_epoch=num_epoch,
                                  cuda_device=cuda_device,
                                  early_stopping=early_stopping,
                                  input_names=input_names,
                                  callback=callback)
    print()

    if best_state_dict is not None:
        clf.load_state_dict(best_state_dict)

    torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt'))

    # test
    print('Loading test data...')
    test_reader = WikiqaBaselineReader(test,
                                       category_model,
                                       focus_model,
                                       words_vocab.stoi,
                                       category_vocab.itos,
                                       PAD_TOKEN='<pad>',
                                       pad_size=pad_size)
    test_reader.set_vocabs(vocabs)
    test_iterator = test_reader.get_dataset_iterator(batch_size,
                                                     train=False,
                                                     sort=False)

    print('Testing...')

    test_labels, test_scores = get_label_score(clf,
                                               test_iterator,
                                               cuda_device,
                                               'label',
                                               input_names=input_names)
    test_predicts = test_scores.argmax(axis=-1)
    test_scores = test_scores[:, 1]

    print('test_acc: %.2f' %
          sklearn.metrics.accuracy_score(test_labels, test_predicts))
    print('test_precision: %.2f' %
          sklearn.metrics.precision_score(test_labels, test_predicts))
    print('test_average_precision: %.2f' %
          sklearn.metrics.average_precision_score(test_labels, test_scores))

    index = 0
    aps = []  # for mean average precision score
    rrs = []  # for mean reciprocal rank score

    for query_labels in filtered_ref_generator(test_ref):
        query_scores = test_scores[index:index + len(query_labels)]
        index += len(query_labels)

        aps.append(
            sklearn.metrics.average_precision_score(query_labels,
                                                    query_scores))
        query_rel_best = np.argmin(-query_scores * query_labels)
        rrs.append(1 /
                   (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1))

    print('test_MAP: %.2f' % np.mean(aps))
    print('test_MRR: %.2f' % np.mean(rrs))
def main(config_path):
    with open(config_path, 'r') as fread:
        config_dict = json.load(fread)

    # path
    path_config = config_dict['Path']
    model_dir = path_config['model_dir']
    train = path_config['train']
    dev = path_config['dev']
    dev_ref = path_config['dev_ref']
    test = path_config['test']
    test_ref = path_config['test_ref']

    # dataset
    dataset_config = config_dict['Dataset']
    batch_size = dataset_config['batch_size']

    print('Loading train data...')
    train_reader = WikiqaReader(train, PAD_TOKEN='<pad>')
    dev_reader = WikiqaReader(dev, PAD_TOKEN='<pad>')

    words_embed, words_vocab = load_full_embedding_with_vocab(
        path_config['embed_dir'])
    vocabs = {'q_words': words_vocab, 'a_words': words_vocab}
    train_reader.set_vocabs(vocabs)
    dev_reader.set_vocabs(vocabs)

    train_iterator = train_reader.get_dataset_iterator(batch_size, train=True)
    dev_iterator = dev_reader.get_dataset_iterator(batch_size,
                                                   train=False,
                                                   sort=False)

    test_reader = WikiqaReader(test, PAD_TOKEN='<pad>')
    test_reader.set_vocabs(vocabs)
    test_iterator = test_reader.get_dataset_iterator(batch_size,
                                                     train=False,
                                                     sort=False)

    # model
    model_config = config_dict['Model']
    conv_width = model_config['conv_width']
    out_channels = model_config['out_channels']
    hidden_size = model_config['hidden_size']
    cuda_device = model_config['cuda_device']
    dropout = model_config['dropout']
    h = model_config['h']

    clf = SelfAttentionCnnClassifier(words_embed=words_embed,
                                     out_channels=out_channels,
                                     conv_width=conv_width,
                                     hidden_size=hidden_size,
                                     cuda_device=cuda_device,
                                     h=h,
                                     dropout=dropout)

    # train
    train_config = config_dict['Train']
    num_epoch = train_config['epoch']
    weight_decay = train_config['weight_decay']
    lr = train_config['lr']
    early_stopping = train_config['early_stopping']
    factor = train_config['factor']
    warmup = train_config['warmup']

    input_names = ['q_words', 'a_words']

    # optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5)
    optimizer = NoamOpt(
        clf.len_embed, factor, warmup,
        optim.Adam(clf.parameters(), lr=0, weight_decay=weight_decay,
                   eps=1e-5))

    if cuda_device is not None:
        clf.cuda(device=cuda_device)

    def callback(verbose=True):
        train_labels, train_scores = get_label_score(clf,
                                                     train_iterator,
                                                     cuda_device,
                                                     'label',
                                                     input_names=input_names)
        train_predicts = train_scores.argmax(axis=-1)
        train_scores = train_scores[:, 1]
        if verbose:
            print('train_acc: %.2f' %
                  sklearn.metrics.accuracy_score(train_labels, train_predicts))
            print(
                'train_precision: %.2f' %
                sklearn.metrics.precision_score(train_labels, train_predicts))
            print('train_average_precision: %.2f' %
                  sklearn.metrics.average_precision_score(
                      train_labels, train_scores))

        dev_labels, dev_scores = get_label_score(clf,
                                                 dev_iterator,
                                                 cuda_device,
                                                 'label',
                                                 input_names=input_names)
        dev_predicts = dev_scores.argmax(axis=-1)
        dev_scores = dev_scores[:, 1]
        if verbose:
            print('dev_acc: %.2f' %
                  sklearn.metrics.accuracy_score(dev_labels, dev_predicts))
            print('dev_precision: %.2f' %
                  sklearn.metrics.precision_score(dev_labels, dev_predicts))
            print('dev_average_precision: %.2f' %
                  sklearn.metrics.average_precision_score(
                      dev_labels, dev_scores))

        index = 0
        dev_aps = []  # for mean average precision score
        rrs = []  # for mean reciprocal rank score

        for query_labels in filtered_ref_generator(dev_ref):
            query_scores = dev_scores[index:index + len(query_labels)]
            index += len(query_labels)

            dev_aps.append(
                sklearn.metrics.average_precision_score(
                    query_labels, query_scores))
            query_rel_best = np.argmin(-query_scores * query_labels)
            rrs.append(
                1 /
                (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1))

        if verbose:
            print('dev_MAP: %.2f' % np.mean(dev_aps))
            print('dev_MRR: %.2f' % np.mean(rrs))

        test_labels, test_scores = get_label_score(clf,
                                                   test_iterator,
                                                   cuda_device,
                                                   'label',
                                                   input_names=input_names)
        test_predicts = test_scores.argmax(axis=-1)
        test_scores = test_scores[:, 1]
        if verbose:
            print('test_acc: %.2f' %
                  sklearn.metrics.accuracy_score(test_labels, test_predicts))
            print('test_precision: %.2f' %
                  sklearn.metrics.precision_score(test_labels, test_predicts))
            print('test_average_precision: %.2f' %
                  sklearn.metrics.average_precision_score(
                      test_labels, test_scores))

        index = 0
        test_aps = []  # for mean average precision score
        rrs = []  # for mean reciprocal rank score

        for query_labels in filtered_ref_generator(test_ref):
            query_scores = test_scores[index:index + len(query_labels)]
            index += len(query_labels)

            test_aps.append(
                sklearn.metrics.average_precision_score(
                    query_labels, query_scores))
            query_rel_best = np.argmin(-query_scores * query_labels)
            rrs.append(
                1 /
                (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1))

        if verbose:
            print('test_MAP: %.2f' % np.mean(test_aps))
            print('test_MRR: %.2f' % np.mean(rrs))

        return np.mean(dev_aps)

    print('Training...')
    best_state_dict = train_model(clf,
                                  optimizer,
                                  train_iterator,
                                  label_name='label',
                                  num_epoch=num_epoch,
                                  cuda_device=cuda_device,
                                  early_stopping=early_stopping,
                                  input_names=input_names,
                                  callback=callback)
    print()

    if best_state_dict is not None:
        clf.load_state_dict(best_state_dict)

    torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt'))

    print('Testing...')

    test_labels, test_scores = get_label_score(clf,
                                               test_iterator,
                                               cuda_device,
                                               'label',
                                               input_names=input_names)
    test_predicts = test_scores.argmax(axis=-1)
    test_scores = test_scores[:, 1]

    print('test_acc: %.2f' %
          sklearn.metrics.accuracy_score(test_labels, test_predicts))
    print('test_precision: %.2f' %
          sklearn.metrics.precision_score(test_labels, test_predicts))
    print('test_average_precision: %.2f' %
          sklearn.metrics.average_precision_score(test_labels, test_scores))

    index = 0
    aps = []  # for mean average precision score
    rrs = []  # for mean reciprocal rank score

    for query_labels in filtered_ref_generator(test_ref):
        query_scores = test_scores[index:index + len(query_labels)]
        index += len(query_labels)

        aps.append(
            sklearn.metrics.average_precision_score(query_labels,
                                                    query_scores))
        query_rel_best = np.argmin(-query_scores * query_labels)
        rrs.append(1 /
                   (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1))

    print('test_MAP: %.4f' % np.mean(aps))
    print('test_MRR: %.4f' % np.mean(rrs))
Esempio n. 4
0
        '--out_dir',
        type=str,
        help='output directory where model hdf5 file will be saved')
    parser.add_argument('--path_to_image',
                        type=str,
                        help='path to the image to delover predictions for')
    parser.add_argument('--model_location',
                        type=str,
                        help='path to the model hdf5 file')

    args = parser.parse_args()

    if args.mode == 'train' and (args.img_dir is None
                                 or args.metadata_dir is None
                                 or args.out_dir is None):
        parser.error("--train mode requires --img_dir and --metadata_dir.")

    if args.mode == 'predict' and (args.path_to_image is None
                                   or args.model_location is None):
        parser.error("--predict mode requires --path_to_image.")

    img_dir = "/kaggle/input/images-classification/data/images"
    metadata_dir = "/kaggle/input/images-classification/data/"
    out_dir = "/data/workspace/"

    if args.mode == 'train':
        train_model(args.img_dir, args.metadata_dir, args.out_dir)

    if args.mode == 'predict':
        predict(args.path_to_image, args.model_location)
Esempio n. 5
0
 def train(self):
     return train.train_model(self.trial, self.params, self.model,
                              self.optimizer, self.scheduler, self.device,
                              self.data_loader, self.data_loader_test)
Esempio n. 6
0
def main(config_path):
    with open(config_path, 'r') as fread:
        config_dict = json.load(fread)

    path_config = config_dict['Path']
    model_dir = path_config['model_dir']
    vocab_dir = path_config['vocab_dir']
    train = path_config['train']

    # dataset
    dataset_config = config_dict['Dataset']
    pad_size = dataset_config['pad_size']
    batch_size = dataset_config['batch_size']

    print('Loading train data...')
    train_reader = QFocusReader(train, PAD_TOKEN='<pad>', pad_size=pad_size)
    train_reader.build_vocabs(vocab_dir)

    # model
    model_config = config_dict['Model']
    conv_width = model_config['conv_width']
    hidden_size = model_config['hidden_size']
    out_channels = model_config['out_channels']
    cuda_device = model_config['cuda_device']
    num_filters = model_config['num_filters']

    # load pretrained vocab
    words_embed, words_vocab = load_full_embedding_with_vocab(
        model_config['embed_dir'])
    train_reader.set_vocabs({'words': words_vocab})
    vocabs = train_reader.get_vocabs()  # will be used to test time

    train_config = config_dict['Train']
    num_epoch = train_config['epoch']
    weight_decay = train_config['weight_decay']
    lr = train_config['lr']
    kfold = train_config['kfold']

    # cross-val
    folds = train_reader.get_cross_val_dataset_iterator(batch_size=batch_size,
                                                        k_fold=kfold)
    fold_accs = []
    for test_idx in range(kfold):
        clf = BaselineFocusClassifier(words_embed=words_embed,
                                      out_channels=out_channels,
                                      cuda_device=cuda_device,
                                      conv_width=conv_width,
                                      hidden_size=hidden_size,
                                      num_filters=num_filters)
        optimizer = optim.Adam(clf.parameters(),
                               lr=lr,
                               weight_decay=weight_decay,
                               eps=1e-5)
        if cuda_device is not None:
            clf.cuda(device=cuda_device)

        train_iterator = [
            fold for fold_idx, fold in enumerate(folds) if fold_idx != test_idx
        ]
        train_model(clf,
                    optimizer,
                    train_iterator,
                    num_epoch=num_epoch,
                    cuda_device=cuda_device,
                    early_stopping=0,
                    label_name='focus')

        # test
        print('Testing...')
        acc = test_metric(clf,
                          folds[test_idx],
                          cuda_device,
                          label_name='focus')
        print('test accuracy:', acc)
        fold_accs.append(acc)

    print()
    print('test accuracies:', fold_accs)
    print('mean accuracies:', np.mean(fold_accs))
    print()

    print('Final Training...')

    clf = BaselineFocusClassifier(words_embed=words_embed,
                                  out_channels=out_channels,
                                  cuda_device=cuda_device,
                                  conv_width=conv_width,
                                  hidden_size=hidden_size,
                                  num_filters=num_filters)
    optimizer = optim.Adam(clf.parameters(),
                           lr=lr,
                           weight_decay=weight_decay,
                           eps=1e-5)
    if cuda_device is not None:
        clf.cuda(device=cuda_device)

    train_iterator = train_reader.get_dataset_iterator(batch_size)

    def callback(verbose=False):
        train_acc = test_metric(clf,
                                train_iterator,
                                cuda_device,
                                'focus',
                                return_info=False)
        if verbose: print('train_acc: %.3f' % (train_acc))

    # train
    best_state_dict = train_model(clf,
                                  optimizer,
                                  train_iterator,
                                  num_epoch=num_epoch,
                                  cuda_device=cuda_device,
                                  label_name='focus',
                                  callback=callback)

    if best_state_dict is not None:
        clf.load_state_dict(best_state_dict)

    torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt'))
    print('Done!')
Esempio n. 7
0
def main(config_path):
    with open(config_path, 'r') as fread:
        config_dict = json.load(fread)

    path_config = config_dict['Path']
    model_dir = path_config['model_dir']
    vocab_dir = path_config['vocab_dir']
    train = path_config['train']
    test = path_config['test']
    test_result = path_config['test_result']

    # dataset
    dataset_config = config_dict['Dataset']
    pad_size = dataset_config['pad_size']
    batch_size = dataset_config['batch_size']

    print('Loading train data...')
    train_reader = UIUCReader(train, PAD_TOKEN='<pad>', pad_size=pad_size)
    train_reader.build_vocabs(vocab_dir)
    train_iterator = train_reader.get_dataset_iterator(batch_size, train=True)

    # model
    model_config = config_dict['Model']
    pad_size = dataset_config['pad_size']
    conv_widths = model_config['conv_widths']
    hidden_size = model_config['hidden_size']
    out_channels = model_config['out_channels']
    cuda_device = model_config['cuda_device']
    # cuda_device = None # debugging
    out_size = len(train_reader.get_vocab('category'))

    # load pretrained vocab
    words_embed, words_vocab = load_full_embedding_with_vocab(
        model_config['embed_dir'])
    train_reader.set_vocabs({'words': words_vocab})
    vocabs = train_reader.get_vocabs()  # will be used to test time

    clf = BaselineCategoryClassifier(words_embed=words_embed,
                                     out_channels=out_channels,
                                     cuda_device=cuda_device,
                                     conv_widths=conv_widths,
                                     hidden_size=hidden_size,
                                     out_size=out_size)

    # train
    train_config = config_dict['Train']
    num_epoch = train_config['epoch']
    weight_decay = train_config['weight_decay']
    lr = train_config['lr']
    early_stopping = train_config['early_stopping']

    optimizer = optim.Adam(clf.parameters(),
                           lr=lr,
                           weight_decay=weight_decay,
                           eps=1e-5)
    if cuda_device is not None:
        clf.cuda(device=cuda_device)

    print('Loading test data...')
    test_reader = UIUCReader(test, PAD_TOKEN='<pad>', pad_size=pad_size)
    test_reader.set_vocabs(vocabs)
    test_iterator = test_reader.get_dataset_iterator(batch_size,
                                                     train=False,
                                                     sort=False)

    def callback(verbose=False):
        train_acc = test_metric(clf,
                                train_iterator,
                                cuda_device,
                                'category',
                                return_info=False)
        if verbose: print('train_acc: %.3f' % (train_acc))

        test_acc = test_metric(clf,
                               test_iterator,
                               cuda_device,
                               'category',
                               return_info=False)
        if verbose: print('test_acc: %.3f' % (test_acc))

        return test_acc

    print('Training...')
    best_state_dict = train_model(clf,
                                  optimizer,
                                  train_iterator,
                                  label_name='category',
                                  num_epoch=num_epoch,
                                  cuda_device=cuda_device,
                                  early_stopping=early_stopping,
                                  callback=callback)
    print()

    if best_state_dict is not None:
        clf.load_state_dict(best_state_dict)

    torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt'))

    # test
    print('Loading test data...')
    test_reader = UIUCReader(test, PAD_TOKEN='<pad>', pad_size=pad_size)
    test_reader.set_vocabs(vocabs)

    print('Testing...')
    acc, categories, predicts, sents = test_metric(
        clf,
        test_reader.get_dataset_iterator(batch_size),
        cuda_device,
        label_name='category',
        return_info=True)
    print('test accuracy:', acc)

    print('Writing test result...')
    with open(test_result, 'w') as fwrite:
        for category, predict, sent in zip(categories, predicts, sents):
            fwrite.write(
                '%s\t%s\t%s\n' %
                (test_reader.get_vocab('category').itos[category],
                 test_reader.get_vocab('category').itos[predict], ' '.join([
                     test_reader.get_vocab('words').itos[word] for word in sent
                 ])))

    print('Done!')