コード例 #1
0
def quora_dataset_config7(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.quora_question_pair.load_data import load_parsed_quora_data
    train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \
        load_parsed_quora_data(debug=is_debug,
                               word_vector_name="glove_300d",
                               n_gram=1)
    from model.feed_forward_network import FFNWithCrossCompare
    return {
        "model_fn": FFNWithCrossCompare,
        "model_dict": {
            "word_embedding": embedding_matrix,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": word_pad_id,
            "character_pad_idx": character_pad_id,
        },
        "data": [train, valid, test],
        "batch_size": 128,
        "train_loss": BCELoss,
        "clip_norm": None,
        "name": "FFN_try",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-3,
        "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.25,
    }
コード例 #2
0
def snli_config2(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from model.feed_forward_network import FFNWithCrossCompare
    return {
        "model_fn": FFNWithCrossCompare,
        "model_dict": {
            "word_embedding": vocabulary.embedding_matrix,
            "n_classes": 3,
            "hidden_size": 400,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 32,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "FFN_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-3,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }
コード例 #3
0
def quora_dataset_config5(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.quora_question_pair.load_data import load_parsed_quora_data
    train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \
        load_parsed_quora_data(debug=is_debug,
                               word_vector_name="glove_300d",
                               n_gram=1)
    from qanet.qanet import QANet
    return {
        "model_fn": QANet,
        "model_dict": {
            "word_embedding_matrix": embedding_matrix, "char_embedding_matrix": None,
            "params": {

                "word_embed_dim": 300,

                "highway_n_layers": 2,

                "hidden_size": 128,

                "embed_encoder_resize_kernel_size": 7,
                "embed_encoder_resize_pad": 3,

                "embed_encoder_n_blocks": 1,
                "embed_encoder_n_conv": 4,
                "embed_encoder_kernel_size": 7,
                "embed_encoder_pad": 3,
                "embed_encoder_conv_type": "depthwise_separable",
                "embed_encoder_with_self_attn": False,
                "embed_encoder_n_heads": 8,

                "model_encoder_n_blocks": 7,
                "model_encoder_n_conv": 2,
                "model_encoder_kernel_size": 7,
                "model_encoder_pad": 3,
                "model_encoder_conv_type": "depthwise_separable",
                "model_encoder_with_self_attn": False,
                "model_encoder_n_heads": 8,

                "batch_size": 32,
            }
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": word_pad_id,
            "character_pad_idx": character_pad_id,
        },
        "data": [train, valid, test],
        "batch_size": 64,
        "train_loss": BCELoss,
        "clip_norm": None,
        "name": "qa_net",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 0.001,
        "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.25,
    }
コード例 #4
0
def quora_dataset_config3(is_debug, output_log=None):
    from model.self_attention_model import SelfAttentionPairModel, PreprocessWrapper
    from read_data.quora_question_pair.load_data import load_parsed_quora_data
    train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \
        load_parsed_quora_data(debug=is_debug,
                               word_vector_name="fasttext",
                               n_gram=1)
    return {
        "model_fn": SelfAttentionPairModel,
        "model_dict": {
            "word_embedding": embedding_matrix,
            "character_number": character_size,
            "character_embedding_dim": 600,
            "character_n_filters": 200,
            "character_kernel_size": 5,
            "character_padding": 2,
            "self_attention_layer": 5,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": word_pad_id,
            "character_pad_idx": character_pad_id,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": BCELoss,
        "clip_norm": 10,
        "name": "sequence_graph_encoder_decoder_for_method_name",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-4,
        "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())],
    }
コード例 #5
0
def snli_config7(is_debug, output_log=None):
    from model.sentence_pair_graph import ConcatPreprocessWrapper
    import numpy as np
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    # character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    delimeter_idx = len(vocabulary.id_to_word_dict)
    summary_idx = len(vocabulary.id_to_word_dict) + 1
    embedding_matrix = vocabulary.embedding_matrix
    embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0)
    from model.transformer_lm import dotdict
    return {
        "model_fn": TestModel,
        "model_dict": {
            "cfg": dotdict({
                'n_embd': 768,
                'n_head': 1,
                'n_layer': 1,
                'embd_pdrop': 0.1,
                'attn_pdrop': 0.1,
                'resid_pdrop': 0.1,
                'afn': 'gelu',
                'clf_pdrop': 0.1}),
            "clf_token": summary_idx, "vocabulary_size": embedding_matrix.shape[0],
            "n_ctx": 80 + 2
        },
        "pre_process_module_fn": ConcatPreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "delimeter_idx": delimeter_idx,
            "summary_node_idx": summary_idx,
            "max_length": 80,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": 1,
        "name": "GGNNGraphModel_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        # "optimizer_dict": {
        #                    "schedule": 'warmup_linear',
        #                    "warmup": 0.002,
        #                    "t_total": (100//8)*300,
        #                    "b1": 0.9,
        #                    "b2": 0.999,
        #                    "e": 1e-8,
        #                    "l2": 0.01,
        #                    "vector_l2": 'store_true',
        #                    "max_grad_norm": 1},
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 0.01, },
        "epcohes": 300,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
コード例 #6
0
def snli_config6(is_debug, output_log=None):
    from model.sentence_pair_graph import ConcatPreprocessWrapper
    import numpy as np
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    # character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    delimeter_idx = len(vocabulary.id_to_word_dict)
    summary_idx = len(vocabulary.id_to_word_dict) + 1
    embedding_matrix = vocabulary.embedding_matrix
    embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0)
    return {
        "model_fn": GGNNGraphModel,
        "model_dict": {
            "word_embedding": embedding_matrix,
            "max_length": 80,
            "hidden_state_size": 756,
            "n_classes": 3,
        },
        "pre_process_module_fn": ConcatPreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "delimeter_idx": delimeter_idx,
            "summary_node_idx": summary_idx,
            "max_length": 80,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": 1,
        "name": "GGNNGraphModel_snli",
        "optimizer": OpenAIAdam,
        "need_pad": True,
        "optimizer_dict": {
                           "schedule": 'warmup_linear',
                           "warmup": 0.002,
                           "t_total": (100//8)*300,
                           "b1": 0.9,
                           "b2": 0.999,
                           "e": 1e-8,
                           "l2": 0.01,
                           "vector_l2": 'store_true',
                           "max_grad_norm": 1},
        "epcohes": 300,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
コード例 #7
0
def snli_config5(is_debug, output_log=None):
    from model.sentence_pair_graph import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from model.sentence_pair_graph import SequenceGraphModelWithGraphAttention
    return {
        "model_fn": SequenceGraphModelWithGraphAttention,
        "model_dict": {
            "word_embedding": vocabulary.embedding_matrix,
            "character_number": len(character_vocabulary.character_to_id_dict),
            "mixed": True,
            "character_embedding_dim": 600,
            "character_n_filters": 200,
            "character_kernel_size": 5,
            "character_padding": 2,
            "hidden_size": 128,
            "graph_itr": 1,
            "dynamic_graph_n_layer": 2,
            "graph_attention_n_head": 6,
            "leaky_alpha": 0.2,
            "n_classes": 3,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 80,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "SequenceGraphModelWithGraphAttention_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 160,
        "lr": 3e-3,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }
コード例 #8
0
def quora_dataset_config1(is_debug, output_log=None):
    from model.sentence_pair_graph import SequenceGraphModel, PreprocessWrapper
    from read_data.quora_question_pair.load_data import load_parsed_quora_data
    train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \
        load_parsed_quora_data(debug=is_debug,
                               word_vector_name="glove_300d",
                               n_gram=1)
    return {
        "model_fn": SequenceGraphModel,
        "model_dict": {
            "word_embedding": embedding_matrix,
            "character_number": character_size,
            "character_embedding_dim": 16,
            "character_n_filters": 32,
            "character_kernel_size": 5,
            "character_padding": 2,
            "n_link_type": 3,
            "hidden_state_size": 200,
            "n_dynamic_link_layer": 2,
            "n_fix_graph": 1,
            "graph_itr": 5,
            "n_classes": 2,
            "summary_node": False,
            "tie_weight": False,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": word_pad_id,
            "character_pad_idx": character_pad_id,
            "summary_node": True,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": BCELoss,
        "clip_norm": 10,
        "name": "sequence_graph_encoder_decoder_for_method_name",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-4,
        "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())],
    }
コード例 #9
0
def quora_dataset_config4(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from model.graph_cluster_model import GraphClusterModel
    from read_data.quora_question_pair.load_data import load_parsed_quora_data
    train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \
        load_parsed_quora_data(debug=is_debug,
                               word_vector_name="glove_300d",
                               n_gram=1)
    return {
        "model_fn": GraphClusterModel,
        "model_dict": {
            "word_embedding": embedding_matrix,
            "character_number": character_size,
            "character_embedding_dim": 16,
            "character_n_filters": 32,
            "character_kernel_size": 5,
            "character_padding": 2,
            "hidden_size": 128, "conv_type": "depthwise_separable",
            "resize_kernel_size": 7, "resize_pad_size": 3,
            "n_encoder_conv_layer": 2, "encoder_kernel_size": 7, "encoder_padding": 3,
            "n_self_attention_heads": 4, "route_number": 3, "n_capsules": 32,
            "capsules_dim": 128, "n_compare_layer": 2, "n_layer_output_conv": 2,
            "n_layer_output_feedforward": 3, "hidden_size_output_feedforward": 128, "n_classes": 2, "dropout": 0.2
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": word_pad_id,
            "character_pad_idx": character_pad_id,
        },
        "data": [train, valid, test],
        "batch_size": 16,
        "train_loss": BCELoss,
        "clip_norm": None,
        "name": "sequence_graph_encoder_decoder_for_method_name",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-2,
        "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())],
    }
コード例 #10
0
def sequence_transform_data_config3(is_debug, output_log=None):
    from model.encoder_decoder_graph import SEDWithInitialStatePreproceser
    import numpy as np
    from read_data.sequencec_transform_data.load_data import load_generated_random_target_data
    train, valid, test = load_generated_random_target_data(is_debug)
    valid.train = False
    test.train = False
    max_index = 10
    def new_id():
        nonlocal max_index
        max_index += 1
        return max_index
    max_length = 20
    begin_index = new_id()
    end_index = new_id()
    delimiter_index = new_id()
    pad_index = new_id()
    decoder_init_idx = new_id()
    for t in [train, valid, test]:
        t.end = [end_index]
    train_size = len(train)
    itr_num = 80
    batch_size = 14
    from model.transformer_lm import dotdict
    from model.encoder_decoder_graph import SEDWithInitialState
    return {
        "model_fn": SEDWithInitialState,
        "model_dict": {
            "cfg": dotdict({
                'n_embd': 768,
                'n_head': 12,
                'n_layer': 12,
                'embd_pdrop': 0.1,
                'attn_pdrop': 0.1,
                'resid_pdrop': 0.1,
                'afn': 'gelu',
                'clf_pdrop': 0.1}),
            "vocab": max_index + 1 + max_length * 2 + 4,
            "n_source_ctx": max_length + 2,
            "n_ctx": max_length * 2 + 4,
            "decoder_init_idx": decoder_init_idx,
        },
        "pre_process_module_fn": SEDWithInitialStatePreproceser,
        "pre_process_module_dict": {
            "begin_idx":  begin_index,
            "delimeter_idx": delimiter_index,
            "summary_idx": decoder_init_idx,
            "pad_idx": pad_index,
            "source_ctx": max_length+2,
            "position_embedding_base": max_index+1,
        },
        "data": [train, valid, test],
        "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])),
        "batch_size": batch_size,
        "train_loss": lambda: NCE_train_loss(ignore_index=pad_index),
        "clip_norm": 1,
        "name": "SEDWithInitialState",
        "optimizer": OpenAIAdam,
        "need_pad": True,
        "optimizer_dict": {
                           "schedule": 'warmup_linear',
                           "warmup": 0.002,
                           "t_total": (train_size//batch_size)*itr_num,
                           "b1": 0.9,
                           "b2": 0.999,
                           "e": 1e-8,
                           "l2": 0.01,
                           "vector_l2": 'store_true',
                           "max_grad_norm": 1},
        "epcohes": itr_num,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index),
                                 SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
コード例 #11
0
def sequence_transform_data_config2(is_debug, output_log=None):
    from model.encoder_decoder_graph import SequenceEncoderDecoderModelUseEncodePad
    import numpy as np
    from read_data.sequencec_transform_data.load_data import load_generated_random_target_data
    train, valid, test = load_generated_random_target_data(is_debug)
    valid.train = False
    test.train = False
    max_index = 10
    max_length = 20
    begin_index = 11
    end_index = 12
    delimiter_index = 13
    hole_index = 14
    pad_index = 15
    for t in [train, valid, test]:
        t.end = [end_index]
    from model.transformer_lm import dotdict
    from model.encoder_decoder_graph import SequencePreprocesserWithInputPad
    return {
        "model_fn": SequenceEncoderDecoderModelUseEncodePad,
        "model_dict": {
            "cfg": dotdict({
                'n_embd': 768,
                'n_head': 1,
                'n_layer': 1,
                'embd_pdrop': 0.1,
                'attn_pdrop': 0.1,
                'resid_pdrop': 0.1,
                'afn': 'gelu',
                'clf_pdrop': 0.1}),
            "vocab": 16 + max_length*2+4,
            "n_ctx": max_length*2+4,
            "encoder_length": max_length+2,
        },
        "pre_process_module_fn": SequencePreprocesserWithInputPad,
        "pre_process_module_dict": {
            "hole_idx": hole_index,
            "begin_idx":  begin_index,
            "delimeter_idx": delimiter_index,
            "pad_idx": pad_index,
            "max_length": max_length+2,
            "position_embedding_base": 16,
        },
        "data": [train, valid, test],
        "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])),
        "batch_size": 800,
        "train_loss": lambda: NCE_train_loss(ignore_index=pad_index),
        "clip_norm": 1,
        "name": "Transformer_seq_to_seq_model_use_random_target_use_encoder_pad",
        "optimizer": OpenAIAdam,
        "need_pad": True,
        "optimizer_dict": {
                           "schedule": 'warmup_linear',
                           "warmup": 0.002,
                           "t_total": (80000//800)*80,
                           "b1": 0.9,
                           "b2": 0.999,
                           "e": 1e-8,
                           "l2": 0.01,
                           "vector_l2": 'store_true',
                           "max_grad_norm": 1},
        "epcohes": 80,
        "lr": 6.25e-4,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index),
                                 SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
コード例 #12
0
def snli_config3(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from qanet.qanet import QANet
    return {
        "model_fn": QANet,
        "model_dict": {
            "word_embedding_matrix": vocabulary.embedding_matrix,
            "char_embedding_matrix": None,
            "params": {

                "word_embed_dim": 300,

                "highway_n_layers": 2,

                "hidden_size": 128,

                "embed_encoder_resize_kernel_size": 7,
                "embed_encoder_resize_pad": 3,

                "embed_encoder_n_blocks": 1,
                "embed_encoder_n_conv": 4,
                "embed_encoder_kernel_size": 7,
                "embed_encoder_pad": 3,
                "embed_encoder_conv_type": "depthwise_separable",
                "embed_encoder_with_self_attn": False,
                "embed_encoder_n_heads": 8,

                "model_encoder_n_blocks": 7,
                "model_encoder_n_conv": 2,
                "model_encoder_kernel_size": 7,
                "model_encoder_pad": 3,
                "model_encoder_conv_type": "depthwise_separable",
                "model_encoder_with_self_attn": False,
                "model_encoder_n_heads": 8,

                "batch_size": 128,
            }
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 128,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "QANet_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }
コード例 #13
0
 def loss(log_probs, target):
     return calculate_accuracy_of_code_completion(log_probs, target, ignore_token=ignore_token, topk_range=(1, 1),
                                                  gpu_index=get_gpu_index())[1]