def quora_dataset_config7(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.quora_question_pair.load_data import load_parsed_quora_data train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \ load_parsed_quora_data(debug=is_debug, word_vector_name="glove_300d", n_gram=1) from model.feed_forward_network import FFNWithCrossCompare return { "model_fn": FFNWithCrossCompare, "model_dict": { "word_embedding": embedding_matrix, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": word_pad_id, "character_pad_idx": character_pad_id, }, "data": [train, valid, test], "batch_size": 128, "train_loss": BCELoss, "clip_norm": None, "name": "FFN_try", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-3, "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.25, }
def snli_config2(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from model.feed_forward_network import FFNWithCrossCompare return { "model_fn": FFNWithCrossCompare, "model_dict": { "word_embedding": vocabulary.embedding_matrix, "n_classes": 3, "hidden_size": 400, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 32, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "FFN_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-3, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }
def quora_dataset_config5(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.quora_question_pair.load_data import load_parsed_quora_data train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \ load_parsed_quora_data(debug=is_debug, word_vector_name="glove_300d", n_gram=1) from qanet.qanet import QANet return { "model_fn": QANet, "model_dict": { "word_embedding_matrix": embedding_matrix, "char_embedding_matrix": None, "params": { "word_embed_dim": 300, "highway_n_layers": 2, "hidden_size": 128, "embed_encoder_resize_kernel_size": 7, "embed_encoder_resize_pad": 3, "embed_encoder_n_blocks": 1, "embed_encoder_n_conv": 4, "embed_encoder_kernel_size": 7, "embed_encoder_pad": 3, "embed_encoder_conv_type": "depthwise_separable", "embed_encoder_with_self_attn": False, "embed_encoder_n_heads": 8, "model_encoder_n_blocks": 7, "model_encoder_n_conv": 2, "model_encoder_kernel_size": 7, "model_encoder_pad": 3, "model_encoder_conv_type": "depthwise_separable", "model_encoder_with_self_attn": False, "model_encoder_n_heads": 8, "batch_size": 32, } }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": word_pad_id, "character_pad_idx": character_pad_id, }, "data": [train, valid, test], "batch_size": 64, "train_loss": BCELoss, "clip_norm": None, "name": "qa_net", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 0.001, "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.25, }
def quora_dataset_config3(is_debug, output_log=None): from model.self_attention_model import SelfAttentionPairModel, PreprocessWrapper from read_data.quora_question_pair.load_data import load_parsed_quora_data train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \ load_parsed_quora_data(debug=is_debug, word_vector_name="fasttext", n_gram=1) return { "model_fn": SelfAttentionPairModel, "model_dict": { "word_embedding": embedding_matrix, "character_number": character_size, "character_embedding_dim": 600, "character_n_filters": 200, "character_kernel_size": 5, "character_padding": 2, "self_attention_layer": 5, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": word_pad_id, "character_pad_idx": character_pad_id, }, "data": [train, valid, test], "batch_size": 8, "train_loss": BCELoss, "clip_norm": 10, "name": "sequence_graph_encoder_decoder_for_method_name", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-4, "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())], }
def snli_config7(is_debug, output_log=None): from model.sentence_pair_graph import ConcatPreprocessWrapper import numpy as np from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") # character_vocabulary = load_snli_character_vocabulary(n_gram=1) delimeter_idx = len(vocabulary.id_to_word_dict) summary_idx = len(vocabulary.id_to_word_dict) + 1 embedding_matrix = vocabulary.embedding_matrix embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0) from model.transformer_lm import dotdict return { "model_fn": TestModel, "model_dict": { "cfg": dotdict({ 'n_embd': 768, 'n_head': 1, 'n_layer': 1, 'embd_pdrop': 0.1, 'attn_pdrop': 0.1, 'resid_pdrop': 0.1, 'afn': 'gelu', 'clf_pdrop': 0.1}), "clf_token": summary_idx, "vocabulary_size": embedding_matrix.shape[0], "n_ctx": 80 + 2 }, "pre_process_module_fn": ConcatPreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "delimeter_idx": delimeter_idx, "summary_node_idx": summary_idx, "max_length": 80, }, "data": [train, valid, test], "batch_size": 8, "train_loss": nn.CrossEntropyLoss, "clip_norm": 1, "name": "GGNNGraphModel_snli", "optimizer": optim.Adam, "need_pad": True, # "optimizer_dict": { # "schedule": 'warmup_linear', # "warmup": 0.002, # "t_total": (100//8)*300, # "b1": 0.9, # "b2": 0.999, # "e": 1e-8, # "l2": 0.01, # "vector_l2": 'store_true', # "max_grad_norm": 1}, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 0.01, }, "epcohes": 300, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 1, "scheduler_fn": None }
def snli_config6(is_debug, output_log=None): from model.sentence_pair_graph import ConcatPreprocessWrapper import numpy as np from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") # character_vocabulary = load_snli_character_vocabulary(n_gram=1) delimeter_idx = len(vocabulary.id_to_word_dict) summary_idx = len(vocabulary.id_to_word_dict) + 1 embedding_matrix = vocabulary.embedding_matrix embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0) return { "model_fn": GGNNGraphModel, "model_dict": { "word_embedding": embedding_matrix, "max_length": 80, "hidden_state_size": 756, "n_classes": 3, }, "pre_process_module_fn": ConcatPreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "delimeter_idx": delimeter_idx, "summary_node_idx": summary_idx, "max_length": 80, }, "data": [train, valid, test], "batch_size": 8, "train_loss": nn.CrossEntropyLoss, "clip_norm": 1, "name": "GGNNGraphModel_snli", "optimizer": OpenAIAdam, "need_pad": True, "optimizer_dict": { "schedule": 'warmup_linear', "warmup": 0.002, "t_total": (100//8)*300, "b1": 0.9, "b2": 0.999, "e": 1e-8, "l2": 0.01, "vector_l2": 'store_true', "max_grad_norm": 1}, "epcohes": 300, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 1, "scheduler_fn": None }
def snli_config5(is_debug, output_log=None): from model.sentence_pair_graph import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from model.sentence_pair_graph import SequenceGraphModelWithGraphAttention return { "model_fn": SequenceGraphModelWithGraphAttention, "model_dict": { "word_embedding": vocabulary.embedding_matrix, "character_number": len(character_vocabulary.character_to_id_dict), "mixed": True, "character_embedding_dim": 600, "character_n_filters": 200, "character_kernel_size": 5, "character_padding": 2, "hidden_size": 128, "graph_itr": 1, "dynamic_graph_n_layer": 2, "graph_attention_n_head": 6, "leaky_alpha": 0.2, "n_classes": 3, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 80, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "SequenceGraphModelWithGraphAttention_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 160, "lr": 3e-3, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }
def quora_dataset_config1(is_debug, output_log=None): from model.sentence_pair_graph import SequenceGraphModel, PreprocessWrapper from read_data.quora_question_pair.load_data import load_parsed_quora_data train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \ load_parsed_quora_data(debug=is_debug, word_vector_name="glove_300d", n_gram=1) return { "model_fn": SequenceGraphModel, "model_dict": { "word_embedding": embedding_matrix, "character_number": character_size, "character_embedding_dim": 16, "character_n_filters": 32, "character_kernel_size": 5, "character_padding": 2, "n_link_type": 3, "hidden_state_size": 200, "n_dynamic_link_layer": 2, "n_fix_graph": 1, "graph_itr": 5, "n_classes": 2, "summary_node": False, "tie_weight": False, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": word_pad_id, "character_pad_idx": character_pad_id, "summary_node": True, }, "data": [train, valid, test], "batch_size": 8, "train_loss": BCELoss, "clip_norm": 10, "name": "sequence_graph_encoder_decoder_for_method_name", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-4, "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())], }
def quora_dataset_config4(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from model.graph_cluster_model import GraphClusterModel from read_data.quora_question_pair.load_data import load_parsed_quora_data train, valid, test, embedding_matrix, character_size, word_pad_id, character_pad_id = \ load_parsed_quora_data(debug=is_debug, word_vector_name="glove_300d", n_gram=1) return { "model_fn": GraphClusterModel, "model_dict": { "word_embedding": embedding_matrix, "character_number": character_size, "character_embedding_dim": 16, "character_n_filters": 32, "character_kernel_size": 5, "character_padding": 2, "hidden_size": 128, "conv_type": "depthwise_separable", "resize_kernel_size": 7, "resize_pad_size": 3, "n_encoder_conv_layer": 2, "encoder_kernel_size": 7, "encoder_padding": 3, "n_self_attention_heads": 4, "route_number": 3, "n_capsules": 32, "capsules_dim": 128, "n_compare_layer": 2, "n_layer_output_conv": 2, "n_layer_output_feedforward": 3, "hidden_size_output_feedforward": 128, "n_classes": 2, "dropout": 0.2 }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": word_pad_id, "character_pad_idx": character_pad_id, }, "data": [train, valid, test], "batch_size": 16, "train_loss": BCELoss, "clip_norm": None, "name": "sequence_graph_encoder_decoder_for_method_name", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.8, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-2, "evaluate_object_list": [SequenceBinaryClassExactMatch(gpu_index=get_gpu_index())], }
def sequence_transform_data_config3(is_debug, output_log=None): from model.encoder_decoder_graph import SEDWithInitialStatePreproceser import numpy as np from read_data.sequencec_transform_data.load_data import load_generated_random_target_data train, valid, test = load_generated_random_target_data(is_debug) valid.train = False test.train = False max_index = 10 def new_id(): nonlocal max_index max_index += 1 return max_index max_length = 20 begin_index = new_id() end_index = new_id() delimiter_index = new_id() pad_index = new_id() decoder_init_idx = new_id() for t in [train, valid, test]: t.end = [end_index] train_size = len(train) itr_num = 80 batch_size = 14 from model.transformer_lm import dotdict from model.encoder_decoder_graph import SEDWithInitialState return { "model_fn": SEDWithInitialState, "model_dict": { "cfg": dotdict({ 'n_embd': 768, 'n_head': 12, 'n_layer': 12, 'embd_pdrop': 0.1, 'attn_pdrop': 0.1, 'resid_pdrop': 0.1, 'afn': 'gelu', 'clf_pdrop': 0.1}), "vocab": max_index + 1 + max_length * 2 + 4, "n_source_ctx": max_length + 2, "n_ctx": max_length * 2 + 4, "decoder_init_idx": decoder_init_idx, }, "pre_process_module_fn": SEDWithInitialStatePreproceser, "pre_process_module_dict": { "begin_idx": begin_index, "delimeter_idx": delimiter_index, "summary_idx": decoder_init_idx, "pad_idx": pad_index, "source_ctx": max_length+2, "position_embedding_base": max_index+1, }, "data": [train, valid, test], "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])), "batch_size": batch_size, "train_loss": lambda: NCE_train_loss(ignore_index=pad_index), "clip_norm": 1, "name": "SEDWithInitialState", "optimizer": OpenAIAdam, "need_pad": True, "optimizer_dict": { "schedule": 'warmup_linear', "warmup": 0.002, "t_total": (train_size//batch_size)*itr_num, "b1": 0.9, "b2": 0.999, "e": 1e-8, "l2": 0.01, "vector_l2": 'store_true', "max_grad_norm": 1}, "epcohes": itr_num, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index), SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)], "epoch_ratio": 1, "scheduler_fn": None }
def sequence_transform_data_config2(is_debug, output_log=None): from model.encoder_decoder_graph import SequenceEncoderDecoderModelUseEncodePad import numpy as np from read_data.sequencec_transform_data.load_data import load_generated_random_target_data train, valid, test = load_generated_random_target_data(is_debug) valid.train = False test.train = False max_index = 10 max_length = 20 begin_index = 11 end_index = 12 delimiter_index = 13 hole_index = 14 pad_index = 15 for t in [train, valid, test]: t.end = [end_index] from model.transformer_lm import dotdict from model.encoder_decoder_graph import SequencePreprocesserWithInputPad return { "model_fn": SequenceEncoderDecoderModelUseEncodePad, "model_dict": { "cfg": dotdict({ 'n_embd': 768, 'n_head': 1, 'n_layer': 1, 'embd_pdrop': 0.1, 'attn_pdrop': 0.1, 'resid_pdrop': 0.1, 'afn': 'gelu', 'clf_pdrop': 0.1}), "vocab": 16 + max_length*2+4, "n_ctx": max_length*2+4, "encoder_length": max_length+2, }, "pre_process_module_fn": SequencePreprocesserWithInputPad, "pre_process_module_dict": { "hole_idx": hole_index, "begin_idx": begin_index, "delimeter_idx": delimiter_index, "pad_idx": pad_index, "max_length": max_length+2, "position_embedding_base": 16, }, "data": [train, valid, test], "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])), "batch_size": 800, "train_loss": lambda: NCE_train_loss(ignore_index=pad_index), "clip_norm": 1, "name": "Transformer_seq_to_seq_model_use_random_target_use_encoder_pad", "optimizer": OpenAIAdam, "need_pad": True, "optimizer_dict": { "schedule": 'warmup_linear', "warmup": 0.002, "t_total": (80000//800)*80, "b1": 0.9, "b2": 0.999, "e": 1e-8, "l2": 0.01, "vector_l2": 'store_true', "max_grad_norm": 1}, "epcohes": 80, "lr": 6.25e-4, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index), SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)], "epoch_ratio": 1, "scheduler_fn": None }
def snli_config3(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from qanet.qanet import QANet return { "model_fn": QANet, "model_dict": { "word_embedding_matrix": vocabulary.embedding_matrix, "char_embedding_matrix": None, "params": { "word_embed_dim": 300, "highway_n_layers": 2, "hidden_size": 128, "embed_encoder_resize_kernel_size": 7, "embed_encoder_resize_pad": 3, "embed_encoder_n_blocks": 1, "embed_encoder_n_conv": 4, "embed_encoder_kernel_size": 7, "embed_encoder_pad": 3, "embed_encoder_conv_type": "depthwise_separable", "embed_encoder_with_self_attn": False, "embed_encoder_n_heads": 8, "model_encoder_n_blocks": 7, "model_encoder_n_conv": 2, "model_encoder_kernel_size": 7, "model_encoder_pad": 3, "model_encoder_conv_type": "depthwise_separable", "model_encoder_with_self_attn": False, "model_encoder_n_heads": 8, "batch_size": 128, } }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 128, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "QANet_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }
def loss(log_probs, target): return calculate_accuracy_of_code_completion(log_probs, target, ignore_token=ignore_token, topk_range=(1, 1), gpu_index=get_gpu_index())[1]