def snli_config2(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from model.feed_forward_network import FFNWithCrossCompare return { "model_fn": FFNWithCrossCompare, "model_dict": { "word_embedding": vocabulary.embedding_matrix, "n_classes": 3, "hidden_size": 400, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 32, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "FFN_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-3, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }
def snli_config7(is_debug, output_log=None): from model.sentence_pair_graph import ConcatPreprocessWrapper import numpy as np from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") # character_vocabulary = load_snli_character_vocabulary(n_gram=1) delimeter_idx = len(vocabulary.id_to_word_dict) summary_idx = len(vocabulary.id_to_word_dict) + 1 embedding_matrix = vocabulary.embedding_matrix embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0) from model.transformer_lm import dotdict return { "model_fn": TestModel, "model_dict": { "cfg": dotdict({ 'n_embd': 768, 'n_head': 1, 'n_layer': 1, 'embd_pdrop': 0.1, 'attn_pdrop': 0.1, 'resid_pdrop': 0.1, 'afn': 'gelu', 'clf_pdrop': 0.1}), "clf_token": summary_idx, "vocabulary_size": embedding_matrix.shape[0], "n_ctx": 80 + 2 }, "pre_process_module_fn": ConcatPreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "delimeter_idx": delimeter_idx, "summary_node_idx": summary_idx, "max_length": 80, }, "data": [train, valid, test], "batch_size": 8, "train_loss": nn.CrossEntropyLoss, "clip_norm": 1, "name": "GGNNGraphModel_snli", "optimizer": optim.Adam, "need_pad": True, # "optimizer_dict": { # "schedule": 'warmup_linear', # "warmup": 0.002, # "t_total": (100//8)*300, # "b1": 0.9, # "b2": 0.999, # "e": 1e-8, # "l2": 0.01, # "vector_l2": 'store_true', # "max_grad_norm": 1}, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 0.01, }, "epcohes": 300, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 1, "scheduler_fn": None }
def snli_config6(is_debug, output_log=None): from model.sentence_pair_graph import ConcatPreprocessWrapper import numpy as np from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") # character_vocabulary = load_snli_character_vocabulary(n_gram=1) delimeter_idx = len(vocabulary.id_to_word_dict) summary_idx = len(vocabulary.id_to_word_dict) + 1 embedding_matrix = vocabulary.embedding_matrix embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0) return { "model_fn": GGNNGraphModel, "model_dict": { "word_embedding": embedding_matrix, "max_length": 80, "hidden_state_size": 756, "n_classes": 3, }, "pre_process_module_fn": ConcatPreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "delimeter_idx": delimeter_idx, "summary_node_idx": summary_idx, "max_length": 80, }, "data": [train, valid, test], "batch_size": 8, "train_loss": nn.CrossEntropyLoss, "clip_norm": 1, "name": "GGNNGraphModel_snli", "optimizer": OpenAIAdam, "need_pad": True, "optimizer_dict": { "schedule": 'warmup_linear', "warmup": 0.002, "t_total": (100//8)*300, "b1": 0.9, "b2": 0.999, "e": 1e-8, "l2": 0.01, "vector_l2": 'store_true', "max_grad_norm": 1}, "epcohes": 300, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 1, "scheduler_fn": None }
def snli_config5(is_debug, output_log=None): from model.sentence_pair_graph import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from model.sentence_pair_graph import SequenceGraphModelWithGraphAttention return { "model_fn": SequenceGraphModelWithGraphAttention, "model_dict": { "word_embedding": vocabulary.embedding_matrix, "character_number": len(character_vocabulary.character_to_id_dict), "mixed": True, "character_embedding_dim": 600, "character_n_filters": 200, "character_kernel_size": 5, "character_padding": 2, "hidden_size": 128, "graph_itr": 1, "dynamic_graph_n_layer": 2, "graph_attention_n_head": 6, "leaky_alpha": 0.2, "n_classes": 3, }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 80, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "SequenceGraphModelWithGraphAttention_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 160, "lr": 3e-3, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }
def load_snli_data(is_debug, max_total_len=80, word_vector_name='glove_300d'): """ sentence1: sentence 1 tokens1: token list of sentence 1, split by ' ' tokens_id1: token id list of sentence 1 with begin token 1 and end token 1 character_ids1: sentence2: like before tokens2: like before tokens_id2: like before character_ids2: tokens_len1: token 1 length tokens_len2: total_len: token_len1 + token_len2 gold_label: target label. one of ['entailment', 'neutral', 'contradiction'] label: target label id [0, 1, 2]. :param is_debug: :param max_total_len: :return: dataframe obj """ train_df = read_snli_split_train_data() valid_df = read_snli_split_valid_data() test_df = read_snli_split_test_data() dfs = [train_df, valid_df, test_df] print('after read data: ') for i in range(len(dfs)): print(len(dfs[i])) vocab = load_snli_vocabulary(word_vector_name) character_vocab = load_snli_character_vocabulary(n_gram=1) dfs = [parse_tokens_id(df, vocab) for df in dfs] dfs = [parse_character_id(df, character_vocab) for df in dfs] dfs = [parse_label_id(df) for df in dfs] print('after parse token to id') dfs = [df[df['total_len'] < max_total_len] for df in dfs] print('after filter longer than {} data: '.format(max_total_len)) for i in range(len(dfs)): print(len(dfs[i])) if is_debug: dfs = [df[:100] for df in dfs] return dfs
def snli_config3(is_debug, output_log=None): from model.self_attention_model import PreprocessWrapper from read_data.snli.read_snli_experiment_data import load_dict_data from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary train, valid, test = load_dict_data(debug=is_debug, ) vocabulary = load_snli_vocabulary("glove_300d") character_vocabulary = load_snli_character_vocabulary(n_gram=1) from qanet.qanet import QANet return { "model_fn": QANet, "model_dict": { "word_embedding_matrix": vocabulary.embedding_matrix, "char_embedding_matrix": None, "params": { "word_embed_dim": 300, "highway_n_layers": 2, "hidden_size": 128, "embed_encoder_resize_kernel_size": 7, "embed_encoder_resize_pad": 3, "embed_encoder_n_blocks": 1, "embed_encoder_n_conv": 4, "embed_encoder_kernel_size": 7, "embed_encoder_pad": 3, "embed_encoder_conv_type": "depthwise_separable", "embed_encoder_with_self_attn": False, "embed_encoder_n_heads": 8, "model_encoder_n_blocks": 7, "model_encoder_n_conv": 2, "model_encoder_kernel_size": 7, "model_encoder_pad": 3, "model_encoder_conv_type": "depthwise_separable", "model_encoder_with_self_attn": False, "model_encoder_n_heads": 8, "batch_size": 128, } }, "pre_process_module_fn": PreprocessWrapper, "pre_process_module_dict": { "pad_idx": vocabulary.word_to_id(vocabulary.pad), "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD], }, "data": [train, valid, test], "batch_size": 128, "train_loss": nn.CrossEntropyLoss, "clip_norm": None, "name": "QANet_snli", "optimizer": optim.Adam, "need_pad": True, "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, }, "epcohes": 80, "lr": 1e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())], "epoch_ratio": 0.1, }