def train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config): train_config = general_config.train_config # Remove softmax from memory for i in range(general_config.nhops): memory[i].mod_query.modules.pop() # Save settings nepochs2 = general_config.nepochs lrate_decay_step2 = general_config.lrate_decay_step init_lrate2 = train_config["init_lrate"] # Add new settings general_config.nepochs = general_config.ls_nepochs general_config.lrate_decay_step = general_config.ls_lrate_decay_step train_config["init_lrate"] = general_config.ls_init_lrate # Train with new settings train(train_story, train_questions, train_qstory, memory, model, loss, general_config) # Add softmax back for i in range(general_config.nhops): memory[i].mod_query.add(Softmax()) # Restore old settings general_config.nepochs = nepochs2 general_config.lrate_decay_step = lrate_decay_step2 train_config["init_lrate"] = init_lrate2 # Train with old settings train(train_story, train_questions, train_qstory, memory, model, loss, general_config)
def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) p = Parallel() p.add(self.emb_query) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax())
def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(ElemMult(self.config["weight"])) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax())
def train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config): """ Run Memory Network Training with Linear Start. :param train_story: Tensor of Stories (Shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES)) :param train_questions: Tensor of Questions (Shape: (14 (see parser.py), NUM_SAMPLES)) :param train_qstory: Tensor of Q Indices within story (Shape: (SENTENCE_SIZE, NUM_SAMPLES)) :param memory: Memory (Story) Network :param model: Sequential Query Network :param loss: Loss Network :param general_config: bAbI Configuration """ train_config = general_config.train_config # Remove softmax from memory for i in range(general_config.nhops): memory[i].mod_query.modules.pop() # Save settings nepochs2 = general_config.nepochs lrate_decay_step2 = general_config.lrate_decay_step init_lrate2 = train_config["init_lrate"] # Add new settings general_config.nepochs = general_config.ls_nepochs general_config.lrate_decay_step = general_config.ls_lrate_decay_step train_config["init_lrate"] = general_config.ls_init_lrate # Train with new settings train(train_story, train_questions, train_qstory, memory, model, loss, general_config) # Add softmax back for i in range(general_config.nhops): memory[i].mod_query.add(Softmax()) # Restore old settings general_config.nepochs = nepochs2 general_config.lrate_decay_step = lrate_decay_step2 train_config["init_lrate"] = init_lrate2 # Train with old settings train(train_story, train_questions, train_qstory, memory, model, loss, general_config)
def build_model(general_config): """ Build model NOTE: (for default config) 1) Model's architecture (embedding B) LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax 2) Memory's architecture a) Query module (embedding A) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax b) Output module (embedding C) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd """ train_config = general_config.train_config dictionary = general_config.dictionary use_bow = general_config.use_bow nhops = general_config.nhops add_proj = general_config.add_proj share_type = general_config.share_type enable_time = general_config.enable_time add_nonlin = general_config.add_nonlin in_dim = train_config["in_dim"] out_dim = train_config["out_dim"] max_words = train_config["max_words"] voc_sz = train_config["voc_sz"] if not use_bow: print('We use PE') train_config["weight"] = np.ones((in_dim, max_words), np.float32) for i in range(in_dim): for j in range(max_words): train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * ( j + 1 - (max_words + 1) / 2) train_config["weight"] = 1 + 4 * train_config["weight"] / (in_dim * max_words) memory = {} model = Sequential() model.add(LookupTable(voc_sz, in_dim)) if not use_bow: if enable_time: print('We use TE') model.add(ElemMult(train_config["weight"][:, :-1])) else: model.add(ElemMult(train_config["weight"])) model.add(Sum(dim=1)) proj = {} for i in range(nhops): if use_bow: memory[i] = MemoryBoW(train_config) else: memory[i] = MemoryL(train_config) # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]" memory[i].nil_word = dictionary['nil'] model.add(Duplicate()) p = Parallel() p.add(memory[i]) if add_proj: print('We add linear layer between internal states') proj[i] = LinearNB(in_dim, in_dim) p.add(proj[i]) else: p.add(Identity()) model.add(p) model.add(AddTable()) if add_nonlin: print('We use non-linearity (RELU) to internal states') model.add(ReLU()) model.add(LinearNB(out_dim, voc_sz, True)) model.add(Softmax()) # Share weights if share_type == 1: # Type 1: adjacent weight tying print('We use adjacent weight tying') memory[0].emb_query.share(model.modules[0]) for i in range(1, nhops): memory[i].emb_query.share(memory[i - 1].emb_out) model.modules[-2].share(memory[len(memory) - 1].emb_out) elif share_type == 2: # Type 2: layer-wise weight tying print('We use layer-wise weight tying (RNN-style)') for i in range(1, nhops): memory[i].emb_query.share(memory[0].emb_query) memory[i].emb_out.share(memory[0].emb_out) if add_proj: for i in range(1, nhops): proj[i].share(proj[0]) # Cost loss = CrossEntropyLoss() loss.size_average = False loss.do_softmax_bprop = True model.modules[-1].skip_bprop = True return memory, model, loss
a1 = np.random.rand(2).astype('f') b = torch.from_numpy(a1).type(FloatTensor) try: a2 = b.data.numpy() assert np.array_equal(a1, a2) is True except AssertionError: tests[0] = False if TEST1: tests[1] = True for i in range(10): M = np.random.rand(224, 32) input_data = M input_data_torch = torch.from_numpy(M).type(FloatTensor) sfmx = Softmax() sfmx_torch = nn.Softmax(dim=0) result_1 = sfmx.fprop(input_data) result_2 = sfmx_torch.forward(input_data_torch) try: result_2_np = result_2.data.numpy() assert np.allclose(result_1, result_2_np) except AssertionError: tests[1] = False if TEST2: tests[2] = True for i in range(10): M = np.random.rand(*matrix_batch_dim) V = np.random.rand(*vect_batch_dim)
def train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config, log_path='./'): train_config = general_config.train_config # Remove softmax from memory for i in range(general_config.nhops): memory[i].mod_query.modules.pop() # Save settings nepochs2 = general_config.nepochs lrate_decay_step2 = general_config.lrate_decay_step init_lrate2 = train_config["init_lrate"] # Add new settings general_config.nepochs = general_config.ls_nepochs general_config.lrate_decay_step = general_config.ls_lrate_decay_step train_config["init_lrate"] = general_config.ls_init_lrate print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\ (general_config.nepochs, train_config["init_lrate"], general_config.lrate_decay_step)) sys.stdout.flush() #print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\ # (nepochs2, init_lrate2, lrate_decay_step2)); sys.stdout.flush() # declear logger train_logger = open(os.path.join(log_path, 'train.log'), 'w') train_logger.write('epoch batch_iter lr loss err\n') train_logger.flush() val_logger = open(os.path.join(log_path, 'val.log'), 'w') val_logger.write('epoch batch_iter lr loss err\n') val_logger.flush() # Train with new settings global_batch_iter = 0 best_val_loss = 1000000. best_val_err = 1000000. train_logger, val_logger, best_model, best_memory, global_batch_iter, best_val_loss, best_val_err = \ train(train_story, train_questions, train_qstory, memory, model, loss, general_config, train_logger, val_logger, global_batch_iter, best_val_loss, best_val_err) # When the validation loss stopped decreasing, # the softmax layers were re-inserted and training recommenced. # Add softmax back for i in range(general_config.nhops): memory[i].mod_query.add(Softmax()) # Restore old settings general_config.nepochs = nepochs2 general_config.lrate_decay_step = lrate_decay_step2 train_config["init_lrate"] = init_lrate2 print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\ (general_config.nepochs, train_config["init_lrate"], general_config.lrate_decay_step)) sys.stdout.flush() # Train with old settings train_logger, val_logger, best_model, best_memory, _, _, _ = \ train(train_story, train_questions, train_qstory, memory, model, loss, general_config, train_logger, val_logger, global_batch_iter, best_val_loss, best_val_err) train_logger.close() val_logger.close() return best_model, best_memory