def step(self, batch_index, mode): ''' This function is one step in an epoch and will run a training or testing step depending on the parameter. Args: batch_index (int): step number for the epoch mode (str): 'train' or 'test' based on the mode of Returns: Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters ''' if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn # Theano function set inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index].astype(floatX) q = qs[batch_index].astype(floatX) ans = answers[batch_index] input_mask = input_masks[batch_index] #print(np.array(inp).shape, np.array(q).shape, np.array(ans).shape, np.array(input_mask).shape) skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print("==> gradient is nan at index %d." % batch_index) print("==> skipping") skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask) # Run the theano function else: ret = [-1, -1] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer fact_counts = self.train_fact_count input_masks = self.train_input_mask if mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer fact_counts = self.test_fact_count input_masks = self.test_input_mask start_index = batch_index * self.batch_size inp = inputs[start_index:start_index+self.batch_size] q = qs[start_index:start_index+self.batch_size] ans = answers[start_index:start_index+self.batch_size] fact_count = fact_counts[start_index:start_index+self.batch_size] input_mask = input_masks[start_index:start_index+self.batch_size] inp, q, ans, fact_count, input_mask = self._process_batch(inp, q, ans, fact_count, input_mask) ret = theano_fn(inp, q, ans, fact_count, input_mask) param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": ret[0], "answers": ans, "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self, batch_index, mode): ''' This function is one step in an epoch and will run a training or testing step depending on the parameter. Args: batch_index (int): step number for the epoch mode (str): 'train' or 'test' based on the mode of Returns: Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters ''' if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn # Theano function set inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") #TODO fix this hack!!! #The mem net expects a 2D array which for the word2vec is (len(words), len(word2vec)) inp = [inputs[batch_index], inputs[batch_index]] q = [qs[batch_index], qs[batch_index]] ans = answers[batch_index] input_mask = input_masks[batch_index] #print(inp) inp = [inputs.__getitem__(0)] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn( inp, q, ans, input_mask) # Get and calculate the gradient function grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print("==> gradient is nan at index %d." % batch_index) print("==> skipping") skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask) # Run the theano function else: ret = [-1, -1] param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }
def step(self, batch_index, mode): """ This function is one step in an epoch and will run a training or testing step depending on the parameter. Args: batch_index (int): step number for the epoch mode (str): 'train' or 'test' based on the mode of Returns: Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters """ if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn # Theano function set inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") # TODO fix this hack!!! # The mem net expects a 2D array which for the word2vec is (len(words), len(word2vec)) inp = [inputs[batch_index], inputs[batch_index]] q = [qs[batch_index], qs[batch_index]] ans = answers[batch_index] input_mask = input_masks[batch_index] # print(inp) inp = [inputs.__getitem__(0)] skipped = 0 grad_norm = float("NaN") if mode == "train": gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if np.isnan(grad_norm): print("==> gradient is nan at index %d." % batch_index) print("==> skipping") skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask) # Run the theano function else: ret = [-1, -1] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm), }