Esempio n. 1
0
    def step(self, batch_index, mode):
        '''
            This function is one step in an epoch and will run a training or testing step depending on the parameter.

	    Args:
		batch_index (int): step number for the epoch
		mode (str): 'train' or 'test' based on the mode of 
	    Returns:
	        Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters
	'''
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn # Theano function set
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")
            
        inp = inputs[batch_index].astype(floatX)
        q = qs[batch_index].astype(floatX)
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]
        #print(np.array(inp).shape, np.array(q).shape, np.array(ans).shape, np.array(input_mask).shape)

        skipped = 0
        grad_norm = float('NaN')
        
        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
            
            if (np.isnan(grad_norm)):
                print("==> gradient is nan at index %d." % batch_index)
                print("==> skipping")
                skipped = 1
        
        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask) # Run the theano function
        else:
            ret = [-1, -1]
        
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": np.array([ret[0]]),
                "answers": np.array([ans]),
                "current_loss": ret[1],
                "skipped": skipped,
                "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
                }
        
Esempio n. 2
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            fact_counts = self.train_fact_count
            input_masks = self.train_input_mask
        if mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            fact_counts = self.test_fact_count
            input_masks = self.test_input_mask

        start_index = batch_index * self.batch_size
        inp = inputs[start_index:start_index+self.batch_size]
        q = qs[start_index:start_index+self.batch_size]
        ans = answers[start_index:start_index+self.batch_size]
        fact_count = fact_counts[start_index:start_index+self.batch_size]
        input_mask = input_masks[start_index:start_index+self.batch_size]

        inp, q, ans, fact_count, input_mask = self._process_batch(inp, q, ans, fact_count, input_mask)
        ret = theano_fn(inp, q, ans, fact_count, input_mask)
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])

        return {"prediction": ret[0],
                "answers": ans,
                "current_loss": ret[1],
                "skipped": 0,
                "log": "pn: %.3f" % param_norm,
                }
Esempio n. 3
0
    def step(self, batch_index, mode):
        '''
            This function is one step in an epoch and will run a training or testing step depending on the parameter.

	    Args:
		batch_index (int): step number for the epoch
		mode (str): 'train' or 'test' based on the mode of 
	    Returns:
	        Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters
	'''
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn  # Theano function set
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")

        #TODO fix this hack!!!
        #The mem net expects a 2D array which for the word2vec is (len(words), len(word2vec))
        inp = [inputs[batch_index], inputs[batch_index]]
        q = [qs[batch_index], qs[batch_index]]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]
        #print(inp)
        inp = [inputs.__getitem__(0)]

        skipped = 0
        grad_norm = float('NaN')

        if mode == 'train':
            gradient_value = self.get_gradient_fn(
                inp, q, ans,
                input_mask)  # Get and calculate the gradient function
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])

            if (np.isnan(grad_norm)):
                print("==> gradient is nan at index %d." % batch_index)
                print("==> skipping")
                skipped = 1

        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask)  # Run the theano function
        else:
            ret = [-1, -1]

        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array([ret[0]]),
            "answers": np.array([ans]),
            "current_loss": ret[1],
            "skipped": skipped,
            "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
        }
Esempio n. 4
0
    def step(self, batch_index, mode):
        """
            This function is one step in an epoch and will run a training or testing step depending on the parameter.

	    Args:
		batch_index (int): step number for the epoch
		mode (str): 'train' or 'test' based on the mode of 
	    Returns:
	        Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters
	"""
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn  # Theano function set
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")

        # TODO fix this hack!!!
        # The mem net expects a 2D array which for the word2vec is (len(words), len(word2vec))
        inp = [inputs[batch_index], inputs[batch_index]]
        q = [qs[batch_index], qs[batch_index]]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]
        # print(inp)
        inp = [inputs.__getitem__(0)]

        skipped = 0
        grad_norm = float("NaN")

        if mode == "train":
            gradient_value = self.get_gradient_fn(inp, q, ans, input_mask)  # Get and calculate the gradient function
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])

            if np.isnan(grad_norm):
                print("==> gradient is nan at index %d." % batch_index)
                print("==> skipping")
                skipped = 1

        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask)  # Run the theano function
        else:
            ret = [-1, -1]

        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array([ret[0]]),
            "answers": np.array([ans]),
            "current_loss": ret[1],
            "skipped": skipped,
            "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm),
        }