Example #1
0
    def step(self, batch_index, mode):
        '''
            This function is one step in an epoch and will run a training or testing step depending on the parameter.

	    Args:
		batch_index (int): step number for the epoch
		mode (str): 'train' or 'test' based on the mode of 
	    Returns:
	        Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters
	'''
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn # Theano function set
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":    
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")
            
        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]



        skipped = 0
        grad_norm = float('NaN')
        
        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
            
            if (np.isnan(grad_norm)):
                print("==> gradient is nan at index %d." % batch_index)
                print("==> skipping")
                skipped = 1
        
        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask) # Run the theano function
        else:
            ret = [-1, -1]
        
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": np.array([ret[0]]),
                "answers": np.array([ans]),
                "current_loss": ret[1],
                "skipped": skipped,
                "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
                }
def conjugate_gradient(params, func, init_values, stop_condition=1e-2):
    # * PRP
    values = Matrix(init_values)
    lam = Symbol('lam')
    beta = 0
    previous_d = 0
    previous_g = 0
    step = 0
    while True:
        g = get_grad(params, func)
        g = g.subs(dict(zip(params, list(values))))
        if get_norm(g) <= stop_condition:
            return list(values), func.subs(dict(zip(params, list(values))))
        if previous_g != 0:
            beta = (g.T * (g - previous_g)) / (get_norm(previous_g)**2)
            d = -g + beta[0] * previous_d
        else:
            d = -g
        lam_func = func.subs(dict(zip(params, list(values + lam * d))))
        lam_value = get_stagnation(lam_func)
        values = values + lam_value * d
        previous_d = d
        previous_g = g
        f_value = func.subs(dict(zip(params, list(values))))
        print('step: {}  params: {}  f: {}'.format(step, list(values),
                                                   f_value))
        step += 1
Example #3
0
    def step(self, batch_index, mode):
        '''
            This function is one step in an epoch and will run a training or testing step depending on the parameter.

	    Args:
		batch_index (int): step number for the epoch
		mode (str): 'train' or 'test' based on the mode of 
	    Returns:
	        Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters
	'''
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn # Theano function set
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")
            
        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]

        skipped = 0
        grad_norm = float('NaN')
        
        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
            
            if (np.isnan(grad_norm)):
                print "==> gradient is nan at index %d." % batch_index
                print "==> skipping"
                skipped = 1
        
        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask) # Run the theano function
        else:
            ret = [-1, -1]
        
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": np.array([ret[0]]),
                "answers": np.array([ans]),
                "current_loss": ret[1],
                "skipped": skipped,
                "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
                }
Example #4
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
            sgates = self.train_gates
        elif mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
            sgates = self.test_gates
        else:
            raise Exception("Invalid mode")

        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]
        sgate = sgates[batch_index]

        skipped = 0
        grad_norm = float('NaN')

        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, input_mask,
                                                  sgate)
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])

            if (np.isnan(grad_norm)):
                #print "==> gradient is nan at index %d." % batch_index
                #print "==> skipping"
                skipped = 1

        if skipped == 0:
            ret = theano_fn(inp, q, ans, input_mask, sgate)
        else:
            ret = [-1, -1, -1, -1, -1]

        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array([ret[0]]),
            "answers": np.array([ans]),
            "current_loss": ret[1],
            "log": "pn: %.3f" % param_norm,
            "skipped": skipped
        }
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn 
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            fact_counts = self.train_fact_count
            input_masks = self.train_input_mask
        if mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            fact_counts = self.test_fact_count
            input_masks = self.test_input_mask
        
        start_index = batch_index * self.batch_size    
        inp = inputs[start_index:start_index+self.batch_size]
        q = qs[start_index:start_index+self.batch_size]
        ans = answers[start_index:start_index+self.batch_size]
        fact_count = fact_counts[start_index:start_index+self.batch_size]
        input_mask = input_masks[start_index:start_index+self.batch_size]

        skipped = 0
        grad_norm = float('NaN')
        
        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, fact_count, input_mask)
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
            
            if (np.isnan(grad_norm)):
                print "==> gradient is nan at index %d." % batch_index
                print "==> skipping"
                skipped = 1
        
        if skipped == 0:
            ret = theano_fn(inp, q, ans, fact_count, input_mask)
        else:
            ret = [float('NaN'), float('NaN')]
        
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": ret[0],
                "answers": ans,
                "current_loss": ret[1],
                "skipped": skipped,
                "grad_norm": grad_norm,
                "param_norm": param_norm,
                "log": "",
                }
Example #6
0
 def step(self, batch_index, mode):
     if mode == "train" and self.mode == "test":
         raise Exception("Cannot train during test mode")
     
     if mode == "train":
         theano_fn = self.train_fn 
         inputs = self.train_input
         qs = self.train_q
         answers = self.train_answer
         input_masks = self.train_input_mask
         sgates = self.train_gates
     elif mode == "test":    
         theano_fn = self.test_fn 
         inputs = self.test_input
         qs = self.test_q
         answers = self.test_answer
         input_masks = self.test_input_mask
         sgates = self.test_gates
     else:
         raise Exception("Invalid mode")
     
     
     inp = inputs[batch_index]
     q = qs[batch_index]
     ans = answers[batch_index]
     input_mask = input_masks[batch_index]
     sgate = sgates[batch_index]
     
     skipped = 0
     grad_norm = float('NaN')
     
     if mode == 'train':
         gradient_value = self.get_gradient_fn(inp, q, ans, input_mask,sgate)
         grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
         
         if (np.isnan(grad_norm)):
             #print "==> gradient is nan at index %d." % batch_index
             #print "==> skipping"
             skipped = 1
             
     if skipped==0:
         ret = theano_fn(inp, q, ans, input_mask, sgate)
     else:
         ret=[-1,-1,-1,-1,-1]
         
     param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
     
     return {"prediction": np.array([ret[0]]),
             "answers": np.array([ans]),
             "current_loss": ret[1],
             "log": "pn: %.3f" % param_norm,
             "skipped": skipped
             }
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn 
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")
            
        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]

        ret = theano_fn(inp, q, ans, input_mask)
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": np.array([ret[0]]),
                "answers": np.array([ans]),
                "current_loss": ret[1],
                "skipped": 0,
                "log": "pn: %.3f" % param_norm,
                }
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
        if mode == "test":
            theano_fn = self.test_fn

        inp, q, ans, ans_inp, ans_mask = self._process_batch_sind(
            batch_index, mode)

        ret = theano_fn(inp, q, ans, ans_mask, ans_inp)
        #theano_fn.profile.print_summary()
        #sys.exit()
        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": ret[0],
            "answers": ans,
            "current_loss": ret[1],
            "skipped": 0,
            "log": "pn: %.3f" % param_norm,
        }
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn 
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            fact_counts = self.train_fact_count
            input_masks = self.train_input_mask
        if mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            fact_counts = self.test_fact_count
            input_masks = self.test_input_mask
        
        start_index = batch_index * self.batch_size    
        inp = inputs[start_index:start_index+self.batch_size]
        q = qs[start_index:start_index+self.batch_size]
        ans = answers[start_index:start_index+self.batch_size]
        fact_count = fact_counts[start_index:start_index+self.batch_size]
        input_mask = input_masks[start_index:start_index+self.batch_size]

        ret = theano_fn(inp, q, ans, fact_count, input_mask)
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": ret[0],
                "answers": ans,
                "current_loss": ret[1],
                "skipped": 0,
                "log": "pn: %.3f" % param_norm,
                }
Example #10
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
        elif mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")

        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        input_mask = input_masks[batch_index]

        ret = theano_fn(inp, q, ans, input_mask)
        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array([ret[0]]),
            "answers": np.array([ans]),
            "current_loss": ret[1],
            "skipped": 0,
            "log": "pn: %.3f" % param_norm,
        }
def quasi_newton(params, func, init_values, stop_condition=1e-5):
    # * BFGS
    values = Matrix(init_values)
    lam = Symbol('lam')
    next_g = 0
    next_values = 0
    h = eye(len(params))
    step = 0
    while True:
        g = get_grad(params, func)
        g = g.subs(dict(zip(params, list(values))))
        d = -h**(-1) * g
        lam_func = func.subs(dict(zip(params, list(values + lam * d))))
        lam_value = get_stagnation(lam_func)
        next_values = values + lam_value * d
        if get_norm(g) <= stop_condition:
            return list(values), func.subs(dict(zip(params,
                                                    list(next_values))))
        else:
            next_g = get_grad(params, func)
            next_g = next_g.subs(dict(zip(params, list(next_values))))
            s = next_values - values
            y = next_g - g
            h = (eye(len(params)) - (s * y.T) /
                 (s.T * y)[0]) * h * (eye(len(params)) - (s * y.T) /
                                      (s.T * y)[0]).T + (s * s.T) / (s.T *
                                                                     y)[0]
        values = next_values
        f_value = func.subs(dict(zip(params, list(values))))
        print('step: {}  params: {}  f: {}'.format(step, list(values),
                                                   f_value))
        step += 1
Example #12
0
	def measure_velocity_sensor(self, poiList, rover):
		min_dist = self.min_sensor_dist_sqr
		max_dist = self.max_sensor_dist
		sum = np.zeros(4)
		for poi in poiList:

			# get quadrant of POI
			vect = utils.vect_sub(poi.pos, rover.pos)
			dist = utils.get_norm(vect)
			angle = utils.get_angle(vect) % (2 * math.pi)  # Between 0 to 2pi
			relative_angle = (angle - rover.heading + math.pi / 2) % (2 * math.pi)
			q = utils.get_quadrant(relative_angle) - 1

			# get relative velocity of POI to agent.
			rel_vel_vect = poi.vel_lin
			rel_pos_vect = utils.vect_sub(rover.pos, poi.pos)
			rel_pos_norm = utils.get_norm(rel_pos_vect)
			rel_pos_unit = [rel_pos_vect[0]/rel_pos_norm, rel_pos_vect[1]/rel_pos_norm]

			dot = np.dot(rel_pos_unit, rel_vel_vect)
			normalized_dot = poi.value * dot / rel_pos_norm**2
			sum[q] += normalized_dot

		return list(sum)
Example #13
0
def newton(params, func, init_values, stop_condition=1e-2):
    values = Matrix(init_values)
    step = 0
    while True:
        g = get_grad(params, func)
        g = g.subs(dict(zip(params, list(values))))
        if get_norm(g) <= stop_condition:
            return list(values), func.subs(dict(zip(params, list(values))))
        h = get_hessian(params, func)
        h = h.subs(dict(zip(params, list(values))))
        values = values - h**(-1) * g
        f_value = func.subs(dict(zip(params, list(values))))
        print('step: {}  params: {}  f: {}'.format(step, list(values),
                                                   f_value))
        step += 1
Example #14
0
 def return_sensor_rover(self, roverList, quadrant, max_dist=500):
     min_dist = 10
     sum = 0
     for rover in roverList:
         vect = utils.vect_sub(rover.pos, self.pos)
         dist = utils.get_norm(vect)
         angle = utils.get_angle(vect) % (2 * math.pi)  # Between 0 to 2pi
         relative_angle = (angle - self.heading + math.pi / 2) % (2 *
                                                                  math.pi)
         # print 'Vect: ', vect
         # print 'Angle: ', angle*360/2/math.pi, relative_angle*360/2/math.pi
         if dist < max_dist and utils.check_quadrant(
                 relative_angle, quadrant):
             # print 'I SEE YOU', quadrant
             sum += 1 / max(dist**2, min_dist**2)
     return sum
def steepest_descent(params, func, init_values, stop_condition=1e-10):
    values = Matrix(init_values)
    lam = Symbol('lam')
    step = 0
    while True:
        g = get_grad(params, func)
        g = g.subs(dict(zip(params, list(values))))
        if get_norm(g) <= stop_condition:
            return list(values), func.subs(dict(zip(params, list(values))))
        lam_func = func.subs(dict(zip(params, list(values - lam * g))))
        lam_value = get_stagnation(lam_func)
        values = values - lam_value * g
        f_value = func.subs(dict(zip(params, list(values))))
        print('step: {}  params: {}  f: {}'.format(step, list(values),
                                                   f_value))
        step += 1
Example #16
0
def learn_lr_classifier(training_corpus):
        D = get_vocabulary_size()
        labels = get_labels()
        w = [0] * (D + 1)
        norm = 1.0
        num_iters = 0
        while norm > convergence_threshold:
                num_iters += 1
                if num_iters > max_iters:
                        break
                old_w = list(w)
                shuffled = list(training_corpus)
                shuffle(shuffled)
                for vector in shuffled:
                        label = 1.0 if float(vector[0]) == labels[0] else 0.0
                        prediction = get_prediction(vector[1:], w)
                        delta = label - prediction
                        update_weights(vector[1:], w, delta)
                norm = get_norm(w,old_w)
        return w
Example #17
0
def main():
    """Fuction to compute 1-dimensional correlation matrix using kernel density
    estimation method
    """

    data = np.load(INPUT_PATH)
    data = get_norm(data)

    num_atoms = data.shape[1]

    corr_matrix = np.zeros((num_atoms, num_atoms))

    for row in range(num_atoms):
        # Compute only inferior diagonal matrix
        for col in range(row):
            corr_matrix[row, col] = mi_kde(data, row, col)
            print(row, col, corr_matrix[row, col])

    corr_matrix = gen_corr_coef(corr_matrix, dim=1)

    np.save(file=OUTPUT_PATH, arr=corr_matrix)
Example #18
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
        if mode == "test" or mode == 'val':
            theano_fn = self.test_fn

        q, ans, ans_inp, ans_mask, img_ids = self._process_batch_sind(
            batch_index, mode)

        ret = theano_fn(q, ans, ans_mask, ans_inp)
        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": ret[0],
            "skipped": 0,
            "log": "pn: %.3f" % param_norm,
        }
Example #19
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            fact_counts = self.train_fact_count
            input_masks = self.train_input_mask
        if mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            fact_counts = self.test_fact_count
            input_masks = self.test_input_mask

        start_index = batch_index * self.batch_size
        inp = inputs[start_index:start_index + self.batch_size]
        q = qs[start_index:start_index + self.batch_size]
        ans = answers[start_index:start_index + self.batch_size]
        fact_count = fact_counts[start_index:start_index + self.batch_size]
        input_mask = input_masks[start_index:start_index + self.batch_size]

        inp, q, ans, fact_count, input_mask = self._process_batch(
            inp, q, ans, fact_count, input_mask)
        ret = theano_fn(inp, q, ans, fact_count, input_mask)
        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": ret[0],
            "answers": ans,
            "current_loss": ret[1],
            "skipped": 0,
            "log": "pn: %.3f" % param_norm,
        }
    def step(self,batch_index,mode):
        if mode== "train" and self.mode== "test":
            raise Exception("Cannot train during test mode")

        start_index=batch_index * self.batch_size


        inputs, qs, answers, fact_counts, input_masks,img_feats =self.process_vqa_data(mode,start_index,start_index+self.batch_size)
        if mode=="train":
            theano_fn=self.train_fn
           # inputs = self.process_vqa_data(self.h5file['cap_train'][start_index:start_index+self.batch_size])
           # qs=self.process_vqa_data(self.h5file['ques_train' ][ start_index:start_index+self.batch_size] )
           # answers=self.process_vqa_data(self.h5file['answers'][start_index:start_index+self.batch_size] )
           # fact_counts=np.zeros(self.batch_size,dtype="int")
           # fact_counts.fill(20)
           # input_masks= process_masks( inputs )  # figure it out

        if mode=="test":
            theano_fn=self.test_fn
           # inputs=self.process_vqa_data( self.h5file['cap_test'][start_index:start_index+self.batch_size ]  )
           # qs=self.process_vqa_data( self.h5file['ques_test'][start_index:start_index+self.batch_size ]  )
           # answers=self.process_vqa_data( self.h5file['ans_test'][start_index:start_index+self.batch_size ]  )
           # fact_counts=np.zeros(self.batch_size,dtype="int")
           # fact_counts.fill(20)
           # input_masks= process_masks( inputs  ) # figure it out
        inp,q,ans,fact_count,input_mask,img_feat=self._process_batch(inputs,qs,answers,fact_counts,input_masks,img_feats )
        img_feat=img_feat.reshape((self.batch_size*self.img_seq_len,self.img_vector_size))
        ret = theano_fn( inp,q,ans,fact_count,input_mask,img_feat,self.lr)

        param_norm=np.max( [ utils.get_norm( x.get_value()) for x  in self.params])

        return { "prediction":ret[0],
                 "answers":ans,
                 "current_loss":ret[1],
                 "skipped":0,
                 "log":"pn: %.3f" % param_norm
                 }
Example #21
0
        'W_mem_upd_in', 'W_mem_upd_hid', 'b_mem_upd', 'W_mem_hid_in',
        'W_mem_hid_hid', 'b_mem_hid', 'W_b', 'W_1', 'W_2', 'b_1', 'b_2', 'W_a'
    ]
    fig, ax = plt.subplots(figsize=(9, 4))
    with open(file_name, 'r') as load_file:
        dict = pickle.load(load_file)
        loaded_params = dict['params']

        if flag:
            for i in xrange(len(params)):
                if params[i] in to_write:
                    out_obj[params[i]] = loaded_params[i]
            with open(sys.argv[2], 'w') as save_file:
                pickle.dump(obj=out_obj, file=save_file, protocol=-1)
            print "finish dumping file to " + sys.argv[2]

        for (x, y) in zip(params, loaded_params):
            n = y.shape
            if len(n) == 1:
                n = n[0]
            else:
                n = n[0] * n[1]
            norm = utils.get_norm(y) / n**0.5
            print x, ' shape: ', y.shape, ', norm: ', norm, ', max: ', np.max(
                np.abs(y))
            if len(y.shape) > 1:
                ax.imshow(y, cmap='Blues', interpolation='none')
                plt.title('Train. ' + x + ', norm ' + str(norm))
                fig.show()
                input_str = raw_input("Press ENTER to continue.")
Example #22
0
def train(dataset, alpha, A_type, normalize_type, model_pretrained_params,
          model_type, batch_size, test_batch_size, negative_nums, item_emb_dim,
          hid_dim1, hid_dim2, hid_dim3, lr_emb, l2_emb, lr_gcn, l2_gcn, lr_cnn,
          l2_cnn, epochs, params_file_name):
    # init
    if dataset == 'LastFM':
        # use LastFM dataset
        data_obj = LastfmData()
    elif dataset == 'Diginetica':
        # use Diginetica dataset
        data_obj = DigineticaData()
    else:
        # use yoochoose1_64 dataset
        data_obj = YoochooseData(dataset=dataset)

    # gpu device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # init A
    # A: type=scipy.sparse
    A = data_obj.get_decay_adj(
        data_obj.d, tail=None,
        alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d)
    # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric'
    if normalize_type == 'random_walk':
        print('----------------------------------')
        print('Normalize_type is random_walk:')
        A = spmx_1_normalize(A)
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Normalize_type is symmetric:')
        A = spmx_sym_normalize(A)
        print('----------------------------------')
    # transform the adj to a sparse cpu tensor
    A = spmx2torch_sparse_tensor(A)

    # get cpu tensor: labels
    labels = data_obj.get_labels(data_obj.d)

    # get cpu sparse tensor: session adj
    SI = data_obj.get_session_adj(data_obj.d, alpha=alpha)

    # load model pretrained params
    if model_pretrained_params == 'True':
        print('----------------------------------')
        if dataset == 'LastFM':
            # use LastFM params
            print('Use LastFM model pretraned params: ' + params_file_name +
                  '.pkl')
            pretrained_state_dict = torch.load('./lastfm_pretrained_params/' +
                                               params_file_name + '.pkl')
        elif dataset == 'Diginetica':
            # use Diginetica params
            print('Use Diginetica model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./dig_pretrained_params/' +
                                               params_file_name + '.pkl')
        else:
            # use yoochoose1_64 params
            print('Use yoochoose1_64 model pretraned params: ' +
                  params_file_name + '.pkl')
            pretrained_state_dict = torch.load('./yoo1_64_pretrained_params/' +
                                               params_file_name + '.pkl')
        print('----------------------------------')
    else:
        pretrained_state_dict = None

    # transform all tensor to cuda
    A = A.to(device)
    labels = labels.to(device)
    SI = SI.to(device)

    # define the evalution object
    evalution5 = Evaluation(k=5)
    evalution10 = Evaluation(k=10)
    evalution15 = Evaluation(k=15)
    evalution20 = Evaluation(k=20)

    # define yoochoose data object
    trainloader = SessionDataloader(train_size=data_obj.train_size,
                                    test_size=data_obj.test_size,
                                    item_size=data_obj.item_size,
                                    labels=labels,
                                    batch_size=batch_size,
                                    train=True,
                                    negative_nums=negative_nums,
                                    shuffle=True)
    testloader = SessionDataloader(train_size=data_obj.train_size,
                                   test_size=data_obj.test_size,
                                   item_size=data_obj.item_size,
                                   labels=labels,
                                   batch_size=test_batch_size *
                                   data_obj.item_size,
                                   train=False,
                                   negative_nums=negative_nums,
                                   shuffle=False)

    # define model, then transform to cuda
    if model_type == 'sgncf1_cnn':
        # use sgncf1_cnn model:
        model = sgncf1_cnn(dataset_nums=data_obj.train_size +
                           data_obj.test_size,
                           item_nums=data_obj.item_size,
                           item_emb_dim=item_emb_dim,
                           hid_dim1=hid_dim1)
    else:
        # use sgncf2_cnn model:
        model = sgncf2_cnn(dataset_nums=data_obj.train_size +
                           data_obj.test_size,
                           item_nums=data_obj.item_size,
                           item_emb_dim=item_emb_dim,
                           hid_dim1=hid_dim1,
                           hid_dim2=hid_dim2)
    model.to(device)

    # update model_state_dict
    if pretrained_state_dict is not None:
        model_state_dict = model.state_dict()
        pretrained_state_dict = {
            k: v
            for k, v in pretrained_state_dict.items() if k in model_state_dict
        }
        model_state_dict.update(pretrained_state_dict)
        model.load_state_dict(model_state_dict)

    # define loss and optim
    criterion = nn.BCEWithLogitsLoss()
    if model_type == 'sgncf1_cnn':
        # use sgncf1 model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)
        optim_cnn = optim.Adam([{
            'params': model.cnn_1d.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                               lr=lr_cnn,
                               weight_decay=l2_cnn)
    else:
        # use sgncf2 model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)
        optim_cnn = optim.Adam([{
            'params': model.cnn_1d.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                               lr=lr_cnn,
                               weight_decay=l2_cnn)

    # figure recall mrr norm
    fig_recalls = []
    fig_mrrs = []
    fig_emb_norms = []
    fig_gcn_norms = []
    fig_cnn_norms = []
    fig_epochs = []

    # train epochs
    for epoch in range(epochs):
        # model training
        start = time.time()

        # test evalution dict
        r = {'5': [], '10': [], '15': [], '20': []}
        m = {'5': [], '10': [], '15': [], '20': []}

        # loss list
        losses = []

        model.train()
        for i, data in enumerate(trainloader):
            # zero optim
            optim_emb.zero_grad()
            optim_gcn.zero_grad()
            optim_cnn.zero_grad()

            # batch inputs
            batch_sidxes, batch_iidxes, batch_labels = data[:, 0].long().to(
                device), data[:,
                              1].long().to(device), data[:,
                                                         2].float().to(device)

            # predicting
            outs = model(batch_sidxes, batch_iidxes, A, SI)

            # loss
            loss = criterion(outs, batch_labels)

            # backward
            loss.backward()

            # optim step
            optim_emb.step()
            optim_gcn.step()
            optim_cnn.step()

            # losses
            losses.append(loss.item())

            # print loss, recall, mrr
            if i % 20 == 19:
                print('[{0: 2d}, {1:5d}, {2: 7d}], loss:{3:.4f}'.format(
                    epoch + 1, int(i * (batch_size / (negative_nums + 1))),
                    data_obj.train_size, np.mean(losses)))

        # print gcn_norm, emb_norm
        emb_norm = get_norm(model, 'emb')
        gcn_norm = get_norm(model, 'gcn')
        cnn_norm = get_norm(model, 'cnn')
        fig_emb_norms.append(emb_norm)
        fig_gcn_norms.append(gcn_norm)
        fig_cnn_norms.append(gcn_norm)
        print('[gcn_norm]:{0:.4f}  [emb_norm]:{1:.4f}  [cnn_norm]:{2:.4f}'.
              format(gcn_norm, emb_norm, cnn_norm))

        # epoch time
        print('[epoch time]:{0:.4f}'.format(time.time() - start))

        # model eval
        model.eval()
        with torch.no_grad():
            for j, d in enumerate(testloader):
                # test batch inputs
                b_sidxes, b_iidxes, b_labels = d[0][:, 0].long().to(
                    device), d[0][:, 1].long().to(device), d[1].to(device)

                # predicting
                o = model(b_sidxes, b_iidxes, A, SI)
                o = o.view(-1, data_obj.item_size)

                # evalution, k=5, 10, 15, 20
                r['5'].append(evalution5.evaluate(o, b_labels)[0])
                r['10'].append(evalution10.evaluate(o, b_labels)[0])
                r['15'].append(evalution15.evaluate(o, b_labels)[0])
                r['20'].append(evalution20.evaluate(o, b_labels)[0])
                m['5'].append(evalution5.evaluate(o, b_labels)[1])
                m['10'].append(evalution10.evaluate(o, b_labels)[1])
                m['15'].append(evalution15.evaluate(o, b_labels)[1])
                m['20'].append(evalution20.evaluate(o, b_labels)[1])

                # print test inf
                # print('[{0: 2d}, {1: 5d}, {2: 7d}]'.format(epoch+1,
                #                                            j * test_batch_size,
                #                                            data_obj.test_size))

            # print test recall mrr
            print('[{0: 2d}]'.format(epoch + 1))
            print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                np.sum(r['5']) / data_obj.test_size,
                np.sum(m['5']) / data_obj.test_size))
            print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                np.sum(r['10']) / data_obj.test_size,
                np.sum(m['10']) / data_obj.test_size))
            print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                np.sum(r['15']) / data_obj.test_size,
                np.sum(m['15']) / data_obj.test_size))
            print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                np.sum(r['20']) / data_obj.test_size,
                np.sum(m['20']) / data_obj.test_size))

            # plt recall and mrr and norm
            fig_epochs.append(epoch)
            fig_recalls.append(np.sum(r['20']) / data_obj.test_size)
            fig_mrrs.append(np.sum(m['20']) / data_obj.test_size)
            plt_evalution(fig_epochs,
                          fig_recalls,
                          fig_mrrs,
                          k=20,
                          alpha=alpha,
                          lr_emb=lr_emb,
                          l2_emb=l2_emb,
                          lr_gcn=lr_gcn,
                          l2_gcn=l2_gcn,
                          model_type=model_type,
                          lr_cnn=lr_cnn,
                          l2_cnn=l2_cnn)
            plt_norm(fig_epochs,
                     fig_emb_norms,
                     fig_gcn_norms,
                     fig_cnn_norms,
                     alpha=alpha,
                     lr_emb=lr_emb,
                     l2_emb=l2_emb,
                     lr_gcn=lr_gcn,
                     l2_gcn=l2_gcn,
                     model_type=model_type,
                     lr_cnn=lr_cnn,
                     l2_cnn=l2_cnn)
Example #23
0
                  'W_mem_res_in', 'W_mem_res_hid', 'b_mem_res', 
                  'W_mem_upd_in', 'W_mem_upd_hid', 'b_mem_upd',
                  'W_mem_hid_in', 'W_mem_hid_hid', 'b_mem_hid', 'W_b',
                  'W_1', 'W_2', 'b_1', 'b_2', 'W_a']  
    fig, ax = plt.subplots(figsize=(9,4))    
    with open(file_name, 'r') as load_file:
        dict = pickle.load(load_file)
        loaded_params = dict['params']
        
        if flag:
            for i in xrange(len(params)):
                if params[i] in to_write:
                    out_obj[params[i]] = loaded_params[i]
            with open(sys.argv[2], 'w') as save_file:
                pickle.dump(obj = out_obj,file = save_file,protocol = -1)
            print "finish dumping file to "+sys.argv[2]
            
        for (x, y) in zip(params, loaded_params):
            n = y.shape
            if len(n)==1:
                n=n[0]
            else:
                n=n[0]*n[1]
            norm = utils.get_norm(y)/n**0.5
            print x,' shape: ',y.shape,', norm: ',norm,', max: ',np.max(np.abs(y))
            if len(y.shape)>1:
                ax.imshow(y,cmap = 'Blues',interpolation='none')
                plt.title('Train. '+x+', norm '+str(norm))
                fig.show()
                input_str = raw_input("Press ENTER to continue.")
Example #24
0
def train(dataset, alpha, A_type, normalize_type, session_type,
          pretrained_item_emb, model_type, batch_size, shuffle, item_emb_dim,
          hid_dim1, hid_dim2, hid_dim3, lr_emb, lr_gcn, l2_emb, l2_gcn,
          epochs):
    # init
    if dataset == 'LastFM':
        # use LastFM dataset
        data_obj = LastfmData()
    elif dataset == 'Diginetica':
        # use Diginetica dataset
        data_obj = DigineticaData()
    else:
        # use yoochoose1_64 dataset
        data_obj = YoochooseData(dataset=dataset)

    # gpu device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # init A
    # A: type=scipy.sparse
    A = data_obj.get_decay_adj(
        data_obj.d, tail=None,
        alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d)
    # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric'
    if normalize_type == 'random_walk':
        print('----------------------------------')
        print('Normalize_type is random_walk:')
        A = spmx_1_normalize(A)
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Normalize_type is symmetric:')
        A = spmx_sym_normalize(A)
        print('----------------------------------')
    # transform the adj to a sparse cpu tensor
    A = spmx2torch_sparse_tensor(A)

    # get cpu tensor: labels
    labels = data_obj.get_labels(data_obj.d)

    # get cpu tensor: item_idxes
    _, _, item_idxes = data_obj.get_indexes()

    if session_type == 'session_hot_items':
        # get cpu sparse tensor: session adj
        session_adj = data_obj.get_session_adj(data_obj.d, alpha=alpha)
    else:
        # if not use session adj, then session_adj = None
        session_adj = None

    if session_type == 'session_last_item':
        # get cpu LongTensor: session_last_item
        session_last_item = data_obj.get_session_last_item(data_obj.d).long()
    else:
        # if not use session_last_item, then session_last_item = None
        session_last_item = None

    # get pretrained_item_emb
    if pretrained_item_emb == 'True' and alpha != 0.0:
        print('----------------------------------')
        if dataset == 'yoochoose1_64':
            print('Use yoochoose1_64 pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './yoo1_64_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        elif dataset == 'yoochoose1_8':
            print('Use yoochoose1_8 pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './yoo1_8_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        elif dataset == 'LastFM':
            print('Use LastFM pretrained item embedding: ' + 'pretrained_emb' +
                  str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './lastfm_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        else:
            print('Use Diginetica pretrained item embedding: ' +
                  'pretrained_emb' + str(alpha) + '.pkl')
            pretrained_item_emb = torch.load(
                './dig_pretrained_item_emb/pretrained_emb' + str(alpha) +
                '.pkl')['item_emb.weight']
        print('----------------------------------')
    else:
        print('----------------------------------')
        print('Not use pretrained item embedding:')
        pretrained_item_emb = None
        print('----------------------------------')

    # get cpu LongTensor: item_emb_idxes
    item_emb_idxes = torch.arange(data_obj.item_size).long()

    # transform all tensor to cuda
    A = A.to(device)
    labels = labels.to(device)
    item_idxes = item_idxes.to(device)
    item_emb_idxes = item_emb_idxes.to(device)
    if session_last_item is not None:
        session_last_item = session_last_item.to(device)
    if session_adj is not None:
        session_adj = session_adj.to(device)

    # define the evalution object
    evalution5 = Evaluation(k=5)
    evalution10 = Evaluation(k=10)
    evalution15 = Evaluation(k=15)
    evalution20 = Evaluation(k=20)

    # define yoochoose data object
    trainset = SessionDataset(train_size=data_obj.train_size,
                              test_size=data_obj.test_size,
                              train=True,
                              labels=labels)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=batch_size,
                             shuffle=shuffle)
    testset = SessionDataset(train_size=data_obj.train_size,
                             test_size=data_obj.test_size,
                             train=False,
                             labels=labels)
    testloader = DataLoader(dataset=testset,
                            batch_size=batch_size,
                            shuffle=False)

    # define model, then transform to cuda
    if model_type == 'ngcf1_session_hot_items':
        # use ngcf1_session_hot_items model:
        model = ngcf1_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            pretrained_item_emb=pretrained_item_emb)
    elif model_type == 'ngcf2_session_hot_items':
        # use ngcf2_session_hot_items model:
        model = ngcf2_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            pretrained_item_emb=pretrained_item_emb)
    elif model_type == 'ngcf3_session_hot_items':
        # use ngcf3_session_hot_items model:
        model = ngcf3_session_hot_items(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            hid_dim3=hid_dim3,
            pretrained_item_emb=pretrained_item_emb)
    else:
        # use ngcf2_session_last_item model:
        model = ngcf2_session_last_item(
            item_nums=data_obj.item_size,
            item_emb_dim=item_emb_dim,
            hid_dim1=hid_dim1,
            hid_dim2=hid_dim2,
            pretrained_item_emb=pretrained_item_emb)
    model.to(device)

    # define loss and optim
    criterion = nn.CrossEntropyLoss()
    if model_type == 'ngcf1_session_hot_items':
        # use ngcf1_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    elif model_type == 'ngcf2_session_hot_items':
        # use ngcf2_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    elif model_type == 'ngcf3_session_hot_items':
        # use ngcf3_session_hot_items model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }, {
            'params': model.gconv3.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    else:
        # use ngcf2_session_last_item model parameters:
        optim_emb = optim.Adagrad([{
            'params': model.item_emb.parameters()
        }],
                                  lr=lr_emb,
                                  weight_decay=l2_emb)
        optim_gcn = optim.Adam([{
            'params': model.gconv1.parameters()
        }, {
            'params': model.gconv2.parameters()
        }],
                               lr=lr_gcn,
                               weight_decay=l2_gcn)

    # figure recall mrr norm
    fig_recalls = []
    fig_mrrs = []
    fig_emb_norms = []
    fig_gcn_norms = []
    fig_epochs = []

    # train epochs
    for epoch in range(epochs):
        # model training
        start = time.time()

        # train evalution dict
        recall = {'5': [], '10': [], '15': [], '20': []}
        mrr = {'5': [], '10': [], '15': [], '20': []}

        # test evalution dict
        r = {'5': [], '10': [], '15': [], '20': []}
        m = {'5': [], '10': [], '15': [], '20': []}

        # loss list
        losses = []

        model.train()
        for i, data in enumerate(trainloader):
            # zero optim
            optim_emb.zero_grad()
            optim_gcn.zero_grad()

            # batch inputs
            batch_idxes, batch_labels = data[0].long().to(
                device), data[1].long().to(device)

            # predicting
            if model_type == 'ngcf1_session_hot_items':
                # use ngcf1_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            elif model_type == 'ngcf2_session_hot_items':
                # use ngcf2_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            elif model_type == 'ngcf3_session_hot_items':
                # use ngcf3_session_hot_items model to predict
                outs = model(batch_idxes, A, item_idxes, session_adj,
                             item_emb_idxes)
            else:
                # use ngcf2_session_last_item model to predict
                outs = model(batch_idxes, A, item_idxes, session_last_item,
                             item_emb_idxes)

            # loss
            loss = criterion(outs, batch_labels)

            # backward
            loss.backward()

            # optim step
            optim_emb.step()
            optim_gcn.step()

            # evalution, k=5, 10, 15, 20
            recall['5'].append(evalution5.evaluate(outs, batch_labels)[0])
            recall['10'].append(evalution10.evaluate(outs, batch_labels)[0])
            recall['15'].append(evalution15.evaluate(outs, batch_labels)[0])
            recall['20'].append(evalution20.evaluate(outs, batch_labels)[0])
            mrr['5'].append(evalution5.evaluate(outs, batch_labels)[1])
            mrr['10'].append(evalution10.evaluate(outs, batch_labels)[1])
            mrr['15'].append(evalution15.evaluate(outs, batch_labels)[1])
            mrr['20'].append(evalution20.evaluate(outs, batch_labels)[1])

            # losses
            losses.append(loss.item())

            # print loss, recall, mrr
            if i % 50 == 49:
                print('[{0: 2d}, {1:5d}]  loss:{2:.4f}'.format(
                    epoch + 1, i + 1, np.mean(losses)))
                print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                    np.mean(recall['5']), np.mean(mrr['5'])))
                print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                    np.mean(recall['10']), np.mean(mrr['10'])))
                print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                    np.mean(recall['15']), np.mean(mrr['15'])))
                print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                    np.mean(recall['20']), np.mean(mrr['20'])))

        # print gcn_norm, emb_norm
        emb_norm = get_norm(model, 'emb')
        gcn_norm = get_norm(model, 'gcn')
        fig_emb_norms.append(emb_norm)
        fig_gcn_norms.append(gcn_norm)
        print('[gcn_norm]:{0:.4f}  [emb_norm]:{1:.4f}'.format(
            gcn_norm, emb_norm))

        # epoch time
        print('[epoch time]:{0:.4f}'.format(time.time() - start))

        # save model
        if epoch % 10 == 9:
            torch.save(
                model.state_dict(),
                'params' + model_type + '-Alpha' + str(alpha) + '_' +
                '_lr_emb' + str(lr_emb) + '_l2_emb' + str(l2_emb) + '_lr_gcn' +
                str(lr_gcn) + '_l2_gcn' + str(l2_gcn) + '.pkl')

        # model eval
        model.eval()
        with torch.no_grad():
            for j, d in enumerate(testloader):
                # test batch inputs
                b_idxes, b_labels = d[0].long().to(device), d[1].long().to(
                    device)

                # predicting
                if model_type == 'ngcf1_session_hot_items':
                    # use ngcf1_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                elif model_type == 'ngcf2_session_hot_items':
                    # use ngcf2_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                elif model_type == 'ngcf3_session_hot_items':
                    # use ngcf3_session_hot_items model to predict
                    o = model(b_idxes, A, item_idxes, session_adj,
                              item_emb_idxes)
                else:
                    # use ngcf2_session_last_item model to predict
                    o = model(b_idxes, A, item_idxes, session_last_item,
                              item_emb_idxes)

                # evalution, k=5, 10, 15, 20
                r['5'].append(evalution5.evaluate(o, b_labels)[0])
                r['10'].append(evalution10.evaluate(o, b_labels)[0])
                r['15'].append(evalution15.evaluate(o, b_labels)[0])
                r['20'].append(evalution20.evaluate(o, b_labels)[0])
                m['5'].append(evalution5.evaluate(o, b_labels)[1])
                m['10'].append(evalution10.evaluate(o, b_labels)[1])
                m['15'].append(evalution15.evaluate(o, b_labels)[1])
                m['20'].append(evalution20.evaluate(o, b_labels)[1])

            # print test recall mrr
            print('[{0: 2d}]'.format(epoch + 1))
            print('[recall@5 ]:{0:.4f}  [mrr@5 ]:{1:.4f}'.format(
                np.mean(r['5']), np.mean(m['5'])))
            print('[recall@10]:{0:.4f}  [mrr@10]:{1:.4f}'.format(
                np.mean(r['10']), np.mean(m['10'])))
            print('[recall@15]:{0:.4f}  [mrr@15]:{1:.4f}'.format(
                np.mean(r['15']), np.mean(m['15'])))
            print('[recall@20]:{0:.4f}  [mrr@20]:{1:.4f}'.format(
                np.mean(r['20']), np.mean(m['20'])))

            # plt recall and mrr and norm
            fig_epochs.append(epoch)
            fig_recalls.append(np.mean(r['20']))
            fig_mrrs.append(np.mean(m['20']))
            plt_evalution(fig_epochs,
                          fig_recalls,
                          fig_mrrs,
                          k=20,
                          alpha=alpha,
                          lr_emb=lr_emb,
                          l2_emb=l2_emb,
                          lr_gcn=lr_gcn,
                          l2_gcn=l2_gcn,
                          model_type=model_type)
            plt_norm(fig_epochs,
                     fig_emb_norms,
                     fig_gcn_norms,
                     alpha=alpha,
                     lr_emb=lr_emb,
                     l2_emb=l2_emb,
                     lr_gcn=lr_gcn,
                     l2_gcn=l2_gcn,
                     model_type=model_type)
    def step(self, batch_idx, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            input_masks = self.train_input_mask
            qinfo = self.train_qinfo
        elif mode == "train_val":
            theano_fn = self.test_fn
            inputs = self.train_val_input
            qs = self.train_val_q
            answers = self.train_val_answer
            input_masks = self.test_input_mask
            qinfo = self.train_val_qinfo
        elif mode == 'test':
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            input_masks = self.test_input_mask
            qinfo = self.test_qinfo
        else:
            raise Exception("Invalid mode")

        num_ma_opts = answers.shape[1]

        p_q = np.zeros((len(batch_idx), 300),
                       dtype='float32')  # question input vector
        target = np.zeros((len(batch_idx)))  # answer (as a single number)
        p_inp = np.zeros(
            (len(batch_idx), self.max_sent_len, self.sent_vector_size),
            dtype='float32')  # story statements
        p_ans = np.zeros((len(batch_idx), num_ma_opts, 300),
                         dtype='float32')  # multiple choice answers
        #b_qinfo = []
        input_mask = input_masks
        for b, bi in enumerate(batch_idx):
            inp = inputs[qinfo[bi]['qid']]
            q = qs[bi]
            ans = answers[bi]
            target[b] = qinfo[bi]['correct_option']
            for i in range(len(inp)):
                p_inp[b][i] = inp[i]
            for j in range(len(ans)):
                p_ans[b][j] = self.pos_encodings(ans[j])
            p_q[b] = self.pos_encodings(q)
            #b_qinfo.append(qinfo[bi])

        ret = theano_fn(p_inp, p_q, p_ans, target)
        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array(ret[0]),
            "answers": np.array(target),
            "current_loss": ret[1],
            "skipped": 0,
            "log": "pn: %.3f" % param_norm,
            "inp": np.array([inp]),
            "q": np.array([q]),
            "probabilities": np.array([ret[0]]),
            "attentions": np.array([ret[2]]),
        }
Example #26
0
	def get_vel_lin(self):
		return utils.get_norm(self.vel_lin)
Example #27
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            ca = self.train_choices
            cb = self.train_choices
            #cc = self.train_choices
            #cd = self.train_choices
            input_masks = self.train_input_mask
        elif mode == "test":
            theano_fn = self.test_fn
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            ca = self.test_choices
            cb = self.test_choices
            #cc = self.test_choices
            #cd = self.test_choices
            input_masks = self.test_input_mask
        else:
            raise Exception("Invalid mode")

        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        ca = ca[batch_index][0]
        cb = cb[batch_index][1]
        #cc = cc[batch_index][2]
        #cd = cd[batch_index][3]
        input_mask = input_masks[batch_index]

        skipped = 0
        grad_norm = float('NaN')

        if mode == 'train':
            #gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb, cc, cd, input_mask)
            gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb,
                                                  input_mask)
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])

            if (np.isnan(grad_norm)):
                print "==> gradient is nan at index %d." % batch_index
                print "==> skipping"
                skipped = 1

        if skipped == 0:
            #ret = theano_fn(inp, q, ans, ca, cb, cc, cd, input_mask)
            ret = theano_fn(inp, q, ans, ca, cb, input_mask)
        else:
            ret = [float('NaN'), float('NaN')]

        param_norm = np.max(
            [utils.get_norm(x.get_value()) for x in self.params])

        return {
            "prediction": np.array([ret[0]]),
            "answers": np.array([ans]),
            "current_loss": ret[1],
            "skipped": skipped,
            "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
        }
Example #28
0
    def step(self, batch_index, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")
        
        if mode == "train":
            theano_fn = self.train_fn 
            inputs = self.train_input
            qs = self.train_q
            answers = self.train_answer
            choices = self.train_choices
            input_masks = self.train_input_mask
        elif mode == "test":    
            theano_fn = self.test_fn 
            inputs = self.test_input
            qs = self.test_q
            answers = self.test_answer
            choices = self.test_choices
            input_masks = self.test_input_mask
        elif mode == "dev":    
            theano_fn = self.test_fn 
            inputs = self.dev_input
            qs = self.dev_q
            answers = self.dev_answer
            choices = self.dev_choices
            input_masks = self.dev_input_mask
        else:
            raise Exception("Invalid mode")
            
        inp = inputs[batch_index]
        q = qs[batch_index]
        ans = answers[batch_index]
        ca = choices[batch_index][0]
        cb = choices[batch_index][1]
        cc = choices[batch_index][2]
        cd = choices[batch_index][3]
        input_mask = input_masks[batch_index]

        skipped = 0
        grad_norm = float('NaN')
        
        if mode == 'train':
            gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb, cc, cd, input_mask)
            grad_norm = np.max([utils.get_norm(x) for x in gradient_value])
            
            if (np.isnan(grad_norm)):
                print "==> gradient is nan at index %d." % batch_index
                print "==> skipping"
                skipped = 1
        
        if skipped == 0:
            ret = theano_fn(inp, q, ans, ca, cb, cc, cd, input_mask)
        else:
            ret = [float('NaN'), float('NaN')]
        
        param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params])
        
        return {"prediction": np.array([ret[0]]),
                "answers": np.array([ans]),
                "current_loss": ret[1],
                "skipped": skipped,
                "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm)
                }