def step(self, batch_index, mode): ''' This function is one step in an epoch and will run a training or testing step depending on the parameter. Args: batch_index (int): step number for the epoch mode (str): 'train' or 'test' based on the mode of Returns: Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters ''' if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn # Theano function set inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print("==> gradient is nan at index %d." % batch_index) print("==> skipping") skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask) # Run the theano function else: ret = [-1, -1] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }
def conjugate_gradient(params, func, init_values, stop_condition=1e-2): # * PRP values = Matrix(init_values) lam = Symbol('lam') beta = 0 previous_d = 0 previous_g = 0 step = 0 while True: g = get_grad(params, func) g = g.subs(dict(zip(params, list(values)))) if get_norm(g) <= stop_condition: return list(values), func.subs(dict(zip(params, list(values)))) if previous_g != 0: beta = (g.T * (g - previous_g)) / (get_norm(previous_g)**2) d = -g + beta[0] * previous_d else: d = -g lam_func = func.subs(dict(zip(params, list(values + lam * d)))) lam_value = get_stagnation(lam_func) values = values + lam_value * d previous_d = d previous_g = g f_value = func.subs(dict(zip(params, list(values)))) print('step: {} params: {} f: {}'.format(step, list(values), f_value)) step += 1
def step(self, batch_index, mode): ''' This function is one step in an epoch and will run a training or testing step depending on the parameter. Args: batch_index (int): step number for the epoch mode (str): 'train' or 'test' based on the mode of Returns: Dictionary of predictions, answers, loss, number skipped, and the normal and gradient parameters ''' if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn # Theano function set inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, input_mask) # Get and calculate the gradient function grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print "==> gradient is nan at index %d." % batch_index print "==> skipping" skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask) # Run the theano function else: ret = [-1, -1] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask sgates = self.train_gates elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask sgates = self.test_gates else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] sgate = sgates[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, input_mask, sgate) grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): #print "==> gradient is nan at index %d." % batch_index #print "==> skipping" skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, input_mask, sgate) else: ret = [-1, -1, -1, -1, -1] param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "log": "pn: %.3f" % param_norm, "skipped": skipped }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer fact_counts = self.train_fact_count input_masks = self.train_input_mask if mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer fact_counts = self.test_fact_count input_masks = self.test_input_mask start_index = batch_index * self.batch_size inp = inputs[start_index:start_index+self.batch_size] q = qs[start_index:start_index+self.batch_size] ans = answers[start_index:start_index+self.batch_size] fact_count = fact_counts[start_index:start_index+self.batch_size] input_mask = input_masks[start_index:start_index+self.batch_size] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, fact_count, input_mask) grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print "==> gradient is nan at index %d." % batch_index print "==> skipping" skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, fact_count, input_mask) else: ret = [float('NaN'), float('NaN')] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": ret[0], "answers": ans, "current_loss": ret[1], "skipped": skipped, "grad_norm": grad_norm, "param_norm": param_norm, "log": "", }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask sgates = self.train_gates elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask sgates = self.test_gates else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] sgate = sgates[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, input_mask,sgate) grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): #print "==> gradient is nan at index %d." % batch_index #print "==> skipping" skipped = 1 if skipped==0: ret = theano_fn(inp, q, ans, input_mask, sgate) else: ret=[-1,-1,-1,-1,-1] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "log": "pn: %.3f" % param_norm, "skipped": skipped }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] ret = theano_fn(inp, q, ans, input_mask) param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn if mode == "test": theano_fn = self.test_fn inp, q, ans, ans_inp, ans_mask = self._process_batch_sind( batch_index, mode) ret = theano_fn(inp, q, ans, ans_mask, ans_inp) #theano_fn.profile.print_summary() #sys.exit() param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": ret[0], "answers": ans, "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer fact_counts = self.train_fact_count input_masks = self.train_input_mask if mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer fact_counts = self.test_fact_count input_masks = self.test_input_mask start_index = batch_index * self.batch_size inp = inputs[start_index:start_index+self.batch_size] q = qs[start_index:start_index+self.batch_size] ans = answers[start_index:start_index+self.batch_size] fact_count = fact_counts[start_index:start_index+self.batch_size] input_mask = input_masks[start_index:start_index+self.batch_size] ret = theano_fn(inp, q, ans, fact_count, input_mask) param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": ret[0], "answers": ans, "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] input_mask = input_masks[batch_index] ret = theano_fn(inp, q, ans, input_mask) param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def quasi_newton(params, func, init_values, stop_condition=1e-5): # * BFGS values = Matrix(init_values) lam = Symbol('lam') next_g = 0 next_values = 0 h = eye(len(params)) step = 0 while True: g = get_grad(params, func) g = g.subs(dict(zip(params, list(values)))) d = -h**(-1) * g lam_func = func.subs(dict(zip(params, list(values + lam * d)))) lam_value = get_stagnation(lam_func) next_values = values + lam_value * d if get_norm(g) <= stop_condition: return list(values), func.subs(dict(zip(params, list(next_values)))) else: next_g = get_grad(params, func) next_g = next_g.subs(dict(zip(params, list(next_values)))) s = next_values - values y = next_g - g h = (eye(len(params)) - (s * y.T) / (s.T * y)[0]) * h * (eye(len(params)) - (s * y.T) / (s.T * y)[0]).T + (s * s.T) / (s.T * y)[0] values = next_values f_value = func.subs(dict(zip(params, list(values)))) print('step: {} params: {} f: {}'.format(step, list(values), f_value)) step += 1
def measure_velocity_sensor(self, poiList, rover): min_dist = self.min_sensor_dist_sqr max_dist = self.max_sensor_dist sum = np.zeros(4) for poi in poiList: # get quadrant of POI vect = utils.vect_sub(poi.pos, rover.pos) dist = utils.get_norm(vect) angle = utils.get_angle(vect) % (2 * math.pi) # Between 0 to 2pi relative_angle = (angle - rover.heading + math.pi / 2) % (2 * math.pi) q = utils.get_quadrant(relative_angle) - 1 # get relative velocity of POI to agent. rel_vel_vect = poi.vel_lin rel_pos_vect = utils.vect_sub(rover.pos, poi.pos) rel_pos_norm = utils.get_norm(rel_pos_vect) rel_pos_unit = [rel_pos_vect[0]/rel_pos_norm, rel_pos_vect[1]/rel_pos_norm] dot = np.dot(rel_pos_unit, rel_vel_vect) normalized_dot = poi.value * dot / rel_pos_norm**2 sum[q] += normalized_dot return list(sum)
def newton(params, func, init_values, stop_condition=1e-2): values = Matrix(init_values) step = 0 while True: g = get_grad(params, func) g = g.subs(dict(zip(params, list(values)))) if get_norm(g) <= stop_condition: return list(values), func.subs(dict(zip(params, list(values)))) h = get_hessian(params, func) h = h.subs(dict(zip(params, list(values)))) values = values - h**(-1) * g f_value = func.subs(dict(zip(params, list(values)))) print('step: {} params: {} f: {}'.format(step, list(values), f_value)) step += 1
def return_sensor_rover(self, roverList, quadrant, max_dist=500): min_dist = 10 sum = 0 for rover in roverList: vect = utils.vect_sub(rover.pos, self.pos) dist = utils.get_norm(vect) angle = utils.get_angle(vect) % (2 * math.pi) # Between 0 to 2pi relative_angle = (angle - self.heading + math.pi / 2) % (2 * math.pi) # print 'Vect: ', vect # print 'Angle: ', angle*360/2/math.pi, relative_angle*360/2/math.pi if dist < max_dist and utils.check_quadrant( relative_angle, quadrant): # print 'I SEE YOU', quadrant sum += 1 / max(dist**2, min_dist**2) return sum
def steepest_descent(params, func, init_values, stop_condition=1e-10): values = Matrix(init_values) lam = Symbol('lam') step = 0 while True: g = get_grad(params, func) g = g.subs(dict(zip(params, list(values)))) if get_norm(g) <= stop_condition: return list(values), func.subs(dict(zip(params, list(values)))) lam_func = func.subs(dict(zip(params, list(values - lam * g)))) lam_value = get_stagnation(lam_func) values = values - lam_value * g f_value = func.subs(dict(zip(params, list(values)))) print('step: {} params: {} f: {}'.format(step, list(values), f_value)) step += 1
def learn_lr_classifier(training_corpus): D = get_vocabulary_size() labels = get_labels() w = [0] * (D + 1) norm = 1.0 num_iters = 0 while norm > convergence_threshold: num_iters += 1 if num_iters > max_iters: break old_w = list(w) shuffled = list(training_corpus) shuffle(shuffled) for vector in shuffled: label = 1.0 if float(vector[0]) == labels[0] else 0.0 prediction = get_prediction(vector[1:], w) delta = label - prediction update_weights(vector[1:], w, delta) norm = get_norm(w,old_w) return w
def main(): """Fuction to compute 1-dimensional correlation matrix using kernel density estimation method """ data = np.load(INPUT_PATH) data = get_norm(data) num_atoms = data.shape[1] corr_matrix = np.zeros((num_atoms, num_atoms)) for row in range(num_atoms): # Compute only inferior diagonal matrix for col in range(row): corr_matrix[row, col] = mi_kde(data, row, col) print(row, col, corr_matrix[row, col]) corr_matrix = gen_corr_coef(corr_matrix, dim=1) np.save(file=OUTPUT_PATH, arr=corr_matrix)
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn if mode == "test" or mode == 'val': theano_fn = self.test_fn q, ans, ans_inp, ans_mask, img_ids = self._process_batch_sind( batch_index, mode) ret = theano_fn(q, ans, ans_mask, ans_inp) param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": ret[0], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer fact_counts = self.train_fact_count input_masks = self.train_input_mask if mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer fact_counts = self.test_fact_count input_masks = self.test_input_mask start_index = batch_index * self.batch_size inp = inputs[start_index:start_index + self.batch_size] q = qs[start_index:start_index + self.batch_size] ans = answers[start_index:start_index + self.batch_size] fact_count = fact_counts[start_index:start_index + self.batch_size] input_mask = input_masks[start_index:start_index + self.batch_size] inp, q, ans, fact_count, input_mask = self._process_batch( inp, q, ans, fact_count, input_mask) ret = theano_fn(inp, q, ans, fact_count, input_mask) param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": ret[0], "answers": ans, "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, }
def step(self,batch_index,mode): if mode== "train" and self.mode== "test": raise Exception("Cannot train during test mode") start_index=batch_index * self.batch_size inputs, qs, answers, fact_counts, input_masks,img_feats =self.process_vqa_data(mode,start_index,start_index+self.batch_size) if mode=="train": theano_fn=self.train_fn # inputs = self.process_vqa_data(self.h5file['cap_train'][start_index:start_index+self.batch_size]) # qs=self.process_vqa_data(self.h5file['ques_train' ][ start_index:start_index+self.batch_size] ) # answers=self.process_vqa_data(self.h5file['answers'][start_index:start_index+self.batch_size] ) # fact_counts=np.zeros(self.batch_size,dtype="int") # fact_counts.fill(20) # input_masks= process_masks( inputs ) # figure it out if mode=="test": theano_fn=self.test_fn # inputs=self.process_vqa_data( self.h5file['cap_test'][start_index:start_index+self.batch_size ] ) # qs=self.process_vqa_data( self.h5file['ques_test'][start_index:start_index+self.batch_size ] ) # answers=self.process_vqa_data( self.h5file['ans_test'][start_index:start_index+self.batch_size ] ) # fact_counts=np.zeros(self.batch_size,dtype="int") # fact_counts.fill(20) # input_masks= process_masks( inputs ) # figure it out inp,q,ans,fact_count,input_mask,img_feat=self._process_batch(inputs,qs,answers,fact_counts,input_masks,img_feats ) img_feat=img_feat.reshape((self.batch_size*self.img_seq_len,self.img_vector_size)) ret = theano_fn( inp,q,ans,fact_count,input_mask,img_feat,self.lr) param_norm=np.max( [ utils.get_norm( x.get_value()) for x in self.params]) return { "prediction":ret[0], "answers":ans, "current_loss":ret[1], "skipped":0, "log":"pn: %.3f" % param_norm }
'W_mem_upd_in', 'W_mem_upd_hid', 'b_mem_upd', 'W_mem_hid_in', 'W_mem_hid_hid', 'b_mem_hid', 'W_b', 'W_1', 'W_2', 'b_1', 'b_2', 'W_a' ] fig, ax = plt.subplots(figsize=(9, 4)) with open(file_name, 'r') as load_file: dict = pickle.load(load_file) loaded_params = dict['params'] if flag: for i in xrange(len(params)): if params[i] in to_write: out_obj[params[i]] = loaded_params[i] with open(sys.argv[2], 'w') as save_file: pickle.dump(obj=out_obj, file=save_file, protocol=-1) print "finish dumping file to " + sys.argv[2] for (x, y) in zip(params, loaded_params): n = y.shape if len(n) == 1: n = n[0] else: n = n[0] * n[1] norm = utils.get_norm(y) / n**0.5 print x, ' shape: ', y.shape, ', norm: ', norm, ', max: ', np.max( np.abs(y)) if len(y.shape) > 1: ax.imshow(y, cmap='Blues', interpolation='none') plt.title('Train. ' + x + ', norm ' + str(norm)) fig.show() input_str = raw_input("Press ENTER to continue.")
def train(dataset, alpha, A_type, normalize_type, model_pretrained_params, model_type, batch_size, test_batch_size, negative_nums, item_emb_dim, hid_dim1, hid_dim2, hid_dim3, lr_emb, l2_emb, lr_gcn, l2_gcn, lr_cnn, l2_cnn, epochs, params_file_name): # init if dataset == 'LastFM': # use LastFM dataset data_obj = LastfmData() elif dataset == 'Diginetica': # use Diginetica dataset data_obj = DigineticaData() else: # use yoochoose1_64 dataset data_obj = YoochooseData(dataset=dataset) # gpu device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # init A # A: type=scipy.sparse A = data_obj.get_decay_adj( data_obj.d, tail=None, alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d) # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric' if normalize_type == 'random_walk': print('----------------------------------') print('Normalize_type is random_walk:') A = spmx_1_normalize(A) print('----------------------------------') else: print('----------------------------------') print('Normalize_type is symmetric:') A = spmx_sym_normalize(A) print('----------------------------------') # transform the adj to a sparse cpu tensor A = spmx2torch_sparse_tensor(A) # get cpu tensor: labels labels = data_obj.get_labels(data_obj.d) # get cpu sparse tensor: session adj SI = data_obj.get_session_adj(data_obj.d, alpha=alpha) # load model pretrained params if model_pretrained_params == 'True': print('----------------------------------') if dataset == 'LastFM': # use LastFM params print('Use LastFM model pretraned params: ' + params_file_name + '.pkl') pretrained_state_dict = torch.load('./lastfm_pretrained_params/' + params_file_name + '.pkl') elif dataset == 'Diginetica': # use Diginetica params print('Use Diginetica model pretraned params: ' + params_file_name + '.pkl') pretrained_state_dict = torch.load('./dig_pretrained_params/' + params_file_name + '.pkl') else: # use yoochoose1_64 params print('Use yoochoose1_64 model pretraned params: ' + params_file_name + '.pkl') pretrained_state_dict = torch.load('./yoo1_64_pretrained_params/' + params_file_name + '.pkl') print('----------------------------------') else: pretrained_state_dict = None # transform all tensor to cuda A = A.to(device) labels = labels.to(device) SI = SI.to(device) # define the evalution object evalution5 = Evaluation(k=5) evalution10 = Evaluation(k=10) evalution15 = Evaluation(k=15) evalution20 = Evaluation(k=20) # define yoochoose data object trainloader = SessionDataloader(train_size=data_obj.train_size, test_size=data_obj.test_size, item_size=data_obj.item_size, labels=labels, batch_size=batch_size, train=True, negative_nums=negative_nums, shuffle=True) testloader = SessionDataloader(train_size=data_obj.train_size, test_size=data_obj.test_size, item_size=data_obj.item_size, labels=labels, batch_size=test_batch_size * data_obj.item_size, train=False, negative_nums=negative_nums, shuffle=False) # define model, then transform to cuda if model_type == 'sgncf1_cnn': # use sgncf1_cnn model: model = sgncf1_cnn(dataset_nums=data_obj.train_size + data_obj.test_size, item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1) else: # use sgncf2_cnn model: model = sgncf2_cnn(dataset_nums=data_obj.train_size + data_obj.test_size, item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1, hid_dim2=hid_dim2) model.to(device) # update model_state_dict if pretrained_state_dict is not None: model_state_dict = model.state_dict() pretrained_state_dict = { k: v for k, v in pretrained_state_dict.items() if k in model_state_dict } model_state_dict.update(pretrained_state_dict) model.load_state_dict(model_state_dict) # define loss and optim criterion = nn.BCEWithLogitsLoss() if model_type == 'sgncf1_cnn': # use sgncf1 model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) optim_cnn = optim.Adam([{ 'params': model.cnn_1d.parameters() }, { 'params': model.fc.parameters() }], lr=lr_cnn, weight_decay=l2_cnn) else: # use sgncf2 model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }, { 'params': model.gconv2.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) optim_cnn = optim.Adam([{ 'params': model.cnn_1d.parameters() }, { 'params': model.fc.parameters() }], lr=lr_cnn, weight_decay=l2_cnn) # figure recall mrr norm fig_recalls = [] fig_mrrs = [] fig_emb_norms = [] fig_gcn_norms = [] fig_cnn_norms = [] fig_epochs = [] # train epochs for epoch in range(epochs): # model training start = time.time() # test evalution dict r = {'5': [], '10': [], '15': [], '20': []} m = {'5': [], '10': [], '15': [], '20': []} # loss list losses = [] model.train() for i, data in enumerate(trainloader): # zero optim optim_emb.zero_grad() optim_gcn.zero_grad() optim_cnn.zero_grad() # batch inputs batch_sidxes, batch_iidxes, batch_labels = data[:, 0].long().to( device), data[:, 1].long().to(device), data[:, 2].float().to(device) # predicting outs = model(batch_sidxes, batch_iidxes, A, SI) # loss loss = criterion(outs, batch_labels) # backward loss.backward() # optim step optim_emb.step() optim_gcn.step() optim_cnn.step() # losses losses.append(loss.item()) # print loss, recall, mrr if i % 20 == 19: print('[{0: 2d}, {1:5d}, {2: 7d}], loss:{3:.4f}'.format( epoch + 1, int(i * (batch_size / (negative_nums + 1))), data_obj.train_size, np.mean(losses))) # print gcn_norm, emb_norm emb_norm = get_norm(model, 'emb') gcn_norm = get_norm(model, 'gcn') cnn_norm = get_norm(model, 'cnn') fig_emb_norms.append(emb_norm) fig_gcn_norms.append(gcn_norm) fig_cnn_norms.append(gcn_norm) print('[gcn_norm]:{0:.4f} [emb_norm]:{1:.4f} [cnn_norm]:{2:.4f}'. format(gcn_norm, emb_norm, cnn_norm)) # epoch time print('[epoch time]:{0:.4f}'.format(time.time() - start)) # model eval model.eval() with torch.no_grad(): for j, d in enumerate(testloader): # test batch inputs b_sidxes, b_iidxes, b_labels = d[0][:, 0].long().to( device), d[0][:, 1].long().to(device), d[1].to(device) # predicting o = model(b_sidxes, b_iidxes, A, SI) o = o.view(-1, data_obj.item_size) # evalution, k=5, 10, 15, 20 r['5'].append(evalution5.evaluate(o, b_labels)[0]) r['10'].append(evalution10.evaluate(o, b_labels)[0]) r['15'].append(evalution15.evaluate(o, b_labels)[0]) r['20'].append(evalution20.evaluate(o, b_labels)[0]) m['5'].append(evalution5.evaluate(o, b_labels)[1]) m['10'].append(evalution10.evaluate(o, b_labels)[1]) m['15'].append(evalution15.evaluate(o, b_labels)[1]) m['20'].append(evalution20.evaluate(o, b_labels)[1]) # print test inf # print('[{0: 2d}, {1: 5d}, {2: 7d}]'.format(epoch+1, # j * test_batch_size, # data_obj.test_size)) # print test recall mrr print('[{0: 2d}]'.format(epoch + 1)) print('[recall@5 ]:{0:.4f} [mrr@5 ]:{1:.4f}'.format( np.sum(r['5']) / data_obj.test_size, np.sum(m['5']) / data_obj.test_size)) print('[recall@10]:{0:.4f} [mrr@10]:{1:.4f}'.format( np.sum(r['10']) / data_obj.test_size, np.sum(m['10']) / data_obj.test_size)) print('[recall@15]:{0:.4f} [mrr@15]:{1:.4f}'.format( np.sum(r['15']) / data_obj.test_size, np.sum(m['15']) / data_obj.test_size)) print('[recall@20]:{0:.4f} [mrr@20]:{1:.4f}'.format( np.sum(r['20']) / data_obj.test_size, np.sum(m['20']) / data_obj.test_size)) # plt recall and mrr and norm fig_epochs.append(epoch) fig_recalls.append(np.sum(r['20']) / data_obj.test_size) fig_mrrs.append(np.sum(m['20']) / data_obj.test_size) plt_evalution(fig_epochs, fig_recalls, fig_mrrs, k=20, alpha=alpha, lr_emb=lr_emb, l2_emb=l2_emb, lr_gcn=lr_gcn, l2_gcn=l2_gcn, model_type=model_type, lr_cnn=lr_cnn, l2_cnn=l2_cnn) plt_norm(fig_epochs, fig_emb_norms, fig_gcn_norms, fig_cnn_norms, alpha=alpha, lr_emb=lr_emb, l2_emb=l2_emb, lr_gcn=lr_gcn, l2_gcn=l2_gcn, model_type=model_type, lr_cnn=lr_cnn, l2_cnn=l2_cnn)
'W_mem_res_in', 'W_mem_res_hid', 'b_mem_res', 'W_mem_upd_in', 'W_mem_upd_hid', 'b_mem_upd', 'W_mem_hid_in', 'W_mem_hid_hid', 'b_mem_hid', 'W_b', 'W_1', 'W_2', 'b_1', 'b_2', 'W_a'] fig, ax = plt.subplots(figsize=(9,4)) with open(file_name, 'r') as load_file: dict = pickle.load(load_file) loaded_params = dict['params'] if flag: for i in xrange(len(params)): if params[i] in to_write: out_obj[params[i]] = loaded_params[i] with open(sys.argv[2], 'w') as save_file: pickle.dump(obj = out_obj,file = save_file,protocol = -1) print "finish dumping file to "+sys.argv[2] for (x, y) in zip(params, loaded_params): n = y.shape if len(n)==1: n=n[0] else: n=n[0]*n[1] norm = utils.get_norm(y)/n**0.5 print x,' shape: ',y.shape,', norm: ',norm,', max: ',np.max(np.abs(y)) if len(y.shape)>1: ax.imshow(y,cmap = 'Blues',interpolation='none') plt.title('Train. '+x+', norm '+str(norm)) fig.show() input_str = raw_input("Press ENTER to continue.")
def train(dataset, alpha, A_type, normalize_type, session_type, pretrained_item_emb, model_type, batch_size, shuffle, item_emb_dim, hid_dim1, hid_dim2, hid_dim3, lr_emb, lr_gcn, l2_emb, l2_gcn, epochs): # init if dataset == 'LastFM': # use LastFM dataset data_obj = LastfmData() elif dataset == 'Diginetica': # use Diginetica dataset data_obj = DigineticaData() else: # use yoochoose1_64 dataset data_obj = YoochooseData(dataset=dataset) # gpu device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # init A # A: type=scipy.sparse A = data_obj.get_decay_adj( data_obj.d, tail=None, alpha=alpha) if A_type == 'decay' else data_obj.get_gcn_adj(data_obj.d) # normalize the adj, type = 'ramdom_walk'(row 1) or type = 'symmetric' if normalize_type == 'random_walk': print('----------------------------------') print('Normalize_type is random_walk:') A = spmx_1_normalize(A) print('----------------------------------') else: print('----------------------------------') print('Normalize_type is symmetric:') A = spmx_sym_normalize(A) print('----------------------------------') # transform the adj to a sparse cpu tensor A = spmx2torch_sparse_tensor(A) # get cpu tensor: labels labels = data_obj.get_labels(data_obj.d) # get cpu tensor: item_idxes _, _, item_idxes = data_obj.get_indexes() if session_type == 'session_hot_items': # get cpu sparse tensor: session adj session_adj = data_obj.get_session_adj(data_obj.d, alpha=alpha) else: # if not use session adj, then session_adj = None session_adj = None if session_type == 'session_last_item': # get cpu LongTensor: session_last_item session_last_item = data_obj.get_session_last_item(data_obj.d).long() else: # if not use session_last_item, then session_last_item = None session_last_item = None # get pretrained_item_emb if pretrained_item_emb == 'True' and alpha != 0.0: print('----------------------------------') if dataset == 'yoochoose1_64': print('Use yoochoose1_64 pretrained item embedding: ' + 'pretrained_emb' + str(alpha) + '.pkl') pretrained_item_emb = torch.load( './yoo1_64_pretrained_item_emb/pretrained_emb' + str(alpha) + '.pkl')['item_emb.weight'] elif dataset == 'yoochoose1_8': print('Use yoochoose1_8 pretrained item embedding: ' + 'pretrained_emb' + str(alpha) + '.pkl') pretrained_item_emb = torch.load( './yoo1_8_pretrained_item_emb/pretrained_emb' + str(alpha) + '.pkl')['item_emb.weight'] elif dataset == 'LastFM': print('Use LastFM pretrained item embedding: ' + 'pretrained_emb' + str(alpha) + '.pkl') pretrained_item_emb = torch.load( './lastfm_pretrained_item_emb/pretrained_emb' + str(alpha) + '.pkl')['item_emb.weight'] else: print('Use Diginetica pretrained item embedding: ' + 'pretrained_emb' + str(alpha) + '.pkl') pretrained_item_emb = torch.load( './dig_pretrained_item_emb/pretrained_emb' + str(alpha) + '.pkl')['item_emb.weight'] print('----------------------------------') else: print('----------------------------------') print('Not use pretrained item embedding:') pretrained_item_emb = None print('----------------------------------') # get cpu LongTensor: item_emb_idxes item_emb_idxes = torch.arange(data_obj.item_size).long() # transform all tensor to cuda A = A.to(device) labels = labels.to(device) item_idxes = item_idxes.to(device) item_emb_idxes = item_emb_idxes.to(device) if session_last_item is not None: session_last_item = session_last_item.to(device) if session_adj is not None: session_adj = session_adj.to(device) # define the evalution object evalution5 = Evaluation(k=5) evalution10 = Evaluation(k=10) evalution15 = Evaluation(k=15) evalution20 = Evaluation(k=20) # define yoochoose data object trainset = SessionDataset(train_size=data_obj.train_size, test_size=data_obj.test_size, train=True, labels=labels) trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=shuffle) testset = SessionDataset(train_size=data_obj.train_size, test_size=data_obj.test_size, train=False, labels=labels) testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) # define model, then transform to cuda if model_type == 'ngcf1_session_hot_items': # use ngcf1_session_hot_items model: model = ngcf1_session_hot_items( item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1, pretrained_item_emb=pretrained_item_emb) elif model_type == 'ngcf2_session_hot_items': # use ngcf2_session_hot_items model: model = ngcf2_session_hot_items( item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1, hid_dim2=hid_dim2, pretrained_item_emb=pretrained_item_emb) elif model_type == 'ngcf3_session_hot_items': # use ngcf3_session_hot_items model: model = ngcf3_session_hot_items( item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1, hid_dim2=hid_dim2, hid_dim3=hid_dim3, pretrained_item_emb=pretrained_item_emb) else: # use ngcf2_session_last_item model: model = ngcf2_session_last_item( item_nums=data_obj.item_size, item_emb_dim=item_emb_dim, hid_dim1=hid_dim1, hid_dim2=hid_dim2, pretrained_item_emb=pretrained_item_emb) model.to(device) # define loss and optim criterion = nn.CrossEntropyLoss() if model_type == 'ngcf1_session_hot_items': # use ngcf1_session_hot_items model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) elif model_type == 'ngcf2_session_hot_items': # use ngcf2_session_hot_items model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }, { 'params': model.gconv2.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) elif model_type == 'ngcf3_session_hot_items': # use ngcf3_session_hot_items model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }, { 'params': model.gconv2.parameters() }, { 'params': model.gconv3.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) else: # use ngcf2_session_last_item model parameters: optim_emb = optim.Adagrad([{ 'params': model.item_emb.parameters() }], lr=lr_emb, weight_decay=l2_emb) optim_gcn = optim.Adam([{ 'params': model.gconv1.parameters() }, { 'params': model.gconv2.parameters() }], lr=lr_gcn, weight_decay=l2_gcn) # figure recall mrr norm fig_recalls = [] fig_mrrs = [] fig_emb_norms = [] fig_gcn_norms = [] fig_epochs = [] # train epochs for epoch in range(epochs): # model training start = time.time() # train evalution dict recall = {'5': [], '10': [], '15': [], '20': []} mrr = {'5': [], '10': [], '15': [], '20': []} # test evalution dict r = {'5': [], '10': [], '15': [], '20': []} m = {'5': [], '10': [], '15': [], '20': []} # loss list losses = [] model.train() for i, data in enumerate(trainloader): # zero optim optim_emb.zero_grad() optim_gcn.zero_grad() # batch inputs batch_idxes, batch_labels = data[0].long().to( device), data[1].long().to(device) # predicting if model_type == 'ngcf1_session_hot_items': # use ngcf1_session_hot_items model to predict outs = model(batch_idxes, A, item_idxes, session_adj, item_emb_idxes) elif model_type == 'ngcf2_session_hot_items': # use ngcf2_session_hot_items model to predict outs = model(batch_idxes, A, item_idxes, session_adj, item_emb_idxes) elif model_type == 'ngcf3_session_hot_items': # use ngcf3_session_hot_items model to predict outs = model(batch_idxes, A, item_idxes, session_adj, item_emb_idxes) else: # use ngcf2_session_last_item model to predict outs = model(batch_idxes, A, item_idxes, session_last_item, item_emb_idxes) # loss loss = criterion(outs, batch_labels) # backward loss.backward() # optim step optim_emb.step() optim_gcn.step() # evalution, k=5, 10, 15, 20 recall['5'].append(evalution5.evaluate(outs, batch_labels)[0]) recall['10'].append(evalution10.evaluate(outs, batch_labels)[0]) recall['15'].append(evalution15.evaluate(outs, batch_labels)[0]) recall['20'].append(evalution20.evaluate(outs, batch_labels)[0]) mrr['5'].append(evalution5.evaluate(outs, batch_labels)[1]) mrr['10'].append(evalution10.evaluate(outs, batch_labels)[1]) mrr['15'].append(evalution15.evaluate(outs, batch_labels)[1]) mrr['20'].append(evalution20.evaluate(outs, batch_labels)[1]) # losses losses.append(loss.item()) # print loss, recall, mrr if i % 50 == 49: print('[{0: 2d}, {1:5d}] loss:{2:.4f}'.format( epoch + 1, i + 1, np.mean(losses))) print('[recall@5 ]:{0:.4f} [mrr@5 ]:{1:.4f}'.format( np.mean(recall['5']), np.mean(mrr['5']))) print('[recall@10]:{0:.4f} [mrr@10]:{1:.4f}'.format( np.mean(recall['10']), np.mean(mrr['10']))) print('[recall@15]:{0:.4f} [mrr@15]:{1:.4f}'.format( np.mean(recall['15']), np.mean(mrr['15']))) print('[recall@20]:{0:.4f} [mrr@20]:{1:.4f}'.format( np.mean(recall['20']), np.mean(mrr['20']))) # print gcn_norm, emb_norm emb_norm = get_norm(model, 'emb') gcn_norm = get_norm(model, 'gcn') fig_emb_norms.append(emb_norm) fig_gcn_norms.append(gcn_norm) print('[gcn_norm]:{0:.4f} [emb_norm]:{1:.4f}'.format( gcn_norm, emb_norm)) # epoch time print('[epoch time]:{0:.4f}'.format(time.time() - start)) # save model if epoch % 10 == 9: torch.save( model.state_dict(), 'params' + model_type + '-Alpha' + str(alpha) + '_' + '_lr_emb' + str(lr_emb) + '_l2_emb' + str(l2_emb) + '_lr_gcn' + str(lr_gcn) + '_l2_gcn' + str(l2_gcn) + '.pkl') # model eval model.eval() with torch.no_grad(): for j, d in enumerate(testloader): # test batch inputs b_idxes, b_labels = d[0].long().to(device), d[1].long().to( device) # predicting if model_type == 'ngcf1_session_hot_items': # use ngcf1_session_hot_items model to predict o = model(b_idxes, A, item_idxes, session_adj, item_emb_idxes) elif model_type == 'ngcf2_session_hot_items': # use ngcf2_session_hot_items model to predict o = model(b_idxes, A, item_idxes, session_adj, item_emb_idxes) elif model_type == 'ngcf3_session_hot_items': # use ngcf3_session_hot_items model to predict o = model(b_idxes, A, item_idxes, session_adj, item_emb_idxes) else: # use ngcf2_session_last_item model to predict o = model(b_idxes, A, item_idxes, session_last_item, item_emb_idxes) # evalution, k=5, 10, 15, 20 r['5'].append(evalution5.evaluate(o, b_labels)[0]) r['10'].append(evalution10.evaluate(o, b_labels)[0]) r['15'].append(evalution15.evaluate(o, b_labels)[0]) r['20'].append(evalution20.evaluate(o, b_labels)[0]) m['5'].append(evalution5.evaluate(o, b_labels)[1]) m['10'].append(evalution10.evaluate(o, b_labels)[1]) m['15'].append(evalution15.evaluate(o, b_labels)[1]) m['20'].append(evalution20.evaluate(o, b_labels)[1]) # print test recall mrr print('[{0: 2d}]'.format(epoch + 1)) print('[recall@5 ]:{0:.4f} [mrr@5 ]:{1:.4f}'.format( np.mean(r['5']), np.mean(m['5']))) print('[recall@10]:{0:.4f} [mrr@10]:{1:.4f}'.format( np.mean(r['10']), np.mean(m['10']))) print('[recall@15]:{0:.4f} [mrr@15]:{1:.4f}'.format( np.mean(r['15']), np.mean(m['15']))) print('[recall@20]:{0:.4f} [mrr@20]:{1:.4f}'.format( np.mean(r['20']), np.mean(m['20']))) # plt recall and mrr and norm fig_epochs.append(epoch) fig_recalls.append(np.mean(r['20'])) fig_mrrs.append(np.mean(m['20'])) plt_evalution(fig_epochs, fig_recalls, fig_mrrs, k=20, alpha=alpha, lr_emb=lr_emb, l2_emb=l2_emb, lr_gcn=lr_gcn, l2_gcn=l2_gcn, model_type=model_type) plt_norm(fig_epochs, fig_emb_norms, fig_gcn_norms, alpha=alpha, lr_emb=lr_emb, l2_emb=l2_emb, lr_gcn=lr_gcn, l2_gcn=l2_gcn, model_type=model_type)
def step(self, batch_idx, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer input_masks = self.train_input_mask qinfo = self.train_qinfo elif mode == "train_val": theano_fn = self.test_fn inputs = self.train_val_input qs = self.train_val_q answers = self.train_val_answer input_masks = self.test_input_mask qinfo = self.train_val_qinfo elif mode == 'test': theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer input_masks = self.test_input_mask qinfo = self.test_qinfo else: raise Exception("Invalid mode") num_ma_opts = answers.shape[1] p_q = np.zeros((len(batch_idx), 300), dtype='float32') # question input vector target = np.zeros((len(batch_idx))) # answer (as a single number) p_inp = np.zeros( (len(batch_idx), self.max_sent_len, self.sent_vector_size), dtype='float32') # story statements p_ans = np.zeros((len(batch_idx), num_ma_opts, 300), dtype='float32') # multiple choice answers #b_qinfo = [] input_mask = input_masks for b, bi in enumerate(batch_idx): inp = inputs[qinfo[bi]['qid']] q = qs[bi] ans = answers[bi] target[b] = qinfo[bi]['correct_option'] for i in range(len(inp)): p_inp[b][i] = inp[i] for j in range(len(ans)): p_ans[b][j] = self.pos_encodings(ans[j]) p_q[b] = self.pos_encodings(q) #b_qinfo.append(qinfo[bi]) ret = theano_fn(p_inp, p_q, p_ans, target) param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array(ret[0]), "answers": np.array(target), "current_loss": ret[1], "skipped": 0, "log": "pn: %.3f" % param_norm, "inp": np.array([inp]), "q": np.array([q]), "probabilities": np.array([ret[0]]), "attentions": np.array([ret[2]]), }
def get_vel_lin(self): return utils.get_norm(self.vel_lin)
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer ca = self.train_choices cb = self.train_choices #cc = self.train_choices #cd = self.train_choices input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer ca = self.test_choices cb = self.test_choices #cc = self.test_choices #cd = self.test_choices input_masks = self.test_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] ca = ca[batch_index][0] cb = cb[batch_index][1] #cc = cc[batch_index][2] #cd = cd[batch_index][3] input_mask = input_masks[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': #gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb, cc, cd, input_mask) gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb, input_mask) grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print "==> gradient is nan at index %d." % batch_index print "==> skipping" skipped = 1 if skipped == 0: #ret = theano_fn(inp, q, ans, ca, cb, cc, cd, input_mask) ret = theano_fn(inp, q, ans, ca, cb, input_mask) else: ret = [float('NaN'), float('NaN')] param_norm = np.max( [utils.get_norm(x.get_value()) for x in self.params]) return { "prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }
def step(self, batch_index, mode): if mode == "train" and self.mode == "test": raise Exception("Cannot train during test mode") if mode == "train": theano_fn = self.train_fn inputs = self.train_input qs = self.train_q answers = self.train_answer choices = self.train_choices input_masks = self.train_input_mask elif mode == "test": theano_fn = self.test_fn inputs = self.test_input qs = self.test_q answers = self.test_answer choices = self.test_choices input_masks = self.test_input_mask elif mode == "dev": theano_fn = self.test_fn inputs = self.dev_input qs = self.dev_q answers = self.dev_answer choices = self.dev_choices input_masks = self.dev_input_mask else: raise Exception("Invalid mode") inp = inputs[batch_index] q = qs[batch_index] ans = answers[batch_index] ca = choices[batch_index][0] cb = choices[batch_index][1] cc = choices[batch_index][2] cd = choices[batch_index][3] input_mask = input_masks[batch_index] skipped = 0 grad_norm = float('NaN') if mode == 'train': gradient_value = self.get_gradient_fn(inp, q, ans, ca, cb, cc, cd, input_mask) grad_norm = np.max([utils.get_norm(x) for x in gradient_value]) if (np.isnan(grad_norm)): print "==> gradient is nan at index %d." % batch_index print "==> skipping" skipped = 1 if skipped == 0: ret = theano_fn(inp, q, ans, ca, cb, cc, cd, input_mask) else: ret = [float('NaN'), float('NaN')] param_norm = np.max([utils.get_norm(x.get_value()) for x in self.params]) return {"prediction": np.array([ret[0]]), "answers": np.array([ans]), "current_loss": ret[1], "skipped": skipped, "log": "pn: %.3f \t gn: %.3f" % (param_norm, grad_norm) }