def optimize(self, dict): second_term = self.likelihood.second_term(dict['phi'], dict['theta'], dict['pi']) fourth_term = self.likelihood.E_q_q_g(dict['phi']) K = self.likelihood.integral_pre_computed(dict['bw']) args = { 'mu': dict['mu'], 'samples': dict['samples'], 'Aij': dict['Aij'], 'bw': dict['bw'], 'st': second_term, 'ft': fourth_term, 'K': K } start_time = datetime.now() gamma_mu = np.log(dict['mu']) print "gamma_mu : ", gamma_mu end_time = datetime.now() print('Duration log op : {}'.format(end_time - start_time)) start_time = datetime.now() estimated_mu = np.exp( minimize(self.func_mu, x0=gamma_mu, jac=self.grad_mu, args=(args), options={ 'disp': True, 'maxiter': 100 }).x) estimated_mu = np.reshape(estimated_mu, (1, self.U)) estimated_mu = utility().normalize_2d_array(estimated_mu) dict['mu'] = estimated_mu print estimated_mu end_time = datetime.now() print 'minimize operation duration : {} '.format(end_time - start_time)
def estimate_parameters(self, checkin_file, connections_file, num_community): # get data (self.events, self.checkins, self.T, self.X, self.Y) = synthetic_data_processing().get_checkins(checkin_file) [U_real, V_real] = synthetic_data_processing().get_params_from_file(checkin_file, connections_file) M_real = num_community num_iteration = 5 mu = np.random.uniform(0,1,(1, U_real)) sum_mu = np.sum(mu) mu = [i/sum_mu for i in mu] # Aij = np.random.uniform(0,1,(U_real, U_real)) Aij = np.ones((U_real, U_real)) # Set for only significant users Aij = self.fix_Aij(Aij) sum_Aij = np.sum(Aij, axis = 0) for k in range(Aij.shape[0]): if(sum_Aij[k]) != 0: Aij[:,k] = [i/sum_Aij[k] for i in Aij[:,k]] pi = np.random.uniform(0,1,(U_real, M_real)) sum_pi = np.sum(pi, axis = 1) for k in range(pi.shape[0]): pi[k, :] = [i/sum_pi[k] for i in pi[k, :]] theta = np.random.uniform(0,1,(M_real, V_real)) sum_theta = np.sum(theta, axis=1) for k in range(theta.shape[0]): theta[k, :] = [i/sum_theta[k] for i in theta[k, :]] phi = np.random.uniform(0, 1, (U_real, M_real)) phi = utility().normalize_2d_array(phi) mu = np.asarray(mu) Aij = np.asarray(Aij) pi = np.asarray(pi) theta = np.asarray(theta) N = len(self.events) print ("N : ", N) print("The shape is", mu.shape, Aij.shape, pi.shape, theta.shape) print ("theta : ", theta) print ("mu : ", mu) print ("Aij : ", Aij) print ("phi : ", phi) self.U = U_real self.V = V_real self.M = M_real graph = synthetic_data_processing().get_graph(checkin_file, connections_file, self.U) # Modifications end -------------------------------------------------------------------- for iteration in range(num_iteration): print("Iteration in consideration", iteration) # learn bandwidth of each user bw_start_time = time.time() bw = np.empty((self.U)) bw = [kernelParameters().kde(self.checkins[user][0], self.checkins[user][1]).bandwidth for user in range(0,self.U)] print "bandwidth learn time : ",(time.time() - bw_start_time) # pre computation step pre_comp_time = time.time() # Changes are made here---------------------------------------------------------------------------------------------------------- pickle.dump([self.U, bw, self.events, Aij], open("pre_compute.p", "wb")) (pre_compute_map, pre_compute_Aij) = pre_computation().pre_compute(self.U, bw, self.events, Aij) # Changes end here--------------------------------------------------------------------------------------------------------------- print "pre compute map : ", pre_compute_map print "pre compute Aij : ", pre_compute_Aij print "pre compute time : ", (time.time() - pre_comp_time) obj_cre_time = time.time() par_phi_obj = parameter_estimation_phi(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events, self.checkins, pre_compute_map, pre_compute_Aij) par_mu_obj = parameter_estimation_mu(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events, self.checkins, pre_compute_map, pre_compute_Aij) par_theta_obj = parameter_estimation_theta(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events, self.checkins, pre_compute_map, pre_compute_Aij) par_aij_obj = parameter_estimation_aij(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events, self.checkins, pre_compute_map, pre_compute_Aij) print "object creation time :", (time.time() - obj_cre_time) samples = par_phi_obj.getSamples(pi) main_dict = {'mu': mu[0, :], 'Aij': Aij, 'phi': phi, 'theta': theta, 'bw': bw, 'pi': pi, 'samples': samples} print("The original values are:----------------") # mu estimation start_time = time.time() print("Started the mu optimization----------------------------------->checkpoint 1") param_mu = par_mu_obj.optimize(main_dict) mu = main_dict['mu'] main_dict['mu'] = mu[0, :] print "MU time :", (time.time() - start_time) # theta estimation start_time = time.time() temp_theta = main_dict['theta'] theta = np.reshape(main_dict['theta'], (1, np.product(main_dict['theta'].shape))) main_dict['theta'] = theta print("Started the theta optimization----------------------------------->checkpoint 2") par_theta_obj.optimize(main_dict) theta = np.reshape(main_dict['theta'], (temp_theta.shape[0], temp_theta.shape[1])) main_dict['theta'] = theta print "THETA time :", (time.time() - start_time) # phi parameter estimation start_time = time.time() temp_phi = main_dict['phi'] phi = np.reshape(main_dict['phi'], (1, np.product(main_dict['phi'].shape))) main_dict['phi'] = phi print("Started the phi optimization----------------------------------->checkpoint 3") par_phi_obj.optimize(main_dict) phi = np.reshape(main_dict['phi'], (temp_phi.shape[0], temp_phi.shape[1])) main_dict['phi'] = phi print " PHI time :", (time.time() - start_time) # aij estimation pdb.set_trace() start_time = time.time() temp_Aij = main_dict['Aij'] Aij = np.reshape(main_dict['Aij'], (1, np.product(main_dict['Aij'].shape))) main_dict['Aij'] = Aij print("Started the Aij optimization----------------------------------->checkpoint 4") par_aij_obj.optimize(main_dict) Aij = np.reshape(main_dict['Aij'], (temp_Aij.shape[0], temp_Aij.shape[1])) main_dict['Aij'] = Aij print "AIJ time :", (time.time() - start_time) pickle.dump(main_dict, open("Iteration/Itr_"+str(iteration)+"_"+str(os.path.basename(checkin_file)[:2])+"_results.p", "wb")) pickle.dump(main_dict, open("Iteration/Final_"+str(os.path.basename(checkin_file)[:2])+"_results.p", "wb"))
def get_checkins(self, checkins_file): fin = io.open(checkins_file, "r", encoding="utf-8") # read header line line = fin.readline() # get min and max arguements t_min = np.infty t_max = 0.0 x_min = np.infty x_max = 0.0 y_min = np.infty y_max = 0.0 while line: line = line.rstrip() event = line.split(",") # user, time, lat, lon, cat, com tuple = (int(event[0]), float(event[1]), float(event[2]), float(event[3])) for i in range(4, len(event)): entry = int(event[i]) tuple = tuple + (entry, ) self.events.append(tuple) (t_min, t_max) = utility().get_check(t_min, t_max, float(event[1])) (x_min, x_max) = utility().get_check(x_min, x_max, float(event[2])) (y_min, y_max) = utility().get_check(y_min, y_max, float(event[3])) # if (self.checkins.has_key(int(event[0]))): # self.checkins[int(event[0])][0].append(float(event[2])) # self.checkins[int(event[0])][1].append(float(event[3])) # self.checkins[int(event[0])][2].append(float(event[1])) # else: # lat = [] # lon = [] # t = [] # self.checkins[int(event[0])] = (lat, lon, t) # self.checkins[int(event[0])][0].append(float(event[2])) # self.checkins[int(event[0])][1].append(float(event[3])) # self.checkins[int(event[0])][2].append(float(event[1])) line = fin.readline() fin.close() print(tuple) # self.T = t_max - t_min # self.X = x_max - x_min # self.Y = y_max - y_min # scaling of t,x,y for i in range(0, len(self.events)): event = list(self.events[i]) event[1] = (event[1] - t_min) / (t_max - t_min) event[2] = (event[2] - x_min) / (x_max - x_min) event[3] = (event[3] - y_min) / (y_max - y_min) if (self.checkins.has_key(event[0])): self.checkins[event[0]][0].append(event[2]) self.checkins[event[0]][1].append(event[3]) self.checkins[event[0]][2].append(event[1]) else: lat = [] lon = [] t = [] self.checkins[event[0]] = (lat, lon, t) self.checkins[event[0]][0].append(event[2]) self.checkins[event[0]][1].append(event[3]) self.checkins[event[0]][2].append(event[1]) self.events[i] = event self.T = 1.0 self.X = 1.0 self.Y = 1.0 # print "checkins : " # print self.checkins # # print "events : " # print self.events return (self.events, self.checkins, self.T, self.X, self.Y)
def custom_gradient_phi(self, *args): G = np.zeros(((self.U * self.M), (self.U * self.M))) eta = 1.0 phis = np.zeros((self.I, self.U * self.M)) delta_lambda = np.zeros(self.I) obj_func_values = [] phi = args[0]['phi'] for t in range(0, self.I): phis[t] = phi if t % 50 == 0: print "Iteration: ", t print "phi: ", phi # new samples phi = np.reshape(phi, (self.U, self.M)) phi = utility().normalize_2d_array(phi) phi = np.reshape(phi, (1, self.U * self.M)) samples = self.getSamples(phi) # updated arguments args = list(args) args[0]['samples'] = samples args = tuple(args) # terminating condition obj_fun = self.likelihood_phi(args) # print "obj fun : " , obj_fun obj_func_values.append(obj_fun) if t != 0 and (np.abs(obj_func_values[t] - obj_func_values[t - 1])) < 0.01: print obj_func_values[t] print obj_func_values[t - 1] print "phi : ", phi break cons = [{'type': 'eq', 'fun': self.constraint_phi}] gamma_phi = np.log(phi) res = minimize(self.func_phi, x0=gamma_phi, args=args, method='SLSQP', jac=self.grad_phi, constraints=cons, options={ 'maxiter': 1, 'eps': 1.0 }) phi_new = np.exp(res.x) # print ("phi new" , phi_new) phi = phi_new print "obj func value : ", (obj_func_values[len(obj_func_values) - 1]) print "final phi : ", phi fout = io.open("estimated_phi", "w") fout.write(unicode(self.U) + u"\n") fout.write(unicode(self.M) + u"\n") for i in phi: fout.write(unicode(i) + u"\n") fout.close()