Exemple #1
0
    def optimize(self, dict):

        second_term = self.likelihood.second_term(dict['phi'], dict['theta'],
                                                  dict['pi'])
        fourth_term = self.likelihood.E_q_q_g(dict['phi'])
        K = self.likelihood.integral_pre_computed(dict['bw'])
        args = {
            'mu': dict['mu'],
            'samples': dict['samples'],
            'Aij': dict['Aij'],
            'bw': dict['bw'],
            'st': second_term,
            'ft': fourth_term,
            'K': K
        }
        start_time = datetime.now()
        gamma_mu = np.log(dict['mu'])
        print "gamma_mu : ", gamma_mu
        end_time = datetime.now()
        print('Duration log op : {}'.format(end_time - start_time))

        start_time = datetime.now()
        estimated_mu = np.exp(
            minimize(self.func_mu,
                     x0=gamma_mu,
                     jac=self.grad_mu,
                     args=(args),
                     options={
                         'disp': True,
                         'maxiter': 100
                     }).x)
        estimated_mu = np.reshape(estimated_mu, (1, self.U))
        estimated_mu = utility().normalize_2d_array(estimated_mu)

        dict['mu'] = estimated_mu
        print estimated_mu
        end_time = datetime.now()
        print 'minimize operation duration : {} '.format(end_time - start_time)
Exemple #2
0
    def estimate_parameters(self, checkin_file, connections_file, num_community):

        # get data
        (self.events, self.checkins, self.T, self.X, self.Y) = synthetic_data_processing().get_checkins(checkin_file)

        [U_real, V_real] = synthetic_data_processing().get_params_from_file(checkin_file, connections_file)
        M_real = num_community
        num_iteration = 5
            
        mu = np.random.uniform(0,1,(1, U_real))
        sum_mu = np.sum(mu) 
        mu = [i/sum_mu for i in mu]

        # Aij = np.random.uniform(0,1,(U_real, U_real))
        Aij = np.ones((U_real, U_real))
        
        # Set for only significant users
        Aij = self.fix_Aij(Aij)
        
        sum_Aij = np.sum(Aij, axis = 0)
        for k in range(Aij.shape[0]):
            if(sum_Aij[k]) != 0:
                Aij[:,k] = [i/sum_Aij[k] for i in Aij[:,k]]

        pi = np.random.uniform(0,1,(U_real, M_real))
        sum_pi = np.sum(pi, axis = 1)
        for k in range(pi.shape[0]):
            pi[k, :] = [i/sum_pi[k] for i in pi[k, :]]

        theta = np.random.uniform(0,1,(M_real, V_real))
        sum_theta = np.sum(theta, axis=1)
        for k in range(theta.shape[0]):
            theta[k, :] = [i/sum_theta[k] for i in theta[k, :]]

        phi = np.random.uniform(0, 1, (U_real, M_real))
        phi = utility().normalize_2d_array(phi)

        mu = np.asarray(mu)
        Aij = np.asarray(Aij)
        pi = np.asarray(pi)
        theta = np.asarray(theta)

        N = len(self.events)
        print ("N : ", N)
       
        print("The shape is", mu.shape, Aij.shape, pi.shape, theta.shape)
        
        print ("theta : ", theta)
        print ("mu : ", mu)
        print ("Aij : ", Aij)
        print ("phi : ", phi)

        self.U = U_real
        self.V = V_real
        self.M = M_real

        graph = synthetic_data_processing().get_graph(checkin_file, connections_file, self.U)
        # Modifications end --------------------------------------------------------------------

        for iteration in range(num_iteration):
            
            print("Iteration in consideration", iteration)

            # learn bandwidth of each user
            bw_start_time = time.time()
            bw = np.empty((self.U))
            bw = [kernelParameters().kde(self.checkins[user][0], self.checkins[user][1]).bandwidth for user in range(0,self.U)]

            print "bandwidth learn time : ",(time.time() - bw_start_time)

            # pre computation step
            pre_comp_time = time.time()

            # Changes are made here----------------------------------------------------------------------------------------------------------
            pickle.dump([self.U, bw, self.events, Aij], open("pre_compute.p", "wb"))
            (pre_compute_map, pre_compute_Aij) = pre_computation().pre_compute(self.U, bw, self.events, Aij)
            # Changes end here---------------------------------------------------------------------------------------------------------------

            print "pre compute map : ", pre_compute_map

            print "pre compute Aij : ", pre_compute_Aij

            print "pre compute time : ", (time.time() - pre_comp_time)

            obj_cre_time = time.time()
            
            par_phi_obj = parameter_estimation_phi(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events,
                                                   self.checkins, pre_compute_map, pre_compute_Aij)

            par_mu_obj = parameter_estimation_mu(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events,
                                                 self.checkins, pre_compute_map, pre_compute_Aij)

            par_theta_obj = parameter_estimation_theta(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y,
                                                       self.events, self.checkins, pre_compute_map, pre_compute_Aij)

            par_aij_obj = parameter_estimation_aij(self.S, self.M, self.U, self.V, self.I, self.T, self.X, self.Y, self.events,
                                                   self.checkins, pre_compute_map, pre_compute_Aij)

            print "object creation time :", (time.time() - obj_cre_time)
            samples = par_phi_obj.getSamples(pi)

            main_dict = {'mu': mu[0, :], 'Aij': Aij, 'phi': phi, 'theta': theta, 'bw': bw, 'pi': pi, 'samples': samples}
            print("The original values are:----------------")

            # mu estimation
            start_time = time.time()
            print("Started the mu optimization----------------------------------->checkpoint 1")
            param_mu = par_mu_obj.optimize(main_dict)
            mu = main_dict['mu']
            main_dict['mu'] = mu[0, :]
            print "MU time :", (time.time() - start_time)

            # theta estimation
            start_time = time.time()
            temp_theta = main_dict['theta']
            theta = np.reshape(main_dict['theta'], (1, np.product(main_dict['theta'].shape)))
            main_dict['theta'] = theta
            print("Started the theta optimization----------------------------------->checkpoint 2")
            par_theta_obj.optimize(main_dict)
            theta = np.reshape(main_dict['theta'], (temp_theta.shape[0], temp_theta.shape[1]))
            main_dict['theta'] = theta
            print "THETA time :", (time.time() - start_time)
            
            # phi parameter estimation
            start_time = time.time()
            temp_phi = main_dict['phi']
            phi = np.reshape(main_dict['phi'], (1, np.product(main_dict['phi'].shape)))
            main_dict['phi'] = phi
            print("Started the phi optimization----------------------------------->checkpoint 3")
            par_phi_obj.optimize(main_dict)
            phi = np.reshape(main_dict['phi'], (temp_phi.shape[0], temp_phi.shape[1]))
            main_dict['phi'] = phi
            print " PHI time :", (time.time() - start_time)

            # aij estimation
            pdb.set_trace()
            start_time = time.time()
            temp_Aij = main_dict['Aij']
            Aij = np.reshape(main_dict['Aij'], (1, np.product(main_dict['Aij'].shape)))
            main_dict['Aij'] = Aij
            print("Started the Aij optimization----------------------------------->checkpoint 4")
            par_aij_obj.optimize(main_dict)
            Aij = np.reshape(main_dict['Aij'], (temp_Aij.shape[0], temp_Aij.shape[1]))
            main_dict['Aij'] = Aij
            print "AIJ time :", (time.time() - start_time)
            
            pickle.dump(main_dict, open("Iteration/Itr_"+str(iteration)+"_"+str(os.path.basename(checkin_file)[:2])+"_results.p", "wb"))

        pickle.dump(main_dict, open("Iteration/Final_"+str(os.path.basename(checkin_file)[:2])+"_results.p", "wb"))
    def get_checkins(self, checkins_file):
        fin = io.open(checkins_file, "r", encoding="utf-8")

        # read header line
        line = fin.readline()

        # get min and max arguements
        t_min = np.infty
        t_max = 0.0
        x_min = np.infty
        x_max = 0.0
        y_min = np.infty
        y_max = 0.0

        while line:
            line = line.rstrip()
            event = line.split(",")
            # user, time, lat, lon, cat, com
            tuple = (int(event[0]), float(event[1]), float(event[2]),
                     float(event[3]))
            for i in range(4, len(event)):
                entry = int(event[i])
                tuple = tuple + (entry, )

            self.events.append(tuple)

            (t_min, t_max) = utility().get_check(t_min, t_max, float(event[1]))
            (x_min, x_max) = utility().get_check(x_min, x_max, float(event[2]))
            (y_min, y_max) = utility().get_check(y_min, y_max, float(event[3]))

            # if (self.checkins.has_key(int(event[0]))):
            #     self.checkins[int(event[0])][0].append(float(event[2]))
            #     self.checkins[int(event[0])][1].append(float(event[3]))
            #     self.checkins[int(event[0])][2].append(float(event[1]))
            # else:
            #     lat = []
            #     lon = []
            #     t = []
            #     self.checkins[int(event[0])] = (lat, lon, t)
            #     self.checkins[int(event[0])][0].append(float(event[2]))
            #     self.checkins[int(event[0])][1].append(float(event[3]))
            #     self.checkins[int(event[0])][2].append(float(event[1]))

            line = fin.readline()

        fin.close()
        print(tuple)
        # self.T = t_max - t_min
        # self.X = x_max - x_min
        # self.Y = y_max - y_min

        # scaling of t,x,y

        for i in range(0, len(self.events)):
            event = list(self.events[i])

            event[1] = (event[1] - t_min) / (t_max - t_min)
            event[2] = (event[2] - x_min) / (x_max - x_min)
            event[3] = (event[3] - y_min) / (y_max - y_min)

            if (self.checkins.has_key(event[0])):
                self.checkins[event[0]][0].append(event[2])
                self.checkins[event[0]][1].append(event[3])
                self.checkins[event[0]][2].append(event[1])
            else:
                lat = []
                lon = []
                t = []
                self.checkins[event[0]] = (lat, lon, t)
                self.checkins[event[0]][0].append(event[2])
                self.checkins[event[0]][1].append(event[3])
                self.checkins[event[0]][2].append(event[1])

            self.events[i] = event

        self.T = 1.0
        self.X = 1.0
        self.Y = 1.0
        # print "checkins : "
        # print self.checkins
        #
        # print "events : "
        # print self.events
        return (self.events, self.checkins, self.T, self.X, self.Y)
Exemple #4
0
    def custom_gradient_phi(self, *args):

        G = np.zeros(((self.U * self.M), (self.U * self.M)))
        eta = 1.0
        phis = np.zeros((self.I, self.U * self.M))
        delta_lambda = np.zeros(self.I)
        obj_func_values = []
        phi = args[0]['phi']

        for t in range(0, self.I):

            phis[t] = phi

            if t % 50 == 0:
                print "Iteration: ", t
                print "phi: ", phi

            # new samples

            phi = np.reshape(phi, (self.U, self.M))
            phi = utility().normalize_2d_array(phi)
            phi = np.reshape(phi, (1, self.U * self.M))

            samples = self.getSamples(phi)

            # updated arguments
            args = list(args)
            args[0]['samples'] = samples
            args = tuple(args)

            # terminating condition
            obj_fun = self.likelihood_phi(args)
            # print "obj fun : " , obj_fun
            obj_func_values.append(obj_fun)
            if t != 0 and (np.abs(obj_func_values[t] -
                                  obj_func_values[t - 1])) < 0.01:
                print obj_func_values[t]
                print obj_func_values[t - 1]
                print "phi : ", phi
                break

            cons = [{'type': 'eq', 'fun': self.constraint_phi}]

            gamma_phi = np.log(phi)

            res = minimize(self.func_phi,
                           x0=gamma_phi,
                           args=args,
                           method='SLSQP',
                           jac=self.grad_phi,
                           constraints=cons,
                           options={
                               'maxiter': 1,
                               'eps': 1.0
                           })

            phi_new = np.exp(res.x)

            # print ("phi new" , phi_new)

            phi = phi_new

        print "obj func value : ", (obj_func_values[len(obj_func_values) - 1])
        print "final phi : ", phi
        fout = io.open("estimated_phi", "w")

        fout.write(unicode(self.U) + u"\n")
        fout.write(unicode(self.M) + u"\n")
        for i in phi:
            fout.write(unicode(i) + u"\n")

        fout.close()