Esempio n. 1
0
 def evaluate_instances(self, theta=None, prc=False):
     """Pass the instance scores and ground truth to result analyzer and return proper metric"""
     if not self.labeled_data:
         print ('I have no instance data. Must set them, and labels first')
         return None
     if theta is not None:   # theta overrides existing one if given
         self.theta = theta
     ra = ResultAnalyzer()
     y_hat = af.calculate_y(self.eval_data, self.theta)
     ra.addManyResults(self.y_known, y_hat)
     if prc:
         return ra.auprc()
     else:
         return ra.accuracy(), ra.auc()
Esempio n. 2
0
 def get_instance_scores(self):
     x = self.data.get_instances()
     return af.calculate_y(x, self.theta)
    def train(self):
        """Where the magic happens. Optimizes the cost function of the paper, based on the parameters given before.
        There is a terminating function which determines if optimization should end before the epochs end,
        based on essentially heuristics. Every 50 iterations prints progress. Keeps the best theta values based on the
        group reconstruction score. At the end prints detailed stats about classifying with that."""
        print('Optimizing for ', self._param_str)
        self.total_iterations = 0
        accs = []
        #theta = np.random.random(self.embeddings_dimension)
        theta = np.zeros(self.embeddings_dimension)
        #theta=np.loadtxt('training_output/movies/rbf_100_300_300x100_10.0_0.04rbf0.7071_last_theta', delimiter=',')
        print(theta)
        best_theta = theta
        best_acc = 0
        terminate = False

        for epoch in range(self.epochs):
            self.train_data.rewind_dataset(True)  # reset and shuffle data

            if terminate:
                break
            print('-------epoch ', epoch, '-----------')
            print(self._print_titles)

            X, gs, gl = self.train_data.get_next_batch()

            while X is not None:  # for each mini-batch # do gd step

                W_ij = similarity.get_sim_matrix(X, self.similarity_fn,
                                                 self.sim_variance)

                # calculate y_hat and derivative
                Y_ij = af.calculate_y(X, theta)
                Y_der_ij = af.calculate_y_der(Y_ij, X)

                # calculate cost
                similarity_cost = af.similarity_derivative(
                    Y_ij, Y_der_ij, W_ij) / (X.shape[0]**2)
                group_cost = self.alpha_balance * af.group_derivative(
                    Y_ij, Y_der_ij, gs, gl) / float(len(gs))
                #if self.total_iterations %8==0:
                theta_der = similarity_cost + group_cost
                #else:
                #theta_der = similarity_cost
                #print(theta_der)
                # new theta
                #
                theta = self.momentum_value * theta - self.lr / (epoch +
                                                                 1) * theta_der
                #theta = theta - (1 - self.momentum_value) * self.lr / (epoch + 1) * theta_der#(1 - self.momentum_value) *

                self.total_iterations += 1

                # print progress
                #if self.total_iterations % 50 == 0:
                acc = self._print_progress(theta)
                accs.append(acc)
                if Jilu[-1] < acc:

                    Jilu.append(acc)
                else:
                    Jilu.append(Jilu[-1])
                if acc > best_acc:  # save best theta, based on training set
                    best_acc = acc
                    best_theta = theta
                    io.save_theta(theta,
                                  self.output_name + self._param_str,
                                  best=True)

                    #if self._terminate_conditions(theta, accs):
                #if self.total_iterations == 100:
                #   terminate = True
                #   break
                X, gs, gl = self.train_data.get_next_batch()

        io.save_theta(theta, self.output_name + self._param_str + '_last')

        print('\n\n\n\t\t\t---BEST THETA VALUE (in training group)---')

        self._print_progress(best_theta, print_details=True)
        return self.train_acc, self.group_acc, self.instance_acc, self.instance_auc