def fit_heuristic_h(self, h_classifier, x_train, y_train, verbose=0):
        h_construction_start_time = time.clock()

        h_training_x = []
        h_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(
                x_train[i],
                self.number_of_labels,
                self.scoring_function,
                "train",
                initial_br=self.initial_br,
                true_output=y_train[i],
                reduction=self.h_reduction,
            )
            flipbit.greedy_search(self.depth_of_search)  # Run greedy_search to construct H training examples

            h_training_x.extend(flipbit.get_training_examples())
            h_training_y.extend(flipbit.get_training_labels())

        h_construction_end_time = time.clock()
        if verbose > 0:
            print(
                "H construction time: {0:.4f}, Examples: {1}".format(
                    h_construction_end_time - h_construction_start_time, len(h_training_x)
                )
            )
        h_classifier.fit(vstack(h_training_x, format="csr"), h_training_y)
        h_fit_end_time = time.clock()
        if verbose > 0:
            print("H heuristic train time: {0:.4f}".format(h_fit_end_time - h_construction_end_time))

        return h_classifier
    def fit_simplified(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        print "Number of examples in training set: " + str(len(x_train))
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            outputs = flipbit.greedy_search(self.depth_of_search)
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        generating_end_time = time.clock()

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        print "Number of H regression learning examples: " + str(len(h_training_examples))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        print "Number of C regression learning examples: " + str(len(c_training_examples))

        fit_time = time.clock()

        construction_time = (generating_end_time - start_time)
        learning_time = (fit_time - generating_end_time)
        print("Construction time: {0:.4f}, Learning HC time: {1:.4f}".format(construction_time, learning_time))
Esempio n. 3
0
    def fit_heuristic_h(self, h_classifier, x_train, y_train, verbose=0):
        h_construction_start_time = time.clock()

        h_training_x = []
        h_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i],
                              self.number_of_labels,
                              self.scoring_function,
                              'train',
                              initial_br=self.initial_br,
                              true_output=y_train[i],
                              reduction=self.h_reduction)
            flipbit.greedy_search(
                self.depth_of_search
            )  # Run greedy_search to construct H training examples

            h_training_x.extend(flipbit.get_training_examples())
            h_training_y.extend(flipbit.get_training_labels())

        h_construction_end_time = time.clock()
        if verbose > 0:
            print("H construction time: {0:.4f}, Examples: {1}".format(
                h_construction_end_time - h_construction_start_time,
                len(h_training_x)))
        h_classifier.fit(vstack(h_training_x, format='csr'), h_training_y)
        h_fit_end_time = time.clock()
        if verbose > 0:
            print("H heuristic train time: {0:.4f}".format(
                h_fit_end_time - h_construction_end_time))

        return h_classifier
    def fit(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            flipbit.greedy_search(self.depth_of_search)  # Run greedy_search to construct H training examples
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

        h_construction_end_time = time.clock()
        print("H training examples construction time: {0:.4f}".format(h_construction_end_time-start_time))

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        h_fit_end_time = time.clock()
        print("H heuristic train time: {0:.4f}".format(h_fit_end_time-h_construction_end_time))

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, fitted_regressor=self.h_regressor)
            outputs = flipbit.greedy_search(self.depth_of_search)  # Get outputs using fitted H heuristic

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        c_construction_end_time = time.clock()
        print("C training examples construction time: {0:.4f}".format(c_construction_end_time-h_fit_end_time))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        c_fit_end_time = time.clock()
        print("C heuristic train time: {0:.4f}".format(c_fit_end_time-c_construction_end_time))

        print("Training examples - Total: {0}, H: {1}, C: {2}".format(len(x_train), len(h_training_examples),
                                                                      len(c_training_examples)))