def find_best_child_train(self, children): best_loss = sys.maxint best_child = None for child_1 in children: loss_1 = calculate_loss(self.scoring_function, child_1, self.true_output, self.number_of_labels) if loss_1 < best_loss: best_loss = loss_1 best_child = child_1 # Compare only with best best_attributes = construct_sparse_attributes(self.attributes, best_child) for child_1 in children: if child_1 == best_child: continue loss_1 = calculate_loss(self.scoring_function, child_1, self.true_output, self.number_of_labels) if loss_1 == best_loss: continue if random.uniform(0.0, 1.0) <= self.reduction: attributes_1 = construct_sparse_attributes(self.attributes, child_1) self.h_training_examples.append(attributes_1 - best_attributes) self.h_training_labels.append(np.sign(loss_1 - best_loss)) self.h_training_examples.append(best_attributes - attributes_1) self.h_training_labels.append(np.sign(best_loss - loss_1)) return best_child
def generate_examples_c(self, fitted_h_classifier, x_train, y_train, verbose=0): c_start_time = time.clock() c_training_x = [] c_training_y = [] for i in xrange(len(x_train)): flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, 'test', initial_br=self.initial_br, fitted_classifier=fitted_h_classifier) outputs = flipbit.greedy_search( self.depth_of_search) # Get outputs using fitted H heuristic best_loss = sys.maxint best_output = None for output in outputs: loss = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels) if loss < best_loss: best_loss = loss best_output = output output_1_attributes = construct_sparse_attributes( x_train[i], best_output) for output in outputs: if best_output == output: continue loss_2 = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels) if best_loss == loss_2: continue output_2_attributes = construct_sparse_attributes( x_train[i], output) c_training_x.append(output_1_attributes - output_2_attributes) c_training_y.append(np.sign(best_loss - loss_2)) c_training_x.append(output_2_attributes - output_1_attributes) c_training_y.append(np.sign(loss_2 - best_loss)) c_construction_end_time = time.clock() if verbose > 0: print("C construction time: {0:.4f}, Examples: {1}".format( c_construction_end_time - c_start_time, len(c_training_x))) return c_training_x, c_training_y
def generate_examples_c(self, fitted_h_classifier, x_train, y_train, verbose=0): c_start_time = time.clock() c_training_x = [] c_training_y = [] for i in xrange(len(x_train)): flipbit = FlipBit( x_train[i], self.number_of_labels, self.scoring_function, "test", initial_br=self.initial_br, fitted_classifier=fitted_h_classifier, ) outputs = flipbit.greedy_search(self.depth_of_search) # Get outputs using fitted H heuristic best_loss = sys.maxint best_output = None for output in outputs: loss = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels) if loss < best_loss: best_loss = loss best_output = output output_1_attributes = construct_sparse_attributes(x_train[i], best_output) for output in outputs: if best_output == output: continue loss_2 = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels) if best_loss == loss_2: continue output_2_attributes = construct_sparse_attributes(x_train[i], output) c_training_x.append(output_1_attributes - output_2_attributes) c_training_y.append(np.sign(best_loss - loss_2)) c_training_x.append(output_2_attributes - output_1_attributes) c_training_y.append(np.sign(loss_2 - best_loss)) c_construction_end_time = time.clock() if verbose > 0: print( "C construction time: {0:.4f}, Examples: {1}".format( c_construction_end_time - c_start_time, len(c_training_x) ) ) return c_training_x, c_training_y
def fit_simplified(self, x_train, y_train): c_training_examples = [] c_training_scores = [] h_training_examples = [] h_training_scores = [] start_time = time.clock() print "Number of examples in training set: " + str(len(x_train)) for i in xrange(len(x_train)): flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i]) outputs = flipbit.greedy_search(self.depth_of_search) h_training_examples.extend(flipbit.get_training_examples()) h_training_scores.extend(flipbit.get_training_scores()) for j in xrange(len(outputs)): example = construct_sparse_attributes(x_train[i], outputs[j]) score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels) c_training_examples.append(example) c_training_scores.append(score) generating_end_time = time.clock() self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores) print "Number of H regression learning examples: " + str(len(h_training_examples)) self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores) print "Number of C regression learning examples: " + str(len(c_training_examples)) fit_time = time.clock() construction_time = (generating_end_time - start_time) learning_time = (fit_time - generating_end_time) print("Construction time: {0:.4f}, Learning HC time: {1:.4f}".format(construction_time, learning_time))
def predict_best_output(attributes, outputs, classifier): result_dict = {} for i in range(len(outputs)): pretendent_attributes = construct_sparse_attributes(attributes, outputs[i]) result_dict[i] = classifier.decision_function(pretendent_attributes)[0] # print result_dict index_of_best = min(result_dict.iteritems(), key=operator.itemgetter(1))[0] return outputs[index_of_best]
def predict_best_output(self, example, outputs): best_score = sys.maxint best_output = None for output in outputs: attributes = construct_sparse_attributes(example, output) score = self.c_regressor.predict(attributes) if score < best_score: best_score = score best_output = output return best_output
def predict_best_output(attributes, outputs, classifier): result_dict = {} for i in range(len(outputs)): pretendent_attributes = construct_sparse_attributes( attributes, outputs[i]) result_dict[i] = classifier.decision_function(pretendent_attributes)[0] # print result_dict index_of_best = min(result_dict.iteritems(), key=operator.itemgetter(1))[0] return outputs[index_of_best]
def find_best_child(self, children): best_score = sys.maxint best_child = None for child in children: example = construct_sparse_attributes(self.attributes, child) if (self.regressor is not None) and (self.true_output is None): score = self.regressor.predict(example) elif (self.true_output is not None) and (self.regressor is None): score = calculate_loss(self.scoring_function, child, self.true_output, self.number_of_labels) self.h_training_examples.append(example) self.h_training_scores.append(score) else: raise ValueError("Either regressor or true_output must not be None.") if score < best_score: best_score = score best_child = child return best_child
def fit(self, x_train, y_train): c_training_examples = [] c_training_scores = [] h_training_examples = [] h_training_scores = [] start_time = time.clock() for i in xrange(len(x_train)): flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i]) flipbit.greedy_search(self.depth_of_search) # Run greedy_search to construct H training examples h_training_examples.extend(flipbit.get_training_examples()) h_training_scores.extend(flipbit.get_training_scores()) h_construction_end_time = time.clock() print("H training examples construction time: {0:.4f}".format(h_construction_end_time-start_time)) self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores) h_fit_end_time = time.clock() print("H heuristic train time: {0:.4f}".format(h_fit_end_time-h_construction_end_time)) for i in xrange(len(x_train)): flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, fitted_regressor=self.h_regressor) outputs = flipbit.greedy_search(self.depth_of_search) # Get outputs using fitted H heuristic for j in xrange(len(outputs)): example = construct_sparse_attributes(x_train[i], outputs[j]) score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels) c_training_examples.append(example) c_training_scores.append(score) c_construction_end_time = time.clock() print("C training examples construction time: {0:.4f}".format(c_construction_end_time-h_fit_end_time)) self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores) c_fit_end_time = time.clock() print("C heuristic train time: {0:.4f}".format(c_fit_end_time-c_construction_end_time)) print("Training examples - Total: {0}, H: {1}, C: {2}".format(len(x_train), len(h_training_examples), len(c_training_examples)))