def run_opf_supervised(self): le = preprocessing.LabelEncoder() le.fit(self.result_classes) cross_result_classes = le.transform(self.result_classes) cross_result_classes = cross_result_classes.astype(numpy.int32) self.log("Result data size: " + str(len(self.result_data))) self.log("Cross classes size: " + str(len(cross_result_classes))) self.log("Training OPF") self.opf_sup_cls = libopf_py.OPF() t_opf_start = time.time() self.opf_sup_cls.fit(self.result_data, cross_result_classes, metric=self.distance_function) t_opf_end = time.time() - t_opf_start self.params_output['time_classificator_fit'] = t_opf_end images_info = filesprocess.get_files(self.test_path) labels_test = [] labels_hist_array = [] #bins = range(self.bins_size) pool = multiprocessing.Pool() preds_data = [] preds_data = pool.map( func_star_extract_descriptors_predict_test, itertools.izip(images_info, itertools.repeat(self))) for data in preds_data: labels_hist_array.extend(data[0]) labels_test.extend(data[1]) labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64) self.log("Generating predictions") prediction = self.opf_sup_cls.predict(labels_hist_array) labels_predicted = le.inverse_transform(prediction) pool.close() pool.terminate() accuracy = metrics.accuracy_score(labels_test, labels_predicted) precision = metrics.precision_score(labels_test, labels_predicted) recall = metrics.recall_score(labels_test, labels_predicted) f1 = metrics.f1_score(labels_test, labels_predicted) self.params_output['accuracy'] = accuracy self.params_output['precision'] = precision self.params_output['recall'] = recall self.params_output['F1'] = f1 self.log("Accuracy: " + str(accuracy)) self.log("Precision: " + str(precision)) self.log("Recall: " + str(recall)) self.log("F1: " + str(f1)) return accuracy, precision, recall, f1
def run_test(self): le = preprocessing.LabelEncoder() le.fit(self.result_classes) #list(le.classes_) cross_result_classes = le.transform(self.result_classes) cross_result_classes = cross_result_classes.astype(numpy.int32) result_data_array = numpy.asarray(self.result_data, numpy.float64) self.log("Creating OPF") self.opf_cls = libopf_py.OPF() t_opf_start = time.time() #self.svm_cls.fit(self.result_data, self.result_classes) self.opf_cls.fit(result_data_array, cross_result_classes, metric=self.distance_function) t_opf_end = time.time() - t_opf_start self.params_output['time_classificator_fit'] = t_opf_end labels_test = [] labels_predicted = [] images_info = filesprocess.get_files(self.test_path) for idx, image_info in enumerate(images_info): self.log("Generating hist " + str(idx) + " of " + str(len(images_info))) img_path = image_info[0] img_folder = image_info[1] features, descriptors = featureextractor.extract_descriptor( img_path, None, "OVERFEAT", "OVERFEAT") if (descriptors != None): descriptors = numpy.asarray(descriptors, numpy.float64) prediction = self.opf_cls.predict(descriptors) labels_predicted.append(prediction[0]) label_trans = le.transform([img_folder]) labels_test.append(label_trans[0]) print labels_predicted accuracy = metrics.accuracy_score(labels_test, labels_predicted) self.log("Accuracy: " + str(accuracy)) precision = metrics.precision_score(labels_test, labels_predicted) self.log("Precision: " + str(precision)) recall = metrics.recall_score(labels_test, labels_predicted) self.log("Recall: " + str(recall)) f1 = metrics.f1_score(labels_test, labels_predicted) self.log("F1: " + str(f1)) self.params_output['accuracy'] = accuracy self.params_output['precision'] = precision self.params_output['recall'] = recall self.params_output['F1'] = f1 return accuracy, precision, recall, f1
def run_opf_supervised(self): le = preprocessing.LabelEncoder() le.fit(self.result_classes) cross_result_classes = le.transform(self.result_classes) cross_result_classes = cross_result_classes.astype(numpy.int32) self.log("Result data size: " + str(len(self.result_data))) self.log("Cross classes size: " + str(len(cross_result_classes))) self.log("Training OPF") self.opf_sup_cls = libopf_py.OPF() t_opf_start = time.time() self.opf_sup_cls.fit(self.result_data, cross_result_classes,metric=self.distance_function) t_opf_end = time.time() - t_opf_start self.params_output['time_classificator_fit'] = t_opf_end images_info = filesprocess.get_files(self.test_path) labels_test = [] labels_hist_array = [] #bins = range(self.bins_size) for idx, image_info in enumerate(images_info): self.log("Generating hist test " + str(idx) + " of " + str(len(images_info))) img_path = image_info[0] img_folder = image_info[1] features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type) if (descriptors != None): label_hist = self.opf_predict(descriptors, self.n_clusters) labels_hist_array.append(label_hist) labels_test.append(img_folder) labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64) self.log("Generating predictions") prediction = self.opf_sup_cls.predict(labels_hist_array) labels_predicted = le.inverse_transform(prediction) accuracy = metrics.accuracy_score(labels_test, labels_predicted) precision = metrics.precision_score(labels_test, labels_predicted) recall = metrics.recall_score(labels_test, labels_predicted) f1 = metrics.f1_score(labels_test, labels_predicted) self.params_output['accuracy'] = accuracy self.params_output['precision'] = precision self.params_output['recall'] = recall self.params_output['F1'] = f1 self.log("Accuracy: " + str(accuracy)) self.log("Precision: " + str(precision)) self.log("Recall: " + str(recall)) self.log("F1: " + str(f1)) return accuracy, precision, recall, f1
def _opf(): label_train_32 = label_train.astype(np.int32) label_test_32 = label_test.astype(np.int32) O = libopf_py.OPF() t = time() O.fit(data_train, label_train_32) opf_results[i, 3] = time() - t t = time() predicted = O.predict(data_test) opf_results[i, 0] = precision_score(label_test_32, predicted) opf_results[i, 1] = recall_score(label_test_32, predicted) opf_results[i, 2] = f1_score(label_test_32, predicted) gc.collect()
def opf(): # OPF only supports 32 bits labels at the moment label_train_32 = label_train.astype(numpy.int32) label_test_32 = label_test.astype(numpy.int32) O = libopf_py.OPF() t = time.time() O.fit(dist_train, label_train_32, precomputed_distance=True) # O.fit(dist_train, label_train_32, precomputed_distance=True, learning="agglomerative", split=0.8) print("OPF: time elapsed in fitting: %f secs" % (time.time() - t)) t = time.time() predicted = O.predict(dist_test) print("OPF: time elapsed in predicting: %f secs" % (time.time() - t)) print("Classification report for OPF:\n%s\n" % (classification_report(label_test_32, predicted))) print("Confusion matrix:\n%s" % confusion_matrix(label_test_32, predicted))
def run_opf_supervised(self): le = preprocessing.LabelEncoder() le.fit(self.result_classes) #list(le.classes_) cross_result_classes = le.transform(self.result_classes) cross_result_classes = cross_result_classes.astype(numpy.int32) # unique_classes = numpy.asarray(self.result_classes) # unique_classes = numpy.unique(unique_classes) # unique_classes = numpy.sort(unique_classes) # num_samples = len(self.result_data) # num_classes = len(unique_classes) # num_features = len(self.result_data[0]) O = libopf_py.OPF() result_data_array = numpy.array(self.result_data) self.log("Training OPF") t_opf_start = time.time() O.fit(result_data_array, cross_result_classes,metric=self.distance_function) t_opf_end = time.time() - t_opf_start self.params_output['time_classificator_fit'] = t_opf_end images_info = filesprocess.get_files(self.test_path) bins = range(self.kmeans_k) labels_test = [] labels_hist_array = [] for idx, image_info in enumerate(images_info): self.log("Generating hist " + str(idx) + " of " + str(len(images_info))) img_path = image_info[0] img_folder = image_info[1] label_test = le.transform([img_folder]) features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type) if (descriptors != None): labels = self.kmeans_cls.predict(descriptors) labels_hist = numpy.histogram(labels, bins=bins, density=True)[0] labels_hist_array.append(labels_hist) labels_test.append(label_test) #labels_hist_array = numpy.array([labels_hist]) labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64) labels_predicted = O.predict(labels_hist_array) #labels_predicted = le.inverse_transform(prediction) #self.log("Prediction: " + str(pridicted_label[0])) #self.log("Real: " + str(img_folder)) accuracy = metrics.accuracy_score(labels_test, labels_predicted) precision = metrics.precision_score(labels_test, labels_predicted) recall = metrics.recall_score(labels_test, labels_predicted) f1 = metrics.f1_score(labels_test, labels_predicted) self.params_output['accuracy'] = accuracy self.params_output['precision'] = precision self.params_output['recall'] = recall self.params_output['F1'] = f1 self.log("Accuracy: " + str(accuracy)) self.log("Precision: " + str(precision)) self.log("Recall: " + str(recall)) self.log("F1: " + str(f1)) return accuracy, precision, recall, f1