Beispiel #1
0
    def run_opf_supervised(self):
        le = preprocessing.LabelEncoder()
        le.fit(self.result_classes)
        cross_result_classes = le.transform(self.result_classes)
        cross_result_classes = cross_result_classes.astype(numpy.int32)

        self.log("Result data size: " + str(len(self.result_data)))
        self.log("Cross classes size: " + str(len(cross_result_classes)))
        self.log("Training OPF")
        self.opf_sup_cls = libopf_py.OPF()
        t_opf_start = time.time()
        self.opf_sup_cls.fit(self.result_data,
                             cross_result_classes,
                             metric=self.distance_function)
        t_opf_end = time.time() - t_opf_start
        self.params_output['time_classificator_fit'] = t_opf_end

        images_info = filesprocess.get_files(self.test_path)
        labels_test = []
        labels_hist_array = []
        #bins = range(self.bins_size)

        pool = multiprocessing.Pool()
        preds_data = []
        preds_data = pool.map(
            func_star_extract_descriptors_predict_test,
            itertools.izip(images_info, itertools.repeat(self)))

        for data in preds_data:
            labels_hist_array.extend(data[0])
            labels_test.extend(data[1])

        labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

        self.log("Generating predictions")
        prediction = self.opf_sup_cls.predict(labels_hist_array)
        labels_predicted = le.inverse_transform(prediction)

        pool.close()
        pool.terminate()

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
    def run_test(self):
        le = preprocessing.LabelEncoder()
        le.fit(self.result_classes)
        #list(le.classes_)
        cross_result_classes = le.transform(self.result_classes)
        cross_result_classes = cross_result_classes.astype(numpy.int32)

        result_data_array = numpy.asarray(self.result_data, numpy.float64)
        self.log("Creating OPF")
        self.opf_cls = libopf_py.OPF()
        t_opf_start = time.time()
        #self.svm_cls.fit(self.result_data, self.result_classes)
        self.opf_cls.fit(result_data_array,
                         cross_result_classes,
                         metric=self.distance_function)
        t_opf_end = time.time() - t_opf_start
        self.params_output['time_classificator_fit'] = t_opf_end

        labels_test = []
        labels_predicted = []

        images_info = filesprocess.get_files(self.test_path)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, None, "OVERFEAT", "OVERFEAT")
            if (descriptors != None):
                descriptors = numpy.asarray(descriptors, numpy.float64)
                prediction = self.opf_cls.predict(descriptors)
                labels_predicted.append(prediction[0])
                label_trans = le.transform([img_folder])
                labels_test.append(label_trans[0])

        print labels_predicted
        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        self.log("Accuracy: " + str(accuracy))
        precision = metrics.precision_score(labels_test, labels_predicted)
        self.log("Precision: " + str(precision))
        recall = metrics.recall_score(labels_test, labels_predicted)
        self.log("Recall: " + str(recall))
        f1 = metrics.f1_score(labels_test, labels_predicted)
        self.log("F1: " + str(f1))

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        return accuracy, precision, recall, f1
  def run_opf_supervised(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)
    
    self.log("Result data size: " + str(len(self.result_data)))
    self.log("Cross classes size: " + str(len(cross_result_classes)))
    self.log("Training OPF")
    self.opf_sup_cls = libopf_py.OPF()
    t_opf_start = time.time()
    self.opf_sup_cls.fit(self.result_data, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist test " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

    self.log("Generating predictions")
    prediction = self.opf_sup_cls.predict(labels_hist_array)
    labels_predicted = le.inverse_transform(prediction)
    

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Beispiel #4
0
        def _opf():
            label_train_32 = label_train.astype(np.int32)
            label_test_32 = label_test.astype(np.int32)
            O = libopf_py.OPF()
            t = time()
            O.fit(data_train, label_train_32)

            opf_results[i, 3] = time() - t
            t = time()
            predicted = O.predict(data_test)
            opf_results[i, 0] = precision_score(label_test_32, predicted)
            opf_results[i, 1] = recall_score(label_test_32, predicted)
            opf_results[i, 2] = f1_score(label_test_32, predicted)
            gc.collect()
Beispiel #5
0
    def opf():
        # OPF only supports 32 bits labels at the moment
        label_train_32 = label_train.astype(numpy.int32)
        label_test_32 = label_test.astype(numpy.int32)

        O = libopf_py.OPF()

        t = time.time()
        O.fit(dist_train, label_train_32, precomputed_distance=True)
        #    O.fit(dist_train, label_train_32, precomputed_distance=True, learning="agglomerative", split=0.8)
        print("OPF: time elapsed in fitting: %f secs" % (time.time() - t))

        t = time.time()
        predicted = O.predict(dist_test)
        print("OPF: time elapsed in predicting: %f secs" % (time.time() - t))

        print("Classification report for OPF:\n%s\n" %
              (classification_report(label_test_32, predicted)))
        print("Confusion matrix:\n%s" %
              confusion_matrix(label_test_32, predicted))
  def run_opf_supervised(self):

    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    #list(le.classes_)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)

    # unique_classes = numpy.asarray(self.result_classes)
    # unique_classes = numpy.unique(unique_classes)
    # unique_classes = numpy.sort(unique_classes)

    # num_samples = len(self.result_data)
    # num_classes = len(unique_classes)
    # num_features = len(self.result_data[0])
    
    O = libopf_py.OPF()
    result_data_array = numpy.array(self.result_data)
    self.log("Training OPF")
    t_opf_start = time.time()
    O.fit(result_data_array, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end
    images_info = filesprocess.get_files(self.test_path)

    bins = range(self.kmeans_k)
    labels_test = []
    labels_hist_array = []
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      label_test = le.transform([img_folder])
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        labels_hist_array.append(labels_hist)
        labels_test.append(label_test)
        #labels_hist_array = numpy.array([labels_hist])
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
    labels_predicted = O.predict(labels_hist_array)
    #labels_predicted = le.inverse_transform(prediction)

    #self.log("Prediction: " + str(pridicted_label[0]))
    #self.log("Real: " + str(img_folder))

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1