def train(self):
   self.log("BoW Overfeat - Starting Train")
   self.log("train_path: " + self.train_path)
   
   result_descriptors = []
   self.log("Listing files to process")
   total_images_info = filesprocess.get_files(self.train_path)
   #total_images_info = total_images_info[0:100]
   self.result_classes = []
   self.result_data = []
   t_ext_desc_sum = 0
   t_feature_extract_start = time.time()
   for idx, image_info in enumerate(total_images_info):
     self.log("Processing " + str(idx) + " of " + str(len(total_images_info)))
     img_path = image_info[0]
     img_folder = image_info[1]
     t_ext_desc_start = time.time()
     features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
     t_ext_desc_end = time.time() - t_ext_desc_start
     t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
     if descriptors != None:
       print "Debug 1: ", descriptors[0].shape
       self.result_data.append(descriptors[0])
       self.result_classes.append(img_folder)
   t_feature_extract_end = time.time() - t_feature_extract_start
   self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
   self.params_output['time_feature_extraction'] = t_feature_extract_end
   self.log("Creating SVM")
   self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
   t_svm_start = time.time()
   self.svm_cls.fit(self.result_data, self.result_classes)
   t_svm_end = time.time() - t_svm_start
   self.params_output['time_classificator_fit'] = t_svm_end
  def run_test(self):
    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
      if (descriptors != None):
        prediction = self.svm_cls.predict(descriptors)
        print "Debug 2: ", prediction.shape
        labels_predicted.append(prediction[0])
        labels_test.append(img_folder)

    print labels_predicted
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    self.log("Accuracy: " + str(accuracy))
    precision = metrics.precision_score(labels_test, labels_predicted)
    self.log("Precision: " + str(precision))
    recall = metrics.recall_score(labels_test, labels_predicted)
    self.log("Recall: " + str(recall))
    f1 = metrics.f1_score(labels_test, labels_predicted)
    self.log("F1: " + str(f1))

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    return accuracy, precision, recall, f1
    def train(self):
        self.log("BoW Overfeat - Starting Train")
        self.log("train_path: " + self.train_path)

        result_descriptors = []
        self.log("Listing files to process")
        total_images_info = filesprocess.get_files(self.train_path)
        #total_images_info = total_images_info[0:100]
        self.result_classes = []
        self.result_data = []
        t_ext_desc_sum = 0
        t_feature_extract_start = time.time()
        for idx, image_info in enumerate(total_images_info):
            self.log("Processing " + str(idx) + " of " +
                     str(len(total_images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            t_ext_desc_start = time.time()
            features, descriptors = featureextractor.extract_descriptor(
                img_path, None, "OVERFEAT", "OVERFEAT")
            t_ext_desc_end = time.time() - t_ext_desc_start
            t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
            if descriptors != None:
                print "Debug 1: ", descriptors[0].shape
                self.result_data.append(descriptors[0])
                self.result_classes.append(img_folder)
        t_feature_extract_end = time.time() - t_feature_extract_start
        self.params_output['time_ext_desc_med'] = t_ext_desc_sum / len(
            total_images_info)
        self.params_output['time_feature_extraction'] = t_feature_extract_end
Esempio n. 4
0
def extract(image_info, params):
    image_path = image_info[0]
    feature_type = params[0]
    descriptor_type = params[1]
    thumbnail_size = params[2]
    f, d = featureextractor.extract_descriptor(image_path, thumbnail_size,
                                               feature_type, descriptor_type)
Esempio n. 5
0
def extract_descriptor_one(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
  tmp_desc = []
  if descriptors != None:
    #self.log("Descriptors lenght: " + str(len(descriptors)))
    for desc in descriptors:
      tmp_desc.append(desc)
  return tmp_desc
Esempio n. 6
0
def extract_descriptor_one(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    tmp_desc = []
    if descriptors != None:
        #self.log("Descriptors lenght: " + str(len(descriptors)))
        for desc in descriptors:
            tmp_desc.append(desc)
    return tmp_desc
def extract_descriptors_predict_test(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  labels_hist_array = []
  labels_test = []
  self.log("Processing (desc+predict test): " + str(img_path))
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
  if (descriptors != None):
    label_hist = self.opf_predict(descriptors, self.n_clusters)
    labels_hist_array.append(label_hist)
    labels_test.append(img_folder)
  return (labels_hist_array, labels_test)
    def run_test(self):
        le = preprocessing.LabelEncoder()
        le.fit(self.result_classes)
        #list(le.classes_)
        cross_result_classes = le.transform(self.result_classes)
        cross_result_classes = cross_result_classes.astype(numpy.int32)

        result_data_array = numpy.asarray(self.result_data, numpy.float64)
        self.log("Creating OPF")
        self.opf_cls = libopf_py.OPF()
        t_opf_start = time.time()
        #self.svm_cls.fit(self.result_data, self.result_classes)
        self.opf_cls.fit(result_data_array,
                         cross_result_classes,
                         metric=self.distance_function)
        t_opf_end = time.time() - t_opf_start
        self.params_output['time_classificator_fit'] = t_opf_end

        labels_test = []
        labels_predicted = []

        images_info = filesprocess.get_files(self.test_path)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, None, "OVERFEAT", "OVERFEAT")
            if (descriptors != None):
                descriptors = numpy.asarray(descriptors, numpy.float64)
                prediction = self.opf_cls.predict(descriptors)
                labels_predicted.append(prediction[0])
                label_trans = le.transform([img_folder])
                labels_test.append(label_trans[0])

        print labels_predicted
        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        self.log("Accuracy: " + str(accuracy))
        precision = metrics.precision_score(labels_test, labels_predicted)
        self.log("Precision: " + str(precision))
        recall = metrics.recall_score(labels_test, labels_predicted)
        self.log("Recall: " + str(recall))
        f1 = metrics.f1_score(labels_test, labels_predicted)
        self.log("F1: " + str(f1))

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        return accuracy, precision, recall, f1
Esempio n. 9
0
def extract_descriptors_predict_test(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    labels_hist_array = []
    labels_test = []
    self.log("Processing (desc+predict test): " + str(img_path))
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
    return (labels_hist_array, labels_test)
def extract_descriptor_one(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  self.log("Processing: " + str(img_path))
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
  labels_true = []
  tmp_desc = []
  if descriptors != None:
    #self.log("Descriptors lenght: " + str(len(descriptors)))
    for desc in descriptors:
      labels_true.append(img_folder)
      tmp_desc.append(desc)
  return (tmp_desc, labels_true)
  def run_opf_supervised(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)
    
    self.log("Result data size: " + str(len(self.result_data)))
    self.log("Cross classes size: " + str(len(cross_result_classes)))
    self.log("Training OPF")
    self.opf_sup_cls = libopf_py.OPF()
    t_opf_start = time.time()
    self.opf_sup_cls.fit(self.result_data, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist test " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

    self.log("Generating predictions")
    prediction = self.opf_sup_cls.predict(labels_hist_array)
    labels_predicted = le.inverse_transform(prediction)
    

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Esempio n. 12
0
def extract_descriptor_one(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    self.log("Processing: " + str(img_path))
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    labels_true = []
    tmp_desc = []
    if descriptors != None:
        #self.log("Descriptors lenght: " + str(len(descriptors)))
        for desc in descriptors:
            labels_true.append(img_folder)
            tmp_desc.append(desc)
    return (tmp_desc, labels_true)
  def run_test(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    #list(le.classes_)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)

    result_data_array = numpy.asarray(self.result_data, numpy.float64)
    self.log("Creating OPF")
    self.opf_cls = libopf_py.OPF()
    t_opf_start = time.time()
    #self.svm_cls.fit(self.result_data, self.result_classes)
    self.opf_cls.fit(result_data_array, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
      if (descriptors != None):
        descriptors = numpy.asarray(descriptors, numpy.float64)
        prediction = self.opf_cls.predict(descriptors)
        labels_predicted.append(prediction[0])
        label_trans = le.transform([img_folder])
        labels_test.append(label_trans[0])

    print labels_predicted
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    self.log("Accuracy: " + str(accuracy))
    precision = metrics.precision_score(labels_test, labels_predicted)
    self.log("Precision: " + str(precision))
    recall = metrics.recall_score(labels_test, labels_predicted)
    self.log("Recall: " + str(recall))
    f1 = metrics.f1_score(labels_test, labels_predicted)
    self.log("F1: " + str(f1))

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    return accuracy, precision, recall, f1
Esempio n. 14
0
def extract_descriptors_predict_test(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  labels_predicted = []
  labels_test = []
  self.log("Processing (desc+predict test): " + str(img_path))
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
  if (descriptors != None):
    labels = self.kmeans_cls.predict(descriptors)
    bins = range(self.kmeans_k)
    labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
    prediction = self.svm_cls.predict(labels_hist)
    confidence = self.svm_cls.decision_function(labels_hist)
    labels_predicted.append(prediction[0])
    labels_test.append(img_folder)
    
  return (labels_predicted, labels_test)
    def run_opf_supervised(self):
        self.log("Creating SVM")
        self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
        t_svm_start = time.time()
        self.svm_cls.fit(self.result_data, self.result_classes)
        t_svm_end = time.time() - t_svm_start
        self.params_output['time_classificator_fit'] = t_svm_end

        images_info = filesprocess.get_files(self.test_path)
        labels_test = []
        labels_hist_array = []
        #bins = range(self.bins_size)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist test " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, self.thumbnail_size, self.feature_type,
                self.descriptor_type)
            if (descriptors != None):
                label_hist = self.opf_predict(descriptors, self.n_clusters)
                labels_hist_array.append(label_hist)
                labels_test.append(img_folder)

        labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

        self.log("Generating predictions")
        labels_predicted = self.svm_cls.predict(labels_hist_array)

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
Esempio n. 16
0
def extract_descriptors_predict_test(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    labels_predicted = []
    labels_test = []
    self.log("Processing (desc+predict test): " + str(img_path))
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        prediction = self.svm_cls.predict(labels_hist)
        confidence = self.svm_cls.decision_function(labels_hist)
        labels_predicted.append(prediction[0])
        labels_test.append(img_folder)

    return (labels_predicted, labels_test)
def extract_descriptors_and_predict(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  self.log("Processing (desc+predict): " + str(img_path))
  result_classes = []
  result_data = []
  t_ext_desc_start = time.time()
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
  t_ext_desc_end = time.time() - t_ext_desc_start
  #self.log("Time desc extraction: " + str(t_ext_desc_end))
  t_cluster_consult_start = time.time()
  if (descriptors != None) and (len(descriptors) > 0):
    label_hist = self.opf_predict(descriptors, self.n_clusters)
    result_classes.append(img_folder)
    result_data.append(label_hist)
  t_cluster_consult_end = time.time() - t_cluster_consult_start
  #self.log("Time cluster consult: " + str(t_cluster_consult_end))
  return (result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
Esempio n. 18
0
def extract_descriptors_predict_test(image_info, params):
    self = params[0]
    le = params[1]
    img_path = image_info[0]
    img_folder = image_info[1]
    self.log("Processing (desc+predict test): " + str(img_path))
    label_test = le.transform([img_folder])
    labels_hist_array = []
    labels_test = []
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        labels_hist_array.append(labels_hist)
        labels_test.append(label_test)
    return (labels_hist_array, labels_test)
Esempio n. 19
0
def extract_descriptors_and_predict(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    self.log("Processing (desc+predict): " + str(img_path))
    result_classes = []
    result_data = []
    t_ext_desc_start = time.time()
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    t_ext_desc_end = time.time() - t_ext_desc_start
    #self.log("Time desc extraction: " + str(t_ext_desc_end))
    t_cluster_consult_start = time.time()
    if (descriptors != None) and (len(descriptors) > 0):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        result_classes.append(img_folder)
        result_data.append(label_hist)
    t_cluster_consult_end = time.time() - t_cluster_consult_start
    #self.log("Time cluster consult: " + str(t_cluster_consult_end))
    return (result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
Esempio n. 20
0
    def run_test(self):
        labels_test = []
        labels_predicted = []

        images_info = filesprocess.get_files(self.test_path)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, self.thumbnail_size, self.feature_type,
                self.descriptor_type)
            if (descriptors != None):
                labels = self.kmeans_cls.predict(descriptors)
                bins = range(self.kmeans_k)
                labels_hist = numpy.histogram(labels, bins=bins,
                                              density=True)[0]
                prediction = self.svm_cls.predict(labels_hist)
                confidence = self.svm_cls.decision_function(labels_hist)
                # print "Classes: ", self.svm_cls.classes_
                # print "Prediction: ", prediction[0]
                # print "Confidence: ", confidence[0]
                labels_predicted.append(prediction[0])
                labels_test.append(img_folder)
                #self.log("Prediction: " + str(prediction[0]))
                #self.log("Real: " + str(img_folder))

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
  def run_opf_supervised(self):
    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist test " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
        
    self.log("Generating predictions")
    labels_predicted = self.svm_cls.predict(labels_hist_array)
    
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Esempio n. 22
0
def extract_descriptors_and_predict(image_info, self):
  img_path = image_info[0]
  img_folder = image_info[1]
  self.log("Processing: " + str(img_path))
  t_ext_desc_start = time.time()
  features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
  t_ext_desc_end = time.time() - t_ext_desc_start

  result_classes = []
  result_data = []
  t_cluster_consult_start = time.time()
  if (descriptors != None):
    labels = self.kmeans_cls.predict(descriptors)        
    bins = range(self.kmeans_k)
    labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
    result_classes.append(img_folder)
    result_data.append(labels_hist)
  t_cluster_consult_end = time.time() - t_cluster_consult_start
  self.log("Time cluster consult: " + str(t_cluster_consult_end))
  return (result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
Esempio n. 23
0
def extract_descriptors_and_predict(image_info, self):
    img_path = image_info[0]
    img_folder = image_info[1]
    self.log("Processing: " + str(img_path))
    t_ext_desc_start = time.time()
    features, descriptors = featureextractor.extract_descriptor(
        img_path, self.thumbnail_size, self.feature_type, self.descriptor_type)
    t_ext_desc_end = time.time() - t_ext_desc_start

    result_classes = []
    result_data = []
    t_cluster_consult_start = time.time()
    if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        result_classes.append(img_folder)
        result_data.append(labels_hist)
    t_cluster_consult_end = time.time() - t_cluster_consult_start
    self.log("Time cluster consult: " + str(t_cluster_consult_end))
    return (result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
Esempio n. 24
0
  def run_test(self):
    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        prediction = self.svm_cls.predict(labels_hist)
        confidence = self.svm_cls.decision_function(labels_hist)
        # print "Classes: ", self.svm_cls.classes_
        # print "Prediction: ", prediction[0]
        # print "Confidence: ", confidence[0]
        labels_predicted.append(prediction[0])
        labels_test.append(img_folder)
        #self.log("Prediction: " + str(prediction[0]))
        #self.log("Real: " + str(img_folder))


    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Esempio n. 25
0
  def run_opf_supervised(self):

    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    #list(le.classes_)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)

    # unique_classes = numpy.asarray(self.result_classes)
    # unique_classes = numpy.unique(unique_classes)
    # unique_classes = numpy.sort(unique_classes)

    # num_samples = len(self.result_data)
    # num_classes = len(unique_classes)
    # num_features = len(self.result_data[0])
    
    O = libopf_py.OPF()
    result_data_array = numpy.array(self.result_data)
    self.log("Training OPF")
    t_opf_start = time.time()
    O.fit(result_data_array, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end
    images_info = filesprocess.get_files(self.test_path)

    bins = range(self.kmeans_k)
    labels_test = []
    labels_hist_array = []
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      label_test = le.transform([img_folder])
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        labels_hist_array.append(labels_hist)
        labels_test.append(label_test)
        #labels_hist_array = numpy.array([labels_hist])
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
    labels_predicted = O.predict(labels_hist_array)
    #labels_predicted = le.inverse_transform(prediction)

    #self.log("Prediction: " + str(pridicted_label[0]))
    #self.log("Real: " + str(img_folder))

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Esempio n. 26
0
  def train(self):
    self.log("BoW - Starting Train")
    self.log("train_path: " + self.train_path)
    
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)
    
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info
      
    self.log(str(len(images_info)) + " files to process.")

    tmp_desc = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if descriptors != None:
        for desc in descriptors:
          tmp_desc.append(desc)

    self.log("Desc lenght original: " + str(len(tmp_desc)))
    tmp_desc = numpy.array(tmp_desc)
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(self.n_sample_descriptors)
      tmp_desc = tmp_desc[rand]
    
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.kmeans_cls = KMeans(init='k-means++', n_clusters=self.kmeans_k, n_init=10, n_jobs=-1)
    self.log("Kmeans fit")
    t_kmeans_start = time.time()
    self.kmeans_cls.fit(tmp_desc)
    t_kmeans_end = time.time() - t_kmeans_start
    self.params_output['time_cluster_fit'] = t_kmeans_end
    self.log("Time cluster fit: " + str(t_kmeans_end))

    self.log("Generating histograms")
    result_labels = []
    self.result_classes = []
    self.result_data = []
    self.result_clustering = []
    tmp_desc = []
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        self.result_clustering.append((img_path, img_folder, labels_hist))
        self.result_classes.append(img_folder)
        self.result_data.append(labels_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)

    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end
import featureextractor

n_chunk = 2000
dir_path = '/home/images/CALTECH256/train'
output_dir = ""
zipped = True
files = filesprocess.get_files(dir_path)
files_chunks = zip(*[iter(files)] * n_chunk)
print "Chunks: ", len(files_chunks)
for idx_chunk, chunk in enumerate(files_chunks):
    chunk_data = []
    for idx_file, file_info in enumerate(chunk):
        img_path = file_info[0]
        folder = file_info[1]
        print "Processing: ", idx_file, " - ", img_path
        keypoints, descriptors = featureextractor.extract_descriptor(
            img_path, (512, 512), "SURF", "SURF")
        data = {'path': img_path, 'folder': folder, 'descriptors': descriptors}
        chunk_data.append(data)
    print "Saving chunk ", idx_chunk
    output_filename = output_dir + "caltech256_" + str(idx_chunk) + ".pickle"

    if zipped:
        with open(output_filename + ".gz", 'wb') as fp:
            fp.write(
                zlib.compress(
                    pickle.dumps(chunk_data, pickle.HIGHEST_PROTOCOL), 9))
    else:
        with open(output_filename, 'wb') as fp:
            fp.write(pickle.dumps(chunk_data, pickle.HIGHEST_PROTOCOL))
    #output = open(output_filename, 'wb')
    #pickle.dump(chunk_data, output)
Esempio n. 28
0
  def train(self):
    self.log("BoW - Starting Train")
    self.log("train_path: " + self.train_path)
    self.kmeans_cls = KMeans(init='k-means++', n_clusters=self.kmeans_k, n_init=10, n_jobs=-1)
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info
      
    self.log(str(len(images_info)) + " files to process.")

    tmp_desc = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if descriptors != None:
        for desc in descriptors:
          tmp_desc.append(desc)
    
    self.log("Desc lenght original: " + str(len(tmp_desc)))
    tmp_desc = numpy.array(tmp_desc)
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(self.n_sample_descriptors)
      tmp_desc = tmp_desc[rand]
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.log("Kmeans fit")
    t_kmeans_start = time.time()
    self.kmeans_cls.fit(tmp_desc)
    t_kmeans_end = time.time() - t_kmeans_start
    self.params_output['time_cluster_fit'] = t_kmeans_end
    self.log("Time cluster fit: " + str(t_kmeans_end))

    self.log("Generating histograms")
    result_labels = []
    self.result_classes = []
    self.result_data = []
    self.result_clustering = []
    tmp_desc = []
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)        
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        self.result_clustering.append((img_path, img_folder, labels_hist))
        self.result_classes.append(img_folder)
        self.result_data.append(labels_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)
Esempio n. 29
0
import featureextractor
import sys

image_path = sys.argv[1]
print "Processing: ", image_path
kp, desc = featureextractor.extract_descriptor(image_path, (256, 256),
                                               feature_type="SIFT",
                                               descriptor_type="SIFT")
print "Descritores: ", len(desc)

n_samples = len(desc)
n_features = len(desc[0])
result_file = open("desc_opf.txt", "w")
result_file.write(str(n_samples) + " " + "0" + " " + str(n_features) + "\n")
for idx, dsc in enumerate(desc):
    result_file.write(str(idx) + " ")
    for d in dsc:
        result_file.write(str(d) + " ")
    result_file.write("\n")
result_file.close()
  def train(self):
    self.log("BoW Opf-Opf - Starting Train")
    self.log("train_path: " + self.train_path)
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)

    self.log("Total images: " + str(len(total_images_info)))
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info

    self.log(str(len(images_info)) + " files to process.")

    #Extrai descritores
    tmp_desc = []
    labels_true = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      
      if descriptors != None:
        self.log("Descriptors lenght: " + str(len(descriptors)))
        for desc in descriptors:
          labels_true.append(img_folder)
          tmp_desc.append(desc)

    # for dsc in tmp_desc:
    #   print "Desc size:", len(dsc)
    # print len(tmp_desc)
    # print len(tmp_desc[0])
    tmp_desc = numpy.array(tmp_desc, numpy.float64)
    labels_true = numpy.asarray(labels_true)
    #Seleciona aleatoriamente um numero de descritores (n_sample_descriptors)
    self.log("Desc lenght original: " + str(len(tmp_desc)))
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(len(tmp_desc))[0:self.n_sample_descriptors]
      tmp_desc = tmp_desc[rand]
      labels_true = labels_true[rand]
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.log("OPF - Clustering")
    self.best_k, self.n_clusters = self.opf_cluster(tmp_desc)
    

    #Extrai os prototipos com base nos descritores das imagens de treinamento
    self.result_classes = []
    self.result_data = []
    self.log("Generating predictions")
    #total_images_info = total_images_info[:100]
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist train " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None) and (len(descriptors) > 0):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        self.result_classes.append(img_folder)
        self.result_data.append(label_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)
    self.result_data = numpy.asarray(self.result_data, numpy.float64)
    return self.n_clusters#, self.best_k
def extract(image_info, params):
  image_path = image_info[0]
  feature_type = params[0]
  descriptor_type = params[1]
  thumbnail_size = params[2]
  f,d = featureextractor.extract_descriptor(image_path, thumbnail_size, feature_type, descriptor_type)
import featureextractor

n_chunk = 2000
dir_path = '/home/images/CALTECH256/train'
output_dir = ""
zipped = True
files = filesprocess.get_files(dir_path)
files_chunks = zip(*[iter(files)]*n_chunk)
print "Chunks: ", len(files_chunks)
for idx_chunk, chunk in enumerate(files_chunks):
  chunk_data = []
  for idx_file, file_info in enumerate(chunk):
    img_path = file_info[0]
    folder = file_info[1]
    print "Processing: ", idx_file, " - ", img_path
    keypoints, descriptors = featureextractor.extract_descriptor(img_path, (512,512), "SURF", "SURF")
    data = {'path':img_path, 'folder':folder, 'descriptors':descriptors}
    chunk_data.append(data)
  print "Saving chunk ", idx_chunk
  output_filename = output_dir + "caltech256_" + str(idx_chunk) + ".pickle"
  
  if zipped:
    with open(output_filename + ".gz", 'wb') as fp:
     fp.write(zlib.compress(pickle.dumps(chunk_data, pickle.HIGHEST_PROTOCOL),9))
  else:
    with open(output_filename, 'wb') as fp:
      fp.write(pickle.dumps(chunk_data, pickle.HIGHEST_PROTOCOL))
  #output = open(output_filename, 'wb')
  #pickle.dump(chunk_data, output)
  #output.close()
# img_path = '/home/images/CALTECH256/train/010.beer-mug/010_0004.jpg'
import featureextractor
import sys

image_path = sys.argv[1]
print "Processing: ", image_path
kp, desc = featureextractor.extract_descriptor(image_path, (256,256), feature_type="SIFT", descriptor_type="SIFT")
print "Descritores: ", len(desc)

n_samples = len(desc)
n_features = len(desc[0])
result_file = open("desc_opf.txt", "w")
result_file.write(str(n_samples) + " " + "0" + " " + str(n_features) + "\n")
for idx, dsc in enumerate(desc):
  result_file.write(str(idx) + " ")
  for d in dsc:
    result_file.write(str(d) + " ")
  result_file.write("\n")
result_file.close()