def precompute_compute(images_path, feature_type, descriptor_type, thumbnail_size, pool=None):
  images_info = filesprocess.get_files(images_path)
  if pool == None:
    pool = multiprocessing.Pool()
  pool.map(func_star_extract, 
                          itertools.izip(images_info, 
                          itertools.repeat((feature_type, descriptor_type, thumbnail_size))))
    def train(self):
        self.log("BoW Overfeat - Starting Train")
        self.log("train_path: " + self.train_path)

        result_descriptors = []
        self.log("Listing files to process")
        total_images_info = filesprocess.get_files(self.train_path)
        #total_images_info = total_images_info[0:100]
        self.result_classes = []
        self.result_data = []
        t_ext_desc_sum = 0
        t_feature_extract_start = time.time()
        for idx, image_info in enumerate(total_images_info):
            self.log("Processing " + str(idx) + " of " +
                     str(len(total_images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            t_ext_desc_start = time.time()
            features, descriptors = featureextractor.extract_descriptor(
                img_path, None, "OVERFEAT", "OVERFEAT")
            t_ext_desc_end = time.time() - t_ext_desc_start
            t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
            if descriptors != None:
                print "Debug 1: ", descriptors[0].shape
                self.result_data.append(descriptors[0])
                self.result_classes.append(img_folder)
        t_feature_extract_end = time.time() - t_feature_extract_start
        self.params_output['time_ext_desc_med'] = t_ext_desc_sum / len(
            total_images_info)
        self.params_output['time_feature_extraction'] = t_feature_extract_end
  def process(self):
    if not os.path.exists(self.train_path):
        os.makedirs(self.train_path)

    if not os.path.exists(self.validation_path):
        os.makedirs(self.validation_path)

    if not os.path.exists(self.test_path):
        os.makedirs(self.test_path)

    images_info = filesprocess.get_files(self.dataset_path)
    a_train, a_test = train_test_split(images_info, test_size=self.test_size, random_state=42)

    print "Saving training data"
    for data in a_train:
      img_folder_path = self.train_path + "/" + data[1]
      img_filename = os.path.split(data[0])[1]
      src_path = data[0]
      dst_path = img_folder_path + "/" + img_filename
      print "Processing train: " + src_path
      if not os.path.exists(img_folder_path):
        os.makedirs(img_folder_path)
      shutil.copyfile(src_path, dst_path)
    
    print "Saving test data"
    for data in a_test:
      img_folder_path = self.test_path + "/" + data[1]
      img_filename = os.path.split(data[0])[1]
      src_path = data[0]
      dst_path = img_folder_path + "/" + img_filename
      print "Processing test: " + src_path
      if not os.path.exists(img_folder_path):
        os.makedirs(img_folder_path)
      shutil.copyfile(src_path, dst_path)
def generate_descriptors_file(path_to_process, path_to_result, batch_size, prefix="train_"):
  print "Generating descriptors file"
  if not os.path.exists(path_to_result):
      os.makedirs(path_to_result)
  images_info = filesprocess.get_files(path_to_process)
  print str(len(images_info)), " files to process."
  count_descriptors = 0
  count_files = 0;
  buffer = []
  for idx, image_info in enumerate(images_info):
    print "Processing ", idx, " of ", len(images_info)
    count_descriptors = count_descriptors + 1
    img_path = image_info[0]
    img_folder = image_info[1]
    f,d = featureextractor.extract_sift(img_path)
    if (d != None):
      p_temp = serializer.pickle_keypoints(f, d)
      buffer.append((img_path, img_folder, p_temp))
    
    if count_descriptors >= batch_size:
      file_name = prefix + str(count_files) + ".des.gz"
      file_path = path_to_result + "/" + file_name
      print "Saving file: ", file_path
      save_desc_file(buffer, file_path)
      count_files += 1
      count_descriptors = 0
      buffer = []

  return images_info
Beispiel #5
0
def generate_descriptors_file(path_to_process,
                              path_to_result,
                              batch_size,
                              prefix="train_"):
    print "Generating descriptors file"
    if not os.path.exists(path_to_result):
        os.makedirs(path_to_result)
    images_info = filesprocess.get_files(path_to_process)
    print str(len(images_info)), " files to process."
    count_descriptors = 0
    count_files = 0
    buffer = []
    for idx, image_info in enumerate(images_info):
        print "Processing ", idx, " of ", len(images_info)
        count_descriptors = count_descriptors + 1
        img_path = image_info[0]
        img_folder = image_info[1]
        f, d = featureextractor.extract_sift(img_path)
        if (d != None):
            p_temp = serializer.pickle_keypoints(f, d)
            buffer.append((img_path, img_folder, p_temp))

        if count_descriptors >= batch_size:
            file_name = prefix + str(count_files) + ".des.gz"
            file_path = path_to_result + "/" + file_name
            print "Saving file: ", file_path
            save_desc_file(buffer, file_path)
            count_files += 1
            count_descriptors = 0
            buffer = []

    return images_info
    def run_test(self):

        labels_test = []
        labels_predicted = []

        pool = multiprocessing.Pool()
        images_info = filesprocess.get_files(self.test_path)
        preds_data = []
        preds_data = pool.map(
            func_star_extract_descriptors_predict_test,
            itertools.izip(images_info, itertools.repeat(self)))

        for data in preds_data:
            labels_predicted.extend(data[0])
            labels_test.extend(data[1])

        pool.close()
        pool.terminate()

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
 def train(self):
   self.log("BoW Overfeat - Starting Train")
   self.log("train_path: " + self.train_path)
   
   result_descriptors = []
   self.log("Listing files to process")
   total_images_info = filesprocess.get_files(self.train_path)
   #total_images_info = total_images_info[0:100]
   self.result_classes = []
   self.result_data = []
   t_ext_desc_sum = 0
   t_feature_extract_start = time.time()
   for idx, image_info in enumerate(total_images_info):
     self.log("Processing " + str(idx) + " of " + str(len(total_images_info)))
     img_path = image_info[0]
     img_folder = image_info[1]
     t_ext_desc_start = time.time()
     features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
     t_ext_desc_end = time.time() - t_ext_desc_start
     t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
     if descriptors != None:
       print "Debug 1: ", descriptors[0].shape
       self.result_data.append(descriptors[0])
       self.result_classes.append(img_folder)
   t_feature_extract_end = time.time() - t_feature_extract_start
   self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
   self.params_output['time_feature_extraction'] = t_feature_extract_end
   self.log("Creating SVM")
   self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
   t_svm_start = time.time()
   self.svm_cls.fit(self.result_data, self.result_classes)
   t_svm_end = time.time() - t_svm_start
   self.params_output['time_classificator_fit'] = t_svm_end
  def run_test(self):

    labels_test = []
    labels_predicted = []

    pool = multiprocessing.Pool()
    images_info = filesprocess.get_files(self.test_path)
    preds_data = []
    preds_data = pool.map(func_star_extract_descriptors_predict_test, 
                          itertools.izip(images_info, 
                          itertools.repeat(self)))

    for data in preds_data:
      labels_predicted.extend(data[0])
      labels_test.extend(data[1])

    pool.close()
    pool.terminate()

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
  def run_test(self):
    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
      if (descriptors != None):
        prediction = self.svm_cls.predict(descriptors)
        print "Debug 2: ", prediction.shape
        labels_predicted.append(prediction[0])
        labels_test.append(img_folder)

    print labels_predicted
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    self.log("Accuracy: " + str(accuracy))
    precision = metrics.precision_score(labels_test, labels_predicted)
    self.log("Precision: " + str(precision))
    recall = metrics.recall_score(labels_test, labels_predicted)
    self.log("Recall: " + str(recall))
    f1 = metrics.f1_score(labels_test, labels_predicted)
    self.log("F1: " + str(f1))

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    return accuracy, precision, recall, f1
Beispiel #10
0
    def run_opf_supervised(self):
        le = preprocessing.LabelEncoder()
        le.fit(self.result_classes)
        cross_result_classes = le.transform(self.result_classes)
        cross_result_classes = cross_result_classes.astype(numpy.int32)

        self.log("Result data size: " + str(len(self.result_data)))
        self.log("Cross classes size: " + str(len(cross_result_classes)))
        self.log("Training OPF")
        self.opf_sup_cls = libopf_py.OPF()
        t_opf_start = time.time()
        self.opf_sup_cls.fit(self.result_data,
                             cross_result_classes,
                             metric=self.distance_function)
        t_opf_end = time.time() - t_opf_start
        self.params_output['time_classificator_fit'] = t_opf_end

        images_info = filesprocess.get_files(self.test_path)
        labels_test = []
        labels_hist_array = []
        #bins = range(self.bins_size)

        pool = multiprocessing.Pool()
        preds_data = []
        preds_data = pool.map(
            func_star_extract_descriptors_predict_test,
            itertools.izip(images_info, itertools.repeat(self)))

        for data in preds_data:
            labels_hist_array.extend(data[0])
            labels_test.extend(data[1])

        labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

        self.log("Generating predictions")
        prediction = self.opf_sup_cls.predict(labels_hist_array)
        labels_predicted = le.inverse_transform(prediction)

        pool.close()
        pool.terminate()

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
  def run_opf_supervised(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)
    
    self.log("Result data size: " + str(len(self.result_data)))
    self.log("Cross classes size: " + str(len(cross_result_classes)))
    self.log("Training OPF")
    self.opf_sup_cls = libopf_py.OPF()
    t_opf_start = time.time()
    self.opf_sup_cls.fit(self.result_data, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)

    pool = multiprocessing.Pool()
    preds_data = []
    preds_data = pool.map(func_star_extract_descriptors_predict_test, 
                          itertools.izip(images_info, 
                          itertools.repeat(self)))

    for data in preds_data:
      labels_hist_array.extend(data[0])
      labels_test.extend(data[1])
  
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

    self.log("Generating predictions")
    prediction = self.opf_sup_cls.predict(labels_hist_array)
    labels_predicted = le.inverse_transform(prediction)
    
    pool.close()
    pool.terminate()

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
    def run_test(self):
        le = preprocessing.LabelEncoder()
        le.fit(self.result_classes)
        #list(le.classes_)
        cross_result_classes = le.transform(self.result_classes)
        cross_result_classes = cross_result_classes.astype(numpy.int32)

        result_data_array = numpy.asarray(self.result_data, numpy.float64)
        self.log("Creating OPF")
        self.opf_cls = libopf_py.OPF()
        t_opf_start = time.time()
        #self.svm_cls.fit(self.result_data, self.result_classes)
        self.opf_cls.fit(result_data_array,
                         cross_result_classes,
                         metric=self.distance_function)
        t_opf_end = time.time() - t_opf_start
        self.params_output['time_classificator_fit'] = t_opf_end

        labels_test = []
        labels_predicted = []

        images_info = filesprocess.get_files(self.test_path)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, None, "OVERFEAT", "OVERFEAT")
            if (descriptors != None):
                descriptors = numpy.asarray(descriptors, numpy.float64)
                prediction = self.opf_cls.predict(descriptors)
                labels_predicted.append(prediction[0])
                label_trans = le.transform([img_folder])
                labels_test.append(label_trans[0])

        print labels_predicted
        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        self.log("Accuracy: " + str(accuracy))
        precision = metrics.precision_score(labels_test, labels_predicted)
        self.log("Precision: " + str(precision))
        recall = metrics.recall_score(labels_test, labels_predicted)
        self.log("Recall: " + str(recall))
        f1 = metrics.f1_score(labels_test, labels_predicted)
        self.log("F1: " + str(f1))

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        return accuracy, precision, recall, f1
Beispiel #13
0
def precompute_compute(images_path,
                       feature_type,
                       descriptor_type,
                       thumbnail_size,
                       pool=None):
    images_info = filesprocess.get_files(images_path)
    if pool == None:
        pool = multiprocessing.Pool()
    pool.map(
        func_star_extract,
        itertools.izip(
            images_info,
            itertools.repeat((feature_type, descriptor_type, thumbnail_size))))
  def run_opf_supervised(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)
    
    self.log("Result data size: " + str(len(self.result_data)))
    self.log("Cross classes size: " + str(len(cross_result_classes)))
    self.log("Training OPF")
    self.opf_sup_cls = libopf_py.OPF()
    t_opf_start = time.time()
    self.opf_sup_cls.fit(self.result_data, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist test " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

    self.log("Generating predictions")
    prediction = self.opf_sup_cls.predict(labels_hist_array)
    labels_predicted = le.inverse_transform(prediction)
    

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
  def run_test(self):
    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    #list(le.classes_)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)

    result_data_array = numpy.asarray(self.result_data, numpy.float64)
    self.log("Creating OPF")
    self.opf_cls = libopf_py.OPF()
    t_opf_start = time.time()
    #self.svm_cls.fit(self.result_data, self.result_classes)
    self.opf_cls.fit(result_data_array, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end

    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, None,"OVERFEAT","OVERFEAT")
      if (descriptors != None):
        descriptors = numpy.asarray(descriptors, numpy.float64)
        prediction = self.opf_cls.predict(descriptors)
        labels_predicted.append(prediction[0])
        label_trans = le.transform([img_folder])
        labels_test.append(label_trans[0])

    print labels_predicted
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    self.log("Accuracy: " + str(accuracy))
    precision = metrics.precision_score(labels_test, labels_predicted)
    self.log("Precision: " + str(precision))
    recall = metrics.recall_score(labels_test, labels_predicted)
    self.log("Recall: " + str(recall))
    f1 = metrics.f1_score(labels_test, labels_predicted)
    self.log("F1: " + str(f1))

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    return accuracy, precision, recall, f1
    def run_opf_supervised(self):
        self.log("Creating SVM")
        self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
        t_svm_start = time.time()
        self.svm_cls.fit(self.result_data, self.result_classes)
        t_svm_end = time.time() - t_svm_start
        self.params_output['time_classificator_fit'] = t_svm_end

        images_info = filesprocess.get_files(self.test_path)
        labels_test = []
        labels_hist_array = []
        #bins = range(self.bins_size)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist test " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, self.thumbnail_size, self.feature_type,
                self.descriptor_type)
            if (descriptors != None):
                label_hist = self.opf_predict(descriptors, self.n_clusters)
                labels_hist_array.append(label_hist)
                labels_test.append(img_folder)

        labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

        self.log("Generating predictions")
        labels_predicted = self.svm_cls.predict(labels_hist_array)

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
Beispiel #17
0
    def run_opf_supervised(self):
        self.log("Creating SVM")
        self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
        t_svm_start = time.time()
        self.svm_cls.fit(self.result_data, self.result_classes)
        t_svm_end = time.time() - t_svm_start
        self.params_output['time_classificator_fit'] = t_svm_end

        images_info = filesprocess.get_files(self.test_path)
        labels_test = []
        labels_hist_array = []

        pool = multiprocessing.Pool()
        preds_data = []
        preds_data = pool.map(
            func_star_extract_descriptors_predict_test,
            itertools.izip(images_info, itertools.repeat(self)))

        for data in preds_data:
            labels_hist_array.extend(data[0])
            labels_test.extend(data[1])

        labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)

        self.log("Generating predictions")
        labels_predicted = self.svm_cls.predict(labels_hist_array)

        pool.close()
        pool.terminate()

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
  def run_opf_supervised(self):
    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []

    pool = multiprocessing.Pool()
    preds_data = []
    preds_data = pool.map(func_star_extract_descriptors_predict_test, 
                          itertools.izip(images_info, 
                          itertools.repeat(self)))

    for data in preds_data:
      labels_hist_array.extend(data[0])
      labels_test.extend(data[1])
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
        
    self.log("Generating predictions")
    labels_predicted = self.svm_cls.predict(labels_hist_array)

    pool.close()
    pool.terminate()
    
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Beispiel #19
0
    def run_test(self):
        labels_test = []
        labels_predicted = []

        images_info = filesprocess.get_files(self.test_path)
        for idx, image_info in enumerate(images_info):
            self.log("Generating hist " + str(idx) + " of " +
                     str(len(images_info)))
            img_path = image_info[0]
            img_folder = image_info[1]
            features, descriptors = featureextractor.extract_descriptor(
                img_path, self.thumbnail_size, self.feature_type,
                self.descriptor_type)
            if (descriptors != None):
                labels = self.kmeans_cls.predict(descriptors)
                bins = range(self.kmeans_k)
                labels_hist = numpy.histogram(labels, bins=bins,
                                              density=True)[0]
                prediction = self.svm_cls.predict(labels_hist)
                confidence = self.svm_cls.decision_function(labels_hist)
                # print "Classes: ", self.svm_cls.classes_
                # print "Prediction: ", prediction[0]
                # print "Confidence: ", confidence[0]
                labels_predicted.append(prediction[0])
                labels_test.append(img_folder)
                #self.log("Prediction: " + str(prediction[0]))
                #self.log("Real: " + str(img_folder))

        accuracy = metrics.accuracy_score(labels_test, labels_predicted)
        precision = metrics.precision_score(labels_test, labels_predicted)
        recall = metrics.recall_score(labels_test, labels_predicted)
        f1 = metrics.f1_score(labels_test, labels_predicted)

        self.params_output['accuracy'] = accuracy
        self.params_output['precision'] = precision
        self.params_output['recall'] = recall
        self.params_output['F1'] = f1

        self.log("Accuracy: " + str(accuracy))
        self.log("Precision: " + str(precision))
        self.log("Recall: " + str(recall))
        self.log("F1: " + str(f1))
        return accuracy, precision, recall, f1
  def run_opf_supervised(self):
    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end

    images_info = filesprocess.get_files(self.test_path)
    labels_test = []
    labels_hist_array = []
    #bins = range(self.bins_size)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist test " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        labels_hist_array.append(label_hist)
        labels_test.append(img_folder)
        
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
        
    self.log("Generating predictions")
    labels_predicted = self.svm_cls.predict(labels_hist_array)
    
    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
Beispiel #21
0
  def run_test(self):
    labels_test = []
    labels_predicted = []

    images_info = filesprocess.get_files(self.test_path)
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        prediction = self.svm_cls.predict(labels_hist)
        confidence = self.svm_cls.decision_function(labels_hist)
        # print "Classes: ", self.svm_cls.classes_
        # print "Prediction: ", prediction[0]
        # print "Confidence: ", confidence[0]
        labels_predicted.append(prediction[0])
        labels_test.append(img_folder)
        #self.log("Prediction: " + str(prediction[0]))
        #self.log("Real: " + str(img_folder))


    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1
    
    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
    def process(self):
        if not os.path.exists(self.train_path):
            os.makedirs(self.train_path)

        if not os.path.exists(self.validation_path):
            os.makedirs(self.validation_path)

        if not os.path.exists(self.test_path):
            os.makedirs(self.test_path)

        images_info = filesprocess.get_files(self.dataset_path)
        a_train, a_test = train_test_split(images_info,
                                           test_size=self.test_size,
                                           random_state=42)

        print "Saving training data"
        for data in a_train:
            img_folder_path = self.train_path + "/" + data[1]
            img_filename = os.path.split(data[0])[1]
            src_path = data[0]
            dst_path = img_folder_path + "/" + img_filename
            print "Processing train: " + src_path
            if not os.path.exists(img_folder_path):
                os.makedirs(img_folder_path)
            shutil.copyfile(src_path, dst_path)

        print "Saving test data"
        for data in a_test:
            img_folder_path = self.test_path + "/" + data[1]
            img_filename = os.path.split(data[0])[1]
            src_path = data[0]
            dst_path = img_folder_path + "/" + img_filename
            print "Processing test: " + src_path
            if not os.path.exists(img_folder_path):
                os.makedirs(img_folder_path)
            shutil.copyfile(src_path, dst_path)
  def train(self):
    self.log("BoW Opf-Opf - Starting Train")
    self.log("train_path: " + self.train_path)
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)

    self.log("Total images: " + str(len(total_images_info)))
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info

    self.log(str(len(images_info)) + " files to process.")

    #Extrai descritores
    tmp_desc = []
    labels_true = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      
      if descriptors != None:
        self.log("Descriptors lenght: " + str(len(descriptors)))
        for desc in descriptors:
          labels_true.append(img_folder)
          tmp_desc.append(desc)

    # for dsc in tmp_desc:
    #   print "Desc size:", len(dsc)
    # print len(tmp_desc)
    # print len(tmp_desc[0])
    tmp_desc = numpy.array(tmp_desc, numpy.float64)
    labels_true = numpy.asarray(labels_true)
    #Seleciona aleatoriamente um numero de descritores (n_sample_descriptors)
    self.log("Desc lenght original: " + str(len(tmp_desc)))
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(len(tmp_desc))[0:self.n_sample_descriptors]
      tmp_desc = tmp_desc[rand]
      labels_true = labels_true[rand]
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.log("OPF - Clustering")
    self.best_k, self.n_clusters = self.opf_cluster(tmp_desc)
    

    #Extrai os prototipos com base nos descritores das imagens de treinamento
    self.result_classes = []
    self.result_data = []
    self.log("Generating predictions")
    #total_images_info = total_images_info[:100]
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist train " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None) and (len(descriptors) > 0):
        label_hist = self.opf_predict(descriptors, self.n_clusters)
        self.result_classes.append(img_folder)
        self.result_data.append(label_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)
    self.result_data = numpy.asarray(self.result_data, numpy.float64)
    return self.n_clusters#, self.best_k
import cPickle as pickle
import cv2
import numpy
from PIL import Image
import zlib
import filesprocess
import featureextractor

n_chunk = 2000
dir_path = '/home/images/CALTECH256/train'
output_dir = ""
zipped = True
files = filesprocess.get_files(dir_path)
files_chunks = zip(*[iter(files)]*n_chunk)
print "Chunks: ", len(files_chunks)
for idx_chunk, chunk in enumerate(files_chunks):
  chunk_data = []
  for idx_file, file_info in enumerate(chunk):
    img_path = file_info[0]
    folder = file_info[1]
    print "Processing: ", idx_file, " - ", img_path
    keypoints, descriptors = featureextractor.extract_descriptor(img_path, (512,512), "SURF", "SURF")
    data = {'path':img_path, 'folder':folder, 'descriptors':descriptors}
    chunk_data.append(data)
  print "Saving chunk ", idx_chunk
  output_filename = output_dir + "caltech256_" + str(idx_chunk) + ".pickle"
  
  if zipped:
    with open(output_filename + ".gz", 'wb') as fp:
     fp.write(zlib.compress(pickle.dumps(chunk_data, pickle.HIGHEST_PROTOCOL),9))
  else:
import cPickle as pickle
import cv2
import numpy
from PIL import Image
import zlib
import filesprocess
import featureextractor

n_chunk = 2000
dir_path = '/home/images/CALTECH256/train'
output_dir = ""
zipped = True
files = filesprocess.get_files(dir_path)
files_chunks = zip(*[iter(files)] * n_chunk)
print "Chunks: ", len(files_chunks)
for idx_chunk, chunk in enumerate(files_chunks):
    chunk_data = []
    for idx_file, file_info in enumerate(chunk):
        img_path = file_info[0]
        folder = file_info[1]
        print "Processing: ", idx_file, " - ", img_path
        keypoints, descriptors = featureextractor.extract_descriptor(
            img_path, (512, 512), "SURF", "SURF")
        data = {'path': img_path, 'folder': folder, 'descriptors': descriptors}
        chunk_data.append(data)
    print "Saving chunk ", idx_chunk
    output_filename = output_dir + "caltech256_" + str(idx_chunk) + ".pickle"

    if zipped:
        with open(output_filename + ".gz", 'wb') as fp:
            fp.write(
    def train(self):
        self.log("BoW - Starting Train")
        self.log("train_path: " + self.train_path)

        result_descriptors = []
        self.log("Listing files to process")
        total_images_info = filesprocess.get_files(self.train_path)

        images_info = []
        if len(total_images_info) > self.n_sample_images:
            for i in range(self.n_sample_images):
                random_index = randrange(0, len(total_images_info))
                images_info.append(total_images_info[random_index])
        else:
            images_info = total_images_info

        self.log(str(len(images_info)) + " files to process.")

        pool = multiprocessing.Pool()
        tmp_desc = []
        descs_data = pool.map(
            func_star_extract_descriptor_one,
            itertools.izip(images_info, itertools.repeat(self)))

        for data in descs_data:
            tmp_desc.extend(data)

        self.log("Desc lenght original: " + str(len(tmp_desc)))
        tmp_desc = numpy.array(tmp_desc)
        if len(tmp_desc) > self.n_sample_descriptors:
            rand = numpy.random.permutation(self.n_sample_descriptors)
            tmp_desc = tmp_desc[rand]

        self.log("Desc lenght reduced: " + str(len(tmp_desc)))
        self.params_output['real_desc_size'] = len(tmp_desc)
        self.kmeans_cls = KMeans(init='k-means++',
                                 n_clusters=self.kmeans_k,
                                 n_init=10,
                                 n_jobs=-1)
        self.log("Kmeans fit")
        t_kmeans_start = time.time()
        self.kmeans_cls.fit(tmp_desc)
        t_kmeans_end = time.time() - t_kmeans_start
        self.params_output['time_cluster_fit'] = t_kmeans_end
        self.log("Time cluster fit: " + str(t_kmeans_end))

        self.log("Generating histograms")
        self.result_classes = []
        self.result_data = []
        descs_data = []
        #(result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
        descs_data = pool.map(
            func_star_extract_descriptors_and_predict,
            itertools.izip(total_images_info, itertools.repeat(self)))

        t_ext_desc_sum = 0.0
        t_cluster_consult_sum = 0.0
        for data in descs_data:
            self.result_classes.extend(data[0])
            self.result_data.extend(data[1])
            t_ext_desc_sum = t_ext_desc_sum + data[2]
            t_cluster_consult_sum = t_cluster_consult_sum + data[3]

        self.params_output['time_ext_desc_med'] = t_ext_desc_sum / len(
            total_images_info)
        self.params_output[
            'time_cons_cluster_med'] = t_cluster_consult_sum / len(
                total_images_info)

        self.log("Creating SVM")
        self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
        t_svm_start = time.time()
        self.svm_cls.fit(self.result_data, self.result_classes)
        t_svm_end = time.time() - t_svm_start
        self.params_output['time_classificator_fit'] = t_svm_end

        pool.close()
        pool.terminate()
  def train(self):
    self.log("BoW Opf-Opf - Starting Train")
    self.log("train_path: " + self.train_path)
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)

    self.log("Total images: " + str(len(total_images_info)))
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info

    self.log(str(len(images_info)) + " files to process.")


    #Extrai descritores
    pool = multiprocessing.Pool()
    tmp_desc = []
    labels_true = []
    descs_data = pool.map(func_star_extract_descriptor_one, 
                          itertools.izip(images_info, 
                          itertools.repeat(self)))

    #itertools.repeat((self.thumbnail_size, self.feature_type, self.descriptor_type))))
    for data in descs_data:
      tmp_desc.extend(data[0])
      labels_true.extend(data[1])

    tmp_desc = numpy.array(tmp_desc, numpy.float64)
    labels_true = numpy.asarray(labels_true)
    #Seleciona aleatoriamente um numero de descritores (n_sample_descriptors)
    self.log("Desc lenght original: " + str(len(tmp_desc)))
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(len(tmp_desc))[0:self.n_sample_descriptors]
      tmp_desc = tmp_desc[rand]
      labels_true = labels_true[rand]
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.log("OPF - Clustering")
    self.best_k, self.n_clusters = self.opf_cluster(tmp_desc)
    

    #Extrai os prototipos com base nos descritores das imagens de treinamento
    self.log("Generating predictions")
    self.result_classes = []
    self.result_data = [] #numpy.zeros((len(total_images_info), self.n_clusters), numpy.float64)
    descs_data = []
    #(result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
    descs_data = pool.map(func_star_extract_descriptors_and_predict, 
                          itertools.izip(total_images_info, 
                          itertools.repeat(self)))
    
    t_ext_desc_sum = 0.0
    t_cluster_consult_sum = 0.0
    for data in descs_data:
      self.result_classes.extend(data[0])
      self.result_data.extend(data[1])
      t_ext_desc_sum = t_ext_desc_sum + data[2]
      t_cluster_consult_sum = t_cluster_consult_sum + data[3]

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)
    self.result_data = numpy.asarray(self.result_data, numpy.float64)

    pool.close()
    pool.terminate()

    return self.n_clusters#, self.best_k
Beispiel #28
0
  def train(self):
    self.log("BoW - Starting Train")
    self.log("train_path: " + self.train_path)
    
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)
    
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info
      
    self.log(str(len(images_info)) + " files to process.")

    tmp_desc = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if descriptors != None:
        for desc in descriptors:
          tmp_desc.append(desc)

    self.log("Desc lenght original: " + str(len(tmp_desc)))
    tmp_desc = numpy.array(tmp_desc)
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(self.n_sample_descriptors)
      tmp_desc = tmp_desc[rand]
    
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.kmeans_cls = KMeans(init='k-means++', n_clusters=self.kmeans_k, n_init=10, n_jobs=-1)
    self.log("Kmeans fit")
    t_kmeans_start = time.time()
    self.kmeans_cls.fit(tmp_desc)
    t_kmeans_end = time.time() - t_kmeans_start
    self.params_output['time_cluster_fit'] = t_kmeans_end
    self.log("Time cluster fit: " + str(t_kmeans_end))

    self.log("Generating histograms")
    result_labels = []
    self.result_classes = []
    self.result_data = []
    self.result_clustering = []
    tmp_desc = []
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        self.result_clustering.append((img_path, img_folder, labels_hist))
        self.result_classes.append(img_folder)
        self.result_data.append(labels_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)

    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end
  def train(self):
    self.log("BoW - Starting Train")
    self.log("train_path: " + self.train_path)
    
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)
    
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info
      
    self.log(str(len(images_info)) + " files to process.")

    pool = multiprocessing.Pool()
    tmp_desc = []
    descs_data = pool.map(func_star_extract_descriptor_one, 
                          itertools.izip(images_info, 
                          itertools.repeat(self)))

    for data in descs_data:
      tmp_desc.extend(data)

    self.log("Desc lenght original: " + str(len(tmp_desc)))
    tmp_desc = numpy.array(tmp_desc)
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(self.n_sample_descriptors)
      tmp_desc = tmp_desc[rand]
    
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.kmeans_cls = KMeans(init='k-means++', n_clusters=self.kmeans_k, n_init=10, n_jobs=-1)
    self.log("Kmeans fit")
    t_kmeans_start = time.time()
    self.kmeans_cls.fit(tmp_desc)
    t_kmeans_end = time.time() - t_kmeans_start
    self.params_output['time_cluster_fit'] = t_kmeans_end
    self.log("Time cluster fit: " + str(t_kmeans_end))

    self.log("Generating histograms")
    self.result_classes = []
    self.result_data = [] 
    descs_data = []
    #(result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
    descs_data = pool.map(func_star_extract_descriptors_and_predict, 
                          itertools.izip(total_images_info, 
                          itertools.repeat(self)))
    
    t_ext_desc_sum = 0.0
    t_cluster_consult_sum = 0.0
    for data in descs_data:
      self.result_classes.extend(data[0])
      self.result_data.extend(data[1])
      t_ext_desc_sum = t_ext_desc_sum + data[2]
      t_cluster_consult_sum = t_cluster_consult_sum + data[3]


    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)

    self.log("Creating SVM")
    self.svm_cls = svm.LinearSVC(C=1.0, loss='l2', class_weight='auto')
    t_svm_start = time.time()
    self.svm_cls.fit(self.result_data, self.result_classes)
    t_svm_end = time.time() - t_svm_start
    self.params_output['time_classificator_fit'] = t_svm_end

    pool.close()
    pool.terminate()
    dig = hashlib.md5(hash).hexdigest()
    return str(dig)


parser = argparse.ArgumentParser(prog='python demo_opf_unsup.py',
                                 usage='%(prog)s [options]',
                                 description='',
                                 epilog="")
parser.add_argument('-dir',
                    '--directory',
                    help='images directory',
                    required=True)

args = parser.parse_args()
print "Processing: " + str(args.directory)
images_info = filesprocess.get_files(args.directory)
print str(len(images_info)) + " images found."

# all_classes = []
# for idx, image_info in enumerate(images_info):
#   img_folder = image_info[1]
#   all_classes.append(img_folder)

# all_classes = numpy.asarray(all_classes)
# all_classes = numpy.unique(all_classes)
# print str(len(all_classes)) + " classes found."

all_descriptors = []
for idx, image_info in enumerate(images_info):
    img_path = image_info[0]
    img_class = image_info[1]
  def run_opf_supervised(self):

    le = preprocessing.LabelEncoder()
    le.fit(self.result_classes)
    #list(le.classes_)
    cross_result_classes = le.transform(self.result_classes)
    cross_result_classes = cross_result_classes.astype(numpy.int32)

    # unique_classes = numpy.asarray(self.result_classes)
    # unique_classes = numpy.unique(unique_classes)
    # unique_classes = numpy.sort(unique_classes)

    # num_samples = len(self.result_data)
    # num_classes = len(unique_classes)
    # num_features = len(self.result_data[0])
    
    O = libopf_py.OPF()
    result_data_array = numpy.array(self.result_data)
    self.log("Training OPF")
    t_opf_start = time.time()
    O.fit(result_data_array, cross_result_classes,metric=self.distance_function)
    t_opf_end = time.time() - t_opf_start
    self.params_output['time_classificator_fit'] = t_opf_end
    images_info = filesprocess.get_files(self.test_path)

    bins = range(self.kmeans_k)
    labels_test = []
    labels_hist_array = []
    for idx, image_info in enumerate(images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      label_test = le.transform([img_folder])
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        labels_hist_array.append(labels_hist)
        labels_test.append(label_test)
        #labels_hist_array = numpy.array([labels_hist])
    labels_hist_array = numpy.asarray(labels_hist_array, numpy.float64)
    labels_predicted = O.predict(labels_hist_array)
    #labels_predicted = le.inverse_transform(prediction)

    #self.log("Prediction: " + str(pridicted_label[0]))
    #self.log("Real: " + str(img_folder))

    accuracy = metrics.accuracy_score(labels_test, labels_predicted)
    precision = metrics.precision_score(labels_test, labels_predicted)
    recall = metrics.recall_score(labels_test, labels_predicted)
    f1 = metrics.f1_score(labels_test, labels_predicted)

    self.params_output['accuracy'] = accuracy
    self.params_output['precision'] = precision
    self.params_output['recall'] = recall
    self.params_output['F1'] = f1

    self.log("Accuracy: " + str(accuracy))
    self.log("Precision: " + str(precision))
    self.log("Recall: " + str(recall))
    self.log("F1: " + str(f1))
    return accuracy, precision, recall, f1
  def train(self):
    self.log("BoW - Starting Train")
    self.log("train_path: " + self.train_path)
    self.kmeans_cls = KMeans(init='k-means++', n_clusters=self.kmeans_k, n_init=10, n_jobs=-1)
    result_descriptors = []
    self.log("Listing files to process")
    total_images_info = filesprocess.get_files(self.train_path)
    images_info = []
    if len(total_images_info) > self.n_sample_images:
      for i in range(self.n_sample_images):
        random_index = randrange(0,len(total_images_info))
        images_info.append(total_images_info[random_index])
    else:
      images_info = total_images_info
      
    self.log(str(len(images_info)) + " files to process.")

    tmp_desc = []
    for idx, image_info in enumerate(images_info):
      self.log("Processing " + str(idx) + " of " + str(len(images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      if descriptors != None:
        for desc in descriptors:
          tmp_desc.append(desc)
    
    self.log("Desc lenght original: " + str(len(tmp_desc)))
    tmp_desc = numpy.array(tmp_desc)
    if len(tmp_desc) > self.n_sample_descriptors:
      rand = numpy.random.permutation(self.n_sample_descriptors)
      tmp_desc = tmp_desc[rand]
    self.log("Desc lenght reduced: " + str(len(tmp_desc)))
    self.params_output['real_desc_size'] = len(tmp_desc)
    self.log("Kmeans fit")
    t_kmeans_start = time.time()
    self.kmeans_cls.fit(tmp_desc)
    t_kmeans_end = time.time() - t_kmeans_start
    self.params_output['time_cluster_fit'] = t_kmeans_end
    self.log("Time cluster fit: " + str(t_kmeans_end))

    self.log("Generating histograms")
    result_labels = []
    self.result_classes = []
    self.result_data = []
    self.result_clustering = []
    tmp_desc = []
    t_ext_desc_sum = 0
    t_cluster_consult_sum = 0
    for idx, image_info in enumerate(total_images_info):
      self.log("Generating hist " + str(idx) + " of " + str(len(total_images_info)))
      img_path = image_info[0]
      img_folder = image_info[1]
      t_ext_desc_start = time.time()
      features, descriptors = featureextractor.extract_descriptor(img_path, self.thumbnail_size,self.feature_type,self.descriptor_type)
      t_ext_desc_end = time.time() - t_ext_desc_start
      t_ext_desc_sum = t_ext_desc_sum + t_ext_desc_end
      self.log("Time desc extraction: " + str(t_ext_desc_end))
      t_cluster_consult_start = time.time()
      if (descriptors != None):
        labels = self.kmeans_cls.predict(descriptors)        
        bins = range(self.kmeans_k)
        labels_hist = numpy.histogram(labels, bins=bins, density=True)[0]
        self.result_clustering.append((img_path, img_folder, labels_hist))
        self.result_classes.append(img_folder)
        self.result_data.append(labels_hist)
      t_cluster_consult_end = time.time() - t_cluster_consult_start
      t_cluster_consult_sum = t_cluster_consult_sum + t_cluster_consult_end
      self.log("Time cluster consult: " + str(t_cluster_consult_end))

    self.params_output['time_ext_desc_med'] =  t_ext_desc_sum / len(total_images_info)
    self.params_output['time_cons_cluster_med'] =  t_cluster_consult_sum / len(total_images_info)
Beispiel #33
0
    def train(self):
        self.log("BoW Opf-Opf - Starting Train")
        self.log("train_path: " + self.train_path)
        result_descriptors = []
        self.log("Listing files to process")
        total_images_info = filesprocess.get_files(self.train_path)

        self.log("Total images: " + str(len(total_images_info)))
        images_info = []
        if len(total_images_info) > self.n_sample_images:
            for i in range(self.n_sample_images):
                random_index = randrange(0, len(total_images_info))
                images_info.append(total_images_info[random_index])
        else:
            images_info = total_images_info

        self.log(str(len(images_info)) + " files to process.")

        #Extrai descritores
        pool = multiprocessing.Pool()
        tmp_desc = []
        labels_true = []
        descs_data = pool.map(
            func_star_extract_descriptor_one,
            itertools.izip(images_info, itertools.repeat(self)))

        #itertools.repeat((self.thumbnail_size, self.feature_type, self.descriptor_type))))
        for data in descs_data:
            tmp_desc.extend(data[0])
            labels_true.extend(data[1])

        tmp_desc = numpy.array(tmp_desc, numpy.float64)
        labels_true = numpy.asarray(labels_true)
        #Seleciona aleatoriamente um numero de descritores (n_sample_descriptors)
        self.log("Desc lenght original: " + str(len(tmp_desc)))
        if len(tmp_desc) > self.n_sample_descriptors:
            rand = numpy.random.permutation(
                len(tmp_desc))[0:self.n_sample_descriptors]
            tmp_desc = tmp_desc[rand]
            labels_true = labels_true[rand]
        self.log("Desc lenght reduced: " + str(len(tmp_desc)))
        self.params_output['real_desc_size'] = len(tmp_desc)
        self.log("OPF - Clustering")
        self.best_k, self.n_clusters = self.opf_cluster(tmp_desc)

        #Extrai os prototipos com base nos descritores das imagens de treinamento
        self.log("Generating predictions")
        self.result_classes = []
        self.result_data = [
        ]  #numpy.zeros((len(total_images_info), self.n_clusters), numpy.float64)
        descs_data = []
        #(result_classes, result_data, t_ext_desc_end, t_cluster_consult_end)
        descs_data = pool.map(
            func_star_extract_descriptors_and_predict,
            itertools.izip(total_images_info, itertools.repeat(self)))

        t_ext_desc_sum = 0.0
        t_cluster_consult_sum = 0.0
        for data in descs_data:
            self.result_classes.extend(data[0])
            self.result_data.extend(data[1])
            t_ext_desc_sum = t_ext_desc_sum + data[2]
            t_cluster_consult_sum = t_cluster_consult_sum + data[3]

        self.params_output['time_ext_desc_med'] = t_ext_desc_sum / len(
            total_images_info)
        self.params_output[
            'time_cons_cluster_med'] = t_cluster_consult_sum / len(
                total_images_info)
        self.result_data = numpy.asarray(self.result_data, numpy.float64)

        pool.close()
        pool.terminate()

        return self.n_clusters  #, self.best_k
def desc_hash(desc):
  hash = ""
  for d in desc:
    hash = hash + str("%0.4f" % d )
  dig = hashlib.md5(hash).hexdigest()
  return str(dig)


parser = argparse.ArgumentParser(prog='python demo_opf_unsup.py', usage='%(prog)s [options]',
    description='', 
    epilog="")
parser.add_argument('-dir', '--directory', help='images directory', required=True)

args = parser.parse_args()
print "Processing: " + str(args.directory)
images_info = filesprocess.get_files(args.directory)
print str(len(images_info)) +  " images found."

# all_classes = []
# for idx, image_info in enumerate(images_info):
#   img_folder = image_info[1]
#   all_classes.append(img_folder)

# all_classes = numpy.asarray(all_classes)
# all_classes = numpy.unique(all_classes)
# print str(len(all_classes)) + " classes found."

all_descriptors = []
for idx, image_info in enumerate(images_info):
  img_path = image_info[0]
  img_class = image_info[1]