def perform_learning(self, options): """ Perfrom the learning from the dataset. """ # Retreiving the dataset models print 'Training using a {} classifier.'.format( options['classifier_type']) models = {} for category in self.dataset.keys(): models[category] = [] for google_id in self.dataset[category]: models[category].append( SketchupModel.find_google_id(google_id)) # Training classifier = self.classifiers_available[options['classifier_type']] self.identifier = Identifier(classifier=classifier) for category in models.keys(): self.identifier.add_models(models[category], category) # (x, y, w) = self.identifier._get_example_matrix() # import matplotlib.pyplot as plt # import numpy as np # # w = np.array(w) # # print y # # print w # plt.plot(y, w, 'ro') # print self.dataset.keys() # plt.show() self.identifier.train(options['entropy'])
def test_identification_banana_vs_bowl_vs_food_can(self): """ Try to identify with 3 categories. """ # Getting the dataset bowl_ids = ['fa61e604661d4aa66658ecd96794a1cd', 'f74bba9a22e044dea3769fcd5f96f4', 'd2e1dc9ee02834c71621c7edb823fc53'] banana_ids = ['f6e6117261dca163713c042b393cc65b', 'ba0d56295321002718ddbf38fa69c501', '7d78e217e0ba160fe2b248b8bb97d290'] bowls = [] for bowl_id in bowl_ids: bowls.append(SketchupModel.find_google_id(bowl_id)) bananas = [] for banana_id in banana_ids: bananas.append(SketchupModel.find_google_id(banana_id)) # Training iden = Identifier() iden.add_models(bananas, 'banana') iden.add_models(bowls, 'bowl') iden.train() # Identification for i in range(20): example = Example.get_random(['banana', 'bowl']) pcd_file = example.pcd_file() print "Identification of file {}".format(example) cloud = PointCloud.load_pcd(pcd_file.name) iden.identify(cloud)
def test_exception_when_no_existing_category(self): """ Test that if the identifier is empty it throws. """ pointcloud = PointCloud.load_pcd("pointcloud/fixtures/cloud.pcd") identifier = Identifier() self.assertRaises(IndexError, identifier.identify, pointcloud) # training will add a category : model = SketchupModel() model.google_id = "test1" model.mesh = file("sketchup_models/fixtures/mesh_can.tri").read() identifier.add_models([model], "test_category") try: identifier.identify(pointcloud) except IndexError: self.fail("identifier.identify() raised IndexError unexpectedly!") except: # can raise if Indentification failed pass
class Command(BaseCommand): """ Django Command for classification evaluation. """ dataset = dict() merge_info = dict() # Hash containing category => category_merged_with identifier = None results = OrderedDict() done_examples = [] help = ('Perform learning from a set of models,' ' and test it vs the dataset.') classifiers_available = { 'LinearSVC': LinearSVC(random_state=0), 'SVC': SVC(), 'OneVsOne': OneVsOneClassifier(LinearSVC(random_state=0)), 'OneVsRest': OneVsRestClassifier(LinearSVC(random_state=0)), 'KNeighbors_default': KNeighborsClassifier(), 'OutputCode': OutputCodeClassifier(LinearSVC(random_state=0)), 'DecisionTree': DecisionTreeClassifier(), 'KNeighbors': KNeighborsClassifier(), 'KNeighbors_distance_l2': KNeighborsClassifier(weights='distance', n_neighbors=5, leaf_size=64), 'KNeighbors_uniforme_l2': KNeighborsClassifier( n_neighbors=5, leaf_size=64), 'KNeighbors_brute_l2': KNeighborsClassifier(algorithm='brute'), 'KNeighbors_distance_mahalanobis': KNeighborsClassifier( weights='distance', n_neighbors=5, leaf_size=64, metric='mahalanobis'), 'KNeighbors_distance_taneja': KNeighborsClassifier( weights='distance', n_neighbors=5, leaf_size=64, metric='pyfunc', func=taneja_distance), } option_list = BaseCommand.option_list + ( make_option('-d', '--dataset', dest='dataset_file', default='evaluation_dataset.json.sample', help='Json file with model ids of learnign dataset.'), make_option('-r', '--restrict', dest='categories', default=None, help=('Restrict the dataset to a list of category.' ' Separate categories with comma.')), make_option('-o', '--save', dest='save_file', default=None, help=('When interupted, the task state is store in this ' 'file, so it can be resumed. ' 'Default to load parameter if used.')), make_option('-i', '--load', dest='load_file', default=None, help='Resume task from this file.'), make_option('-a', '--analyze', dest='analyze', action='store_true', default=False, help='Display results of evaluation.'), make_option('-l', '--learning-only', dest='learning', action='store_true', default=False, help='Performs the learning only, do not evaluate.'), make_option('-e', '--use-entropy', dest='entropy', action='store_true', default=False, help='Use entropy to filter partial views.'), make_option('-c', '--classifier', dest='classifier_type', default='LinearSVC', help='Kind of classifier to use.'), make_option('-f', '--force-descriptors', dest='force_descriptors', action='store_true', default=False, help="Force recomputation of descriptors."), ) def handle(self, *_, **options): """ Handle the command call. """ if options['load_file']: self.load(options['load_file']) if not self.identifier: if not self.dataset or options['force_dataset']: self.load_dataset(options) self.perform_learning(options) # We also want to reidentify objects self.results = OrderedDict() self.done_examples = [] if options['learning']: # We stop after learning self.dump(options) return examples = ExampleObject.objects.filter( category__in=self.dataset.keys()).order_by("name") try: for index, example in enumerate(examples.iterator()): if example.pk not in self.done_examples: print "Identification of {} {}/{} ({}%)".format( example.name, index + 1, examples.count(), 100 * (index + 1) / examples.count()) self.results[example.name] = self.process_example(example, options) self.done_examples.append(example.pk) if options['analyze']: self.analyse_results() # The process can be stopped and saved for restart except KeyboardInterrupt: pass self.dump(options) def load_dataset(self, options): """ Load the dataset according to the options. """ with open(options['dataset_file']) as dataset_file: self.dataset = json.load(dataset_file) if 'merge_info' in self.dataset: self.merge_info = self.dataset.pop('merge_info') if options['categories']: # We restrict the dataset to some categories categories = options['categories'].split(',') for category in categories: if category not in self.dataset: raise Exception("{} is not included in the dataset". format(category)) self.dataset = {k: self.dataset[k] for k in categories} def perform_learning(self, options): """ Perfrom the learning from the dataset. """ # Retreiving the dataset models print 'Training using a {} classifier.'.format( options['classifier_type']) models = {} for category in self.dataset.keys(): models[category] = [] for google_id in self.dataset[category]: models[category].append( SketchupModel.find_google_id(google_id)) # Training classifier = self.classifiers_available[options['classifier_type']] self.identifier = Identifier(classifier=classifier) for category in models.keys(): self.identifier.add_models(models[category], category) # (x, y, w) = self.identifier._get_example_matrix() # import matplotlib.pyplot as plt # import numpy as np # # w = np.array(w) # # print y # # print w # plt.plot(y, w, 'ro') # print self.dataset.keys() # plt.show() self.identifier.train(options['entropy']) def process_example(self, example, options): """ Identify an example object and return the result. """ results = [] for sequence in example.sequences.all(): results.append(self.process_videosequence(sequence, options)) return results def process_videosequence(self, video_sequence, options): """ Perform the identification for a sequence. This should be in the Identifier class maybe... """ frame_results = defaultdict(int) for frame in video_sequence.frames.iterator(): if options['force_descriptors']: distribution = frame.get_distribution(True, True) else: distribution = frame.distribution result = self.identifier.identify(distribution) frame_results[result] += 1 sorted_results = sorted(frame_results.iteritems(), reverse=True, key=operator.itemgetter(1)) return sorted_results[0][0] def results_by_category(self): from collections import OrderedDict category_m = re.compile("[a-z_]*[a-z]") categories = OrderedDict() for example, results in self.results.iteritems(): example_category = category_m.match(example).group(0) if example_category not in categories: categories[example_category] = OrderedDict() categories[example_category][example] = results print categories.keys() return categories def analyse_results(self): """ Analyze the results and print it in the terminal. """ print "Results by category" for category, examples in self.results_by_category().iteritems(): num_sequences = sum([len(seq) for seq in examples.values()]) failures = [] for example, sequences in examples.iteritems(): for index, result in enumerate(sequences): if category in self.merge_info: expected = self.merge_info[category] else: expected = category if expected != result: failures.append((example, index, result)) num_positives = num_sequences - len(failures) print "{}: {}/{} ({}%)".format(category, num_positives, num_sequences, 100 * num_positives / num_sequences) if failures: print "Failures :" for example, sequence, result in failures: print " {} seq {} -> {}".format(example, sequence, result) # print "Results by object" # sorted_results = sorted(self.results.iteritems(), # key=operator.itemgetter(0)) # for example_name, sequence_results in sorted_results: # print "Result of %s" % example_name # for index, sequence_result in enumerate(sequence_results): # print " sequence {}: {}".format(index+1, sequence_result) num_positives = 0 num_sequences = 0 for category, examples in self.results_by_category().iteritems(): for examples, sequences in examples.iteritems(): for result in sequences: num_sequences += 1 if result == category: num_positives += 1 print "Overall result: {}/{} ({}%)".format( num_positives, num_sequences, 100 * num_positives / num_sequences) print " {} categories, {} objects, {} video sequences".format( len(self.results_by_category()), len(self.results), num_sequences) print "Classifier %s" % self.identifier.classifier def dump(self, options): """ Dump the process in a homemade format. """ if options['save_file']: state_file_path = options['save_file'] elif options['load_file']: state_file_path = options['load_file'] else: return state = {} state['dataset'] = self.dataset state['merge_info'] = self.merge_info state['done_examples'] = self.done_examples state['identifier'] = self.identifier state['results'] = self.results with open(state_file_path, 'wb') as handle: pickle.dump(state, handle) print "State has been saved to {}.".format(state_file_path) def load(self, state_file_path): """ Load the process state to be restarted. """ with open(state_file_path, 'r') as handle: state = pickle.load(handle) self.__dict__.update(state) print "State has been restored from {}.".format(state_file_path)
class Command(BaseCommand): """ Django Command for classification evaluation. """ dataset = dict() merge_info = dict() # Hash containing category => category_merged_with identifier = None results = OrderedDict() done_examples = [] help = ('Perform learning from a set of models,' ' and test it vs the dataset.') classifiers_available = { 'LinearSVC': LinearSVC(random_state=0), 'SVC': SVC(), 'OneVsOne': OneVsOneClassifier(LinearSVC(random_state=0)), 'OneVsRest': OneVsRestClassifier(LinearSVC(random_state=0)), 'KNeighbors_default': KNeighborsClassifier(), 'OutputCode': OutputCodeClassifier(LinearSVC(random_state=0)), 'DecisionTree': DecisionTreeClassifier(), 'KNeighbors': KNeighborsClassifier(), 'KNeighbors_distance_l2': KNeighborsClassifier(weights='distance', n_neighbors=5, leaf_size=64), 'KNeighbors_uniforme_l2': KNeighborsClassifier(n_neighbors=5, leaf_size=64), 'KNeighbors_brute_l2': KNeighborsClassifier(algorithm='brute'), 'KNeighbors_distance_mahalanobis': KNeighborsClassifier(weights='distance', n_neighbors=5, leaf_size=64, metric='mahalanobis'), 'KNeighbors_distance_taneja': KNeighborsClassifier(weights='distance', n_neighbors=5, leaf_size=64, metric='pyfunc', func=taneja_distance), } option_list = BaseCommand.option_list + ( make_option('-d', '--dataset', dest='dataset_file', default='evaluation_dataset.json.sample', help='Json file with model ids of learnign dataset.'), make_option('-r', '--restrict', dest='categories', default=None, help=('Restrict the dataset to a list of category.' ' Separate categories with comma.')), make_option('-o', '--save', dest='save_file', default=None, help=('When interupted, the task state is store in this ' 'file, so it can be resumed. ' 'Default to load parameter if used.')), make_option('-i', '--load', dest='load_file', default=None, help='Resume task from this file.'), make_option('-a', '--analyze', dest='analyze', action='store_true', default=False, help='Display results of evaluation.'), make_option('-l', '--learning-only', dest='learning', action='store_true', default=False, help='Performs the learning only, do not evaluate.'), make_option('-e', '--use-entropy', dest='entropy', action='store_true', default=False, help='Use entropy to filter partial views.'), make_option('-c', '--classifier', dest='classifier_type', default='LinearSVC', help='Kind of classifier to use.'), make_option('-f', '--force-descriptors', dest='force_descriptors', action='store_true', default=False, help="Force recomputation of descriptors."), ) def handle(self, *_, **options): """ Handle the command call. """ if options['load_file']: self.load(options['load_file']) if not self.identifier: if not self.dataset or options['force_dataset']: self.load_dataset(options) self.perform_learning(options) # We also want to reidentify objects self.results = OrderedDict() self.done_examples = [] if options['learning']: # We stop after learning self.dump(options) return examples = ExampleObject.objects.filter( category__in=self.dataset.keys()).order_by("name") try: for index, example in enumerate(examples.iterator()): if example.pk not in self.done_examples: print "Identification of {} {}/{} ({}%)".format( example.name, index + 1, examples.count(), 100 * (index + 1) / examples.count()) self.results[example.name] = self.process_example( example, options) self.done_examples.append(example.pk) if options['analyze']: self.analyse_results() # The process can be stopped and saved for restart except KeyboardInterrupt: pass self.dump(options) def load_dataset(self, options): """ Load the dataset according to the options. """ with open(options['dataset_file']) as dataset_file: self.dataset = json.load(dataset_file) if 'merge_info' in self.dataset: self.merge_info = self.dataset.pop('merge_info') if options['categories']: # We restrict the dataset to some categories categories = options['categories'].split(',') for category in categories: if category not in self.dataset: raise Exception( "{} is not included in the dataset".format(category)) self.dataset = {k: self.dataset[k] for k in categories} def perform_learning(self, options): """ Perfrom the learning from the dataset. """ # Retreiving the dataset models print 'Training using a {} classifier.'.format( options['classifier_type']) models = {} for category in self.dataset.keys(): models[category] = [] for google_id in self.dataset[category]: models[category].append( SketchupModel.find_google_id(google_id)) # Training classifier = self.classifiers_available[options['classifier_type']] self.identifier = Identifier(classifier=classifier) for category in models.keys(): self.identifier.add_models(models[category], category) # (x, y, w) = self.identifier._get_example_matrix() # import matplotlib.pyplot as plt # import numpy as np # # w = np.array(w) # # print y # # print w # plt.plot(y, w, 'ro') # print self.dataset.keys() # plt.show() self.identifier.train(options['entropy']) def process_example(self, example, options): """ Identify an example object and return the result. """ results = [] for sequence in example.sequences.all(): results.append(self.process_videosequence(sequence, options)) return results def process_videosequence(self, video_sequence, options): """ Perform the identification for a sequence. This should be in the Identifier class maybe... """ frame_results = defaultdict(int) for frame in video_sequence.frames.iterator(): if options['force_descriptors']: distribution = frame.get_distribution(True, True) else: distribution = frame.distribution result = self.identifier.identify(distribution) frame_results[result] += 1 sorted_results = sorted(frame_results.iteritems(), reverse=True, key=operator.itemgetter(1)) return sorted_results[0][0] def results_by_category(self): from collections import OrderedDict category_m = re.compile("[a-z_]*[a-z]") categories = OrderedDict() for example, results in self.results.iteritems(): example_category = category_m.match(example).group(0) if example_category not in categories: categories[example_category] = OrderedDict() categories[example_category][example] = results print categories.keys() return categories def analyse_results(self): """ Analyze the results and print it in the terminal. """ print "Results by category" for category, examples in self.results_by_category().iteritems(): num_sequences = sum([len(seq) for seq in examples.values()]) failures = [] for example, sequences in examples.iteritems(): for index, result in enumerate(sequences): if category in self.merge_info: expected = self.merge_info[category] else: expected = category if expected != result: failures.append((example, index, result)) num_positives = num_sequences - len(failures) print "{}: {}/{} ({}%)".format(category, num_positives, num_sequences, 100 * num_positives / num_sequences) if failures: print "Failures :" for example, sequence, result in failures: print " {} seq {} -> {}".format(example, sequence, result) # print "Results by object" # sorted_results = sorted(self.results.iteritems(), # key=operator.itemgetter(0)) # for example_name, sequence_results in sorted_results: # print "Result of %s" % example_name # for index, sequence_result in enumerate(sequence_results): # print " sequence {}: {}".format(index+1, sequence_result) num_positives = 0 num_sequences = 0 for category, examples in self.results_by_category().iteritems(): for examples, sequences in examples.iteritems(): for result in sequences: num_sequences += 1 if result == category: num_positives += 1 print "Overall result: {}/{} ({}%)".format( num_positives, num_sequences, 100 * num_positives / num_sequences) print " {} categories, {} objects, {} video sequences".format( len(self.results_by_category()), len(self.results), num_sequences) print "Classifier %s" % self.identifier.classifier def dump(self, options): """ Dump the process in a homemade format. """ if options['save_file']: state_file_path = options['save_file'] elif options['load_file']: state_file_path = options['load_file'] else: return state = {} state['dataset'] = self.dataset state['merge_info'] = self.merge_info state['done_examples'] = self.done_examples state['identifier'] = self.identifier state['results'] = self.results with open(state_file_path, 'wb') as handle: pickle.dump(state, handle) print "State has been saved to {}.".format(state_file_path) def load(self, state_file_path): """ Load the process state to be restarted. """ with open(state_file_path, 'r') as handle: state = pickle.load(handle) self.__dict__.update(state) print "State has been restored from {}.".format(state_file_path)
def __init__(self, *args, **kwargs): """ Overwrite to have a default classifier. """ if 'identifier' not in kwargs: default = Identifier(classifier=KNeighborsClassifier()) kwargs['identifier'] = default super(EvaluationSession, self).__init__(*args, **kwargs)