class MakeGistCorpusTask(Condorizable): binary = Condorizable.path_to_script(__file__) def check_args(self, argv): parser = ArgumentParser() parser.add_argument('file_list', type=str, help='File containing list of images to process') parser.add_argument('dest_corpus', type=str, help='Path to write GIST corpus') parser.add_argument('--labeler', type=str, help='Labeler to apply') parser.add_argument('--color', action='store_true', help='Color GIST?') options = parser.parse_args(argv[1:]) if options.labeler is None: log.warning('no labeler provided') elif options.labeler not in labelers.registry: labeler_names = ', '.join(sorted(labelers.registry.keys())) parser.error('Invalid labeler "%s"; available options are %s' % (options.labeler, labeler_names)) if not os.path.exists(options.file_list): parser.error('Input file %s does not exist!' % options.file_list) self.add_output_file(options.dest_corpus) return options def run(self, options): labeler = None if options.labeler is None else labelers.registry[ options.labeler] # Wait to instantiate the corpus writer until we know the dimensionality of the descriptors we'll be writing writer = None log.info('Writing SAM corpus to %s' % options.dest_corpus) filenames = open(options.file_list).readlines() for i, filename in enumerate(filenames): filename = filename.strip() log.info('Processing image %d/%d' % (i + 1, len(filenames))) descriptor = color_gist( filename) if options.color else grayscale_gist(filename) if writer is None: dim = descriptor.size writer = CorpusWriter(options.dest_corpus, data_series='sam', dim=dim) normalized_descriptor = l2_normalize(descriptor) doc_label = labeler(filename) if labeler else None writer.write_doc(ascolvector(normalized_descriptor), name=filename, label=doc_label) writer.close()
class MakeGistArffTask(Condorizable): binary = Condorizable.path_to_script(__file__) def check_args(self, argv): parser = ArgumentParser() parser.add_argument('file_list', type=str, help='File containing list of images to process') parser.add_argument('dest', type=str, help='Destination ARFF file') parser.add_argument('--labeler', type=str, required=True, choices=labelers.registry.keys(), help='Labeler to apply') parser.add_argument('--color', action='store_true', help='Color GIST?') parser.add_argument('--normalize', action='store_true', help='L2 normalize GIST data?') options = parser.parse_args(argv[1:]) if not os.path.exists(options.file_list): parser.error('Input file %s does not exist!' % options.file_list) self.add_output_file(options.dest) return options def run(self, options): labeler = labelers.registry[options.labeler] # Wait to instantiate the corpus writer until we know the dimensionality of the descriptors we'll be writing filenames = open(options.file_list).readlines() labels = [labeler(each) for each in filenames] class_list = sorted(set(labels)) writer = ArffWriter(options.dest, class_list=class_list) log.info('Writing GIST data to %s' % options.dest) for i, (filename, label) in enumerate(izip(filenames, labels)): filename = filename.strip() log.info('Processing image %d/%d' % (i + 1, len(filenames))) descriptor = color_gist( filename) if options.color else grayscale_gist(filename) if options.normalize: descriptor = l2_normalize(descriptor) writer.write_example(descriptor, label) writer.close()