def classify_images(self, keywords=[], limit=25): """ Worker task to retrieve a list of automatically categorized images from Wikimedia Commons from a given list of keywords. """ if not len(keywords): raise AssertionError with app.app_context(): def supported_extractors(): extractors = [] extractors.append(SizeFeature()) extractors.append(ColorFeature()) extractors.append(HistogramFeature()) extractors.append(GradientFeature()) extractors.append(FaceFeature(app.config["FACE_CLASSIFIER"])) extractors.append(GeoFeature()) extractors.append(FormatFeature()) extractors.append(WordsFeature.create_from(app.config["WORDS_CONFIG"])) return extractors def create_response_entry(label, sample): return {"thumbnail": sample.thumbnail, "image": sample.url, "label": label, "title": sample.url} def create_response(entries): return {"current": 100, "total": 100, "result": entries} # keep track of progress progress_observer = ProgressObserver(self) progress_observer.update(5) # query dpedia for related images based on given keywords if limit > app.config["QUERY_LIMIT"]: limit = app.config["QUERY_LIMIT"] searchterm = " ".join(keywords) uris = fetch_uris_from_metadata(searchterm, limit, multiple=False) progress_observer.update(20) # download images and metadata into temp folder with unique task id temp_folder = os.path.join(app.config["DOWNLOAD_DIRECTORY"], classify_images.request.id) images_and_metadata(uris, temp_folder, False, observer=progress_observer) progress_observer.update(80) # load dataset and extract features dataset = Dataset(logging=True) dataset.read(root=temp_folder, extractors=supported_extractors(), unlabeled_data=True) dataset_config = json.load(open(app.config["DATASET_CONFIG"])) dataset.means = dataset_config["means"] dataset.stds = dataset_config["stds"] dataset.normalize() progress_observer.update(90) # predict labels using the trained classifier classifier = joblib.load(app.config["WIKIMEDIA_CLASSIFIER"]) predictions = classifier.predict(dataset.data) progress_observer.update(95) # build response suggestions = [] for index, sample in enumerate(dataset.samples): label = np.asscalar(predictions[index]) entry = create_response_entry(label, sample) suggestions.append(entry) result = create_response(suggestions) # cleanup temporary directory delete_directory(temp_folder) progress_observer.update(100) return result
def classify_images(self, keywords=[], limit=25): """ Worker task to retrieve a list of automatically categorized images from Wikimedia Commons from a given list of keywords. """ if not len(keywords): raise AssertionError with app.app_context(): def supported_extractors(): extractors = [] extractors.append(SizeFeature()) extractors.append(ColorFeature()) extractors.append(HistogramFeature()) extractors.append(GradientFeature()) extractors.append(FaceFeature(app.config['FACE_CLASSIFIER'])) extractors.append(GeoFeature()) extractors.append(FormatFeature()) extractors.append( WordsFeature.create_from(app.config['WORDS_CONFIG'])) return extractors def create_response_entry(label, sample): return { 'thumbnail': sample.thumbnail, 'image': sample.url, 'label': label, 'title': sample.url } def create_response(entries): return {'current': 100, 'total': 100, 'result': entries} # keep track of progress progress_observer = ProgressObserver(self) progress_observer.update(5) # query dpedia for related images based on given keywords if limit > app.config['QUERY_LIMIT']: limit = app.config['QUERY_LIMIT'] searchterm = ' '.join(keywords) uris = fetch_uris_from_metadata(searchterm, limit, multiple=False) progress_observer.update(20) # download images and metadata into temp folder with unique task id temp_folder = os.path.join(app.config['DOWNLOAD_DIRECTORY'], classify_images.request.id) images_and_metadata(uris, temp_folder, False, observer=progress_observer) progress_observer.update(80) # load dataset and extract features dataset = Dataset(logging=True) dataset.read(root=temp_folder, extractors=supported_extractors(), unlabeled_data=True) dataset_config = json.load(open(app.config['DATASET_CONFIG'])) dataset.means = dataset_config['means'] dataset.stds = dataset_config['stds'] dataset.normalize() progress_observer.update(90) # predict labels using the trained classifier classifier = joblib.load(app.config['WIKIMEDIA_CLASSIFIER']) predictions = classifier.predict(dataset.data) progress_observer.update(95) # build response suggestions = [] for index, sample in enumerate(dataset.samples): label = np.asscalar(predictions[index]) entry = create_response_entry(label, sample) suggestions.append(entry) result = create_response(suggestions) # cleanup temporary directory delete_directory(temp_folder) progress_observer.update(100) return result
assert all(len(x) == len(data[0]) for x in data) with open(filename, 'w') as csv: csv.write(','.join(captions) + '\n') for row in range(len(data[0])): csv.write(','.join(str(column[row]) for column in data) + '\n') if __name__ == '__main__': parser = ArgumentParser(description='Measure statistics of features ' 'within the images of the same class to evaluate features.', formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('features', help='Path to the JSON file containing extracted features of the ' 'dataset') parser.add_argument('-o', '--output', default='<folder>/evaluation.csv', help='Filename of the CSV file where p-values will be written to; ' '<folder> is the directory of the features file') args = parser.parse_args() folder = os.path.splitext(os.path.split(args.features)[0])[0] args.output = args.output.replace('<folder>', folder) dataset = Dataset() dataset.load(args.features) dataset.normalize() print_chi(dataset) print('Write CSV table to', args.output) write_chi(args.output, dataset) print('Done')