def classify_images(self, keywords=[], limit=25):
    """
    Worker task to retrieve a list of automatically categorized images from
    Wikimedia Commons from a given list of keywords.
    """
    if not len(keywords):
        raise AssertionError

    with app.app_context():

        def supported_extractors():
            extractors = []
            extractors.append(SizeFeature())
            extractors.append(ColorFeature())
            extractors.append(HistogramFeature())
            extractors.append(GradientFeature())
            extractors.append(FaceFeature(app.config["FACE_CLASSIFIER"]))
            extractors.append(GeoFeature())
            extractors.append(FormatFeature())
            extractors.append(WordsFeature.create_from(app.config["WORDS_CONFIG"]))
            return extractors

        def create_response_entry(label, sample):
            return {"thumbnail": sample.thumbnail, "image": sample.url, "label": label, "title": sample.url}

        def create_response(entries):
            return {"current": 100, "total": 100, "result": entries}

        # keep track of progress
        progress_observer = ProgressObserver(self)
        progress_observer.update(5)

        # query dpedia for related images based on given keywords
        if limit > app.config["QUERY_LIMIT"]:
            limit = app.config["QUERY_LIMIT"]
        searchterm = " ".join(keywords)
        uris = fetch_uris_from_metadata(searchterm, limit, multiple=False)
        progress_observer.update(20)

        # download images and metadata into temp folder with unique task id
        temp_folder = os.path.join(app.config["DOWNLOAD_DIRECTORY"], classify_images.request.id)
        images_and_metadata(uris, temp_folder, False, observer=progress_observer)
        progress_observer.update(80)

        # load dataset and extract features
        dataset = Dataset(logging=True)
        dataset.read(root=temp_folder, extractors=supported_extractors(), unlabeled_data=True)
        dataset_config = json.load(open(app.config["DATASET_CONFIG"]))
        dataset.means = dataset_config["means"]
        dataset.stds = dataset_config["stds"]
        dataset.normalize()
        progress_observer.update(90)

        # predict labels using the trained classifier
        classifier = joblib.load(app.config["WIKIMEDIA_CLASSIFIER"])
        predictions = classifier.predict(dataset.data)
        progress_observer.update(95)

        # build response
        suggestions = []
        for index, sample in enumerate(dataset.samples):
            label = np.asscalar(predictions[index])
            entry = create_response_entry(label, sample)
            suggestions.append(entry)
        result = create_response(suggestions)

        # cleanup temporary directory
        delete_directory(temp_folder)

        progress_observer.update(100)

        return result
def classify_images(self, keywords=[], limit=25):
    """
    Worker task to retrieve a list of automatically categorized images from
    Wikimedia Commons from a given list of keywords.
    """
    if not len(keywords):
        raise AssertionError

    with app.app_context():

        def supported_extractors():
            extractors = []
            extractors.append(SizeFeature())
            extractors.append(ColorFeature())
            extractors.append(HistogramFeature())
            extractors.append(GradientFeature())
            extractors.append(FaceFeature(app.config['FACE_CLASSIFIER']))
            extractors.append(GeoFeature())
            extractors.append(FormatFeature())
            extractors.append(
                WordsFeature.create_from(app.config['WORDS_CONFIG']))
            return extractors

        def create_response_entry(label, sample):
            return {
                'thumbnail': sample.thumbnail,
                'image': sample.url,
                'label': label,
                'title': sample.url
            }

        def create_response(entries):
            return {'current': 100, 'total': 100, 'result': entries}

        # keep track of progress
        progress_observer = ProgressObserver(self)
        progress_observer.update(5)

        # query dpedia for related images based on given keywords
        if limit > app.config['QUERY_LIMIT']:
            limit = app.config['QUERY_LIMIT']
        searchterm = ' '.join(keywords)
        uris = fetch_uris_from_metadata(searchterm, limit, multiple=False)
        progress_observer.update(20)

        # download images and metadata into temp folder with unique task id
        temp_folder = os.path.join(app.config['DOWNLOAD_DIRECTORY'],
                                   classify_images.request.id)
        images_and_metadata(uris,
                            temp_folder,
                            False,
                            observer=progress_observer)
        progress_observer.update(80)

        # load dataset and extract features
        dataset = Dataset(logging=True)
        dataset.read(root=temp_folder,
                     extractors=supported_extractors(),
                     unlabeled_data=True)
        dataset_config = json.load(open(app.config['DATASET_CONFIG']))
        dataset.means = dataset_config['means']
        dataset.stds = dataset_config['stds']
        dataset.normalize()
        progress_observer.update(90)

        # predict labels using the trained classifier
        classifier = joblib.load(app.config['WIKIMEDIA_CLASSIFIER'])
        predictions = classifier.predict(dataset.data)
        progress_observer.update(95)

        # build response
        suggestions = []
        for index, sample in enumerate(dataset.samples):
            label = np.asscalar(predictions[index])
            entry = create_response_entry(label, sample)
            suggestions.append(entry)
        result = create_response(suggestions)

        # cleanup temporary directory
        delete_directory(temp_folder)

        progress_observer.update(100)

        return result
    assert all(len(x) == len(data[0]) for x in data)
    with open(filename, 'w') as csv:
        csv.write(','.join(captions) + '\n')
        for row in range(len(data[0])):
            csv.write(','.join(str(column[row]) for column in data) + '\n')


if __name__ == '__main__':
    parser = ArgumentParser(description='Measure statistics of features '
        'within the images of the same class to evaluate features.',
        formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('features',
        help='Path to the JSON file containing extracted features of the '
        'dataset')
    parser.add_argument('-o', '--output', default='<folder>/evaluation.csv',
        help='Filename of the CSV file where p-values will be written to; '
        '<folder> is the directory of the features file')
    args = parser.parse_args()

    folder = os.path.splitext(os.path.split(args.features)[0])[0]
    args.output = args.output.replace('<folder>', folder)

    dataset = Dataset()
    dataset.load(args.features)
    dataset.normalize()

    print_chi(dataset)
    print('Write CSV table to', args.output)
    write_chi(args.output, dataset)
    print('Done')