def _new_keyword(args):
    dataset = Dataset()
    generator = dataset.keyword_generator(args.image)
    keyword = generator.generate(themes=args.themes)
    if args.progress:
        _print(
            f'Keyword generation complete!  ({keyword.shape[0]} themes found)')
    save_keyword(args.keyword, keyword, dataset.method)
def _keywords(args):
    dataset = Dataset()
    if args.image:
        for keyword in dataset.get_keywords_for_image(args.image):
            print(keyword)
    else:
        for keyword, count in dataset.get_keyword_counts().items():
            print(f'{count:3d} {keyword}')
def _keyword(args):
    dataset = Dataset()
    try:
        images = dataset.get_images_from_keyword(args.keyword)
        for image in images:
            print(image)
    except ValueError as err:
        CommandLineError(str(err))
def _set_wordlist(args):
    method, words = load_wordlist(args['file'])
    dataset = Dataset()
    if dataset.method != method:
        raise CommandLineError(
            f"dataset has method '{feature_name(dataset.method)}' but "
            f"wordlist was built with method '{feature_name(method)}'")
    if not args['yes']:
        print('This will cause bags of words and keyword matches to be '
              'deleted, you will need need to re-index the dataset '
              'afterwards.')
        if input('Do you wish to proceed (yes/NO): ').lower() != 'yes':
            return
    dataset.set_wordlist(words)
def _set_keywords(args):
    dataset = Dataset()
    # load keywords
    keywords = {}
    for file in files(args['file']):
        method, keyword, keyword_data = load_keyword(file)
        keywords[keyword] = keyword_data
        if dataset.method != method:
            raise CommandLineError(
                f"dataset has method '{feature_name(dataset.method)}' but "
                f"wordlist was built with method '{feature_name(method)}'")
    # get confirmation
    if not args['yes']:
        print('This will cause keyword matches to be deleted, you will need'
              'need to re-index the dataset afterwards.')
        if input('Do you wish to proceed (yes/NO): ').lower() != 'yes':
            return
    # set keywords
    dataset.set_keywords(keywords)
def _cluster(args):
    _index(args)
    dataset = Dataset()
    if args.progress:
        _print('Loading Bags of Visual Words...')
    clusterere = dataset.create_clusterer(global_only=args.global_only,
                                          image_cohesion_factor=args.cohesion)
    if clusterere.size == 0:
        raise CommandLineError(
            "missing bags of words, re-run the 'index' command")
    if args.progress:
        _print('Using K-Means to cluster images...')
    cluster_mapping = clusterere.cluster(
        affinity=args.affinity,
        progress_printer=_print if args.progress else None)
    if args.progress:
        _print('Rearranging images into clusters...')
    dataset.cluster(cluster_mapping)
    if args.progress:
        _print('Clustering complete!')
def _new_wordlist(args):
    if args.features and args.features < args.words:
        raise CommandLineError(
            "'--features' must be greater than the number of 'words'")
    dataset = Dataset()
    try:
        generator = dataset.wordlist_generator(
            progress=_progress(args.progress, 'Loading feature files'))
    except FileNotFoundError:
        raise CommandLineError(
            "missing features files, re-run the 'index' command")
    if args.progress:
        cluster_method = 'K-Means' if args.kmeans else 'Mini Batch K-Means'
        print(
            f'Using {cluster_method} to generate wordlist from '
            f'{generator.num_descriptors()} features...',
            file=sys.stderr,
            flush=True)
    words = generator.generate(args.words,
                               max_features=args.features,
                               minibatch=(not args.kmeans))
    if args.progress:
        print('Wordlist complete!', file=sys.stderr, flush=True)
    save_wordlist(args.file, words, generator.method)
def _index(args):
    dataset = Dataset()
    with warnings.catch_warnings(record=True) as w:
        added, removed, orphaned = dataset.index_images(
            progress=_progress(args.progress, 'Indexing images'))
        if args.progress:
            message = ''
            if added:
                message += f'{len(added)} new images'
            if removed:
                message += f' (removed {len(removed)})'
            if orphaned:
                message += f', {len(orphaned)} orphaned feature files removed'
            if message:
                _print(message)
        dataset.index_features(
            progress=_progress(args.progress, 'Extracting features'))
        for warning in w:
            if warning.category is ImageReadWarning:
                _print(warning.message)
        dataset.index_words(progress=_progress(
            args.progress, 'Generating Bags of Visual Words'))
        dataset.index_keywords(
            progress=_progress(args.progress, 'Applying keywords to images'))