Example #1
0
def build_index_and_search_through_it(images_features, file_index):
    # Decide whether to do only image search or hybrid search
    if not features_from_new_model_boolean:
        # This is pure image search
        image_index = vector_search.index_features(images_features, dims=4096)
        search_key = get_index(input_image, file_index)
        results = vector_search.search_index_by_key(search_key, image_index,
                                                    file_index)
        print(results)
    else:
        word_vectors = vector_search.load_glove_vectors(glove_path)
        # If we are searching for tags for an image
        if not input_word:
            # Work on a single image instead of indexing
            search_key = get_index(input_image, file_index)
            word_index, word_mapping = vector_search.build_word_index(
                word_vectors)
            results = vector_search.search_index_by_value(
                images_features[search_key], word_index, word_mapping)
        # If we are using words to search through our images
        else:
            image_index = vector_search.index_features(images_features,
                                                       dims=300)
            results = vector_search.search_index_by_value(
                word_vectors[input_word], image_index, file_index)
        print(results)
Example #2
0
def build_index_and_search_through_it(images_features, file_index):
    # Decide whether to do only image search or hybrid search
    if not features_from_new_model_boolean:
        # This is pure image search
        image_index = vector_search.index_features(images_features, dims=4096)
        search_key = get_index(input_image, file_index)
        results = vector_search.search_index_by_key(search_key, image_index, file_index)
        print(results)
        st.write('\n'.join('- `%s`' % elt for elt in results))
        demo.show_top_n(9, results, search_by_img=False)
    else:
        word_vectors = vector_search.load_glove_vectors(glove_path)
        # If we are searching for tags for an image
        if not input_word:
            # Work on a single image instead of indexing
            search_key = get_index(input_image, file_index)
            word_index, word_mapping = vector_search.build_word_index(word_vectors)
            results = vector_search.search_index_by_value(images_features[search_key], word_index, word_mapping)
            for result in results:
                st.write(result)
                image_index = vector_search.index_features(images_features, dims=300)
                results = vector_search.search_index_by_value(word_vectors[result[1]], image_index, file_index)
                st.write('\n'.join('- `%s`' % elt for elt in results))
                demo.show_top_n(9, results, search_by_img=False)
        # If we are using words to search through our images
        else:
            image_index = vector_search.index_features(images_features, dims=300)
            st.write(word_vectors[input_word])
            results = vector_search.search_index_by_value(word_vectors[input_word], image_index, file_index)
            st.write('\n'.join('- `%s`' % elt for elt in results))
            demo.show_top_n(9, results, search_by_img=False)
        print(results)
Example #3
0
    st.image(Image.open('assets/vgg16_chopped.jpg'), width=800, caption="Our model")
    st.write("This is how we get such a model in practice")
    show_source(vector_search.load_headless_pretrained_model)

    st.write("""
    What do we mean by generating embeddings? Well we just use our pre-trained model up to the penultimate layer, and 
    store the value of the activations.""")
    show_source(vector_search.generate_features)

    st.write('Here are what the embeddings look like for the first 20 images. Each image is now represented by a '
             'sparse vector of size 4096:')
    st.write(images_features[:20])

    st.write("Now that we have the features, we will build a fast index to search through them using Annoy.")
    with st.echo():
        image_index = vector_search.index_features(images_features)
    show_source(vector_search.index_features)
    st.header("Using our embeddings to search through images")
    st.write("""
    We can now simply take in an image, get its **embedding** (saved to disk), and look in our fast index to 
    find **similar embeddings, and thus similar images**.
    
    This is especially useful, since image labels are often noisy, so there is more to an image than it's label.
    
    In our dataset for example, we have both a class `cat`, and a class `bottle`.
    
    Which class do you think this image is labeled as?
    """)

    st.image(to_array(image_paths[search_key]), caption="Cat or bottle")
    
Example #4
0
    loaded_model = vector_search.load_headless_pretrained_model()
    if model_path:
        loaded_model = load_model(model_path)

    # Decide whether to index or search
    if indexing:
        features, index = index_images(index_folder, features_path,
                                       file_mapping, loaded_model)
        print("Indexed %s images" % len(features))
    else:
        images_features, file_index = vector_search.load_features(
            features_path, file_mapping)

        # Decide whether to do only image search or hybrid search
        if pure_image_embedding:
            image_index = vector_search.index_features(images_features,
                                                       dims=4096)
            search_key = get_index(input_image, file_index)
            results = vector_search.search_index_by_key(
                search_key, image_index, file_index)
            print(results)
        else:
            word_vectors = vector_search.load_glove_vectors(glove_path)
            # If we are searching for tags for an image
            if input_image:
                search_key = get_index(input_image, file_index)
                word_index, word_mapping = vector_search.build_word_index(
                    word_vectors)
                results = vector_search.search_index_by_value(
                    images_features[search_key], word_index, word_mapping)
            # If we are using words to search through our images
            else:
        features)
with open(
        './object_detection/feature_extraction/ref_img_filemapping_no_aug.json',
        'w') as f:
    json.dump(file_mapping, f)

# %% Build database reference features
'''
Build and save an ANNOY search index for fast searching.
'''
# Load the saved features and file mappings and compile Annoy search indices
features, file_index = vector_search.load_features(
    'object_detection/feature_extraction/ref_img_features_no_aug',
    './object_detection/feature_extraction/ref_img_filemapping_no_aug')

ref_img_index = vector_search.index_features(features, n_trees=8000)

ref_img_index.save(
    './object_detection/feature_extraction/ref_img_index_no_aug.ann')

# %%
'''
Get and save hash dictionary mapping between the file_mapping index,
and each the item name, category, source website, local source file path,
and the local source file name.
'''

with open(
        './object_detection/feature_extraction/ref_img_filemapping_no_aug.json',
        'r') as f:
    ref_img_filemapping = json.load(f)