Exemple #1
0
def index_images(folder, features_path, mapping_path, model, features_from_new_model_boolean, glove_path):
    print ("Now indexing images...")
    # Use word vectors if leveraging the new model
    if features_from_new_model_boolean:
        word_vectors=vector_search.load_glove_vectors(glove_path)
    else:
        word_vectors=[]
    # Use utiliy function
    _, _, paths = load_paired_img_wrd(
        folder=folder, 
        word_vectors=word_vectors,
        use_word_vectors=features_from_new_model_boolean)
    images_features, file_index = vector_search.generate_features(paths, model)
    vector_search.save_features(features_path, images_features, mapping_path, file_index)
    return images_features, file_index
Exemple #2
0
    We are now going to load a model that was **pre-trained** on a large data set (imagenet), and is freely available
     online.
    
    We use this model to generate **embeddings** for our images.
    
    As you can see below, once we've used the model to generate image features, we can then **store them to disk** 
    and re-use them without needing to do inference again! This is one of the reason that embeddings are so popular 
    in practical applications, as they allow for huge efficiency gains. 
    """)

    with st.echo():
        model = vector_search.load_headless_pretrained_model()
        if generate_image_features:
            print ("Generating image features...")
            images_features, file_index = vector_search.generate_features(image_paths, model)
            vector_search.save_features(features_path, images_features, file_mapping_path, file_index)
        else:
            images_features, file_index = vector_search.load_features(features_path, file_mapping_path)

    st.write("Our model is simply VGG16 without the last layer (softmax)")
    st.image(Image.open('assets/vgg16_architecture.jpg'), width=800, caption="Original VGG. Credit to Data Wow Blog")
    st.image(Image.open('assets/vgg16_chopped.jpg'), width=800, caption="Our model")
    st.write("This is how we get such a model in practice")
    show_source(vector_search.load_headless_pretrained_model)

    st.write("""
    What do we mean by generating embeddings? Well we just use our pre-trained model up to the penultimate layer, and 
    store the value of the activations.""")
    show_source(vector_search.generate_features)

    st.write('Here are what the embeddings look like for the first 20 images. Each image is now represented by a '
Exemple #3
0
def index_images(folder, features_path, mapping_path, model):
    _, _, paths = load_paired_img_wrd(folder, [], use_word_vectors=False)
    images_features, file_index = vector_search.generate_features(paths, model)
    vector_search.save_features(features_path, images_features, mapping_path,
                                file_index)
    return images_features, file_index
def run(item,
        city,
        thedir,
        site,
        input_file=False,
        outdir='myflask/static/matches/',
        first=False,
        features_only=False,
        sold=False,
        model=False,
        pretrained_exists=False,
        topn=12):
    """
        Puts everything together: loads the model, loads the features, 
        applies cosine similarity and returns the matching 10 items
    
        Args:
            item: The item you want to search for (e.g., couch)
            city: The city where you are searching
            thedir: the primary directory (defined early and passes around 
                    for easily porting all programs elsewhere (e.g., AWS)
    
        Returns:
            Nothing, but copies matching items to a temporary directory
    """

    K.clear_session()
    tf.compat.v1.reset_default_graph()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()

    #item='couch'
    #city='los_angeles'
    #thedir='/Users/bsalmon/BrettSalmon/data_science/Insight/goodriddance/scraping/offerup/'
    if not sold:
        folder = (thedir + city + '/' + item + '_images/')
        features_path = (thedir + city + '/cnn/' + item + '_features/')
        file_mapping_path = (thedir + city + '/cnn/' + item + '_file_mapping/')
        if not os.path.exists(
                features_path.replace(features_path.split('/')[-2] + '/', '')):
            os.mkdir(
                features_path.replace(features_path.split('/')[-2] + '/', ''))
        if not os.path.exists(features_path):
            os.mkdir(features_path)
        if not os.path.exists(file_mapping_path):
            os.mkdir(file_mapping_path)
    else:
        folder = (thedir + city + '/' + item + '_images/sold/')
        features_path = (thedir + city + '/cnn/sold_' + item + '_features/')
        file_mapping_path = (thedir + city + '/cnn/sold_' + item +
                             '_file_mapping/')
        if not os.path.exists(features_path):
            os.mkdir(features_path)
        if not os.path.exists(file_mapping_path):
            os.mkdir(file_mapping_path)

    model = load_headless_pretrained_model(pretrained_exists=pretrained_exists)

    # I'll load all images into memory because it's not that many
    #images=np.load(features_path+'images.npy')
    #image_paths=np.load(features_path+'image_paths.npy')
    if first or features_only:
        if item == 'couch': plural = 'es'
        else: plural = 's'
        print("%% You are generating the image features for all " + item +
              plural + " from " + city)
        images, image_paths = load_images(folder)
        #np.save(features_path+'images',images)
        #np.save(features_path+'image_paths',np.array(image_paths))

        images_features, file_index = generate_features(image_paths, model)
        vector_search.save_features(features_path, images_features,
                                    file_mapping_path, file_index)
        if features_only:
            return
    else:
        print(
            "%% You already have the image features in hand-- loading them from disk."
        )
        images_features, file_index = vector_search.load_features(
            features_path, file_mapping_path)
        #images=np.load(features_path+'images.npy')
        #image_paths=np.load(features_path+'image_paths.npy')

    # Define the location of the file uploaded by the user
    if not input_file:
        tfiles = os.listdir('myflask/static/uploads/')
        for ifile in tfiles:
            if ifile.endswith(".jpg"):
                input_file = 'myflask/static/uploads/' + ifile

    K.clear_session()
    tf.compat.v1.reset_default_graph()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    model = load_headless_pretrained_model(pretrained_exists=pretrained_exists)

    # Load in the single input image from the user
    img = image.load_img(input_file, target_size=(224, 224))
    x_raw = image.img_to_array(img)
    x_expand = np.expand_dims(x_raw, axis=0)

    # Extract the image features according to the headless model
    singleinput = preprocess_input(x_expand)
    single_image_features = model.predict(singleinput)

    # Apply cosine_similarities between features of loaded image
    # and features of all directory images
    print("%% That was fast! Applying cosine similarity and finding images")
    cosine_similarities = (cosine_similarity(single_image_features,
                                             images_features)[0])

    # Get top N similar image ID numbers
    top_N_idx = (np.argsort(cosine_similarities)[-topn:])[::-1]

    # Get top 10 similar image files
    topfiles = [file_index[i] for i in top_N_idx]

    # Move them to a happy static folder
    barefiles = []
    match_ids = []
    for i in range(len(topfiles)):
        copyfile(topfiles[i], outdir + site + '/' + topfiles[i].split('/')[-1])
        barefiles.append(topfiles[i].split('/')[-1])
        match_ids.append(int(topfiles[i].split('/')[-1].replace('.jpg', '')))

    #After prediction
    K.clear_session()

    print("%% Cosine similarity complete. Matched " +
          "images are in myflask/static/matches/" + site)
    return barefiles, match_ids, (cosine_similarities[top_N_idx])