コード例 #1
0
    def get_sorted_k_values(self, num_similar_images, similar_images, all_image_hog_features, image_vector):
        similar_images_vectors = []
        if (num_similar_images <= len(similar_images)):
            for i in similar_images:
                index = all_image_hog_features['images'].index(i)
                similar_images_vectors.append(all_image_hog_features['data_matrix'][index])
            ranking = {}
            for i_comp_vector in range(len(similar_images_vectors)):
                image_name = similar_images[i_comp_vector]
                comp_vector_np = similar_images_vectors[i_comp_vector]
                # print(i_comp_vector," : ",np.linalg.norm(input_vector - comp_vector_np))
                ranking[image_name] = np.linalg.norm(image_vector - comp_vector_np)
            sorted_k_values = sorted(ranking.items(), key=lambda kv: kv[1])
            # print(sorted_k_values[:num_similar_images])

        return sorted_k_values[:num_similar_images]


if __name__ == "__main__":
    lsh = LSH(k=9, l=10)
    dbconnection = DatabaseConnection()
    all_image_hog_features = dbconnection.get_object_feature_matrix_from_db(tablename='histogram_of_gradients')
    bit_map = lsh.generate_representation_for_all_layers(all_image_hog_features['data_matrix'],
                                                         all_image_hog_features['images'])
    image_vector = dbconnection.get_feature_data_for_image('histogram_of_gradients', 'Hand_0000012.jpg')
    image_vector = np.asarray(image_vector.flatten())
    num_similar_images = 6
    print(lsh.find_ksimilar_images(k=num_similar_images, image_vector=image_vector,
                                   all_image_hog_features=all_image_hog_features))
コード例 #2
0
def execute_task6(request):
    query_image = request.POST.get('query_image')
    most_similar_images = int(request.POST.get('most_similar_images'))
    query_image_folder_name = request.POST.get('query_image_folder_name')
    relevance_feedback = request.POST.get('relevance_feedback')
    lsh = read_from_pickle('lsh_model')
    db_connection = DatabaseConnection()
    image_vector = db_connection.get_feature_data_for_image(
        'histogram_of_gradients', query_image)
    image_vector = np.asarray(image_vector.flatten())

    if read_from_pickle('all_img_features_LSH.pickle') != None:
        all_image_hog_features = read_from_pickle(
            'all_img_features_LSH.pickle')
    else:
        all_image_hog_features = db_connection.get_object_feature_matrix_from_db(
            tablename='histogram_of_gradients')
        save_to_pickle(all_image_hog_features, 'all_img_features_LSH.pickle')
    #SVD on hog features
    if (read_from_pickle('svd_hog_lsh.pickle') != None):
        svd_obj = read_from_pickle('svd_hog_lsh.pickle')
        transformed_data = svd_obj['data_matrix']
        vt = svd_obj['vt']
    else:
        svd = SingularValueDecomposition()
        transformed_data, vt = svd.get_transformed_data_copy(
            all_image_hog_features['data_matrix'], 400)
        save_to_pickle(
            {
                "data_matrix": transformed_data,
                "images": all_image_hog_features['images'],
                "vt": vt
            }, 'svd_hog_lsh.pickle')

    if (query_image_folder_name != ''):
        table_name = convert_folder_path_to_table_name(
            query_image_folder_name, 'histogram_of_gradients')
        image_vector = db_connection.get_feature_data_for_image(
            table_name, query_image)

    image_vector = np.dot(image_vector.astype(float), np.transpose(vt))

    new_obj = {}
    new_obj['data_matrix'] = transformed_data
    new_obj['images'] = all_image_hog_features['images']
    (sorted_k_values,
     result_stats) = lsh.find_ksimilar_images(k=most_similar_images,
                                              image_vector=image_vector,
                                              all_image_hog_features=new_obj)

    # Now getting a bigger test dataset for relevance feedback
    if relevance_feedback == "Probabilistic":
        (test_dataset, result_stats) = lsh.find_ksimilar_images(
            k=10 + most_similar_images,
            image_vector=image_vector,
            all_image_hog_features=new_obj)
    else:
        (test_dataset, result_stats) = lsh.find_ksimilar_images(
            k=200 + most_similar_images,
            image_vector=image_vector,
            all_image_hog_features=new_obj)

    save_to_pickle(test_dataset, 'test_dataset.pickle')
    print(sorted_k_values[:most_similar_images])
    return render(
        request, 'visualize_images.html', {
            'images': sorted_k_values[:most_similar_images],
            "from_task": "task5",
            'rel_type': relevance_feedback,
            "q": query_image,
            "t": most_similar_images,
            "num_total": result_stats['total'],
            "num_unique": result_stats['unique']
        })
コード例 #3
0
class RelevanceFeedback:
    def __init__(self):
        self.database_connection = DatabaseConnection()
        self.conn = self.database_connection.get_db_connection()
        print('Initiating RelevanceFeedback....')

    def compute_new_query_vector(self, q_old, relevant_items, irrel_items, alpha=0.3, beta=0.65, gamma=0.05):
        print('Computing new query vector.....')

        avg_rel_vec = np.zeros(q_old.shape)
        avg_irl_vec = np.zeros(q_old.shape)

        # Aggregating relevant items
        for item in relevant_items:
            vector = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            avg_rel_vec = avg_rel_vec + vector

        # Aggregating irrelevant items
        for item in irrel_items:
            vector = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            avg_irl_vec = avg_irl_vec + vector

        if len(relevant_items) != 0:
            avg_rel_vec = avg_rel_vec / len(relevant_items)

        if len(irrel_items) != 0:
            avg_irl_vec = avg_irl_vec / len(irrel_items)

        q_new = alpha * q_old + beta * avg_rel_vec - gamma * avg_irl_vec
        return q_new

    def get_user_feedback(self, init_rank_list, q_name, caller='misc'):
        print('Taking user feedback now...')
        rel_items = []
        irl_items = []

        if caller == 'prb':
            for item in init_rank_list[0]:
                if item[0] == q_name:
                    continue
                else:
                    print(f'Is image {item[0]} relevant ? (y/n)')
                    if input() is 'y':
                        rel_items.append(item[0])
                    else:
                        irl_items.append(item[0])
        else:
            for item in init_rank_list:
                if item[0] == q_name:
                    continue
                else:
                    print(f'Is image {item[0]} relevant ? (y/n)')
                    if input() is 'y':
                        rel_items.append(item[0])
                    else:
                        irl_items.append(item[0])

        return rel_items, irl_items

    def get_SVM_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        X_train, Y_train = self.create_X_Y_as_np_matrix(rel_items=rel_items, irl_items=irl_items)

        # Training SVM classifier
        svm = support_vector_machine.SupportVectorMachine()
        svm.fit(X=X_train, y=Y_train)

        # Now getting more test data from LSH indexes
        test_dataset = read_from_pickle('test_dataset.pickle')
        X_test, imageNames = self.create_X_test_as_np_matrix(test_dataset=test_dataset)
        Y_pred = svm.predict(u=X_test)
        relevant_pred_img_names = [imageNames[i] for i in range(0, len(Y_pred)) if Y_pred[i] == 1]
        length_relevant_images = len(relevant_pred_img_names)
        if length_relevant_images < m:
            irr_image_names = [imageNames[i] for i in range(0, m - length_relevant_images) if Y_pred[i] == -1]
            relevant_pred_img_names.extend(irr_image_names)
        new_obj_feature_matrix = self.database_connection.HOG_descriptor_from_image_ids(
            image_ids=relevant_pred_img_names)

        new_rank_list = get_most_m_similar_images(data_with_images=new_obj_feature_matrix,
                                                  query_image_feature_vector=q_new, m=m)
        return new_rank_list

    def get_DTC_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        X_train, Y_train = self.create_X_Y_as_np_matrix(rel_items=rel_items, irl_items=irl_items)

        # Training SVM classifier
        dtl = decision_tree_learning.DecisionTreeLearning()
        dtl.fit(X=X_train, y=Y_train)

        # Now getting more test data from LSH indexes
        test_dataset = read_from_pickle('test_dataset.pickle')
        X_test, imageNames = self.create_X_test_as_np_matrix(test_dataset=test_dataset)
        Y_pred = dtl.predict(u=X_test)
        relevant_pred_img_names = [imageNames[i] for i in range(0, len(Y_pred)) if Y_pred[i] == 1]

        length_relevant_images = len(relevant_pred_img_names)
        if length_relevant_images < m:
            irr_image_names = [imageNames[i] for i in range(0, m - length_relevant_images) if Y_pred[i] == -1]
            relevant_pred_img_names.extend(irr_image_names)

        new_obj_feature_matrix = self.database_connection.HOG_descriptor_from_image_ids(
            image_ids=relevant_pred_img_names)

        new_rank_list = get_most_m_similar_images(data_with_images=new_obj_feature_matrix,
                                                  query_image_feature_vector=q_new, m=m)
        return new_rank_list

    def get_PPR_based_feedback(self, q, rel_items, irl_items, obj_feature_matrix, m):
        q_new = self.compute_new_query_vector(q_old=q, relevant_items=rel_items, irrel_items=irl_items)
        topology_images = read_from_pickle('test_dataset.pickle')
        image_names = get_image_names_from_tuples(topology_images)
        db_conn = DatabaseConnection()
        data_image_dict = db_conn.HOG_descriptor_from_image_ids(image_names)
        data_matrix = data_image_dict['data_matrix']
        image_names = data_image_dict['images']
        svd_obj = SingularValueDecomposition()
        svd_image_data = svd_obj.get_transformed_data(data_matrix, 8)  # change this for 11K images

        pg_obj = PageRank()
        image_similarity_matrix = pg_obj.get_image_similarity_matrix_for_top_k_images(6, svd_image_data)
        seed_vector = pg_obj.get_seed_vector(rel_items, image_names, irl_items)
        pie = pg_obj.get_page_rank_eigen_vector(image_similarity_matrix, seed_vector)
        new_rank_list = pg_obj.get_top_K_images_based_on_scores(pie, image_names, m)

        return new_rank_list

    def get_init_ranking(self, obj_feature_matrix,
                         q):  # For SVM, DTC, PPR.... check calculate_init_prob_similarity for Probab based
        svd = singular_value_decomposition.SingularValueDecomposition()
        data_matrix = obj_feature_matrix['data_matrix']
        U, S, Vt = svd.get_latent_semantics(data_matrix=data_matrix, n_components=25)
        init_rank_list = get_most_m_similar_images(data_with_images=obj_feature_matrix, query_image_feature_vector=q,
                                                   Vt=Vt, m=5)
        return init_rank_list, Vt

    # rel_items,irl_items=rf.get_user_feedback(init_rank_list=init_rank_list,q_name=q_name)
    # q_new=rf.compute_new_query_vector(q_old=q,relevant_items=rel_items,irrel_items=irl_items)
    # new_rank_list=get_most_m_similar_images(data_with_images=obj_feature_matrix,query_image_feature_vector=q_new,Vt=Vt,m=5)

    def get_Vt(self, obj_feature_matrix):  # For SVM, DTC, PPR.... check calculate_init_prob_similarity for Probab based
        svd = singular_value_decomposition.SingularValueDecomposition()
        data_matrix = obj_feature_matrix['data_matrix']
        U, S, Vt = svd.get_latent_semantics(data_matrix=data_matrix, n_components=25)
        return Vt

    def get_probabilistic_relevance_feedback(self, D_matrix, images, q_name, m):
        n_i = self.calculate_n_i(D_matrix=D_matrix)
        init_scores = self.calculate_initial_prob_similarity(D_matrix=D_matrix, images=images, n_i=n_i)
        rel_items, irl_items = self.get_user_feedback(init_rank_list=[init_scores[:m]], q_name=q_name, caller='prb')
        new_rank_list = self.calculate_feedback_prob_similarity(D_matrix=D_matrix, images=images,
                                                                relevant_items=rel_items, n_i=n_i)
        return new_rank_list[:m]

    def calculate_feedback_prob_similarity(self, D_matrix, images, relevant_items, n_i):
        N = D_matrix.shape[0]
        R = len(relevant_items)
        n_i = n_i[0]
        r_i = self.calculate_r_i(D_matrix=D_matrix, images=images, relevant_items=relevant_items)
        r_i = r_i[0]

        feedback_scores = {}
        j = 0
        for d in D_matrix:
            sim_score = 0
            for i in range(0, len(n_i)):
                numerator = (r_i[i] + 0.5) / (R + 1 - r_i[i])
                denominator = (n_i[i] - r_i[i] + 0.5) / (N - R + 1 - n_i[i] + r_i[i])
                sim_score = sim_score + d[i] * math.log2(numerator / denominator)

            feedback_scores[images[j]] = sim_score
            j += 1

        feedback_scores = sorted(feedback_scores.items(), key=lambda k: k[1], reverse=True)
        return feedback_scores

    def calculate_initial_prob_similarity(self, D_matrix, images, n_i):
        N = D_matrix.shape[0]
        n_i = n_i[0]

        init_scores = {}

        j = 0
        for d in D_matrix:
            sim_score = 0
            for i in range(0, len(n_i)):
                sim_score = sim_score + d[i] * math.log2((N - n_i[i] + 0.5) / (n_i[i] + 0.5))

            init_scores[images[j]] = sim_score
            j += 1

        init_scores = sorted(init_scores.items(), key=lambda k: k[1], reverse=True)

        return init_scores

    def calculate_r_i(self, D_matrix, images, relevant_items):
        r_i = np.zeros((1, D_matrix.shape[1]))
        i = 0
        for row in D_matrix:
            temp = [1 if row[x] > 0 and images[i] in relevant_items else 0 for x in range(0, len(row))]
            r_i = r_i + np.array(temp).T
            i += 1

        return r_i

    def calculate_n_i(self, D_matrix):

        n_i = np.zeros((1, D_matrix.shape[1]))
        for row in D_matrix:
            temp = [1 if row[x] > 0 else 0 for x in range(0, len(row))]
            n_i = n_i + np.array(temp).T

        return n_i

    def create_X_Y_as_np_matrix(self, rel_items, irl_items):
        X = []
        Y = []

        # Adding relevant items in X and Y
        for item in rel_items:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            X.append(fv.reshape(fv.shape[1]))
            Y.append(1)

        # Adding irrelevant items in X and Y
        for item in irl_items:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item)
            X.append(fv.reshape(fv.shape[1]))
            Y.append(-1)

        return np.array(X), np.array(Y)

    def create_X_test_as_np_matrix(self, test_dataset):
        X = []
        imageNames = []
        # Adding relevant items in X and Y
        for item in test_dataset:
            fv = self.database_connection.get_feature_data_for_image('histogram_of_gradients', item[0])
            X.append(fv.reshape(fv.shape[1]))
            imageNames.append(item[0])

        return np.array(X), imageNames