Beispiel #1
0
def svm_feedback(relevant_images, irrelevant_images, images_to_display, query,
                 results):
    if not irrelevant_images:
        print("Please provide irrelevant images set for svm to work properly.")
        return relevant_images
    model = settings.SVM.CLASSIFIER.MODEL
    k = settings.SVM.CLASSIFIER.K
    frt = settings.SVM.CLASSIFIER.FRT
    images_rel, data_matrix_rel = get_all_vectors(
        model, f={'path': {
            '$in': relevant_images
        }}, master_db=True)
    images_irel, data_matrix_irel = get_all_vectors(
        model, f={'path': {
            '$in': irrelevant_images
        }}, master_db=True)
    images_test, test_vector = get_all_vectors(
        model, f={'path': {
            '$in': results + [query]
        }}, master_db=True)
    labelled_vectors, _, _, unlabelled_vectors = reducer(
        np.vstack((data_matrix_rel, data_matrix_irel)),
        k,
        frt,
        query_vector=test_vector)
    rel_class = np.array([1] * len(data_matrix_rel))
    irel_class = np.array([-1] * len(data_matrix_irel))
    x_train = labelled_vectors
    x_train = np.array(x_train) * 2
    y_train = np.concatenate((rel_class, irel_class))
    svclassifier = SVM()
    svclassifier.fit(np.array(x_train), np.array(y_train))
    unlabelled_vectors = np.array(unlabelled_vectors) * 2
    y_pred = svclassifier.predict(unlabelled_vectors)
    c = 0
    dic = {}
    for y in y_pred:
        if y == 1:
            dic[images_test[c]] = unlabelled_vectors[c]
        c += 1
    length_dict = {}
    for key in dic.keys():
        length_dict[key] = np.dot(dic[key], svclassifier.w)
    sorted_dict = sorted(length_dict.items(), key=lambda x: x[1], reverse=True)
    list_img = []
    c = 0
    for key, j in sorted_dict:
        if c < images_to_display - len(irrelevant_images):
            list_img.append(key)
            c += 1
        else:
            break
    return (list_img + irrelevant_images)
Beispiel #2
0
def generate_vec():

    #getting dorsal vectors and class
    dorsal_paths = filter_images('dorsal')
    _, dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}})
    
    #getting palmar vectors and class
    palmar_paths = filter_images('palmar')
    _, palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}})
    
    #getting test vectors and their path
    test_data_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True)
    
    # Return the calculated values
    return dorsal_vectors, palmar_vectors, test_data, test_data_paths
Beispiel #3
0
def build_labelled(model):
    # DORSAL TRAIN DATA
    dorsal_paths = filter_images('dorsal')
    _, dorsal_vectors = get_all_vectors(model, f={'path':{'$in': dorsal_paths}})
    dorsal_class = np.array([-1] * len(dorsal_vectors))

    #PALMAR TRAIN DATA
    _, palmar_vectors = get_all_vectors(model, f={'path': {'$nin': dorsal_paths}})
    palmar_class = np.array([1] * len(palmar_vectors))

    #TRAIN DATA STACKED AND CLASSES
    train_data = np.vstack((dorsal_vectors, palmar_vectors))
    train_class = np.concatenate((dorsal_class, palmar_class))

    return train_data, train_class
Beispiel #4
0
def get_data_matrix(feature, f={}):
    # Get labelled images
    images, data = get_all_vectors(feature, f=f, master_db=True)
    meta, meta_space = get_metadata_space(images)
    matrix = np.c_[data, meta_space]

    return images, meta, matrix
Beispiel #5
0
    def get_data_matrix(cls,
                        feature,
                        label=None,
                        unlabelled=False,
                        ignore_metadata=False):
        min_max_scaler = MinMaxScaler()

        f = {}
        if label:
            label_images = utils.filter_images(label)
            f = {'path': {'$in': label_images}}

        # Build and scale feature matrix
        images, feature_space = utils.get_all_vectors(feature,
                                                      f=f,
                                                      unlabelled_db=unlabelled)
        feature_space = min_max_scaler.fit_transform(feature_space)
        # Not including metadata boosts accuracy of Set 2
        # Including metadata boosts accuracy of Set 1
        if ignore_metadata:
            meta = utils.get_metadata(unlabelled_db=unlabelled)
            # Mapping between image file path name and the metadata
            meta = {m['path']: m for m in meta}
            return images, meta, feature_space

        # Build and scale metadata matrix
        meta, metadata_space = cls.get_metadata_space(images,
                                                      unlabelled_db=unlabelled)
        metadata_space = min_max_scaler.fit_transform(metadata_space)

        # Column stack them
        data_matrix = np.c_[feature_space, metadata_space]

        return images, meta, data_matrix
Beispiel #6
0
def build_unlabelled(model):
    #DORSAL TEST_DATA
    dorsal_paths = filter_images('dorsal', unlabelled_db=True)
    _, u_dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}}, unlabelled_db=True)
    dorsal_class = np.array([-1] * len(u_dorsal_vectors))
    
    #PALMAR TEST DATA
    palmar_paths = filter_images('palmar', unlabelled_db=True)
    _, u_palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}}, unlabelled_db=True)
    palmar_class = np.array([1] * len(u_palmar_vectors))

    #STACK ALL TEST DATA AND LABELS
    test_data = np.vstack((u_dorsal_vectors, u_palmar_vectors))
    test_labels = np.concatenate((dorsal_class,palmar_class))

    return test_data, test_labels, np.concatenate((dorsal_paths,palmar_paths))
Beispiel #7
0
def decision_tree_feedback(relevant_paths, irrelevant_paths, t, query_image, prev_results):
    #Get all images from master db
    master_images, master_vecs = get_all_vectors(settings.DECISION.CLASSIFIER.MODEL, {'path': {'$in': prev_results + [query_image]}},master_db=True)

    #Get indices of relevant and irrelevant images
    relevant_indices = [master_images.index(image) for image in relevant_paths]
    irrelevant_indices = [master_images.index(image) for image in irrelevant_paths]

    # Get query image vector
    _, q_img_vector = get_all_vectors(settings.DECISION.CLASSIFIER.MODEL, f={'path': query_image}, master_db=True)

    #Prepare relevant and irrelevant matrices
    relevant_matrix = master_vecs[relevant_indices,:]
    irrelevant_matrix = master_vecs[irrelevant_indices,:]

    #Prepare test matrix
    relevant_indices.extend(irrelevant_indices)
    test_data = np.delete(master_vecs,relevant_indices, 0)
    test_data_images = np.delete(np.array(master_images),relevant_indices,0)

    #Prepare train data
    relevant_matrix = np.c_[relevant_matrix,np.full((relevant_matrix.shape[0]),1.0)]
    irrelevant_matrix = np.c_[irrelevant_matrix,np.full((irrelevant_matrix.shape[0]),0.0)]
    train_data = np.vstack((relevant_matrix,irrelevant_matrix))

    #Make predictions
    predictions = np.array(decision_tree(train_data, test_data, settings.DECISION.CLASSIFIER.MAX_DEPTH, settings.DECISION.CLASSIFIER.MIN_SIZE))

    #Get indices for images classified images as relevant
    predicted_relevant_indices = np.where(predictions==1.0)[0]

    #Combine train data and predicted-relevant data indices
    test_data_images = np.append(test_data_images[predicted_relevant_indices],np.array(master_images)[relevant_indices])
    test_data = np.vstack((test_data[predicted_relevant_indices,:],train_data[:,:-1]))

    #Compute euclidean distance from the query image for all relevant images
    euclidean_distances = euclidean(test_data,np.tile(q_img_vector[0],(test_data.shape[0],1)))

    #Prepare final result
    result = []
    for index,res in enumerate(sorted(list(zip(test_data_images,euclidean_distances)),key=lambda a:a[1])):
        if index < t:
            result.append(res[0])
        else:
            break

    return result
Beispiel #8
0
def get_unlabelled_data(feature):
    u_images, u_vectors = utils.get_all_vectors(feature, unlabelled_db=True)

    # Get metadata
    meta = utils.get_metadata(unlabelled_db=True)
    meta = {m['path']: m for m in meta}

    return u_images, meta, u_vectors
Beispiel #9
0
def get_labelled_data(feature):
    # Get labelled images
    l_images, feature_space = utils.get_all_vectors(feature)

    # Get metadata
    meta = utils.get_metadata()
    meta = {m['path']: m for m in meta}

    return l_images, meta, feature_space
Beispiel #10
0
def run_svm(evaluate, model='lbp', k=30, frt='pca'):
    train_data, train_labels = build_labelled(model)
    if evaluate:
        test_data, test_labels, test_paths = build_unlabelled(model)
    else:
        test_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True)
    labelled_vectors, _, _, unlabelled_vectors  = reducer(train_data, k, frt, query_vector=test_data)
    labelled_vectors *= 2
    unlabelled_vectors *= 2
    svclassifier = SVM()
    svclassifier.fit(labelled_vectors, train_labels)
    y_pred = svclassifier.predict(unlabelled_vectors)
    if evaluate:
        print(classification_report(test_labels,y_pred))
    return test_paths, y_pred
Beispiel #11
0
def get_full_matrix(feature, unlabelled=False, master=False):
    # Get labelled images
    images, data = get_all_vectors(feature,
                                   unlabelled_db=unlabelled,
                                   master_db=master)

    # Get metadata
    meta = get_metadata(unlabelled_db=unlabelled, master_db=master)
    meta = {m['path']: m for m in meta}
    meta_space = np.array([[
        meta[i]['age'], mapping[meta[i]['gender']],
        mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]],
        meta[i]["nailPolish"], meta[i]["irregularities"]
    ] for i in images])

    return images, meta, np.c_[data, meta_space]
Beispiel #12
0
def decision_tree_driver(args, evaluate=False):
    images, data_matrix = utils.get_all_vectors(args.decision_model)
    # Fetch unlabelled data (as provided in the settings)
    u_images, u_meta, unlabelled = helper.get_unlabelled_data(
        args.decision_model)

    #matrix, _, _, um = reducer(data_matrix, 30, "nmf", query_vector=unlabelled)
    matrix = data_matrix
    um = unlabelled

    l_matrix = matrix[:len(images)]
    u_matrix = um[:len(u_images)]

    dm = helper.build_labelled_matrix(l_matrix, images, 'aspectOfHand')

    # prepare test data
    query = helper.prepare_matrix_for_evaluation(u_matrix)

    max_depth = args.decision_max_depth
    min_size = args.decision_min_size

    prediction = decision_tree(dm, query, max_depth, min_size)

    dorsal_symbol = 0.0
    palmar_symbol = 1.0

    if evaluate:
        master_meta = utils.get_metadata(master_db=True)
        # Mapping between image file path name and the metadata
        master_meta = {m['imageName']: m for m in master_meta}
        truth = [
            dorsal_symbol
            if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0]
            == 'dorsal' else palmar_symbol for image in u_images
        ]

        print(helper.get_accuracy(truth, prediction))

    return zip(u_images, prediction)
Beispiel #13
0
def feedback_probab(relevant, irrelevant, t, query, prev_results):
    if not relevant:
        print("Probabilistic model requires relevant images for re-ordering.")
        return prev_results

    img_all, img_all_vec = get_all_vectors(
        model, f={'path': {
            '$in': prev_results + [query]
        }}, master_db=True)
    #f={'path': {'$nin': relevant}}

    img_all_vec_red, _, __ = reducer(img_all_vec, k, frt)
    img_all_vec_red = scale(img_all_vec_red, 0, 1)

    dict_all_red = {}
    for i in range(len(img_all)):
        name = img_all[i]
        dict_all_red[name] = img_all_vec_red[i]

    img_rel_vec_red = []
    for name in relevant:
        img_rel_vec_red.append(dict_all_red[name])
    img_rel_vec_red = np.array(img_rel_vec_red)

    img_all_vec_red = makeArrayBinary(img_all_vec_red,
                                      img_all_vec_red.shape[0],
                                      img_all_vec_red.shape[1])
    img_rel_vec_red = makeArrayBinary(img_rel_vec_red,
                                      img_rel_vec_red.shape[0],
                                      img_rel_vec_red.shape[1])

    R = img_rel_vec_red.shape[0]
    N = len(img_all)

    p_list = []
    for j in range(k):
        r = 0
        for i in range(R):
            if img_rel_vec_red[i][j] == 1:
                r += 1
        p_list.append((r + 0.5) / (R + 1))

    n_list = []
    for j in range(k):
        n = 0
        for i in range(N):
            if img_all_vec_red[i][j] == 1:
                n += 1
        n_list.append(n)

    for i in range(k):
        n_list[i] = (n_list[i] - p_list[i] + 0.5) / (N - R + 1)

    log_list = []
    for i in range(k):
        num = (p_list[i] * (1 - n_list[i])) / (n_list[i] * (1 - p_list[i]))
        if num > 0:
            log_list.append(math.log(num, 2))
    log_list = np.array(log_list)

    new_result = []
    for name in dict_all_red.keys():
        sim = np.dot(dict_all_red[name], log_list)
        new_result.append((name, sim))

    new_result = sorted(new_result, key=lambda x: x[1], reverse=True)

    final = []
    for i in range(t):
        final.append(new_result[i][0])

    return final
Beispiel #14
0
    return 'dorsal' if dorsal_dist <= palmar_dist else 'palmar'


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()
    n_clusters = args.n_clusters

    #get the absolute data path and models whose features to concatenate
    data_path = Path(settings.path_for(settings.DATA_PATH))
    model = settings.TASK2_CONFIG.MODEL

    #Fetch training data for dorsal and palmer images from LABELLED DB
    dorsal_paths = filter_images('dorsal')
    dorsal_paths, dorsal_vectors = get_all_vectors(
        model, f={'path': {
            '$in': dorsal_paths
        }})
    palmar_paths, palmar_vectors = get_all_vectors(
        model, f={'path': {
            '$nin': dorsal_paths
        }})

    #Fetch test data from UNLABELLED DB
    test_data_paths, test_data = get_all_vectors(model, unlabelled_db=True)

    #Get centroids and centroid_labels for dorsal and palmar vectors
    print("Clustering dorsal vectors")
    dorsal_kmeans = Kmeans(dorsal_vectors, n_clusters)
    dorsal_kmeans.cluster()
    print("Clustering Palmar vectors")
    palmar_kmeans = Kmeans(palmar_vectors, n_clusters)
Beispiel #15
0
    final_sorted = sorted(final_dictionary.items(), key=lambda item: item[1])
    return final_sorted[:t], member_count, unique_member_count


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()

    start = time.time()

    #Part a
    l = args.layers
    k = args.hashes

    #load the object-feature matrix and data
    images, data_matrix = get_all_vectors('moment', master_db=True)

    data_matrix_shape = data_matrix.shape[1]

    layers = [{} for _ in range(l)]

    planes_per_layer = []

    for i in range(l):
        #Generate normally distributed planes
        planes = np.random.randn(k, data_matrix_shape)

        #Generating compressed sparsed row matrices
        planes_per_layer.append(scipy.sparse.csr_matrix(planes))

    #index all points
Beispiel #16
0

def prepare_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('-k', '--k_latent_semantics', type=int, required=True)
    return parser


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()

    meta = get_metadata()
    # Mapping between image file path name and the metadata
    meta = {m['path']: m for m in meta}
    images, data_matrix = get_all_vectors("moment_inv")

    data_matrix = np.c_[data_matrix, np.array([[meta[i]['age'], mapping[meta[i]['gender']], mapping[meta[i]['skinColor']]] for i in images]) * [2,2,10]]

    # Image-Image similarity
    img_img = np.array([
        distance.similarity(data_matrix, img, distance.EUCLIDEAN) for img in data_matrix
    ])

    # sub id to list of their image index in images
    subs = {}
    # sub id to metadata if the subjects
    sub_meta = {}
    for img in meta:
        idx = images.index(img)
        if meta[img]['id'] not in subs:
Beispiel #17
0
    return predictions.tolist()


def evaluate(dataset):
    n_folds = 3
    scores = helper.evaluate_algorithm(dataset, ppr_classifier, n_folds)
    print('Scores: %s' % scores)
    print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))


if __name__ == "__main__":
    from feature_reduction.feature_reduction import reducer
    min_max_scaler = MinMaxScaler()

    images, feature_space = utils.get_all_vectors(
        settings.PPR.CLASSIFIER.FEATURE)
    feature_space = min_max_scaler.fit_transform(feature_space)

    meta = utils.get_metadata()
    meta = {m['path']: m for m in meta}
    """
    u_images, u_feature_space = utils.get_all_vectors(
        settings.PPR.CLASSIFIER.FEATURE, unlabelled_db=True)
    u_feature_space = min_max_scaler.fit_transform(u_feature_space)

    matrix = np.vstack((
        feature_space,
        u_feature_space,
    ))

    matrix, eigen_values, latent_vs_old = reducer(