def get_pca_vectors(model_name, vectors, k, load_pca):
    """generates k PCA vectors 
    
    Arguments:
        vectors {np.array} -- 2D numpy array of features
        k {int} -- no of features
    
    Returns:
        np.array -- 2D numpy array of features
    """

    directory = Path(settings.path_for(settings.FILES.MODELS))
    directory = str(directory)

    k = min(k, vectors.shape[1])
    if load_pca:
        std_scaler = joblib.load(
            os.path.join(directory, model_name + '_std_scaler.pkl'))
        pca = joblib.load(os.path.join(directory, model_name + '_pca.pkl'))
    else:
        std_scaler = StandardScaler()
        pca = PCA(n_components=k)

    scaled_values = std_scaler.fit_transform(vectors)
    pca_vectors = pca.fit_transform(scaled_values)
    print("Total variance accounted for: ", sum(pca.explained_variance_ratio_))
    if not load_pca:
        joblib.dump(pca, model_name + '_pca.pkl')
        joblib.dump(std_scaler, model_name + '_std_scaler.pkl')

    return pca_vectors
Esempio n. 2
0
def write_to_file(tmp, file_name, **kwargs):
    """
    Writes the top k image comparisions to a html file using Jinja templates.

    Args:
        tmp: The template file
        file_name: The filename of the output
        **kwargs: The data to render
    """
    tmpl = env.get_template(tmp)
    op_path = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name

    f = open(op_path, "w")
    f.write(tmpl.render(**kwargs))
    print("Fin. Check {} in Outputs folder".format(file_name))
    webbrowser.open(op_path.as_uri())
Esempio n. 3
0
def take_images_input(image_type, meta):
    image_path = Path(settings.path_for(settings.MASTER_DATA_PATH))
    while True:
        image_ids = input(
            "Enter image ids (space seperated) that you think are {}: ".format(
                image_type)).split(' ')
        invalid_input = False
        for image in image_ids:
            if not image:
                return []
            if image not in meta:
                print(
                    "Image '{}' not found. Please check the spelling and enter again."
                    .format(image))
                invalid_input = True
        if not invalid_input:
            return [str((image_path / i).resolve()) for i in image_ids]
Esempio n. 4
0
def get_meal_array(padding=False):
    """
    To convert meal data from CSVs in MealDataFolder to a numpy array and a class label array
    Returns:
        tuple of the form (numpy array of all data, numpy array of class labels)
    """
    directory = Path(settings.path_for(settings.FILES.MEAL_DATA_DIRECTORY))
    directory = str(directory)

    meal_data_np = []
    class_labels_np = []

    max_len = 0
    for meal_data_file in os.listdir(directory):
        # print("loading file - " + meal_data_file)
        class_label = 0 if 'Nomeal' in meal_data_file else 1

        meal_data = pd.read_csv(os.path.join(directory, meal_data_file),
                                na_filter=False,
                                header=None,
                                sep='\n')
        for i, _ in enumerate(meal_data.iterrows()):
            t = getFloatFromObjectForMealData(meal_data.loc[i])
            if t.size != 0:
                t = t[::-1]
                if padding and t.size > max_len: max_len = t.size
                meal_data_np.append(t)
                class_labels_np.append(class_label)

    if padding and max_len:
        meal_data_np = pad_array(meal_data_np, max_len)
    else:
        meal_data_np = np.array(meal_data_np)

    class_labels_np = np.array(class_labels_np)
    return meal_data_np, class_labels_np
Esempio n. 5
0
def print_term_weight_pairs(term_weight_pairs, file_name):
    file_name = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name
    with open(file_name, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(term_weight_pairs)
Esempio n. 6
0
from pathlib import Path
import csv
import webbrowser
from dynaconf import settings

from jinja2 import Environment
from jinja2.loaders import FileSystemLoader

env = Environment(
    loader=FileSystemLoader(settings.path_for(settings.TEMPLATE_PATH)))


def write_to_file(tmp, file_name, **kwargs):
    """
    Writes the top k image comparisions to a html file using Jinja templates.

    Args:
        tmp: The template file
        file_name: The filename of the output
        **kwargs: The data to render
    """
    tmpl = env.get_template(tmp)
    op_path = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name

    f = open(op_path, "w")
    f.write(tmpl.render(**kwargs))
    print("Fin. Check {} in Outputs folder".format(file_name))
    webbrowser.open(op_path.as_uri())


def print_term_weight_pairs(term_weight_pairs, file_name):
Esempio n. 7
0
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics.pairwise import euclidean_distances
from scipy.special import softmax
from pathlib import Path

sys.path.append('../')
from feature_reduction.feature_reduction import reducer
from metric import distance, similarity
from utils import get_metadata, get_term_weight_pairs, get_all_vectors

import output
import time
from joblib import Memory

CACHE_DIR = Path(settings.path_for(settings.PPR.TASK_3.CACHE_DIR))
CACHE = Memory(str(CACHE_DIR), verbose=1)

mapping = {
    "without_acs": 0,
    "with_acs": 1,
    "male": 0,
    "female": 1,
    "very fair": 0,
    "fair": 1,
    "medium": 2,
    "dark": 3,
    "dorsal": 0,
    "palmar": 1,
    "right": 0,
    "left": 1
Esempio n. 8
0
from __future__ import annotations

from dynaconf import settings
from dynaconf.loaders.toml_loader import write

# Get the potato
original = str(settings.POTATO)  # str() to force a copy
modern = "Modern Potato"
print("Original data:", original)
assert settings.POTATO == settings.get("potato") == original

# Change the Toml file
print("Change POTATO to `Modern Potato` in settings.toml file...")
write(settings.path_for("settings.toml"), {"default": {"potato": modern}})

# Print the original
print("Dynaconf knows:", settings.POTATO)
assert settings.POTATO == settings.get("potato") == original

# Get it freshly
print("Read it freshly to get changed value...")
print("Changed data:", settings.get_fresh("POTATO"))
assert settings.POTATO == settings.get_fresh("potato") == modern

# Get it again
print("Now read again without the get_fresh method")
print("Still the new value no?", settings.POTATO)
assert settings.POTATO == settings.get("potato") == modern

# Restore it
write(settings.path_for("settings.toml"), {"default": {"potato": original}})
Esempio n. 9
0
    dorsal_dist = dorsal_kmeans.get_closest(query,
                                            dorsal_kmeans.centroids,
                                            return_min=True)
    palmar_dist = palmar_kmeans.get_closest(query,
                                            palmar_kmeans.centroids,
                                            return_min=True)
    return 'dorsal' if dorsal_dist <= palmar_dist else 'palmar'


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()
    n_clusters = args.n_clusters

    #get the absolute data path and models whose features to concatenate
    data_path = Path(settings.path_for(settings.DATA_PATH))
    model = settings.TASK2_CONFIG.MODEL

    #Fetch training data for dorsal and palmer images from LABELLED DB
    dorsal_paths = filter_images('dorsal')
    dorsal_paths, dorsal_vectors = get_all_vectors(
        model, f={'path': {
            '$in': dorsal_paths
        }})
    palmar_paths, palmar_vectors = get_all_vectors(
        model, f={'path': {
            '$nin': dorsal_paths
        }})

    #Fetch test data from UNLABELLED DB
    test_data_paths, test_data = get_all_vectors(model, unlabelled_db=True)
Esempio n. 10
0
POWER_ITR = 'power_iteration'
MATH = 'math_method'

mapping = {
    "without_acs": 0,
    "with_acs": 1,
    "male": 0,
    "female": 1,
    "very fair": 0,
    "fair": 1,
    "medium": 2,
    "dark": 3,
}

CACHE_DIR = Path(settings.path_for(settings.PPR.FEEDBACK.CACHE_DIR))
CACHE = Memory(str(CACHE_DIR), verbose=1)


def math_method(adj_matrix, alpha):
    return np.linalg.inv(
        np.identity(adj_matrix.shape[0]) - (alpha * adj_matrix))


def power_iteration(adj_matrix, alpha, seed):
    n = 0
    pi_1 = None
    pi_2 = np.copy(seed)

    while True:
        pi_1 = np.matmul(np.dot(alpha, adj_matrix), pi_2) + np.dot(
Esempio n. 11
0
    #if model is sift, generate and insert histogram vector
    if model == 'sift':
        sift.generate_histogram_vectors(coll)


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()

    #edge case
    if args.build_unlabeled and args.build_master:
        raise Exception("oi you cheeky wanker.")

    # Setting images folder
    data_path = Path(settings.path_for(settings.DATA_PATH))
    if args.build_unlabeled:
        data_path = Path(settings.path_for(settings.UNLABELED_DATA_PATH))
    elif args.build_master:
        data_path = Path(settings.path_for(settings.MASTER_DATA_PATH))

    # Setting database
    database = settings.QUERY_DATABASE if args.build_unlabeled else (
        settings.MASTER_DATABASE if args.build_master else settings.DATABASE)

    # Setting metadata CSV
    metadata_path = Path(settings.path_for(settings.METADATA_CSV))
    if args.build_unlabeled:
        metadata_path = Path(settings.UNLABELED_METADATA_CSV)
    elif args.build_master:
        metadata_path = Path(settings.MASTER_METADATA_CSV)
Esempio n. 12
0
    predicted_relevant_indices = np.where(predictions==1.0)[0]

    #Combine train data and predicted-relevant data indices
    test_data_images = np.append(test_data_images[predicted_relevant_indices],np.array(master_images)[relevant_indices])
    test_data = np.vstack((test_data[predicted_relevant_indices,:],train_data[:,:-1]))

    #Compute euclidean distance from the query image for all relevant images
    euclidean_distances = euclidean(test_data,np.tile(q_img_vector[0],(test_data.shape[0],1)))

    #Prepare final result
    result = []
    for index,res in enumerate(sorted(list(zip(test_data_images,euclidean_distances)),key=lambda a:a[1])):
        if index < t:
            result.append(res[0])
        else:
            break

    return result

if __name__ == "__main__":
    path = str(Path(settings.path_for(settings.MASTER_DATA_PATH)).resolve())
    relevant_images = [
        path + '/' + 'Hand_0007166.jpg', path + '/' + 'Hand_0007168.jpg', path + '/' + 'Hand_0008622.jpg',
        path + '/' + 'Hand_0008628.jpg'
    ]
    irrelevant_images = [
        path + '/' + 'Hand_0009376.jpg', path + '/' + 'Hand_0000902.jpg', path + '/' + 'Hand_0011283.jpg',
        path + '/' + 'Hand_0008014.jpg'
    ]
    print(decision_tree_feedback(relevant_images,irrelevant_images,8,path + '/' + 'Hand_0000674.jpg'))
Esempio n. 13
0
        #Generating compressed sparsed row matrices
        planes_per_layer.append(scipy.sparse.csr_matrix(planes))

    #index all points
    for i in range(data_matrix.shape[0]):
        perform_lsh(i, data_matrix[i], layers, planes_per_layer, images)

    print("\nIndex structure created.")
    print("\nTime Taken: ", (time.time() - start))

    #Part b
    query = input("\nEnter the query image id:\n")
    to_output = query.split('.')[0]
    t = int(input("Enter t:\n"))

    data_path = Path(settings.path_for(settings.MASTER_DATA_PATH)).resolve()

    query = str(data_path / query)

    index = images.index(query)
    query_vec = data_matrix[index]

    results = query_relevant_images(query_vec, t, layers, planes_per_layer,
                                    data_matrix, images)

    images = results[0]
    member_count = results[1]
    unique_member_count = results[2]

    image_paths = []
    all_images = []
    filename = args.file_name
    meal_data_np = []
    print("loading file - " + filename)
    meal_data = pd.read_csv(os.path.join(filename),
                            na_filter=False,
                            header=None,
                            sep='\n')
    for i, _ in enumerate(meal_data.iterrows()):
        t = helper.getFloatFromObjectForMealData(meal_data.loc[i])
        if t.size != 0:
            t = t[::-1]
            meal_data_np.append(t)
    meal_data_np = np.array(meal_data_np)

    #reading all models and their settings
    directory = Path(settings.path_for(settings.FILES.MODELS))
    directory = str(directory)
    model_dict = list(settings.CLASSIFIER.MODEL_DICT)

    #load from saved models and run predict for generated vectors
    classifier_preditions = pd.DataFrame()
    for classifier in model_dict:
        filename = classifier[1]
        model = joblib.load(os.path.join(directory, filename))
        meal_vectors, labels = get_meal_vectors(classifier[0],
                                                apply_pca=True,
                                                padding=False,
                                                load_pca=True)
        predictions = model.predict(meal_vectors)
        classifier_preditions[classifier[0]] = predictions