def get_pca_vectors(model_name, vectors, k, load_pca): """generates k PCA vectors Arguments: vectors {np.array} -- 2D numpy array of features k {int} -- no of features Returns: np.array -- 2D numpy array of features """ directory = Path(settings.path_for(settings.FILES.MODELS)) directory = str(directory) k = min(k, vectors.shape[1]) if load_pca: std_scaler = joblib.load( os.path.join(directory, model_name + '_std_scaler.pkl')) pca = joblib.load(os.path.join(directory, model_name + '_pca.pkl')) else: std_scaler = StandardScaler() pca = PCA(n_components=k) scaled_values = std_scaler.fit_transform(vectors) pca_vectors = pca.fit_transform(scaled_values) print("Total variance accounted for: ", sum(pca.explained_variance_ratio_)) if not load_pca: joblib.dump(pca, model_name + '_pca.pkl') joblib.dump(std_scaler, model_name + '_std_scaler.pkl') return pca_vectors
def write_to_file(tmp, file_name, **kwargs): """ Writes the top k image comparisions to a html file using Jinja templates. Args: tmp: The template file file_name: The filename of the output **kwargs: The data to render """ tmpl = env.get_template(tmp) op_path = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name f = open(op_path, "w") f.write(tmpl.render(**kwargs)) print("Fin. Check {} in Outputs folder".format(file_name)) webbrowser.open(op_path.as_uri())
def take_images_input(image_type, meta): image_path = Path(settings.path_for(settings.MASTER_DATA_PATH)) while True: image_ids = input( "Enter image ids (space seperated) that you think are {}: ".format( image_type)).split(' ') invalid_input = False for image in image_ids: if not image: return [] if image not in meta: print( "Image '{}' not found. Please check the spelling and enter again." .format(image)) invalid_input = True if not invalid_input: return [str((image_path / i).resolve()) for i in image_ids]
def get_meal_array(padding=False): """ To convert meal data from CSVs in MealDataFolder to a numpy array and a class label array Returns: tuple of the form (numpy array of all data, numpy array of class labels) """ directory = Path(settings.path_for(settings.FILES.MEAL_DATA_DIRECTORY)) directory = str(directory) meal_data_np = [] class_labels_np = [] max_len = 0 for meal_data_file in os.listdir(directory): # print("loading file - " + meal_data_file) class_label = 0 if 'Nomeal' in meal_data_file else 1 meal_data = pd.read_csv(os.path.join(directory, meal_data_file), na_filter=False, header=None, sep='\n') for i, _ in enumerate(meal_data.iterrows()): t = getFloatFromObjectForMealData(meal_data.loc[i]) if t.size != 0: t = t[::-1] if padding and t.size > max_len: max_len = t.size meal_data_np.append(t) class_labels_np.append(class_label) if padding and max_len: meal_data_np = pad_array(meal_data_np, max_len) else: meal_data_np = np.array(meal_data_np) class_labels_np = np.array(class_labels_np) return meal_data_np, class_labels_np
def print_term_weight_pairs(term_weight_pairs, file_name): file_name = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name with open(file_name, "w", newline="") as f: writer = csv.writer(f) writer.writerows(term_weight_pairs)
from pathlib import Path import csv import webbrowser from dynaconf import settings from jinja2 import Environment from jinja2.loaders import FileSystemLoader env = Environment( loader=FileSystemLoader(settings.path_for(settings.TEMPLATE_PATH))) def write_to_file(tmp, file_name, **kwargs): """ Writes the top k image comparisions to a html file using Jinja templates. Args: tmp: The template file file_name: The filename of the output **kwargs: The data to render """ tmpl = env.get_template(tmp) op_path = Path(settings.path_for(settings.OUTPUT_PATH)) / file_name f = open(op_path, "w") f.write(tmpl.render(**kwargs)) print("Fin. Check {} in Outputs folder".format(file_name)) webbrowser.open(op_path.as_uri()) def print_term_weight_pairs(term_weight_pairs, file_name):
from sklearn.preprocessing import MinMaxScaler from sklearn.metrics.pairwise import euclidean_distances from scipy.special import softmax from pathlib import Path sys.path.append('../') from feature_reduction.feature_reduction import reducer from metric import distance, similarity from utils import get_metadata, get_term_weight_pairs, get_all_vectors import output import time from joblib import Memory CACHE_DIR = Path(settings.path_for(settings.PPR.TASK_3.CACHE_DIR)) CACHE = Memory(str(CACHE_DIR), verbose=1) mapping = { "without_acs": 0, "with_acs": 1, "male": 0, "female": 1, "very fair": 0, "fair": 1, "medium": 2, "dark": 3, "dorsal": 0, "palmar": 1, "right": 0, "left": 1
from __future__ import annotations from dynaconf import settings from dynaconf.loaders.toml_loader import write # Get the potato original = str(settings.POTATO) # str() to force a copy modern = "Modern Potato" print("Original data:", original) assert settings.POTATO == settings.get("potato") == original # Change the Toml file print("Change POTATO to `Modern Potato` in settings.toml file...") write(settings.path_for("settings.toml"), {"default": {"potato": modern}}) # Print the original print("Dynaconf knows:", settings.POTATO) assert settings.POTATO == settings.get("potato") == original # Get it freshly print("Read it freshly to get changed value...") print("Changed data:", settings.get_fresh("POTATO")) assert settings.POTATO == settings.get_fresh("potato") == modern # Get it again print("Now read again without the get_fresh method") print("Still the new value no?", settings.POTATO) assert settings.POTATO == settings.get("potato") == modern # Restore it write(settings.path_for("settings.toml"), {"default": {"potato": original}})
dorsal_dist = dorsal_kmeans.get_closest(query, dorsal_kmeans.centroids, return_min=True) palmar_dist = palmar_kmeans.get_closest(query, palmar_kmeans.centroids, return_min=True) return 'dorsal' if dorsal_dist <= palmar_dist else 'palmar' if __name__ == "__main__": parser = prepare_parser() args = parser.parse_args() n_clusters = args.n_clusters #get the absolute data path and models whose features to concatenate data_path = Path(settings.path_for(settings.DATA_PATH)) model = settings.TASK2_CONFIG.MODEL #Fetch training data for dorsal and palmer images from LABELLED DB dorsal_paths = filter_images('dorsal') dorsal_paths, dorsal_vectors = get_all_vectors( model, f={'path': { '$in': dorsal_paths }}) palmar_paths, palmar_vectors = get_all_vectors( model, f={'path': { '$nin': dorsal_paths }}) #Fetch test data from UNLABELLED DB test_data_paths, test_data = get_all_vectors(model, unlabelled_db=True)
POWER_ITR = 'power_iteration' MATH = 'math_method' mapping = { "without_acs": 0, "with_acs": 1, "male": 0, "female": 1, "very fair": 0, "fair": 1, "medium": 2, "dark": 3, } CACHE_DIR = Path(settings.path_for(settings.PPR.FEEDBACK.CACHE_DIR)) CACHE = Memory(str(CACHE_DIR), verbose=1) def math_method(adj_matrix, alpha): return np.linalg.inv( np.identity(adj_matrix.shape[0]) - (alpha * adj_matrix)) def power_iteration(adj_matrix, alpha, seed): n = 0 pi_1 = None pi_2 = np.copy(seed) while True: pi_1 = np.matmul(np.dot(alpha, adj_matrix), pi_2) + np.dot(
#if model is sift, generate and insert histogram vector if model == 'sift': sift.generate_histogram_vectors(coll) if __name__ == "__main__": parser = prepare_parser() args = parser.parse_args() #edge case if args.build_unlabeled and args.build_master: raise Exception("oi you cheeky wanker.") # Setting images folder data_path = Path(settings.path_for(settings.DATA_PATH)) if args.build_unlabeled: data_path = Path(settings.path_for(settings.UNLABELED_DATA_PATH)) elif args.build_master: data_path = Path(settings.path_for(settings.MASTER_DATA_PATH)) # Setting database database = settings.QUERY_DATABASE if args.build_unlabeled else ( settings.MASTER_DATABASE if args.build_master else settings.DATABASE) # Setting metadata CSV metadata_path = Path(settings.path_for(settings.METADATA_CSV)) if args.build_unlabeled: metadata_path = Path(settings.UNLABELED_METADATA_CSV) elif args.build_master: metadata_path = Path(settings.MASTER_METADATA_CSV)
predicted_relevant_indices = np.where(predictions==1.0)[0] #Combine train data and predicted-relevant data indices test_data_images = np.append(test_data_images[predicted_relevant_indices],np.array(master_images)[relevant_indices]) test_data = np.vstack((test_data[predicted_relevant_indices,:],train_data[:,:-1])) #Compute euclidean distance from the query image for all relevant images euclidean_distances = euclidean(test_data,np.tile(q_img_vector[0],(test_data.shape[0],1))) #Prepare final result result = [] for index,res in enumerate(sorted(list(zip(test_data_images,euclidean_distances)),key=lambda a:a[1])): if index < t: result.append(res[0]) else: break return result if __name__ == "__main__": path = str(Path(settings.path_for(settings.MASTER_DATA_PATH)).resolve()) relevant_images = [ path + '/' + 'Hand_0007166.jpg', path + '/' + 'Hand_0007168.jpg', path + '/' + 'Hand_0008622.jpg', path + '/' + 'Hand_0008628.jpg' ] irrelevant_images = [ path + '/' + 'Hand_0009376.jpg', path + '/' + 'Hand_0000902.jpg', path + '/' + 'Hand_0011283.jpg', path + '/' + 'Hand_0008014.jpg' ] print(decision_tree_feedback(relevant_images,irrelevant_images,8,path + '/' + 'Hand_0000674.jpg'))
#Generating compressed sparsed row matrices planes_per_layer.append(scipy.sparse.csr_matrix(planes)) #index all points for i in range(data_matrix.shape[0]): perform_lsh(i, data_matrix[i], layers, planes_per_layer, images) print("\nIndex structure created.") print("\nTime Taken: ", (time.time() - start)) #Part b query = input("\nEnter the query image id:\n") to_output = query.split('.')[0] t = int(input("Enter t:\n")) data_path = Path(settings.path_for(settings.MASTER_DATA_PATH)).resolve() query = str(data_path / query) index = images.index(query) query_vec = data_matrix[index] results = query_relevant_images(query_vec, t, layers, planes_per_layer, data_matrix, images) images = results[0] member_count = results[1] unique_member_count = results[2] image_paths = [] all_images = []
filename = args.file_name meal_data_np = [] print("loading file - " + filename) meal_data = pd.read_csv(os.path.join(filename), na_filter=False, header=None, sep='\n') for i, _ in enumerate(meal_data.iterrows()): t = helper.getFloatFromObjectForMealData(meal_data.loc[i]) if t.size != 0: t = t[::-1] meal_data_np.append(t) meal_data_np = np.array(meal_data_np) #reading all models and their settings directory = Path(settings.path_for(settings.FILES.MODELS)) directory = str(directory) model_dict = list(settings.CLASSIFIER.MODEL_DICT) #load from saved models and run predict for generated vectors classifier_preditions = pd.DataFrame() for classifier in model_dict: filename = classifier[1] model = joblib.load(os.path.join(directory, filename)) meal_vectors, labels = get_meal_vectors(classifier[0], apply_pca=True, padding=False, load_pca=True) predictions = model.predict(meal_vectors) classifier_preditions[classifier[0]] = predictions