def Average(lst): lst = np.array(lst) print(lst.shape) for l in lst: if l is None: logger.info("None found") exit(1) return sum(lst) / len(lst)
def save_test_features(data): start_timestamp = time.time() dataset = {} logger.info("Entered on reduce test features process") for user, paths in data.items(): features = [extract_features_of_roi(path)[0] for path in paths] dataset.update({user: [features, paths]}) # saving test database with open(TEST_PATH, "wb") as file: pickle.dump(dataset, file) logger.info("Extraction concluded in {} seconds.".format(time.time() - start_timestamp))
def get_features_from_user(name): # loading train a catalogues with open(TRAIN_PATH, "rb") as file: train = pickle.load(file) # separing train and test features # logger.info('Getting features from user {}'.format(name)) if name in train: t = np.array(train[name][0]) return (np.reshape(t, (t.shape[0], t.shape[2]))) else: logger.info("User {} not enrolled on dataset".format(name)) return None, None, None
def save_train_features(data): start_timestamp = time.time() dataset = {} full_dataset = {} logger.info("Entered on reduce train features process") for user, paths in data.items(): features = [extract_features_of_roi(path)[0] for path in paths] avg = np.array(Average(features)) arg = np.array(l2(vector - avg) for vector in features).argmin() dataset.update({user: avg}) full_dataset.update({user: [features, paths]}) logger.info("Extraction concluded in {} seconds.".format(time.time() - start_timestamp)) with open(DATAKEYS_PATH, "wb") as file: pickle.dump(dataset, file) with open(TRAIN_PATH, "wb") as file: pickle.dump(full_dataset, file) return 0
def avaliate(classes, ctrs, dataset, n): sum = 0 qtd = 0 total_wrong = 0 for user, fts in dataset.items(): true_centroid = [idc for idc, c in enumerate(classes) if user in c] report = {} for i in range(len(fts[0])): winners = compute_distance(ctrs, fts[0][i], n) st = "centers_" st2 = '' for i in range(len(winners)): st2 += str(winners[i]) + "_" st += st2 if st not in report.keys(): report.update({st: []}) distances = [] for i in range(len(winners)): distances.append(l2(ctrs[true_centroid] - ctrs[winners[i]])) # distance = l2(ctrs[true_centroid] - ctrs[winner]) minimum = min(distances) if minimum > 0: total_wrong += 1 report[st].append(minimum) sum += minimum qtd += 1 logger.info("Report for user {}: {}".format(user, report)) logger.info("Quantization error: {}".format(sum / qtd)) logger.info("Miss prediction tax: {}".format(total_wrong / qtd))
def cut_clusters(classes, centers, thrs=7): centers = np.array(centers) classes = np.array(classes) to_cut = {} for idc, c in enumerate(classes): if (len(c) <= thrs): to_cut.update({idc: c}) index = [index for index, users in to_cut.items()] new_classes = [classes[i] for i in range(len(classes)) if i not in index] new_centers = [centers[i] for i in range(len(centers)) if i not in index] logger.info("Index of wrong clusters {}".format(index)) for idc, c in to_cut.items(): logger.info("Cluster {} has minus than {} users".format(idc, thrs)) # logger.info("Classes vector before remotion: {}".format(classes)) # logger.info("Classes vector after remotion: {}".format(classes)) for name in c: vets = get_features_from_user(name) new_cluster = reallocate_user(vets, new_centers) logger.info("New cluster for user {} is cluster number {}".format( name, new_cluster)) new_classes[new_cluster].append(name) new_classes = np.array(new_classes) new_centers = np.array(new_centers) print("New shape for classes and centers: {}, {}".format( new_classes.shape, new_centers.shape)) return new_classes, new_centers