def compare(): similars = auxilary.read_csv(fileName="../csv_files/similars_diff.csv") differs = auxilary.read_csv(fileName="../csv_files/different_diff.csv") sims = np.array(similars.mean()) difs = np.array(differs.mean()) sub = np.subtract(difs, sims) sub = sub[:-1] feats = np.zeros([22, 2]) for i in range(22): if sub[i] > 0: feats[i] = [int(i), sub[i]] sorted_array = feats[np.argsort(feats[:, 1])] flipped = np.flip(sorted_array) w_mul = copy.deepcopy(feats) feats = flipped[:, 0] idx = flipped[:, 1] summ = (np.sum(feats)) w_mul = w_mul[:, 1] / summ prob = (feats) / summ return prob, idx, w_mul * 100
def get_diff_diff(csv_path): similars = auxilary.read_csv(fileName="../csv_files/similars_diff.csv") iterations = (len(similars)) data = auxilary.read_csv(fileName=csv_path) N = data.shape[0] #5050 D = data.shape[1] - 1 #22 diff_diff = [] for i in range(iterations): if i % 1000 == 0: print(f'iteration: {i}') random_int = np.random.randint(0, N) row_1 = data.iloc[random_int] features_1 = np.array(row_1[:D]) random_int = np.random.randint(0, N) row_2 = data.iloc[random_int] while row_2['output'] == row_1['output']: random_int = np.random.randint(0, N) row_2 = data.iloc[random_int] features_2 = np.array(row_2[:D]) diff_diff.append(np.power((features_1 - features_2), 2)) df = pd.DataFrame(diff_diff) df["label"] = 0 df.to_csv("../csv_files/different_diff.csv", index=False)
def NN_result_preview(second=False, image_num=None, blur=False, pred=None, detc=None, pos_fals=False): print("Load labels") if second: data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv') D = 22 else: data = auxilary.read_csv(fileName='../csv_files/embedded.csv') D = 128 N = len(data) data_inputs = (data.iloc[:, :D]) inputs = np.zeros([N, D]) inputs = np.array(data_inputs) labels = np.zeros([N, 1]) labels = np.array(data.iloc[:, D]) if not second: embeddings, face_name, human_file_path = face_recognition.face_recognition( dataset_path="../dataset/main_data/*/*", preview=True, image_num=image_num, blur=blur) else: embeddings, face_name, human_file_path = facial_landmarks.test_preview( blur=blur, dataset_path="../dataset/main_data/*/*", pred=pred, detc=detc) # identicals, similars = NN_results(embeddings, inputs, labels) identicals, similars = NN.predict_input(embeddings, second=second) idc_paths, idc_names, sim_paths, sim_names, others_paths, others_names = \ trim_NN_outputs (labels, face_name, identicals, similars, human_file_path, pos_fals = pos_fals) show_tests.buttons(identicalls=idc_paths, id_titles=idc_names, similars=sim_paths, sim_titles=sim_names, left_overs=others_paths, left_titles=others_names, orig_image_path=human_file_path, orig_title=face_name, title1="MATCHING", title2="SIMILARS", title3="OTHERS")
def is_similar(img, orig): THRESHOLD = 2 data = auxilary.read_csv(auxilary.path_to_csv_key_points) def img_info(imag): dists = [] name = imag.split("/")[4] index = data.index[data["image_name"] == name] base_point = [ int(i) for i in [ x.strip("[]") for x in data[data.columns[2]][index].tolist()[0].split(',') ] ] for f in data[data.columns[3:15]]: x = [ int(i) for i in [ x.strip('[]') for x in data.loc[index, f].tolist()[0].split(',') ] ] dists.append(auxilary.distance_two_points(base_point, x)) return dists li = [] for x, y in zip(img_info(img), img_info(orig)): li.append(abs(x - y)) return np.average(li) < THRESHOLD
def store_shape_tri(image_path, image_name): shape, tris = get_delaunay_points(image_path, returned=True) if len(shape) == 0: return #create csv for tris and shape if not os.path.exists(path_to_shape_tris): create_shape_tris(fileName=path_to_shape_tris) #store values my_dict = { 'image_name': None, # 7 major points 'shape': None, 'tris': None } my_dict['image_name'] = image_name my_dict['shape'] = shape my_dict['tris'] = tris dataframe = read_csv(fileName=path_to_shape_tris) n_rows = len(dataframe) dataframe.at[n_rows, 'image_name'] = image_name dataframe.at[n_rows, 'shape'] = shape dataframe.at[n_rows, 'tris'] = tris store_csv(dataframe=dataframe, fileName=path_to_shape_tris) return
def test_euc(): data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv') N = data.shape[0] #5050 D = data.shape[1] - 1 #22 data_inputs = (data.iloc[:, :D]) inputs = np.zeros([N, D]) inputs = np.array(data_inputs) labels = np.zeros([N, 1]) labels = np.array(data.iloc[:, D]) true = 0 count = 0 percentages = [] for emb, label in zip(inputs, labels): name, perc = euclidean.euc_predict(emb, inputs, labels) if name == label: true += 1 percentages.append(perc) else: percentages.append(-perc) count += 1 if count % 100 == 0: print(f'Working, iteration {count}') print(f'Accuracy = {true/count}') percentages = np.array(percentages) print(f"Accuracy: {true/count}") print(f'detected with prob = {np.average(percentages)}')
def prepare_data(): # print ("Load labels") data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv') N = data.shape[0] #5050 D = data.shape[1] - 1 #128 data_inputs = (data.iloc[:, :D]) inputs = np.zeros([N, D]) inputs = np.array(data_inputs) labels = np.zeros([N, 1]) labels = np.array(data.iloc[:, D]) names_encode = LabelEncoder().fit(labels) Y = names_encode.transform(labels) # in range of [0:252] unique_count = len(set(labels)) #Transofrm the Y into shape of [5050, 253] with all equals zeros, except for the correct label y = np.zeros([N, unique_count]) for i, row in enumerate(y): row[Y[i]] = 1 # print (f'Inputs shape: {inputs.shape}') # print (f'Outputs shape: {y.shape}') return inputs, y, names_encode
def multiclass_classification(path): dataframe = auxilary.read_csv(path) # Store variables as target y and the first two features as X (sepal length and sepal width of the iris flowers) X = dataframe.iloc[:, 0:22].to_numpy() y = dataframe['output'].to_list() decision_tree(X, y)
def get_ones(): for person in list_of_people: print(person) dataframe = auxilary.read_csv(fileName=path) if len(dataframe.index) > data_size: break df_values = main_data.iloc[:, 0:22] df_filter = df_values[main_data['output'] == person] values_per_set = math.ceil(data_size / num_sets) pairs = list(itertools.combinations(df_filter.index, 2)) if len(pairs) > values_per_set: pairs = pairs[0:int(values_per_set)] for pair in pairs: row1 = df_values.iloc[pair[0]] row2 = df_values.iloc[pair[1]] diff = mse_diff(row1, row2) row_dict = {'inputs': [], 'label': 1} row_dict['inputs'].append(diff) dataframe = auxilary.read_csv(fileName=path) auxilary.add_row(dataframe, row_dict, fileName=path)
def read_data(): data = auxilary.read_csv(fileName='../csv_files/embedded.csv') N = data.shape[0] #5050 D = data.shape[1] - 1 #22 data_inputs = (data.iloc[:, :D]) inputs = np.zeros([N, D]) inputs = np.array(data_inputs) labels = np.zeros([N, 1]) labels = np.array(data.iloc[:, D]) return inputs, labels
def get_diff_similar(csv_path): data = auxilary.read_csv(fileName=csv_path) N = data.shape[0] #5050 D = data.shape[1] - 1 #22 george_data = data[data['output'] == 'George_W_Bush'] sim_diff = [] ev = 0 for index_1, row_1 in george_data.iterrows(): features_1 = np.array(row_1[:D]) if ev % 50 == 0: print(f'image: {ev}') ev += 1 for index_2, row_2 in george_data.iterrows(): if index_1 >= index_2: continue features_2 = list(row_2[:D]) sim_diff.append(np.power((features_1 - features_2), 2)) df = pd.DataFrame(sim_diff) df["label"] = 1 df.to_csv("../csv_files/similars_diff.csv", index=False)
def get_zeros(list_of_people, size, index): while size > 0: index2 = len(list_of_people) - (index + 1) print(index, index2) if index2 <= index: list_of_people = list_of_people[1:] size = size - 1 get_zeros(list_of_people, size, 0) break df = main_data.iloc[:, 0:22] row1 = df[main_data['output'] == list_of_people[index]].sample().squeeze() row2 = df[main_data['output'] == list_of_people[index2]].sample().squeeze() diff = mse_diff(row1, row2) row_dict = {'inputs': [], 'label': 0} row_dict['inputs'].append(diff) df = auxilary.read_csv(fileName=path) print(row_dict['inputs']) auxilary.add_row(df, row_dict, fileName=path) size = size - 1 index = index + 1 return
def Euc_result_preview(image_num=None): print("Load labels") data = auxilary.read_csv(fileName='../csv_files/embedded.csv') N = len(data) D = 128 data_inputs = (data.iloc[:, :D]) inputs = np.zeros([N, D]) inputs = np.array(data_inputs) labels = np.zeros([N, 1]) labels = np.array(data.iloc[:, D]) embeddings, face_name, human_file_path = face_recognition.face_recognition( dataset_path="../dataset/main_data/*/*", preview=True, image_num=image_num) identicals, similars = results(embeddings, inputs, labels) idc_paths, idc_names, sim_paths, sim_names, others_paths, others_names = \ trim_outputs (labels, face_name, identicals, similars) show_tests.buttons(identicalls=idc_paths, id_titles=idc_names, similars=sim_paths, sim_titles=sim_names, left_overs=others_paths, left_titles=others_names, orig_image_path=human_file_path, orig_title=face_name, title1="MATCHING", title2="SIMILARS", title3="OTHERS")
def svm_compare(): ''' TODO: implement svm ''' # path = '../csv_files/csv_differences.csv' path = '../csv_files/svm_set2.csv' data = auxilary.read_csv(fileName=path) # READ THE INPUTS # we have 1520 inputs -> N # each one of them is 12 dimension -> D N = len(data) D = 22 inputs = np.zeros([N, D]) data_input = (data['inputs']) for i, input_list in enumerate(data_input): inputs[i] = auxilary.strings_to_lists(input_list) # READ THE LABELS data_labels = data['label'] == 0 labels = np.ones([N, 1]) labels[data_labels] = 0 # clf = tree.DecisionTreeClassifier() clf = svm.SVC(gamma=0.001, C=100, probability=True) X, y = inputs, np.ravel(labels) # shuffles the date to save 20% of data for testing X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, shuffle=True, random_state=42, ) clf.fit(X_train, y_train) # TO GET EACH SAMPLE OUTPUT # for sample in X_test: # y_pred = clf.predict_proba(sample.reshape(1, -1)) # print(y_pred) # TO DO ALL SAMPLES AT ONCE y_prob = clf.predict_proba(X_test) y_pred = clf.predict(X_test) print("probabilities:\n", y_prob[:5]) print("\npredictions: \n" , y_pred[:5]) print("accuracy: \n",np.array(y_pred == y_test)[:5]) print('\npercentage correct: ', 100 * np.sum(y_pred == y_test) / len(y_test)) # Get support vectors themselves # support_vectors = clf.support_vectors_ # Get support vectors themselves # support_vectors = clf.support_vectors_ # X_train2 = pca.fit_transform(X_train) # clf.fit(X_train2, y_train) # plot_decision_regions(X_train2, y_train.astype(np.integer), clf=clf, legend=2) # # plt.xlabel(X_train[0], size=14) # plt.ylabel(X_train[1], size=14) # plt.title('SVM Decision Region Boundary', size=16) # # plt.show() show_vectors(X_train, y_train) return clf
def get_binary_classification(key_points_path, image_dir): main_data = auxilary.read_csv(key_points_path) # create diff csv columns = ['inputs', 'label'] path = '../csv_files/svm_set2.csv' if not os.path.exists(path): auxilary.create_csv(path, columns) dataframe = auxilary.read_csv(fileName=path) print("a", len(main_data)) print("b", len(dataframe[dataframe.label == 0])) num_sets = int(main_data[['output']].nunique()) list_of_people = main_data['output'].unique() data_size = 1000 # # GET LABEL 1 def get_ones(): for person in list_of_people: print(person) dataframe = auxilary.read_csv(fileName=path) if len(dataframe.index) > data_size: break df_values = main_data.iloc[:, 0:22] df_filter = df_values[main_data['output'] == person] values_per_set = math.ceil(data_size / num_sets) pairs = list(itertools.combinations(df_filter.index, 2)) if len(pairs) > values_per_set: pairs = pairs[0:int(values_per_set)] for pair in pairs: row1 = df_values.iloc[pair[0]] row2 = df_values.iloc[pair[1]] diff = mse_diff(row1, row2) row_dict = {'inputs': [], 'label': 1} row_dict['inputs'].append(diff) dataframe = auxilary.read_csv(fileName=path) auxilary.add_row(dataframe, row_dict, fileName=path) # RUN ONCE ONLY # get_ones() # GET LABEL 0 curr_size = len(dataframe.index) def get_zeros(list_of_people, size, index): while size > 0: index2 = len(list_of_people) - (index + 1) print(index, index2) if index2 <= index: list_of_people = list_of_people[1:] size = size - 1 get_zeros(list_of_people, size, 0) break df = main_data.iloc[:, 0:22] row1 = df[main_data['output'] == list_of_people[index]].sample().squeeze() row2 = df[main_data['output'] == list_of_people[index2]].sample().squeeze() diff = mse_diff(row1, row2) row_dict = {'inputs': [], 'label': 0} row_dict['inputs'].append(diff) df = auxilary.read_csv(fileName=path) print(row_dict['inputs']) auxilary.add_row(df, row_dict, fileName=path) size = size - 1 index = index + 1 return #RUN ONCE ONLY get_zeros(list_of_people, curr_size, 0) dataframe = auxilary.read_csv(fileName=path) print("c", len(dataframe)) print("d", len(dataframe[dataframe.label == 1])) print("d", len(dataframe[dataframe.label == 0]))