def compare():
    similars = auxilary.read_csv(fileName="../csv_files/similars_diff.csv")
    differs = auxilary.read_csv(fileName="../csv_files/different_diff.csv")

    sims = np.array(similars.mean())
    difs = np.array(differs.mean())

    sub = np.subtract(difs, sims)
    sub = sub[:-1]
    feats = np.zeros([22, 2])

    for i in range(22):
        if sub[i] > 0:
            feats[i] = [int(i), sub[i]]

    sorted_array = feats[np.argsort(feats[:, 1])]
    flipped = np.flip(sorted_array)
    w_mul = copy.deepcopy(feats)

    feats = flipped[:, 0]
    idx = flipped[:, 1]
    summ = (np.sum(feats))

    w_mul = w_mul[:, 1] / summ

    prob = (feats) / summ

    return prob, idx, w_mul * 100
def get_diff_diff(csv_path):
    similars = auxilary.read_csv(fileName="../csv_files/similars_diff.csv")
    iterations = (len(similars))

    data = auxilary.read_csv(fileName=csv_path)
    N = data.shape[0]  #5050
    D = data.shape[1] - 1  #22
    diff_diff = []
    for i in range(iterations):
        if i % 1000 == 0:
            print(f'iteration: {i}')
        random_int = np.random.randint(0, N)
        row_1 = data.iloc[random_int]
        features_1 = np.array(row_1[:D])

        random_int = np.random.randint(0, N)
        row_2 = data.iloc[random_int]

        while row_2['output'] == row_1['output']:
            random_int = np.random.randint(0, N)
            row_2 = data.iloc[random_int]

        features_2 = np.array(row_2[:D])

        diff_diff.append(np.power((features_1 - features_2), 2))

    df = pd.DataFrame(diff_diff)
    df["label"] = 0
    df.to_csv("../csv_files/different_diff.csv", index=False)
Esempio n. 3
0
def NN_result_preview(second=False,
                      image_num=None,
                      blur=False,
                      pred=None,
                      detc=None,
                      pos_fals=False):
    print("Load labels")
    if second:
        data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv')
        D = 22
    else:
        data = auxilary.read_csv(fileName='../csv_files/embedded.csv')
        D = 128
    N = len(data)

    data_inputs = (data.iloc[:, :D])
    inputs = np.zeros([N, D])
    inputs = np.array(data_inputs)

    labels = np.zeros([N, 1])
    labels = np.array(data.iloc[:, D])

    if not second:
        embeddings, face_name, human_file_path = face_recognition.face_recognition(
            dataset_path="../dataset/main_data/*/*",
            preview=True,
            image_num=image_num,
            blur=blur)
    else:
        embeddings, face_name, human_file_path = facial_landmarks.test_preview(
            blur=blur,
            dataset_path="../dataset/main_data/*/*",
            pred=pred,
            detc=detc)

    # identicals, similars = NN_results(embeddings, inputs, labels)
    identicals, similars = NN.predict_input(embeddings, second=second)

    idc_paths, idc_names, sim_paths, sim_names, others_paths, others_names = \
        trim_NN_outputs (labels, face_name, identicals, similars, human_file_path, pos_fals = pos_fals)

    show_tests.buttons(identicalls=idc_paths,
                       id_titles=idc_names,
                       similars=sim_paths,
                       sim_titles=sim_names,
                       left_overs=others_paths,
                       left_titles=others_names,
                       orig_image_path=human_file_path,
                       orig_title=face_name,
                       title1="MATCHING",
                       title2="SIMILARS",
                       title3="OTHERS")
def is_similar(img, orig):
    THRESHOLD = 2
    data = auxilary.read_csv(auxilary.path_to_csv_key_points)

    def img_info(imag):
        dists = []
        name = imag.split("/")[4]
        index = data.index[data["image_name"] == name]
        base_point = [
            int(i) for i in [
                x.strip("[]")
                for x in data[data.columns[2]][index].tolist()[0].split(',')
            ]
        ]
        for f in data[data.columns[3:15]]:
            x = [
                int(i) for i in [
                    x.strip('[]')
                    for x in data.loc[index, f].tolist()[0].split(',')
                ]
            ]
            dists.append(auxilary.distance_two_points(base_point, x))
        return dists

    li = []
    for x, y in zip(img_info(img), img_info(orig)):
        li.append(abs(x - y))
    return np.average(li) < THRESHOLD
def store_shape_tri(image_path, image_name):
    shape, tris = get_delaunay_points(image_path, returned=True)
    if len(shape) == 0:
        return
    #create csv for tris and shape
    if not os.path.exists(path_to_shape_tris):
        create_shape_tris(fileName=path_to_shape_tris)
    #store values
    my_dict = {
        'image_name': None,
        # 7 major points
        'shape': None,
        'tris': None
    }
    my_dict['image_name'] = image_name
    my_dict['shape'] = shape
    my_dict['tris'] = tris

    dataframe = read_csv(fileName=path_to_shape_tris)
    n_rows = len(dataframe)
    dataframe.at[n_rows, 'image_name'] = image_name
    dataframe.at[n_rows, 'shape'] = shape
    dataframe.at[n_rows, 'tris'] = tris
    store_csv(dataframe=dataframe, fileName=path_to_shape_tris)

    return
Esempio n. 6
0
def test_euc():
    data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv')
    N = data.shape[0]  #5050
    D = data.shape[1] - 1  #22

    data_inputs = (data.iloc[:, :D])
    inputs = np.zeros([N, D])
    inputs = np.array(data_inputs)

    labels = np.zeros([N, 1])
    labels = np.array(data.iloc[:, D])

    true = 0
    count = 0
    percentages = []

    for emb, label in zip(inputs, labels):
        name, perc = euclidean.euc_predict(emb, inputs, labels)

        if name == label:
            true += 1
            percentages.append(perc)
        else:
            percentages.append(-perc)
        count += 1
        if count % 100 == 0:
            print(f'Working, iteration {count}')
            print(f'Accuracy = {true/count}')

    percentages = np.array(percentages)
    print(f"Accuracy: {true/count}")
    print(f'detected with prob = {np.average(percentages)}')
def prepare_data():
    # print ("Load labels")

    data = auxilary.read_csv(fileName='../csv_files/embedded_2.csv')
    N = data.shape[0]  #5050
    D = data.shape[1] - 1  #128

    data_inputs = (data.iloc[:, :D])
    inputs = np.zeros([N, D])
    inputs = np.array(data_inputs)

    labels = np.zeros([N, 1])
    labels = np.array(data.iloc[:, D])

    names_encode = LabelEncoder().fit(labels)
    Y = names_encode.transform(labels)  # in range of [0:252]
    unique_count = len(set(labels))
    #Transofrm the Y into shape of [5050, 253] with all equals zeros, except for the correct label
    y = np.zeros([N, unique_count])
    for i, row in enumerate(y):
        row[Y[i]] = 1

    # print (f'Inputs shape: {inputs.shape}')
    # print (f'Outputs shape: {y.shape}')
    return inputs, y, names_encode
def multiclass_classification(path):
    dataframe = auxilary.read_csv(path)

    # Store variables as target y and the first two features as X (sepal length and sepal width of the iris flowers)
    X = dataframe.iloc[:, 0:22].to_numpy()
    y = dataframe['output'].to_list()


    decision_tree(X, y)
    def get_ones():
        for person in list_of_people:
            print(person)
            dataframe = auxilary.read_csv(fileName=path)
            if len(dataframe.index) > data_size:
                break

            df_values = main_data.iloc[:, 0:22]
            df_filter = df_values[main_data['output'] == person]
            values_per_set = math.ceil(data_size / num_sets)
            pairs = list(itertools.combinations(df_filter.index, 2))
            if len(pairs) > values_per_set:
                pairs = pairs[0:int(values_per_set)]

            for pair in pairs:
                row1 = df_values.iloc[pair[0]]
                row2 = df_values.iloc[pair[1]]
                diff = mse_diff(row1, row2)
                row_dict = {'inputs': [], 'label': 1}
                row_dict['inputs'].append(diff)
                dataframe = auxilary.read_csv(fileName=path)
                auxilary.add_row(dataframe, row_dict, fileName=path)
def read_data():
    data = auxilary.read_csv(fileName='../csv_files/embedded.csv')
    N = data.shape[0]  #5050
    D = data.shape[1] - 1  #22

    data_inputs = (data.iloc[:, :D])
    inputs = np.zeros([N, D])
    inputs = np.array(data_inputs)

    labels = np.zeros([N, 1])
    labels = np.array(data.iloc[:, D])

    return inputs, labels
def get_diff_similar(csv_path):
    data = auxilary.read_csv(fileName=csv_path)
    N = data.shape[0]  #5050
    D = data.shape[1] - 1  #22
    george_data = data[data['output'] == 'George_W_Bush']
    sim_diff = []
    ev = 0
    for index_1, row_1 in george_data.iterrows():
        features_1 = np.array(row_1[:D])
        if ev % 50 == 0:
            print(f'image: {ev}')
        ev += 1
        for index_2, row_2 in george_data.iterrows():
            if index_1 >= index_2:
                continue
            features_2 = list(row_2[:D])

            sim_diff.append(np.power((features_1 - features_2), 2))

    df = pd.DataFrame(sim_diff)
    df["label"] = 1
    df.to_csv("../csv_files/similars_diff.csv", index=False)
 def get_zeros(list_of_people, size, index):
     while size > 0:
         index2 = len(list_of_people) - (index + 1)
         print(index, index2)
         if index2 <= index:
             list_of_people = list_of_people[1:]
             size = size - 1
             get_zeros(list_of_people, size, 0)
             break
         df = main_data.iloc[:, 0:22]
         row1 = df[main_data['output'] ==
                   list_of_people[index]].sample().squeeze()
         row2 = df[main_data['output'] ==
                   list_of_people[index2]].sample().squeeze()
         diff = mse_diff(row1, row2)
         row_dict = {'inputs': [], 'label': 0}
         row_dict['inputs'].append(diff)
         df = auxilary.read_csv(fileName=path)
         print(row_dict['inputs'])
         auxilary.add_row(df, row_dict, fileName=path)
         size = size - 1
         index = index + 1
     return
Esempio n. 13
0
def Euc_result_preview(image_num=None):
    print("Load labels")
    data = auxilary.read_csv(fileName='../csv_files/embedded.csv')
    N = len(data)
    D = 128

    data_inputs = (data.iloc[:, :D])
    inputs = np.zeros([N, D])
    inputs = np.array(data_inputs)

    labels = np.zeros([N, 1])
    labels = np.array(data.iloc[:, D])

    embeddings, face_name, human_file_path = face_recognition.face_recognition(
        dataset_path="../dataset/main_data/*/*",
        preview=True,
        image_num=image_num)

    identicals, similars = results(embeddings, inputs, labels)



    idc_paths, idc_names, sim_paths, sim_names, others_paths, others_names = \
        trim_outputs (labels, face_name, identicals, similars)

    show_tests.buttons(identicalls=idc_paths,
                       id_titles=idc_names,
                       similars=sim_paths,
                       sim_titles=sim_names,
                       left_overs=others_paths,
                       left_titles=others_names,
                       orig_image_path=human_file_path,
                       orig_title=face_name,
                       title1="MATCHING",
                       title2="SIMILARS",
                       title3="OTHERS")
def svm_compare():
    '''
    TODO: implement svm
    '''
    # path = '../csv_files/csv_differences.csv'
    path = '../csv_files/svm_set2.csv'
    data = auxilary.read_csv(fileName=path)

    # READ THE INPUTS
    # we have 1520 inputs -> N
    # each one of them is 12 dimension -> D
    N = len(data)
    D = 22
    inputs = np.zeros([N, D])
    data_input = (data['inputs'])
    for i, input_list in enumerate(data_input):
        inputs[i] = auxilary.strings_to_lists(input_list)

    # READ THE LABELS
    data_labels = data['label'] == 0
    labels = np.ones([N, 1])
    labels[data_labels] = 0

    # clf = tree.DecisionTreeClassifier()
    clf = svm.SVC(gamma=0.001, C=100, probability=True)
    X, y = inputs, np.ravel(labels)
    # shuffles the date to save 20% of data for testing
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=0.2,
        shuffle=True,
        random_state=42,
    )
    clf.fit(X_train, y_train)

    # TO GET EACH SAMPLE OUTPUT
    # for sample in X_test:
    #     y_pred = clf.predict_proba(sample.reshape(1, -1))
    #     print(y_pred)

    # TO DO ALL SAMPLES AT ONCE
    y_prob = clf.predict_proba(X_test)
    y_pred = clf.predict(X_test)
    print("probabilities:\n", y_prob[:5])
    print("\npredictions: \n" , y_pred[:5])
    print("accuracy: \n",np.array(y_pred == y_test)[:5])
    print('\npercentage correct: ', 100 * np.sum(y_pred == y_test) / len(y_test))

    # Get support vectors themselves
    # support_vectors = clf.support_vectors_

    # Get support vectors themselves
    # support_vectors = clf.support_vectors_
    # X_train2 = pca.fit_transform(X_train)
    # clf.fit(X_train2, y_train)
    # plot_decision_regions(X_train2, y_train.astype(np.integer), clf=clf, legend=2)
    #
    # plt.xlabel(X_train[0], size=14)
    # plt.ylabel(X_train[1], size=14)
    # plt.title('SVM Decision Region Boundary', size=16)
    #
    # plt.show()

    show_vectors(X_train, y_train)

    return clf
def get_binary_classification(key_points_path, image_dir):
    main_data = auxilary.read_csv(key_points_path)
    # create diff csv
    columns = ['inputs', 'label']
    path = '../csv_files/svm_set2.csv'

    if not os.path.exists(path):
        auxilary.create_csv(path, columns)

    dataframe = auxilary.read_csv(fileName=path)
    print("a", len(main_data))

    print("b", len(dataframe[dataframe.label == 0]))
    num_sets = int(main_data[['output']].nunique())
    list_of_people = main_data['output'].unique()
    data_size = 1000

    # # GET LABEL 1
    def get_ones():
        for person in list_of_people:
            print(person)
            dataframe = auxilary.read_csv(fileName=path)
            if len(dataframe.index) > data_size:
                break

            df_values = main_data.iloc[:, 0:22]
            df_filter = df_values[main_data['output'] == person]
            values_per_set = math.ceil(data_size / num_sets)
            pairs = list(itertools.combinations(df_filter.index, 2))
            if len(pairs) > values_per_set:
                pairs = pairs[0:int(values_per_set)]

            for pair in pairs:
                row1 = df_values.iloc[pair[0]]
                row2 = df_values.iloc[pair[1]]
                diff = mse_diff(row1, row2)
                row_dict = {'inputs': [], 'label': 1}
                row_dict['inputs'].append(diff)
                dataframe = auxilary.read_csv(fileName=path)
                auxilary.add_row(dataframe, row_dict, fileName=path)

    # RUN ONCE ONLY
    # get_ones()

    # GET LABEL 0
    curr_size = len(dataframe.index)

    def get_zeros(list_of_people, size, index):
        while size > 0:
            index2 = len(list_of_people) - (index + 1)
            print(index, index2)
            if index2 <= index:
                list_of_people = list_of_people[1:]
                size = size - 1
                get_zeros(list_of_people, size, 0)
                break
            df = main_data.iloc[:, 0:22]
            row1 = df[main_data['output'] ==
                      list_of_people[index]].sample().squeeze()
            row2 = df[main_data['output'] ==
                      list_of_people[index2]].sample().squeeze()
            diff = mse_diff(row1, row2)
            row_dict = {'inputs': [], 'label': 0}
            row_dict['inputs'].append(diff)
            df = auxilary.read_csv(fileName=path)
            print(row_dict['inputs'])
            auxilary.add_row(df, row_dict, fileName=path)
            size = size - 1
            index = index + 1
        return

    #RUN ONCE ONLY
    get_zeros(list_of_people, curr_size, 0)

    dataframe = auxilary.read_csv(fileName=path)
    print("c", len(dataframe))
    print("d", len(dataframe[dataframe.label == 1]))
    print("d", len(dataframe[dataframe.label == 0]))