コード例 #1
0
def initial_result_task5(vectors, t, query_gesture):
    data_file_name = vectors
    task1.call_task1("outputs/", "tf_idf", "pca", 10)
    task3.call_task3("tf_idf", "outputs/", "pca", 4, "svd", "False")
    similarity_matrix_file_name = "outputs/similarity_matrix_pca.csv"
    data_matrix = np.array(
        pd.read_csv(data_file_name, header=None, low_memory=False))
    query_gesture_row_index = np.where(data_matrix == query_gesture)[0][0]
    graph_degree = 10

    similarity_matrix = np.array(
        pd.read_csv(similarity_matrix_file_name, header=None))

    column_file_map = similarity_matrix[0][1:].tolist(
    )  # give a column number, return file name

    name_column_map = dict()  # give a filename, returns the row index
    for index, filename in enumerate(column_file_map):
        name_column_map[filename] = index

    adjacency_graph = np.array(similarity_matrix[1:, 1:].tolist(), dtype=float)
    adjacency_graph = adjacency_graph * (adjacency_graph >= np.sort(
        adjacency_graph, axis=1)[:, [-graph_degree]])
    normalized_adjacency_graph = sklearn.preprocessing.normalize(
        adjacency_graph, norm='l1', axis=0)

    restart_vector = np.zeros((len(adjacency_graph), 1))
    restart_vector[query_gesture_row_index][0] = 1
    ppr_vector = ppr(normalized_adjacency_graph, restart_vector)
    sorted_list = sorted(zip(ppr_vector, range(len(ppr_vector))),
                         key=lambda v: v[0],
                         reverse=True)
    dominant_feature_indices = []
    for (s, i) in sorted_list:
        dominant_feature_indices.append(i)
    dominant_feature_indices = dominant_feature_indices[:t]
    # print("indices",dominant_feature_indices)
    dominant_features = [column_file_map[i] for i in dominant_feature_indices]
    print("Dominant features ", dominant_features)
    return dominant_features
コード例 #2
0
def get_updated_gestures_task5(relevant_gestures, irrelevant_gestures, t,
                               query_gesture):
    print(query_gesture)

    data_file_name = "outputs/tf_idf_pca_vectors.csv"
    similarity_matrix_file_name = "outputs/similarity_matrix_pca.csv"
    data_matrix = np.array(pd.read_csv(data_file_name, header=None))
    query_gesture_row_index = np.where(data_matrix == query_gesture)[0][0]
    graph_degree = 10
    task3.call_task3("tf_idf", "outputs/", "pca", 4, "svd", "False")
    relevant_gesture_row_indices = []

    irrelevant_gestures_vector = np.zeros((1, len(data_matrix[0]) - 1),
                                          dtype=object)
    relevant_gestures_vector = np.zeros((1, len(data_matrix[0]) - 1),
                                        dtype=object)
    query_gesture_values = data_matrix[query_gesture_row_index,
                                       1:].astype(np.float)
    if relevant_gestures:
        for gesture in relevant_gestures:
            gesture_row_index = np.where(data_matrix == gesture)[0][0]
            relevant_gestures_vector = np.add(
                relevant_gestures_vector, data_matrix[gesture_row_index,
                                                      1:].astype(np.float))
            relevant_gesture_row_indices.append(gesture_row_index)
        relevant_gestures_vector = (
            1 / (len(relevant_gestures))) * relevant_gestures_vector

    if irrelevant_gestures:
        for gesture in irrelevant_gestures:
            # print(np.where(data_matrix == gesture)[0])
            gesture_row_index = np.where(data_matrix == gesture)[0][0]
            irrelevant_gestures_vector = np.add(
                irrelevant_gestures_vector, data_matrix[gesture_row_index,
                                                        1:].astype(np.float))
        irrelevant_gestures_vector = (
            -1 / len(irrelevant_gestures)) * irrelevant_gestures_vector

    data_matrix[query_gesture_row_index,
                1:] = np.add(query_gesture_values, relevant_gestures_vector,
                             irrelevant_gestures_vector)
    pd.DataFrame(data_matrix).to_csv(data_file_name, header=None, index=None)

    similarity_matrix = np.array(
        pd.read_csv(similarity_matrix_file_name, header=None))

    column_file_map = similarity_matrix[0][1:].tolist(
    )  # give a column number, return file name

    name_column_map = dict()  # give a filename, returns the row index
    for index, filename in enumerate(column_file_map):
        name_column_map[filename] = index

    adjacency_graph = np.array(similarity_matrix[1:, 1:].tolist(), dtype=float)
    adjacency_graph = adjacency_graph * (adjacency_graph >= np.sort(
        adjacency_graph, axis=1)[:, [-graph_degree]])
    normalized_adjacency_graph = sklearn.preprocessing.normalize(
        adjacency_graph, norm='l1', axis=0)

    restart_vector = np.zeros((len(adjacency_graph), 1))
    restart_vector[query_gesture_row_index][0] = 1
    for i in relevant_gesture_row_indices:
        restart_vector[i][0] = 1
    ppr_vector = ppr(normalized_adjacency_graph, restart_vector)

    sorted_list = sorted(zip(ppr_vector, range(len(ppr_vector))),
                         key=lambda v: v[0],
                         reverse=True)
    dominant_feature_indices = []
    for (s, i) in sorted_list:
        dominant_feature_indices.append(i)
    dominant_feature_indices = dominant_feature_indices[:t]

    dominant_features = [column_file_map[i] for i in dominant_feature_indices]
    # print("Dominant features ", dominant_features)
    return dominant_features
コード例 #3
0
def ppr_2(labels_train, vector_model, output_dir, user_option, custom_cost, k):
    labels_train_dict = {}
    for each in labels_train.tolist():
        labels_train_dict[str(each[0]) + "_words.csv"] = each[1]

    task3.call_task3(
        vector_model, output_dir, user_option, 4, "svd",
        custom_cost)  # construct gesture_gesture_similarity matrix
    similarity_matrix_df = pd.read_csv(output_dir + "similarity_matrix_" +
                                       user_option + ".csv",
                                       header=None,
                                       low_memory=False)

    similarity_matrix = np.array(similarity_matrix_df)
    column_file_map = similarity_matrix[0][1:].tolist(
    )  # give a column number, return file name

    name_column_map = dict()  # give a filename, returns the row index
    for index, filename in enumerate(column_file_map):
        name_column_map[filename] = index

    adjacency_graph = np.array(similarity_matrix[1:, 1:].tolist(), dtype=float)
    adjacency_graph = adjacency_graph * (adjacency_graph >= np.sort(
        adjacency_graph, axis=1)[:, [-k]])
    normalized_adjacency_graph = sklearn.preprocessing.normalize(
        adjacency_graph, norm='l1', axis=0)
    vector_size = len(adjacency_graph)
    # print(adjacency_graph)

    class_set = list(set(labels_train_dict.values()))
    classlist = [0] * len(class_set)
    # label_map = {2: "vattene", 1: "combinato", 0: "daccordo"}
    label_map = {}
    for index in range(len(class_set)):
        classlist[index] = []
    for i, c in enumerate(class_set):
        for k, v in labels_train_dict.items():
            if (c == v):
                label_map[i] = c
                classlist[i].append(k)

    class1 = classlist[0]
    class2 = classlist[1]
    class3 = classlist[2]

    restart_vector_class1 = np.zeros((vector_size, 1))
    for f in class1:
        column = name_column_map[f]
        restart_vector_class1[column][0] = 1 / len(class1)
    ppr_vector_class1 = ppr(normalized_adjacency_graph, restart_vector_class1)

    restart_vector_class2 = np.zeros((vector_size, 1))
    for f in class2:
        column = name_column_map[f]
        restart_vector_class2[column][0] = 1 / len(class2)
    ppr_vector_class2 = ppr(normalized_adjacency_graph, restart_vector_class2)

    restart_vector_class3 = np.zeros((vector_size, 1))
    for f in class3:
        column = name_column_map[f]
        restart_vector_class3[column][0] = 1 / len(class3)
    ppr_vector_class3 = ppr(normalized_adjacency_graph, restart_vector_class3)

    output_file = open(output_dir + "ppr_2_classification.csv", "w")
    labelled_gestures = [x for x in labels_train_dict.keys()]
    labelled_gesture_columns = [
        name_column_map[x] + 1 for x in labelled_gestures
    ]

    unlabelled_gestures = list(set(column_file_map) - set(labelled_gestures))
    unlabelled_gesture_columns = [
        name_column_map[x] + 1 for x in unlabelled_gestures
    ]

    csv_write = csv.writer(output_file)
    for c_index, c in enumerate(unlabelled_gesture_columns):
        # print(column_file_map[c - 1])
        # column_file_map[c - 1]="575_words.csv"
        user_specified_column = name_column_map[column_file_map[c - 1]]
        scores = [
            ppr_vector_class1[user_specified_column][0],
            ppr_vector_class2[user_specified_column][0],
            ppr_vector_class3[user_specified_column][0]
        ]
        label = scores.index(max(scores))
        # print(scores,column_file_map[c-1],label_map[label])
        csv_write.writerow(
            (unlabelled_gestures[c_index].replace("_words.csv",
                                                  ""), label_map[label]))
コード例 #4
0
        adjacency_graph = np.array(similarity_matrix[1:, 1:].tolist(),
                                   dtype=float)
        adjacency_graph = adjacency_graph * (adjacency_graph >= np.sort(
            adjacency_graph, axis=1)[:, [-graph_degree]])
        normalized_adjacency_graph = sklearn.preprocessing.normalize(
            adjacency_graph, norm='l1', axis=0)

        restart_vector = np.zeros((len(adjacency_graph), 1))
        restart_vector[query_gesture_row_index][0] = 1
        for i in relevant_gesture_row_indices:
            # print(i)
            restart_vector[i][0] = 1
        relevant_gesture_row_indices = []

        ppr_vector = ppr(normalized_adjacency_graph, restart_vector)

        sorted_list = sorted(zip(ppr_vector, range(len(ppr_vector))),
                             key=lambda v: v[0],
                             reverse=True)
        dominant_feature_indices = []
        for (s, i) in sorted_list:
            dominant_feature_indices.append(i)
        dominant_feature_indices = dominant_feature_indices[:args.t]

        dominant_features = [
            column_file_map[i].replace("_words.csv", "")
            for i in dominant_feature_indices
        ]
        print("Dominant features ", dominant_features)
コード例 #5
0
def ppr_classifier(labels_train, vector_model, output_dir, user_option,
                   custom_cost, k):

    labels_train_dict = {}
    for each in labels_train.tolist():
        labels_train_dict[str(each[0]) + "_words.csv"] = each[1]

    output_file = open(output_dir + "ppr_classification.csv", "w")

    number_of_dominant_features = 10

    task3.call_task3(
        vector_model, output_dir, user_option, 4, "svd",
        custom_cost)  # construct gesture_gesture_similarity matrix

    similarity_matrix_df = pd.read_csv(output_dir + "similarity_matrix_" +
                                       user_option + ".csv",
                                       header=None,
                                       low_memory=False)

    similarity_matrix = np.array(similarity_matrix_df)
    column_file_map = similarity_matrix[0][1:].tolist(
    )  # give a column number, return file name
    name_column_map = dict()  # give a filename, returns the row index
    for index, filename in enumerate(column_file_map):
        name_column_map[filename] = index

    # ---------------------------------------run ppr for each unlabelled data ---------------------------------
    labelled_gestures = [x for x in labels_train_dict.keys()]
    labelled_gesture_columns = [
        name_column_map[x] + 1 for x in labelled_gestures
    ]

    unlabelled_gestures = list(set(column_file_map) - set(labelled_gestures))
    unlabelled_gesture_columns = [
        name_column_map[x] + 1 for x in unlabelled_gestures
    ]

    csv_write = csv.writer(output_file)

    for c_index, c in enumerate(unlabelled_gesture_columns):
        similarity_matrix_df = pd.read_csv(output_dir + "similarity_matrix_" +
                                           user_option + ".csv",
                                           low_memory=False)
        matrix_columns = labelled_gesture_columns + [c]
        #print("file",column_file_map[c-1])
        gestures = labelled_gestures + [column_file_map[c - 1]]
        adjacency_graph = similarity_matrix_df.loc[:, ["Nothing"] + gestures]
        adjacency_graph = adjacency_graph.loc[adjacency_graph["Nothing"].isin(
            gestures)]
        adjacency_graph = np.array(adjacency_graph)[:, 1:]
        adjacency_graph = adjacency_graph.astype(dtype=float)
        # TODO: Do we have to consider only k most closest here?
        adjacency_graph = adjacency_graph * (adjacency_graph >= np.sort(
            adjacency_graph, axis=1)[:, [-k]])
        normalized_adjacency_graph = sklearn.preprocessing.normalize(
            adjacency_graph, norm='l1', axis=0)
        vector_size = len(adjacency_graph)
        restart_vector = np.zeros((vector_size, 1))
        restart_vector[vector_size - 1][0] = 1
        ppr_vector = ppr(normalized_adjacency_graph, restart_vector)
        matrix_file_names = [column_file_map[k - 1] for k in matrix_columns]
        dominant_file_names = sorted(zip(ppr_vector, matrix_file_names),
                                     key=lambda v: v[0],
                                     reverse=True)[:10]
        dominant_features_class = []
        for ranking, filename in dominant_file_names:
            if filename != column_file_map[c - 1]:
                dominant_features_class.append(labels_train_dict[filename])

        class_label = max(set(dominant_features_class),
                          key=dominant_features_class.count)
        csv_write.writerow(
            (unlabelled_gestures[c_index].replace("_words.csv",
                                                  ""), class_label))