def preprocess_feature_data(dataset, raw_data_folder, preprocessed_data_folder, feature_raw_data_file, number_of_nodes):
    source_file_path = raw_data_folder + "/" + dataset + "-" + feature_raw_data_file + ".mtx"
    target_file_path = preprocessed_data_folder + "/" + feature_raw_data_file + ".tsv"

    matrix = read_feature_raw_data_file(source_file_path, number_of_nodes)

    scipy_sparse_to_csv(target_file_path, matrix, separator="\t", directed=True, numbering="matlab")
def make_labelling(dataset, raw_data_folder, preprocessed_data_folder):
    node_file_path = raw_data_folder + "/" + dataset + ".ids"

    file_row_gen = get_file_row_generator(node_file_path, " ")

    user_twitter_id_list = list()

    for file_row in file_row_gen:
        if file_row[0] == "":
            break
        else:
            user_twitter_id_list.append(int(file_row[0]))

    id_to_node = dict(zip(user_twitter_id_list, range(len(user_twitter_id_list))))
    user_twitter_id_list = set(user_twitter_id_list)

    core_file_path = raw_data_folder + "/" + dataset + ".communities"

    file_row_gen = get_file_row_generator(core_file_path, ",")

    row = list()
    col = list()

    category_counter = 0
    for file_row in file_row_gen:
        id_list = list()
        first_id = file_row[0].strip().split(" ")
        first_id = id_to_node[int(first_id[1])]
        id_list.append(first_id)
        for id in file_row[1:]:
            id_list.append(id_to_node[int(id)])

        row.extend(id_list)
        col.extend(category_counter*np.ones(len(id_list), dtype=np.int32))

        category_counter += 1

    row = np.array(row, dtype=np.int32)
    col = np.array(col, dtype=np.int32)
    data = np.ones_like(row, dtype=np.int8)

    node_label_matrix = spsp.coo_matrix((data, (row, col)), shape=(len(user_twitter_id_list), category_counter))

    target_path = preprocessed_data_folder + "/" + "node_label_matrix" + ".tsv"
    scipy_sparse_to_csv(target_path, node_label_matrix, separator="\t", directed=True, numbering="matlab")
def make_labelling(dataset, raw_data_folder, preprocessed_data_folder):
    node_file_path = raw_data_folder + "/" + dataset + ".ids"

    file_row_gen = get_file_row_generator(node_file_path, " ")

    user_twitter_id_list = list()

    for file_row in file_row_gen:
        if file_row[0] == "":
            break
        else:
            user_twitter_id_list.append(int(file_row[0]))

    id_to_node = dict(zip(user_twitter_id_list, range(len(user_twitter_id_list))))
    user_twitter_id_list = set(user_twitter_id_list)

    core_file_path = raw_data_folder + "/" + dataset + ".core"

    file_row_gen = get_file_row_generator(core_file_path, " ")

    core_user_twitter_id_list = list()

    for file_row in file_row_gen:
        if file_row[0] == "":
            break
        else:
            core_user_twitter_id_list.append(int(file_row[0]))

    core_user_twitter_id_list = user_twitter_id_list.intersection(core_user_twitter_id_list)

    non_core_user_twitter_id_set = user_twitter_id_list.difference(core_user_twitter_id_list)

    row = [id_to_node[id] for id in core_user_twitter_id_list] + [id_to_node[id] for id in non_core_user_twitter_id_set]
    row = np.array(row, dtype=np.int32)
    col = [1 for id in core_user_twitter_id_list] + [0 for id in non_core_user_twitter_id_set]
    col = np.array(col, dtype=np.int32)
    data = np.ones(len(user_twitter_id_list), dtype=np.int8)

    node_label_matrix = spsp.coo_matrix((data, (row, col)), shape=(len(user_twitter_id_list), 2))

    target_path = preprocessed_data_folder + "/" + "node_label_matrix" + ".tsv"
    scipy_sparse_to_csv(target_path, node_label_matrix, separator="\t", directed=True, numbering="matlab")
def make_implicit_graphs(preprocessed_data_folder, simple_undirected_implicit_graph_folder):
    ####################################################################################################################
    # Read graphs.
    ####################################################################################################################
    # Read follow graph.
    source_path = preprocessed_data_folder + "/" + "followedby" + ".tsv"
    follow_graph = read_adjacency_matrix(source_path, "\t", "matlab")
    follow_graph = follow_graph.transpose()

    # Read mention graph.
    source_path = preprocessed_data_folder + "/" + "mentionedby" + ".tsv"
    mention_graph = read_adjacency_matrix(source_path, "\t", "matlab")
    mention_graph = mention_graph.transpose()

    # Read retweet graph.
    source_path = preprocessed_data_folder + "/" + "retweetedby" + ".tsv"
    retweet_graph = read_adjacency_matrix(source_path, "\t", "matlab")
    retweet_graph = retweet_graph.transpose()

    ####################################################################################################################
    # Simple undirected implicit graphs.
    ####################################################################################################################
    target_path = simple_undirected_implicit_graph_folder + "/" + "follow_graph" + ".tsv"
    simple_undirected_follow_graph = (follow_graph + follow_graph.transpose())/2
    scipy_sparse_to_csv(target_path, simple_undirected_follow_graph, separator="\t", directed=False, numbering="matlab")

    target_path = simple_undirected_implicit_graph_folder + "/" + "mention_graph" + ".tsv"
    simple_undirected_mention_graph = (mention_graph + mention_graph.transpose())/2
    scipy_sparse_to_csv(target_path, simple_undirected_mention_graph, separator="\t", directed=False, numbering="matlab")

    target_path = simple_undirected_implicit_graph_folder + "/" + "retweet_graph" + ".tsv"
    simple_undirected_retweet_graph = (retweet_graph + retweet_graph.transpose())/2
    scipy_sparse_to_csv(target_path, simple_undirected_retweet_graph, separator="\t", directed=False, numbering="matlab")

    gc.collect()

    ####################################################################################################################
    # Multiview graphs.
    ####################################################################################################################
    target_path = simple_undirected_implicit_graph_folder + "/" + "fol_men_graph" + ".tsv"
    simple_undirected_fol_men_graph = (follow_graph + follow_graph.transpose() +
                                       mention_graph + mention_graph.transpose())/4
    scipy_sparse_to_csv(target_path, simple_undirected_fol_men_graph, separator="\t", directed=False, numbering="matlab")

    target_path = simple_undirected_implicit_graph_folder + "/" + "men_ret_graph" + ".tsv"
    simple_undirected_men_ret_graph = (mention_graph + mention_graph.transpose() +
                                       retweet_graph + retweet_graph.transpose())/4
    scipy_sparse_to_csv(target_path, simple_undirected_men_ret_graph, separator="\t", directed=False, numbering="matlab")

    target_path = simple_undirected_implicit_graph_folder + "/" + "fol_ret_graph" + ".tsv"
    simple_undirected_fol_ret_graph = (follow_graph + follow_graph.transpose() +
                                       retweet_graph + retweet_graph.transpose())/4
    scipy_sparse_to_csv(target_path, simple_undirected_fol_ret_graph, separator="\t", directed=False, numbering="matlab")

    target_path = simple_undirected_implicit_graph_folder + "/" + "fol_men_ret_graph" + ".tsv"
    simple_undirected_fol_men_ret_graph = (follow_graph + follow_graph.transpose() +
                                           mention_graph + mention_graph.transpose() +
                                           retweet_graph + retweet_graph.transpose())/6
    scipy_sparse_to_csv(target_path, simple_undirected_fol_men_ret_graph, separator="\t", directed=False, numbering="matlab")