Example #1
0
def main():
    test_graph = load_graph('instance2.clq')

    start_time = time.time()
    solution = cliques_from_list(greedy(test_graph))
    print("--- %s seconds ---" % (time.time() - start_time))
    print(len(solution), "cliques:", solution)
Example #2
0
def main():
    test_graph, nb_nodes, nb_edges = load_graph('specific/Gnp10_0.2.clq')

    print("\n--- GREEDY ---")
    start_time = time.time()
    gd_solution = cliques_from_list(greedy(test_graph))
    print(">>> %s seconds" % (time.time() - start_time))
    print(">>>", len(gd_solution), "cliques")
    print(">>>", gd_solution)

    print("\n--- BACKTRACK ---")
    start_time = time.time()
    cliques = [0 for x in range(test_graph.shape[0])]
    cliques[0] = 1
    bt_solution = backtrack(test_graph, cliques, 1)
    print(">>> %s seconds" % (time.time() - start_time))
    print(">>>", bt_solution[0], "cliques")
    print(">>>", bt_solution[1])

    print("\n--- BRUTE FORCE ---")
    start_time = time.time()
    bf_solution = brute_force(test_graph, cliques, 0)
    print(">>> %s seconds" % (time.time() - start_time))
    print(">>>", bf_solution[0], "cliques")
    print(">>>", bf_solution[1])
Example #3
0
def main():
    test_graph, nb_nodes, nb_edges = load_graph(
        'specific/Gnp12_0.2.clq')
    start_time = time.time()
    cliques = [0 for x in range(test_graph.shape[0])]
    cliques[0] = 1
    solution = brute_force(test_graph, cliques, 0)
    print(solution[0], "cliques:", solution[1])
    print("--- %s seconds ---" % (time.time() - start_time))
Example #4
0
    real_data = Amazon(root=input_path, name="Photo")
elif DATASET == "CLUSTER":
    real_data = GNNBenchmarkDataset(root=input_path,
                                    name="CLUSTER",
                                    split="test")
elif DATASET == "PATTERN":
    real_data = GNNBenchmarkDataset(root=input_path,
                                    name="PATTERN",
                                    split="test")
elif DATASET == "Flickr":
    real_data = Flickr(root=input_path)
elif DATASET == "OGB Products":
    real_data = PygNodePropPredDataset(name='ogbn-products')
    split_idx = real_data.get_idx_split()
elif DATASET == "GitHub Network":
    gitGraph = from_networkx(load_graph(input_path + '/musae_git_edges.csv'))
    gitGraph.x = torch.tensor(
        load_features(input_path + '/musae_git_features.json'))
    gitGraph.y = torch.tensor(
        load_targets(input_path + '/musae_git_target.csv'))
elif DATASET == "SBM":

    # Size of blocks
    COMMUNITY_SIZE = 400

    # Number of clusters
    NUM_BLOCKS = 3

    # In-Block prob.
    INTER_PROB = [0.7, 0.6, 0.5]
Example #5
0
def main():
    now = datetime.now().strftime("%d_%m_%Y_%I_%M_%S")

    # if true, compares at a fixed number of nodes. If false, compares at a fixed probability of edges.
    n_constant = False

    # generate_graphs(n=20, p=0.15, start=4, stop=20,
    #                 nb=30, n_constant=n_constant)

    if not os.path.exists(OUT_DIR):
        os.mkdir(OUT_DIR)

    with open(os.path.join(OUT_DIR, 'results_' + now + '.csv'), 'w') as output_file:
        fieldnames = ['nodes', 'edges', 'greedy_duration', 'greedy_solution']
        writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        writer.writeheader()

        results_by_edge = dict()
        results_by_node = dict()

        for file in os.listdir(DATA_DIR):
            print('\n')
            graph, nb_nodes, nb_edges = load_graph(
                os.path.join('random_graphs', 'complexity', 'clq', file))
            # print(nb_nodes, 'nodes', nb_edges, 'edges')
            start_time = time.time()
            gd_solution = cliques_from_list(greedy(graph))
            greedy_duration = time.time() - start_time
            greedy_solution = len(gd_solution)
            print('GREEDY:', greedy_solution, 'cliques', greedy_duration, 's')

            results_by_edge[nb_edges] = {
                'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution}
            results_by_node[nb_nodes] = {
                'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution}

            row = dict({'nodes': nb_nodes, 'edges': nb_edges,
                        'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution})
            writer.writerow(row)

    if not n_constant:
        nodes_numbers = []
        edges_numbers = []
        greedy_durations = []
        greedy_solutions = []

        for n in sorted(results_by_node.keys()):
            nodes_numbers.append(n)
            greedy_durations.append(results_by_node[n]['greedy_duration'])
            greedy_solutions.append(results_by_node[n]['greedy_solution'])

        plt.rcParams["figure.figsize"] = (18, 12)

        ratios = []
        for i in range(len(greedy_solutions)):
            ratios.append(greedy_solutions[i]/nodes_numbers[i])

        plt.title(
            "Ratio of the greedy algorithm duration / number of nodes")
        plt_values = pd.DataFrame(
            {'nodes': nodes_numbers, 'ratio': ratios})
        plt.xlabel("Number of nodes")
        plt.ylabel("Ratio greedy/n")
        plt.plot('nodes', 'ratio', data=plt_values,
                 lw=2,  label='greedy/n')
        plt.legend()
        plt.savefig(os.path.join(OUT_DIR, 'ratio_nodes_' +
                                 now + '.png'), bbox_inches="tight")
        # plt.show()
        plt.clf()

    nodes_numbers = []
    edges_numbers = []
    greedy_durations = []
    greedy_solutions = []
    for m in sorted(results_by_edge.keys()):
        edges_numbers.append(m)
        greedy_durations.append(results_by_edge[m]['greedy_duration'])
        greedy_solutions.append(results_by_edge[m]['greedy_solution'])

    ratios = []
    for i in range(len(greedy_solutions)):
        ratios.append(greedy_solutions[i]/edges_numbers[i])

    plt.title(
        "Ratio of the greedy algorithm duration / number of edges")
    plt_values = pd.DataFrame(
        {'edges': edges_numbers, 'ratio': ratios})
    plt.xlabel("Number of nodes")
    plt.ylabel("Ratio greedy/m")
    plt.plot('edges', 'ratio', data=plt_values,
             lw=2)
    plt.savefig(os.path.join(OUT_DIR, 'ratio_edges_' +
                             now + '.png'), bbox_inches="tight")
    # plt.show()
    plt.clf()
Example #6
0
def main():
    now = datetime.now().strftime("%d_%m_%Y_%I_%M_%S")

    # if true, compares at a fixed number of nodes. If false, compares at a fixed probability of edges.
    n_constant = False

    generate_graphs(n=20, p=0.15, start=4, stop=20,
                    nb=30, n_constant=n_constant)

    if not os.path.exists(OUT_DIR):
        os.mkdir(OUT_DIR)

    with open(os.path.join(OUT_DIR, 'results_' + now + '.csv'), 'w') as output_file:
        fieldnames = ['nodes', 'edges', 'greedy_duration', 'greedy_solution',
                      'backtrack_duration', 'backtrack_solution', 'lt_backtrack_duration', 'lt_backtrack_solution']
        writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        writer.writeheader()

        results_by_edge = dict()
        results_by_node = dict()

        for file in os.listdir(DATA_DIR):
            print('\n')
            graph, nb_nodes, nb_edges = load_graph(
                os.path.join('random_graphs', 'clq', file))
            # print(nb_nodes, 'nodes', nb_edges, 'edges')
            start_time = time.time()
            gd_solution = cliques_from_list(greedy(graph))
            greedy_duration = time.time() - start_time
            greedy_solution = len(gd_solution)
            print('GREEDY:', greedy_solution, 'cliques', greedy_duration, 's')

            cliques = [0 for x in range(graph.shape[0])]
            cliques[0] = 1
            start_time = time.time()
            ltbt_solution = light_backtrack(graph, cliques, 1)
            lt_backtrack_duration = time.time() - start_time
            lt_backtrack_solution = ltbt_solution[0]
            print('LIGHT BACKTRACK:', lt_backtrack_solution,
                  'cliques', lt_backtrack_duration, 's')

            start_time = time.time()
            bt_solution = backtrack(graph, cliques, 1)
            backtrack_duration = time.time() - start_time
            backtrack_solution = bt_solution[0]
            print('BACKTRACK:', backtrack_solution,
                  'cliques', backtrack_duration, 's')

            results_by_edge[nb_edges] = {'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution,
                                         'backtrack_duration': backtrack_duration, 'backtrack_solution': backtrack_solution,
                                         'lt_backtrack_duration': lt_backtrack_duration, 'lt_backtrack_solution': lt_backtrack_solution}
            results_by_node[nb_nodes] = {'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution,
                                         'backtrack_duration': backtrack_duration, 'backtrack_solution': backtrack_solution,
                                         'lt_backtrack_duration': lt_backtrack_duration, 'lt_backtrack_solution': lt_backtrack_solution}

            row = dict({'nodes': nb_nodes, 'edges': nb_edges, 'greedy_duration': greedy_duration, 'greedy_solution': greedy_solution,
                        'backtrack_duration': backtrack_duration, 'backtrack_solution': backtrack_solution,
                        'lt_backtrack_duration': lt_backtrack_duration, 'lt_backtrack_solution': lt_backtrack_solution})
            writer.writerow(row)

    if not n_constant:
        nodes_numbers = []
        edges_numbers = []
        greedy_durations = []
        greedy_solutions = []
        backtrack_durations = []
        backtrack_solutions = []
        lt_backtrack_durations = []
        lt_backtrack_solutions = []

        for n in sorted(results_by_node.keys()):
            nodes_numbers.append(n)
            greedy_durations.append(results_by_node[n]['greedy_duration'])
            greedy_solutions.append(results_by_node[n]['greedy_solution'])
            backtrack_durations.append(
                results_by_node[n]['backtrack_duration'])
            backtrack_solutions.append(
                results_by_node[n]['backtrack_solution'])
            lt_backtrack_durations.append(
                results_by_node[n]['lt_backtrack_duration'])
            lt_backtrack_solutions.append(
                results_by_node[n]['lt_backtrack_solution'])

        plt.rcParams["figure.figsize"] = (18, 12)
        plt.title("Algorithm duration given the number of nodes of the graph")
        plt_values = pd.DataFrame(
            {'nodes': nodes_numbers, 'greedy_duration': greedy_durations, 'backtrack_duration': backtrack_durations, 'lt_backtrack_duration': lt_backtrack_durations})
        plt.xlabel("Number of nodes")
        plt.ylabel("Seconds")
        plt.plot('nodes', 'greedy_duration', data=plt_values,
                 lw=2, label='Greedy algorithm')
        plt.plot('nodes', 'backtrack_duration', data=plt_values,
                 lw=2, label='Backtracking algorithm')
        plt.legend()
        plt.savefig(os.path.join(OUT_DIR, 'durations_nodes_' +
                                 now + '.png'), bbox_inches="tight")
        # plt.show()
        plt.clf()

        plt.title("Number of cliques found given the number of nodes of the graph")
        plt_values = pd.DataFrame(
            {'nodes': nodes_numbers, 'greedy_solutions': greedy_solutions, 'backtrack_solutions': backtrack_solutions, 'lt_backtrack_solutions': lt_backtrack_solutions})
        plt.xlabel("Number of nodes")
        plt.ylabel("Number of cliques found")
        plt.plot('nodes', 'greedy_solutions', data=plt_values,
                 lw=2, label='Greedy algorithm')
        plt.plot('nodes', 'backtrack_solutions', data=plt_values,
                 lw=2, label='Backtracking algorithm')
        plt.legend()
        plt.savefig(os.path.join(OUT_DIR, 'solutions_nodes_' +
                                 now + '.png'), bbox_inches="tight")
        # plt.show()
        plt.clf()

        ratios = []
        for i in range(len(greedy_solutions)):
            ratios.append(greedy_solutions[i]/backtrack_solutions[i])

        plt.title(
            "Ratio of number of cliques found given the number of nodes of the graph")
        plt_values = pd.DataFrame(
            {'nodes': nodes_numbers, 'ratio': ratios})
        plt.xlabel("Number of nodes")
        plt.ylabel("Ratio greedy/backtrack")
        plt.plot('nodes', 'ratio', data=plt_values,
                 lw=2)
        plt.savefig(os.path.join(OUT_DIR, 'ratio_nodes_' +
                                 now + '.png'), bbox_inches="tight")
        # plt.show()
        plt.clf()

    nodes_numbers = []
    edges_numbers = []
    greedy_durations = []
    greedy_solutions = []
    backtrack_durations = []
    backtrack_solutions = []
    lt_backtrack_durations = []
    lt_backtrack_solutions = []

    for m in sorted(results_by_edge.keys()):
        edges_numbers.append(m)
        greedy_durations.append(results_by_edge[m]['greedy_duration'])
        greedy_solutions.append(results_by_edge[m]['greedy_solution'])
        backtrack_durations.append(results_by_edge[m]['backtrack_duration'])
        backtrack_solutions.append(results_by_edge[m]['backtrack_solution'])
        lt_backtrack_durations.append(
            results_by_edge[m]['lt_backtrack_duration'])
        lt_backtrack_solutions.append(
            results_by_edge[m]['lt_backtrack_solution'])

    plt.rcParams["figure.figsize"] = (18, 8)
    plt.title("Algorithm duration given the number of edges of the graph")
    plt_values = pd.DataFrame(
        {'edges': edges_numbers, 'greedy_duration': greedy_durations, 'backtrack_duration': backtrack_durations, 'lt_backtrack_duration': lt_backtrack_durations})
    plt.xlabel("Number of edges")
    plt.ylabel("Seconds")
    plt.plot('edges', 'greedy_duration', data=plt_values,
             lw=2, label='Greedy algorithm')
    plt.plot('edges', 'backtrack_duration', data=plt_values,
             lw=2, label='Backtracking algorithm')
    plt.legend()
    plt.savefig(os.path.join(OUT_DIR, 'durations_edges_' +
                             now + '.png'), bbox_inches="tight")
    # plt.show()
    plt.clf()

    plt.title("Number of cliques found given the number of edges of the graph")
    plt_values = pd.DataFrame(
        {'edges': edges_numbers, 'greedy_solutions': greedy_solutions, 'backtrack_solutions': backtrack_solutions, 'lt_backtrack_solutions': lt_backtrack_solutions})
    plt.xlabel("Number of edges")
    plt.ylabel("Number of cliques found")
    plt.plot('edges', 'greedy_solutions', data=plt_values,
             lw=2, label='Greedy algorithm')
    plt.plot('edges', 'backtrack_solutions', data=plt_values,
             lw=2, label='Backtracking algorithm')
    plt.legend()
    plt.savefig(os.path.join(OUT_DIR, 'solutions_edges_' +
                             now + '.png'), bbox_inches="tight")
    # plt.show()
    plt.clf()

    ratios = []
    for i in range(len(greedy_solutions)):
        ratios.append(greedy_solutions[i]/backtrack_solutions[i])

    plt.title(
        "Ratio of number of cliques found given the number of edges of the graph")
    plt_values = pd.DataFrame(
        {'edges': edges_numbers, 'ratio': ratios})
    plt.xlabel("Number of edges")
    plt.ylabel("Ratio greedy/backtrack")
    plt.plot('edges', 'ratio', data=plt_values,
             lw=2)
    plt.savefig(os.path.join(OUT_DIR, 'ratio_edges_' +
                             now + '.png'), bbox_inches="tight")
    # plt.show()
    plt.clf()
        if not args.one_k:
            Ks = competitive_Ks

    if args.num_clusters < 1 and not (args.non_competitive or args.competitive
                                      or args.load_all):
        raise ValueError('You need to define the number of clusters!')

    #assert(len(graph_files) == len(Ks))
    if len(Ks) == 1 and len(graph_files) > 1:
        Ks = Ks * len(graph_files)

    for graph_file, k in zip(graph_files, Ks):

        print("Running {} with k={}".format(graph_file, k))

        graph_data, header = helpers.load_graph(graph_file)
        print("Graph data loaded.")

        adjacency_matrix = helpers.calculate_adjacency_matrix(graph_data)

        algo_pairs = []

        if (args.run_kmean or args.run_balanced):
            L_norm, dd = scipy.sparse.csgraph.laplacian(adjacency_matrix,
                                                        normed=True,
                                                        return_diag=True)
            embedding = helpers.calculate_embedding_representation(
                L_norm, dd, k)

            print("Laplacian calculated.")
            if args.run_kmean: