Exemple #1
0
def generate_graph(limit="ALL"):
    print("Graph generation started", str(datetime.today()))
    query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS"
    if limit == "ALL":
        records = db_operations.select_records(True, {"query": query})
    else:
        records = db_operations.select_records(
            True, {"query": query + " LIMIT " + str(limit)})
    bat_transfer_graph = dict()
    for record in records:
        if record[0] in bat_transfer_graph:
            is_present = False
            index = -1
            count = 0
            for adjacent in bat_transfer_graph.get(record[0]):
                if record[1] == adjacent[0]:
                    is_present = True
                    index = count
                    break
                count += 1
            if not is_present:
                bat_transfer_graph.get(record[0]).append(
                    (record[1], ([record[2]], [record[3]])))
            else:
                bat_transfer_graph.get(record[0])[index][1][0].append(
                    record[2])
                bat_transfer_graph.get(record[0])[index][1][1].append(
                    record[3])
        else:
            bat_transfer_graph[record[0]] = [(record[1], ([record[2]],
                                                          [record[3]]))]
    # print(bat_transfer_graph.get("0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204"))
    print("Graph generation ended", str(datetime.today()))
    misc_operations.dump_load_pickle_object(
        "dump", "pickled_objects/transfer_events_graph", bat_transfer_graph)
Exemple #2
0
def generate_graph_nx(limit="ALL"):
    print("Graph generation started", str(datetime.today()))
    query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS"
    if limit == "ALL":
        records = db_operations.select_records(True, {"query": query})
    else:
        records = db_operations.select_records(
            True, {"query": query + " LIMIT " + str(limit)})
    directed_bat_transfer_graph = nx.DiGraph()
    for record in records:
        directed_bat_transfer_graph.add_edge(record[0],
                                             record[1],
                                             weight=(record[2], record[3]))
    misc_operations.dump_load_pickle_object(
        "dump", "pickled_objects/transfer_events_graph_nx",
        directed_bat_transfer_graph)
    print("Graph generation ended", str(datetime.today()))
Exemple #3
0
def create_gephi_graph():
    limit = "ALL"
    query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS"
    if limit == "ALL":
        records = db_operations.select_records(True, {"query": query})
    else:
        records = db_operations.select_records(
            True, {"query": query + " LIMIT " + str(limit)})

    directed_bat_transfer_graph_count = nx.DiGraph()
    directed_bat_transfer_graph_bat = nx.DiGraph()
    for record in records:
        directed_bat_transfer_graph_count.add_edge(record[0],
                                                   record[1],
                                                   weight=1)
        directed_bat_transfer_graph_bat.add_edge(record[0],
                                                 record[1],
                                                 weight=record[3])

    nx.write_gexf(directed_bat_transfer_graph_count,
                  "output/gephi/transfer_events_graph_nx_count.gexf")
    nx.write_gexf(directed_bat_transfer_graph_bat,
                  "output/gephi/transfer_events_graph_nx_bat.gexf")
Exemple #4
0
def generate_indegree_outdegree_plots(limit="ALL"):
    print("In-Degree Out-Degree Analysis started", str(datetime.today()))
    query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS"
    if limit == "ALL":
        records = db_operations.select_records(True, {"query": query})
    else:
        records = db_operations.select_records(
            True, {"query": query + " LIMIT " + str(limit)})

    in_degree_multi_edge_merged_map = dict()
    out_degree_multi_edge_merged_map = dict()
    in_degree_map = dict()
    out_degree_map = dict()
    for record in records:
        if record[0] in out_degree_map:
            out_degree_map[record[0]] += 1
        else:
            out_degree_map[record[0]] = 1

        if record[1] in in_degree_map:
            in_degree_map[record[1]] += 1
        else:
            in_degree_map[record[1]] = 1

        if record[0] in out_degree_multi_edge_merged_map:
            out_degree_multi_edge_merged_map[record[0]].add(record[1])
        else:
            out_degree_multi_edge_merged_map[record[0]] = {record[1]}

        if record[1] in in_degree_multi_edge_merged_map:
            in_degree_multi_edge_merged_map[record[1]].add(record[0])
        else:
            in_degree_multi_edge_merged_map[record[1]] = {record[0]}

    in_degree_multi_edge_merged_count_map = dict()
    out_degree_multi_edge_merged_count_map = dict()

    for address in in_degree_multi_edge_merged_map:
        in_degree_multi_edge_merged_count_map[address] = len(
            in_degree_multi_edge_merged_map.get(address))

    for address in out_degree_multi_edge_merged_map:
        out_degree_multi_edge_merged_count_map[address] = len(
            out_degree_multi_edge_merged_map.get(address))

    in_degree_map_descending = sorted(in_degree_map.items(),
                                      key=operator.itemgetter(1),
                                      reverse=True)
    out_degree_map_descending = sorted(out_degree_map.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

    in_degree_multi_edge_merged_count_map_descending = sorted(
        in_degree_multi_edge_merged_count_map.items(),
        key=operator.itemgetter(1),
        reverse=True)
    out_degree_multi_edge_merged_count_map_descending = sorted(
        out_degree_multi_edge_merged_count_map.items(),
        key=operator.itemgetter(1),
        reverse=True)

    # print(in_degree_map_descending)
    # print(out_degree_multi_edge_merged_count_map_descending)

    x_set = set()

    x_data = []
    y_data = []
    for i in range(10):
        x_data.append(in_degree_map_descending[i][0][26:30])
        y_data.append(in_degree_map_descending[i][1])
        x_set.add(in_degree_map_descending[i][0])
    misc_operations.plot_basic_bar_chart(
        x_data, y_data, "Top In-Degree Nodes (keeping Multi-edges)", "Address",
        "Degree", 12)

    x_data = []
    y_data = []
    for i in range(10):
        x_data.append(out_degree_map_descending[i][0][26:30])
        y_data.append(out_degree_map_descending[i][1])
        x_set.add(out_degree_map_descending[i][0])
    misc_operations.plot_basic_bar_chart(
        x_data, y_data, "Top Out-Degree Nodes (keeping Multi-edges)",
        "Address", "Degree", 12)

    x_data = []
    y_data = []
    for i in range(10):
        x_data.append(
            in_degree_multi_edge_merged_count_map_descending[i][0][26:30])
        y_data.append(in_degree_multi_edge_merged_count_map_descending[i][1])
        x_set.add(in_degree_multi_edge_merged_count_map_descending[i][0])
    misc_operations.plot_basic_bar_chart(
        x_data, y_data, "Top In-Degree Nodes (merging Multi-edges)", "Address",
        "Degree", 12)

    x_data = []
    y_data = []
    for i in range(10):
        x_data.append(
            out_degree_multi_edge_merged_count_map_descending[i][0][26:30])
        y_data.append(out_degree_multi_edge_merged_count_map_descending[i][1])
        x_set.add(out_degree_multi_edge_merged_count_map_descending[i][0])
    misc_operations.plot_basic_bar_chart(
        x_data, y_data, "Top Out-Degree Nodes (merging Multi-edges)",
        "Address", "Degree", 12)

    print("Union of X-Data:")
    [print(address) for address in x_set]

    print("In-Degree Out-Degree Analysis ended", str(datetime.today()))
Exemple #5
0
def generate_transaction_plots(limit="ALL"):
    print("Plot generation started", str(datetime.today()))
    query = "SELECT DATE(TIMESTAMP, 'unixepoch', 'localtime'), COUNT(1), SUM(DATA_FLOAT), AVG(DATA_FLOAT) FROM TRANSFER_EVENTS WHERE TIMESTAMP != 0 GROUP BY DATE(TIMESTAMP, 'unixepoch', 'localtime') ORDER BY TIMESTAMP"
    if limit == "ALL":
        records = db_operations.select_records(True, {"query": query})
    else:
        records = db_operations.select_records(
            True, {"query": query + " LIMIT " + str(limit)})

    x_data = []
    y_data_number_transactions = []
    y_data_number_transactions_downscaled = []
    y_data_sum = []
    y_data_sum_downscaled = []
    y_data_avg = []
    y_data_avg_downscaled = []
    for record in records:
        if datetime.strptime(record[0], '%Y-%m-%d').date() > datetime.strptime(
                '2017-06-01', '%Y-%m-%d').date():
            x_data.append(datetime.strptime(record[0], '%Y-%m-%d').date())
            y_data_number_transactions.append(record[1])
            y_data_number_transactions_downscaled.append(
                float(record[1]) / 1000.0)
            y_data_sum.append(int(record[2]))
            y_data_sum_downscaled.append(float(int(record[2])) / 100000000.0)
            y_data_avg.append(int(record[3]))
            y_data_avg_downscaled.append(float(int(record[3])) / 10000000.0)

    data_map = dict()
    # data_map["#Transactions (X $10^3$)"] = (x_data, y_data_number_transactions_downscaled)
    data_map["Total BAT Transferred (X $10^8$)"] = (
        matplotlib.dates.date2num(x_data), y_data_sum_downscaled)
    data_map["Avg. BAT Transferred (X $10^7$)"] = (
        matplotlib.dates.date2num(x_data), y_data_avg_downscaled)

    misc_operations.plot_basic_line_chart(x_data,
                                          y_data_number_transactions,
                                          "#Transactions v/s Date",
                                          "Date",
                                          "#Transactions",
                                          date=True)
    misc_operations.plot_basic_line_chart(x_data,
                                          y_data_sum,
                                          "Total BAT Transferred v/s Date",
                                          "Date",
                                          "Total BAT Transferred",
                                          date=True)
    misc_operations.plot_basic_line_chart(x_data,
                                          y_data_avg,
                                          "Avg BAT Transferred v/s Date",
                                          "Date",
                                          "Avg. BAT Transferred",
                                          date=True)

    misc_operations.plot_multiple_lines_chart(data_map,
                                              "BAT Transferred v/s Date",
                                              "Date",
                                              "BAT Transferred",
                                              date=True)

    print("Plot generation ended", str(datetime.today()))
Exemple #6
0
def generate_average_distance_of_scc_from_brave(seed_date="2017-05-29"):
    print("Avg. Distance Computation started", str(datetime.today()))

    current_date = datetime.strptime(seed_date, "%Y-%m-%d")
    end_date = datetime.strptime("2018-11-09", "%Y-%m-%d")
    # end_date = datetime.strptime("2017-08-09", "%Y-%m-%d")

    x_data = []
    number_of_scc = []

    while True:

        query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS WHERE TIMESTAMP < " + str(
            (current_date + timedelta(weeks=4)).timestamp())
        records = db_operations.select_records(True, {"query": query})

        directed_bat_transfer_graph = nx.DiGraph()
        for record in records:
            directed_bat_transfer_graph.add_edge(record[0],
                                                 record[1],
                                                 weight=(record[2], record[3]))

        sccs = [
            component
            for component in nx.strongly_connected_component_subgraphs(
                directed_bat_transfer_graph)
        ]
        sccs.sort(key=lambda x: len(x.nodes), reverse=True)

        x_data.append(
            datetime.strftime(current_date + timedelta(weeks=4), "%Y-%m-%d"))

        sys.stdout.write("\rCurrent Execution Date: " +
                         datetime.strftime(current_date +
                                           timedelta(weeks=4), "%Y-%m-%d"))
        print()

        data_map = {"mean": [[], []], "median": [[], []]}
        index = 1
        number_ssc_less_than_6_hops_away = 0
        for scc in sccs[1:]:
            sys.stdout.write(
                "\rPercent Completion: " +
                str(round(float(index - 1) / float(len(sccs)) * 100, 3)))
            nodes = list(scc.nodes)
            distance_list = []
            if "0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204" in nodes:
                data_map.get("mean")[0].append("00")
                data_map.get("median")[0].append("00")
            else:
                data_map.get("mean")[0].append(str(index))
                data_map.get("median")[0].append(str(index))

            for node in nodes:
                if nx.has_path(
                        directed_bat_transfer_graph,
                        source=
                        "0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204",
                        target=node):
                    distance_list.append(
                        nx.shortest_path_length(
                            directed_bat_transfer_graph,
                            source=
                            "0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204",
                            target=node))
                else:
                    distance_list.append(10000)

            for node in nodes:
                if nx.has_path(
                        directed_bat_transfer_graph,
                        source=
                        "0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204",
                        target=node
                ) and nx.shortest_path_length(
                        directed_bat_transfer_graph,
                        source=
                        "0x00000000000000000000000088e2efac3d2ef957fcd82ec201a506871ad06204",
                        target=node) < 6:
                    number_ssc_less_than_6_hops_away += 1
                    break

            data_map.get("mean")[1].append(np.mean(distance_list))
            data_map.get("median")[1].append(np.median(distance_list))
            index += 1

        number_of_scc.append(number_ssc_less_than_6_hops_away)

        # print(data_map)

        if current_date + timedelta(weeks=4) > end_date:
            break
        else:
            current_date += timedelta(weeks=4)

    # misc_operations.plot_multiple_lines_chart(data_map, "Average distance of SCCs from Brave till " + date, "SCC", "Hops", x_label_font_size=4)
    misc_operations.plot_basic_bar_chart(
        x_data,
        number_of_scc,
        "#SCC greater than 6 Hops Away from Brave as Time progresses",
        "Date",
        "#SCC",
        x_label_font_size=7)
    print("Avg. Distance Computation ended", str(datetime.today()))
Exemple #7
0
def generate_component_evolution_graphs_per_month(seed_date="2017-05-29"):
    print("Component Analysis started", str(datetime.today()))
    current_date = datetime.strptime(seed_date, "%Y-%m-%d")
    end_date = datetime.strptime("2018-11-09", "%Y-%m-%d")
    # end_date = datetime.strptime("2017-07-28", "%Y-%m-%d")
    x_data = []
    number_of_scc = []
    component_size_range = {
        "Size 1": [],
        "Size 2 - 10": [],
        "Size 11 - 100": [],
        "Size 101 - 1000": [],
        "Size 1001 - 10000": [],
        "Size >10000": []
    }
    largest_sccs = []
    node_list_as_time_progresses = []
    max_size_node_list = 0
    while True:
        # query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS WHERE TIMESTAMP BETWEEN " \
        #         + str(current_date.timestamp()) + " AND " + str((current_date + timedelta(weeks=4)).timestamp())
        query = "SELECT FROM_ADDRESS, TO_ADDRESS, TIMESTAMP, DATA_FLOAT FROM TRANSFER_EVENTS WHERE TIMESTAMP < " + str(
            (current_date + timedelta(weeks=4)).timestamp())
        records = db_operations.select_records(True, {"query": query})

        directed_bat_transfer_graph = nx.DiGraph()
        for record in records:
            directed_bat_transfer_graph.add_edge(record[0],
                                                 record[1],
                                                 weight=(record[2], record[3]))
        # directed_bat_transfer_graph = nx.DiGraph(bat_transfer_graph)
        scc = [
            component
            for component in nx.strongly_connected_component_subgraphs(
                directed_bat_transfer_graph)
        ]

        # directed_bat_transfer_graph = nx.DiGraph()
        # directed_bat_transfer_graph.add_edge('b', 'a')
        # directed_bat_transfer_graph.add_edge('a', 'c')
        # directed_bat_transfer_graph.add_edge('c', 'd')
        # directed_bat_transfer_graph.add_edge('d', 'b')
        # directed_bat_transfer_graph.add_edge('c', 'b')
        # directed_bat_transfer_graph.add_node('e')
        # directed_bat_transfer_graph.add_node('f')
        # scc = [component for component in nx.strongly_connected_component_subgraphs(directed_bat_transfer_graph)]

        x_data.append(
            datetime.strftime(current_date + timedelta(weeks=4), "%Y-%m-%d"))
        number_of_scc.append(len(scc))

        # Binning
        bin_1 = 0
        bin_2 = 0
        bin_3 = 0
        bin_4 = 0
        bin_5 = 0
        bin_6 = 0
        max_size = 0
        for component in scc:
            if 0 < len(component.nodes) <= 1:
                bin_1 += 1
            elif 1 < len(component.nodes) <= 10:
                bin_2 += 1
            elif 10 < len(component.nodes) <= 100:
                bin_3 += 1
            elif 100 < len(component.nodes) <= 1000:
                bin_4 += 1
            elif 1000 < len(component.nodes) <= 10000:
                bin_5 += 1
            else:
                bin_6 += 1

            if len(component.nodes) > max_size:
                max_size = len(component.nodes)

        component_size_range.get("Size 1").append(bin_1)
        component_size_range.get("Size 2 - 10").append(bin_2)
        component_size_range.get("Size 11 - 100").append(bin_3)
        component_size_range.get("Size 101 - 1000").append(bin_4)
        component_size_range.get("Size 1001 - 10000").append(bin_5)
        component_size_range.get("Size >10000").append(bin_6)

        sub_graphs = [sg for sg in scc]

        # Size of the Largest SCC
        top_5_scc_size = [len(sub_graph.nodes) for sub_graph in sub_graphs]
        top_5_scc_size.sort(reverse=True)
        largest_sccs.append(top_5_scc_size[0])

        # Top-5 Largest SCCs with Top-5 highest degree nodes in them (degree = in_degree + out_degree + in_degree * out_degree)
        degree_distribution_each_sub_graph_sorted_by_degree = list()
        for sg in sub_graphs:
            in_degree_list = list(sg.in_degree)
            out_degree_list = list(sg.out_degree)
            in_degree_list = sorted(in_degree_list, key=lambda x: x[0])
            out_degree_list = sorted(out_degree_list, key=lambda x: x[0])
            # for i in range(len(in_degree_list)):
            #     if in_degree_list[i][1] != out_degree_list[i][1]:
            #         raise ValueError("Good")
            sg_degree_recomputed = list()

            for index in range(len(in_degree_list)):
                node = in_degree_list[index][0]
                in_degree = in_degree_list[index][1]
                out_degree = out_degree_list[index][1]
                sg_degree_recomputed.append(
                    (node, in_degree + out_degree + in_degree * out_degree,
                     in_degree, out_degree))

            sg_degree_recomputed = sorted(sg_degree_recomputed,
                                          key=lambda x: x[1],
                                          reverse=True)
            # print(len(sg.nodes), ":::::", sg_degree_recomputed)
            degree_distribution_each_sub_graph_sorted_by_degree.append(
                sg_degree_recomputed)

        degree_distribution_each_sub_graph_sorted_by_degree.sort(
            key=lambda x: -len(x))

        top_5_largest_components_with_top_5_highest_degree_nodes_in_them = [
            component[0:5] for component in
            degree_distribution_each_sub_graph_sorted_by_degree[0:5]
        ]

        print(
            "Highest Degree Nodes in Top 5 Largest SCC till",
            datetime.strftime((current_date + timedelta(weeks=4)),
                              "%Y-%m-%d"), ":\n", top_5_scc_size[0:5], "\n",
            top_5_largest_components_with_top_5_highest_degree_nodes_in_them)

        # Intersection of Sets of Nodes involved in top few SCC excluding the astronomical SCC
        sub_graphs_sorted = sorted(sub_graphs,
                                   key=lambda x: len(x.nodes),
                                   reverse=True)[1:]
        node_limit = 200
        total_nodes = 0
        node_list = []
        for sg in sub_graphs_sorted:
            nodes = list(sg.nodes)
            total_nodes += len(nodes)
            node_list.extend(nodes)
            if total_nodes > node_limit:
                break
        node_list_as_time_progresses.append(set(node_list))
        if max_size_node_list < len(node_list):
            max_size_node_list = len(node_list)

        percent_similarity = len(
            set.intersection(
                *node_list_as_time_progresses)) / max_size_node_list * 100
        print("Percent Similarity In Smaller Components:", percent_similarity)
        print("Intersecting Addresses:",
              set.intersection(*node_list_as_time_progresses))

        # Calling the Average and Median Distance from Brave Calculator
        # generate_average_distance_of_scc_from_brave(directed_bat_transfer_graph, scc, datetime.strftime(current_date + timedelta(weeks=4), "%Y-%m-%d"))

        if current_date + timedelta(weeks=4) > end_date:
            break
        else:
            current_date += timedelta(weeks=4)

    # print(component_size_range)

    for bin_name in component_size_range:
        component_size_range[bin_name] = (x_data,
                                          component_size_range.get(bin_name))

    misc_operations.plot_basic_bar_chart(
        x_data,
        number_of_scc,
        "#Strongly_Connected_Components v/s Date",
        "Date",
        "#SCC",
        x_label_font_size=7)

    misc_operations.plot_basic_bar_chart(
        x_data,
        largest_sccs,
        "Largest_Strongly_Connected_Components v/s Date",
        "Date",
        "Size",
        x_label_font_size=7)

    misc_operations.plot_multiple_lines_chart(
        component_size_range,
        "Frequency_of_Bins_of_Strongly_Connected_Components v/s Date",
        "Date",
        "Frequency",
        date=False,
        x_label_font_size=7)

    component_size_range.pop("Size 1", None)

    misc_operations.plot_multiple_lines_chart(
        component_size_range,
        "Frequency_of_Bins_of_Strongly_Connected_Components (w.o. most frequent bin) v/s Date",
        "Date",
        "Frequency",
        date=False,
        x_label_font_size=7)

    node_list_as_time_progresses.sort(key=lambda x: len(x), reverse=True)
    percent_similarity = len(
        set.intersection(*node_list_as_time_progresses)) / len(
            node_list_as_time_progresses[0]) * 100
    print("Node List:", node_list_as_time_progresses)
    # print("Max(sorted):", len(node_list_as_time_progresses[0]), "|", "Max(variable):", max_size_node_list)
    print("Percent Similarity In Smaller Components:", percent_similarity)

    print("Component Analysis ended", str(datetime.today()))