# Manufacturing Entropy - Code for the article
# Original Data: Kaggle competition - Bosch Manufacturing Data: train_date.csv file
# Data used in this file: pre-processed ("data/manufacturing_paths.txt" and "data/manufacturing_edges.txt")
# Yamila Mariel Omar
# Date of original code: 21st July 2020

from graphfile import GraphFile


# Load manufacturing paths paths
# ==============================
paths = GraphFile("data/manufacturing_paths.txt").read_paths_with_count()
number_of_manufactured_items = sum(paths.values())

# Load edges
# ==========
edges = GraphFile("data/manufacturing_edges.txt").read_edges_from_file()

# Filter out edges that only serve a handfull of items
# ====================================================
threshold = 0.001 * number_of_manufactured_items
edges_to_remove = [k for k,v in edges.items() if v < threshold]
edges_to_remove = set(edges_to_remove)

# Clean manufacturing paths
# =========================
clean_paths = {}
for k,v in paths.items():
    e = [(i,j) for (i,j) in zip(k, k[1:])]
    e = set(e)
    if len(edges_to_remove.intersection(e)) == 0:
    from graphfile import GraphFile
    from graph import Graph
    import multiprocessing
    import datetime
    import sys

    method = int(sys.argv[1])

    # Read data: clean paths and clean edges
    # ======================================
    # full_path = "/mnt/irisgpfs/users/yomar/manufacturing_entropy_article/"
    filename_paths = "data/clean_manufacturing_paths.txt"
    # filename_paths = full_path + filename_paths
    filename_edges = "data/clean_manufacturing_edges.txt"
    # filename_edges = full_path + filename_edges
    edges = GraphFile(filename_edges).read_edges_from_file()
    paths = GraphFile(filename_paths).read_paths_with_count()

    # =========================================================================
    # CALCULATE ENTROPY
    # =========================================================================
    if method == 1:
        # Method 1: weighted, directed graph with self-loops on all nodes
        # self-loops that do not represent ending nodes have a value of 1
        # Due to computation issues, this is deployed in an HPC!!!
        # =============================================================
        edges = add_self_loops(paths, edges)
        G = Graph(edges)

        for n in G.nodes:
            if G.edges.get((n,n), None) == None:
# Data: F2.txt
# Yamila Mariel Omar
# Date of original code: 9th February 2021
# Date of code last modification: 9th February 2021

from graphviz import Digraph
import sys
sys.path.append('..')

from graphfile import GraphFile
from graph import Graph

# Load edges
# ==========
filename = "data/F2.txt"
edges = GraphFile(filename).read_edges_from_file()
total_items = sum([v for k, v in edges.items() if "source" in k])
edges = {k: v / total_items for k, v in edges.items()}

G = Graph(edges)

# Get node list
# =============
nodes = G.nodes

# Nodes positions in plot
# =======================
positions = {
    'source': "0,9!",
    '24': "-1,8!",
    '25': "1,8!",
# Data: full manufacturing network obtained from Kaggle competition data (after cleaning)
# Yamila Mariel Omar
# Date of original code: 19th January 2021
# Date of code last modification: 19th January 2021

from graphfile import GraphFile
from graph import Graph
import betweenness_centrality
import clustering_coefficient
import depth_first_search as dfs

if __name__ == "__main__":
    # Read data
    # =========
    filename = "data/clean_manufacturing_edges.txt"
    edges = GraphFile(filename).read_edges_from_file()
    G = Graph(edges)

    # =========================================================================
    # DEGREE
    # =========================================================================
    indeg, outdeg = G.degree

    # =========================================================================
    # STRENGTH
    # =========================================================================
    instr, outstr = G.strength

    # =========================================================================
    # BETWEENNESS CENTRALITY
    # =========================================================================
Esempio n. 5
0
# ===== End Function definitions =========

if __name__ == "__main__":
    # Import needed modules
    # =====================
    from graphfile import GraphFile
    from graph import Graph
    import multiprocessing
    import datetime

    # Read data: clean paths and clean edges
    # ======================================
    filename_paths = "data/clean_manufacturing_paths.txt"
    filename_edges = "data/clean_manufacturing_edges.txt"
    edges = GraphFile(filename_edges).read_edges_from_file()
    paths = GraphFile(filename_paths).read_paths_with_count()

    # Generate graph from clean edges
    # ===============================
    edges = add_self_loops(paths, edges)
    G = Graph(edges)

    print("Number of nodes: ", len(G.nodes))
    print("Number of edges: ", len(G.edges.keys()))

    # Color code nodes
    # =======================
    node_colors = dict()
    for node in G.nodes:
        if node in {0, 1, 24, 12, 25, 13, 2, 3, 14, 15,
    from graphfile import GraphFile
    from graph import Graph
    import multiprocessing
    import datetime
    import sys

    method = int(sys.argv[1])

    # Read data: clean paths and clean edges
    # ======================================
    # full_path = "/mnt/irisgpfs/users/yomar/manufacturing_entropy_article/"
    filename_paths = "data/clean_manufacturing_paths.txt"
    # filename_paths = full_path + filename_paths
    filename_edges = "data/clean_manufacturing_edges.txt"
    # filename_edges = full_path + filename_edges
    edges = GraphFile(filename_edges).read_edges_from_file()
    paths = GraphFile(filename_paths).read_paths_with_count()

    # =========================================================================
    # CALCULATE ENTROPY
    # =========================================================================
    if method == 1:
        # Method 1: binary, directed graph with self-loops on all nodes
        # Due to computation issues, this is deployed in an HPC!!!
        # =============================================================
        edges = {k: 1 for k, v in edges.items()}
        G = Graph(edges)

        nodes = G.nodes
        for n in G.nodes:
            G.addEdge(n, n, 1)
Esempio n. 7
0
from graphfile import GraphFile
from graph import Graph 
from capacity import Capacity
from fordfulkerson import FordFulkerson


# Input data
# ==========
graph_to_study = input("Choose graph to study: F1, F2 or F3? ")


# Load graph
# ==========
filename = "data/" + graph_to_study + ".txt"
edges = GraphFile(filename).read_edges_from_file()
F = Graph(edges)

# Get edges capacity
# ==================
nodes_capacity = GraphFile("data/nodes_capacity.txt").read_nodes_capacity_from_file()
C = Capacity(nodes_capacity, 'i', 'f')
C_edges = C.get_edges_capacity(F, "weight")

for k,v in C_edges.items():
    if ("i" in k) or ("f" in k):
        pass 
    else:
        C_edges[k] = int(v)
        
C_edges = {k:v for k,v in C_edges.items() if v > 0}
Esempio n. 8
0
from capacity import Capacity
from fordfulkerson import FordFulkerson
import string
import json

import sys
sys.path.append('..')

from graphfile import GraphFile
from graph import Graph

# Load edges
# ==========
filename = "data/F2.txt"
edges = GraphFile(filename).read_edges_from_file()
total_items = sum([v for k, v in edges.items() if "source" in k])
edges = {k: v / total_items for k, v in edges.items()}

G = Graph(edges)

# Get edges capacity
# ==================
filename = "results/capacity_estimation.json"
nodes_capacity = open(filename, "r").read()
nodes_capacity = json.loads(nodes_capacity)
nodes_capacity = {int(k[1:]): v for k, v in nodes_capacity.items()}
C = Capacity(nodes_capacity, source_node='source', sink_node='sink')
C_edges = C.get_edges_capacity(G, "weight")

# Flow Network
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()



# =============================================================================
# MAIN
# =============================================================================
if __name__ == "__main__":
    # Read data
    # =========
    filename = "data/clean_manufacturing_edges.txt"
    edges = GraphFile(filename).read_edges_from_file()
    G = Graph(edges)

    # Get start nodes and their fraction
    # ==================================
    total_number_of_items_manufactured = 0
    start_nodes = {n: 0 for n in G.nodes}
    with open("data/clean_manufacturing_paths.txt", "r") as f:
        for line in f:
            line = line.strip().split(" ")
            n_0 = int(line[0])
            path_count = int(line[-1])
            start_nodes[n_0] += path_count
            total_number_of_items_manufactured += path_count

    start_nodes = {k:v/total_number_of_items_manufactured for k,v in start_nodes.items()}
Esempio n. 10
0
# ===== END Function definitions ============

if __name__ == "__main__":
    # Import needed modules
    # =====================
    from graphfile import GraphFile
    from graph import Graph
    import datetime
    import matplotlib.pyplot as plt

    # Read data: clean paths and clean edges
    # ======================================
    filename_paths = "data/clean_manufacturing_paths.txt"
    filename_edges = "data/clean_manufacturing_edges.txt"
    edges = GraphFile(filename_edges).read_edges_from_file()
    paths = GraphFile(filename_paths).read_paths_with_count()

    # Generate graph from clean edges
    # ===============================
    edges = add_self_loops(paths, edges)
    G = Graph(edges)

    # ==========================================================================
    # Get frequency of n in paths
    # ==========================================================================
    freq_n_in_paths = {n: 0 for n in G.nodes}
    for p, v in paths.items():
        for n in p:
            freq_n_in_paths[n] += v