# Manufacturing Entropy - Code for the article # Original Data: Kaggle competition - Bosch Manufacturing Data: train_date.csv file # Data used in this file: pre-processed ("data/manufacturing_paths.txt" and "data/manufacturing_edges.txt") # Yamila Mariel Omar # Date of original code: 21st July 2020 from graphfile import GraphFile # Load manufacturing paths paths # ============================== paths = GraphFile("data/manufacturing_paths.txt").read_paths_with_count() number_of_manufactured_items = sum(paths.values()) # Load edges # ========== edges = GraphFile("data/manufacturing_edges.txt").read_edges_from_file() # Filter out edges that only serve a handfull of items # ==================================================== threshold = 0.001 * number_of_manufactured_items edges_to_remove = [k for k,v in edges.items() if v < threshold] edges_to_remove = set(edges_to_remove) # Clean manufacturing paths # ========================= clean_paths = {} for k,v in paths.items(): e = [(i,j) for (i,j) in zip(k, k[1:])] e = set(e) if len(edges_to_remove.intersection(e)) == 0:
from graphfile import GraphFile from graph import Graph import multiprocessing import datetime import sys method = int(sys.argv[1]) # Read data: clean paths and clean edges # ====================================== # full_path = "/mnt/irisgpfs/users/yomar/manufacturing_entropy_article/" filename_paths = "data/clean_manufacturing_paths.txt" # filename_paths = full_path + filename_paths filename_edges = "data/clean_manufacturing_edges.txt" # filename_edges = full_path + filename_edges edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # ========================================================================= # CALCULATE ENTROPY # ========================================================================= if method == 1: # Method 1: weighted, directed graph with self-loops on all nodes # self-loops that do not represent ending nodes have a value of 1 # Due to computation issues, this is deployed in an HPC!!! # ============================================================= edges = add_self_loops(paths, edges) G = Graph(edges) for n in G.nodes: if G.edges.get((n,n), None) == None:
# Data: F2.txt # Yamila Mariel Omar # Date of original code: 9th February 2021 # Date of code last modification: 9th February 2021 from graphviz import Digraph import sys sys.path.append('..') from graphfile import GraphFile from graph import Graph # Load edges # ========== filename = "data/F2.txt" edges = GraphFile(filename).read_edges_from_file() total_items = sum([v for k, v in edges.items() if "source" in k]) edges = {k: v / total_items for k, v in edges.items()} G = Graph(edges) # Get node list # ============= nodes = G.nodes # Nodes positions in plot # ======================= positions = { 'source': "0,9!", '24': "-1,8!", '25': "1,8!",
# Data: full manufacturing network obtained from Kaggle competition data (after cleaning) # Yamila Mariel Omar # Date of original code: 19th January 2021 # Date of code last modification: 19th January 2021 from graphfile import GraphFile from graph import Graph import betweenness_centrality import clustering_coefficient import depth_first_search as dfs if __name__ == "__main__": # Read data # ========= filename = "data/clean_manufacturing_edges.txt" edges = GraphFile(filename).read_edges_from_file() G = Graph(edges) # ========================================================================= # DEGREE # ========================================================================= indeg, outdeg = G.degree # ========================================================================= # STRENGTH # ========================================================================= instr, outstr = G.strength # ========================================================================= # BETWEENNESS CENTRALITY # =========================================================================
# ===== End Function definitions ========= if __name__ == "__main__": # Import needed modules # ===================== from graphfile import GraphFile from graph import Graph import multiprocessing import datetime # Read data: clean paths and clean edges # ====================================== filename_paths = "data/clean_manufacturing_paths.txt" filename_edges = "data/clean_manufacturing_edges.txt" edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # Generate graph from clean edges # =============================== edges = add_self_loops(paths, edges) G = Graph(edges) print("Number of nodes: ", len(G.nodes)) print("Number of edges: ", len(G.edges.keys())) # Color code nodes # ======================= node_colors = dict() for node in G.nodes: if node in {0, 1, 24, 12, 25, 13, 2, 3, 14, 15,
from graphfile import GraphFile from graph import Graph import multiprocessing import datetime import sys method = int(sys.argv[1]) # Read data: clean paths and clean edges # ====================================== # full_path = "/mnt/irisgpfs/users/yomar/manufacturing_entropy_article/" filename_paths = "data/clean_manufacturing_paths.txt" # filename_paths = full_path + filename_paths filename_edges = "data/clean_manufacturing_edges.txt" # filename_edges = full_path + filename_edges edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # ========================================================================= # CALCULATE ENTROPY # ========================================================================= if method == 1: # Method 1: binary, directed graph with self-loops on all nodes # Due to computation issues, this is deployed in an HPC!!! # ============================================================= edges = {k: 1 for k, v in edges.items()} G = Graph(edges) nodes = G.nodes for n in G.nodes: G.addEdge(n, n, 1)
from graphfile import GraphFile from graph import Graph from capacity import Capacity from fordfulkerson import FordFulkerson # Input data # ========== graph_to_study = input("Choose graph to study: F1, F2 or F3? ") # Load graph # ========== filename = "data/" + graph_to_study + ".txt" edges = GraphFile(filename).read_edges_from_file() F = Graph(edges) # Get edges capacity # ================== nodes_capacity = GraphFile("data/nodes_capacity.txt").read_nodes_capacity_from_file() C = Capacity(nodes_capacity, 'i', 'f') C_edges = C.get_edges_capacity(F, "weight") for k,v in C_edges.items(): if ("i" in k) or ("f" in k): pass else: C_edges[k] = int(v) C_edges = {k:v for k,v in C_edges.items() if v > 0}
from capacity import Capacity from fordfulkerson import FordFulkerson import string import json import sys sys.path.append('..') from graphfile import GraphFile from graph import Graph # Load edges # ========== filename = "data/F2.txt" edges = GraphFile(filename).read_edges_from_file() total_items = sum([v for k, v in edges.items() if "source" in k]) edges = {k: v / total_items for k, v in edges.items()} G = Graph(edges) # Get edges capacity # ================== filename = "results/capacity_estimation.json" nodes_capacity = open(filename, "r").read() nodes_capacity = json.loads(nodes_capacity) nodes_capacity = {int(k[1:]): v for k, v in nodes_capacity.items()} C = Capacity(nodes_capacity, source_node='source', sink_node='sink') C_edges = C.get_edges_capacity(G, "weight") # Flow Network
plt.xlabel(xlabel) plt.ylabel(ylabel) plt.tight_layout() plt.savefig(filename) plt.close() # ============================================================================= # MAIN # ============================================================================= if __name__ == "__main__": # Read data # ========= filename = "data/clean_manufacturing_edges.txt" edges = GraphFile(filename).read_edges_from_file() G = Graph(edges) # Get start nodes and their fraction # ================================== total_number_of_items_manufactured = 0 start_nodes = {n: 0 for n in G.nodes} with open("data/clean_manufacturing_paths.txt", "r") as f: for line in f: line = line.strip().split(" ") n_0 = int(line[0]) path_count = int(line[-1]) start_nodes[n_0] += path_count total_number_of_items_manufactured += path_count start_nodes = {k:v/total_number_of_items_manufactured for k,v in start_nodes.items()}
# ===== END Function definitions ============ if __name__ == "__main__": # Import needed modules # ===================== from graphfile import GraphFile from graph import Graph import datetime import matplotlib.pyplot as plt # Read data: clean paths and clean edges # ====================================== filename_paths = "data/clean_manufacturing_paths.txt" filename_edges = "data/clean_manufacturing_edges.txt" edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # Generate graph from clean edges # =============================== edges = add_self_loops(paths, edges) G = Graph(edges) # ========================================================================== # Get frequency of n in paths # ========================================================================== freq_n_in_paths = {n: 0 for n in G.nodes} for p, v in paths.items(): for n in p: freq_n_in_paths[n] += v