def dataset_parallel_mapper(row, filename): dict_dag = ast.literal_eval(row["graph_object"]) G = nx.node_link_graph(dict_dag) num_tasks = len(G) _, _, h1_cost, _ = heuristic_algorithm(G, 2) w = [1 for _ in range(num_tasks)] s = [1 for _ in range(num_tasks)] p = [1 for _ in range(num_tasks)] etf = Mod_ETF(G, w, s, 2, 2, plot=False) weak_strongman_cost = naive_2(G, 2) intervals, speeds, opt_cost = opt_schedule_given_ordering(True, G, w, p, etf.order, plot=False, compare=False) global_intervals, global_speeds, global_opt_cost = opt_all_orderings( True, G, 2, w, p, False) # print(f"speeds is {speeds}") # print(f"global speeds is {global_speeds}") if speeds[0] != -1 or global_speeds[0] != -1: entry_dict = { "graph_object": nx.node_link_data(G), "num_tasks": num_tasks, "num_machines": 2, "weights": w, "order": etf.order, "features": get_feature_set(G), "psize": speed_to_psize(speeds), "GD_cost": np.inf, "LR_cost": np.inf, "opt_cost": opt_cost, "global_opt_cost": global_opt_cost, "ETF-H_cost": h1_cost, "weak_strongman_cost": weak_strongman_cost } LOCK.acquire() entry_df = pd.DataFrame(columns=[ "graph_object", "num_tasks", "num_machines", "weights", "order", "features", "psize", "GD_cost", "LR_cost", "opt_cost", "global_opt_cost", "ETF-H_cost", "weak_strongman_cost" ]) entry_df = entry_df.append(entry_dict, ignore_index=True) entry_df.to_csv(filename, mode='a', header=False, index=False) LOCK.release() return [entry_dict] else: return [None]
def naive_2(G, num_machines): psize = [ len(nx.algorithms.dag.descendants(G, task)) + 1 for task in range(len(G)) ] s = psize_to_speed(psize) w = [1 for _ in range(len(G))] tie_breaking_rule = 2 naive2_etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule) naive2_cost = naive2_etf.obj_value return naive2_cost
def create_dataset(num_machines, csv_file): df = pd.DataFrame(columns=[ "graph_object", "num_tasks", "num_machines", "weights", "order", "features", "psize", "GD_cost", "LR_cost", "RLP_cost", "ETF-H_cost", "weak_strongman_cost" ]) tie_breaking_rule = 2 count = 0 csv_df = pd.read_csv(csv_file) for index, row in csv_df.iterrows(): dict_dag = ast.literal_eval(row["graph_object"]) G = nx.node_link_graph(dict_dag) num_tasks = len(G) _, _, h1_cost, _ = heuristic_algorithm(G, num_machines) w = [1 for _ in range(num_tasks)] s = [1 for _ in range(num_tasks)] p = [1 for _ in range(num_tasks)] etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=False) weak_strongman_cost = naive_2(G, num_machines) intervals, speeds, opt_cost = opt_schedule_given_ordering( True, G, w, p, etf.order, plot=False, compare=False) if speeds[0] != -1: entry_dict = { "graph_object": nx.node_link_data(G), "num_tasks": num_tasks, "num_machines": num_machines, "weights": w, "order": etf.order, "features": get_feature_set(G), "psize": speed_to_psize(speeds), "GD_cost": np.inf, "LR_cost": np.inf, "RLP_cost": opt_cost, "ETF-H_cost": h1_cost, "weak_strongman_cost": weak_strongman_cost } df = df.append(entry_dict, ignore_index=True) return df
def heuristic_algorithm(G, num_machines): ''' Algorithm for heuristic ''' w = [1 for _ in range(len(G))] s = [1 for _ in range(len(G))] # psize = [len(nx.algorithms.dag.descendants(G, task)) + 1 for task in range(len(G))] # s = psize_to_speed(psize) etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule=2, plot=False) # t = native_rescheduler(G, s, w, copy.deepcopy(etf.order)) # Find pseudosize p_size = approx_psize_homogeneous(G, etf.order, etf.h, etf.t) s_new = psize_to_speed(p_size) t = native_rescheduler(G, s_new, w, copy.deepcopy(etf.order)) total_cost, _, _ = compute_cost(w, t, s_new) return etf.order, t, total_cost, s_new
def general_heuristic(G, num_machines, w, iterations, verbose): convergence = [] s = [1 for _ in range(len(G))] tie_breaking_rule = 2 old_pseudosize = [] for i in range(iterations): etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=verbose) new_pseudosize = approx_psize_general(G, etf.order, etf.t, verbose) if old_pseudosize: new_pseudosize = (old_pseudosize + new_pseudosize) / 2 old_pseudosize = new_pseudosize s = psize_to_speed(new_pseudosize) t = native_rescheduler(G, s, w, etf.order) obj_val, time, energy = compute_cost(w, t, s) convergence.append(obj_val) return obj_val, time, energy, etf.order, convergence
def heuristics(G, num_machines, naive_version=0, iterations=1, verbose=False): ''' Runs both the iterative heuristic and the naive method(s) for generating a schedule given G. :param G: :param num_machines: number of machines :param w: :param naive_version: If 1, we will return heuristic cost, naive 1 cost If 2, we will return heuristic cost, naive 2 cost If 3, we will return heuristic cost, naive 1 cost, naive 2 cost Otherwise (default), we will return heuristic cost only. :param iterations: Set the number of iterations that we run the iterative method :param verbose: If True, graphs will be plotted out. ''' # extra safety method, should return a None value unless you request for # the method to be run naive1_cost = None naive2_cost = None total_cost = None w = [1 for _ in range(len(G))] s = [1 for _ in range(len(G))] tie_breaking_rule = 2 # Get initial ordering using modified ETF etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=False) # run naive 1 if naive_version == 1 or naive_version == 3: psize = approx_psize_naive1(G, etf.order) s_prime_naive = psize_to_speed(psize) naive_t = native_rescheduler(G, s_prime_naive, w, copy.deepcopy(etf.order)) naive1_cost, power, energy = compute_cost(w, naive_t, s_prime_naive) # run naive 2 if naive_version == 2 or naive_version == 3: psize = [ len(nx.algorithms.dag.descendants(G, task)) + 1 for task in range(len(G)) ] s_prime_naive = psize_to_speed(psize) naive2_etf = Mod_ETF(G, w, s_prime_naive, num_machines, tie_breaking_rule, plot=verbose) naive2_cost = naive2_etf.obj_value # run iterative heuristic once p_size, _ = approx_psize_homogeneous(G, etf.order, etf.h, etf.t) s = psize_to_speed(p_size) # etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=verbose) t = native_rescheduler(G, s, w, etf.order) total_cost, _, _ = compute_cost(w, t, s) # for i in range(iterations -1): # s = [1 for _ in range(len(G))] # t = get_t(etf.order, G, len(s), s) # p_size,_ = approx_psize_homogeneous(G, etf.order, etf.h, t) # s = psize_to_speed(p_size) # # etf = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=verbose) # t = native_rescheduler(G, s, w, etf.order) # total_cost, _, _ = compute_cost(w, t, s) # # etf2 = Mod_ETF(G, w, s, num_machines, tie_breaking_rule, plot=verbose) return naive1_cost, naive2_cost, total_cost, etf