def buildEnvironment(explored_states, state_dict, G, G2, G_disrupted, ActionList, supply_nodes): Schedule = [] first_state_id = random.choice(explored_states) remaining_demand = sum(state_dict[(first_state_id, 'demand')]) > 0 first_state = st.State(state_dict[(first_state_id, 'debris')], state_dict[(first_state_id, 'supply')], state_dict[(first_state_id, 'demand')], state_dict[(first_state_id, 'resource')], first_state_id) # This is not the ordered schedule but rather the roads cleared until this state cleared_roads = [ i for i, val in enumerate(state_dict[(first_state_id, 'debris')]) if val == 0 ] Schedule.extend(cleared_roads) for cr in cleared_roads: ed = [edge for edge, edge_id in ActionList.items() if edge_id == cr][0] G2[ed[0]][ed[1]]['debris'] = 0 G_disrupted.add_edge(ed[0], ed[1]) reachable_nodes = [] # list for reachable nodes for s in supply_nodes: reachable_nodes.extend(list(nx.dfs_preorder_nodes(G_disrupted, s))) actions = funcs2.initializeActionSpace( reachable_nodes, G, ActionList ) # actions are the admissable action indices corresponding in ActionList return first_state, actions, Schedule, reachable_nodes
betw_centrality_regular = {} betw_centrality_debris = {} betw_centrality_regular_sp = {} explored_states = [] state_dict[(0, 'demand')] = initial_demand state_dict[(0, 'debris')] = initial_debris state_dict[(0, 'supply')] = initial_supply state_dict[(0, 'period')] = 1 state_dict[(0, 'resource')] = 0 initial_state = st.State(initial_debris, initial_supply, initial_demand, 0, None) id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict) actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList) explored_states.append(initial_state.ID) for e in range(int(n_episodes)): # Initialize the environment #remaining_demand = True Schedule = [] #Initialization - these roads are as if they are cleared before reachable_nodes = set(supply_nodes) rem_resource = resource period = 1 G_restored = nx.Graph() G_restored.add_nodes_from(range(n_nodes))
phi_sa = {} betw_centrality_service = {} betw_centrality_regular = {} betw_centrality_debris = {} betw_centrality_regular_sp = {} state_dict[(0, 'demand')] = initial_demand state_dict[(0, 'debris')] = initial_debris state_dict[(0, 'supply')] = initial_supply state_dict[(0, 'period')] = 1 state_dict[(0, 'resource')] = 0 initial_state = st.State(initial_debris, initial_supply, initial_demand, 0, None) id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict) actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList) explored_states.append(initial_state.ID) step_size = 0.1 # Get the actual optimal calculated to see if GD is working df_q = pd.read_csv( 'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/Q_optimalVI_INS8.csv', sep=',') df_q.set_index('Unnamed: 0', inplace=True) df = pd.read_csv( 'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/basis_INS8.csv', sep=',') df.set_index('Unnamed: 0', inplace=True) # q_column = pd.DataFrame(index=df.index.copy(), columns=['q_optimal', 'q_pred'])