def buildEnvironment(explored_states, state_dict, G, G2, G_disrupted, ActionList, supply_nodes): Schedule = [] first_state_id = random.choice(explored_states) remaining_demand = sum(state_dict[(first_state_id, 'demand')]) > 0 first_state = st.State(state_dict[(first_state_id, 'debris')], state_dict[(first_state_id, 'supply')], state_dict[(first_state_id, 'demand')], state_dict[(first_state_id, 'resource')], first_state_id) # This is not the ordered schedule but rather the roads cleared until this state cleared_roads = [ i for i, val in enumerate(state_dict[(first_state_id, 'debris')]) if val == 0 ] Schedule.extend(cleared_roads) for cr in cleared_roads: ed = [edge for edge, edge_id in ActionList.items() if edge_id == cr][0] G2[ed[0]][ed[1]]['debris'] = 0 G_disrupted.add_edge(ed[0], ed[1]) reachable_nodes = [] # list for reachable nodes for s in supply_nodes: reachable_nodes.extend(list(nx.dfs_preorder_nodes(G_disrupted, s))) actions = funcs2.initializeActionSpace( reachable_nodes, G, ActionList ) # actions are the admissable action indices corresponding in ActionList return first_state, actions, Schedule, reachable_nodes
phi_sa = {} betw_centrality_service = {} betw_centrality_regular = {} betw_centrality_debris = {} betw_centrality_regular_sp = {} explored_states = [] state_dict[(0, 'demand')] = initial_demand state_dict[(0, 'debris')] = initial_debris state_dict[(0, 'supply')] = initial_supply state_dict[(0, 'period')] = 1 state_dict[(0, 'resource')] = 0 initial_state = st.State(initial_debris, initial_supply, initial_demand, 0, None) id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict) actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList) explored_states.append(initial_state.ID) for e in range(int(n_episodes)): # Initialize the environment #remaining_demand = True Schedule = [] #Initialization - these roads are as if they are cleared before reachable_nodes = set(supply_nodes) rem_resource = resource period = 1
state_dict = {} explored_states = [] resource = 1 phi_sa = {} betw_centrality_service = {} betw_centrality_regular = {} betw_centrality_debris = {} betw_centrality_regular_sp = {} state_dict[(0, 'demand')] = initial_demand state_dict[(0, 'debris')] = initial_debris state_dict[(0, 'supply')] = initial_supply state_dict[(0, 'period')] = 1 state_dict[(0, 'resource')] = 0 initial_state = st.State(initial_debris, initial_supply, initial_demand, 0, None) id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict) actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList) explored_states.append(initial_state.ID) step_size = 0.1 # Get the actual optimal calculated to see if GD is working df_q = pd.read_csv( 'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/Q_optimalVI_INS8.csv', sep=',') df_q.set_index('Unnamed: 0', inplace=True) df = pd.read_csv( 'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/basis_INS8.csv', sep=',') df.set_index('Unnamed: 0', inplace=True)
def sample(first_state, actions, supply_nodes, resource, Qmatrix, Schedule, Q_alphaMatrix, G_restored, G2, G, EdgeList, reachable_nodes, ActionList, dist, phi_sa, total_debris, total_supply, explored_states, state_dict, id_dict, id_counter, betw_centrality_service, betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp): #These parameters are very likely that they are not being used epsilon = 0.3 rule = 'glie' e = 1 T = 10 # n_episodes = 10000 # alpha = 0.1 n_nodes = len(G.nodes) Cost = np.zeros((n_nodes, n_nodes)) # debris_feature = total_debris - sum(first_state.rem_debris) # This is the debris cleared until now # demand_feature = total_supply - sum(first_state.rem_supply) # This is the total demand satisfied until now #Choose action action = first_state.choose_action(epsilon, Qmatrix, actions, Schedule, rule, T, Q_alphaMatrix, e) Schedule.append(action) ## Vertex collapse - condense the network # For large sized instances calculating sp can be hard #betw centrality is just used to create the basis for s,a - if already calculated then don't redo it #Its not in constructfeatures function because it had to be done before updating G_restored etc try: betw_centrality_service[first_state.ID] except: betw_centrality_service, betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp = SNEBC.BC_calcs( G_restored, first_state, G2, EdgeList, betw_centrality_service, betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp) ######### Realize the new state and get its information ######### ################################################################# #Find where that action leads - how the graph changes new_node, discovered_nodes = funcs2.get_newReachableNode( set(reachable_nodes), action, ActionList, G_restored, G2) #Update the action list by adding the new_node's connections if new_node is not None: funcs2.updateActions(new_node, actions, ActionList, G) #Find from which supply locations the new_node is accessible connected_supply = first_state.establishSupplyConnection( new_node, G_restored) #If the newly found node connects supplies - then supply transfer if len(connected_supply) > 1: first_state.transferSupply(connected_supply) #Get the resource usage and update remaining debris amounts new_rem_debris, resource_usage = first_state.updateDebris(action) period_before = funcs2.getPeriod(first_state.cum_resource, resource) first_state.cum_resource = first_state.cum_resource + resource_usage #Update the planning horizon and resource amounts period = funcs2.getPeriod(first_state.cum_resource, resource) #Construct features #Not yet demand is realized and not allocated yet phi_sa, new_phi_check = funcs2.constructfeatures( first_state, action, phi_sa, ActionList, period, resource_usage, total_debris, betw_centrality_service[first_state.ID], period_before, total_supply, betw_centrality_regular[first_state.ID], betw_centrality_debris[first_state.ID], betw_centrality_regular_sp[first_state.ID]) # First realize demand then allocate supply immediately new_rem_demand, new_rem_supply, satisfied_demand, dem = first_state.realizeDemand( new_node, dist, connected_supply, G_restored, Cost, reachable_nodes) ###### ---------------------- 12 --------------------------#### if new_phi_check == 1: if dem > 0: # For now the mean distributions are the same # But if demand nodes have diff dist then mean_dist is going to be the mean of each dist mean_dist = 3 phi_sa[(first_state.ID, action)].append(mean_dist) else: phi_sa[(first_state.ID, action)].append(0) ###### -----------------------------------------------------------------------################## reachable_nodes = discovered_nodes #Calculate the reward to switch to the next state reward = funcs2.getReward(period, satisfied_demand) #Create the new state new_state = st.State(new_rem_debris, new_rem_supply, new_rem_demand, first_state.cum_resource, None) #Get its index id_counter, id_dict = new_state.getStateIndex(id_counter, id_dict) state_dict[(new_state.ID, 'demand')] = copy(new_state.rem_demand) state_dict[(new_state.ID, 'debris')] = copy(new_state.rem_debris) state_dict[(new_state.ID, 'supply')] = copy(new_state.rem_supply) state_dict[(new_state.ID, 'period')] = copy(period) state_dict[(new_state.ID, 'resource')] = copy(new_state.cum_resource) if new_state.ID not in explored_states and sum(new_state.rem_demand) > 0: explored_states.append(new_state.ID) return phi_sa, action,id_counter, new_state, reward, period, actions, betw_centrality_service, \ betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp, reachable_nodes