Example #1
0
def buildEnvironment(explored_states, state_dict, G, G2, G_disrupted,
                     ActionList, supply_nodes):

    Schedule = []
    first_state_id = random.choice(explored_states)
    remaining_demand = sum(state_dict[(first_state_id, 'demand')]) > 0
    first_state = st.State(state_dict[(first_state_id, 'debris')],
                           state_dict[(first_state_id, 'supply')],
                           state_dict[(first_state_id, 'demand')],
                           state_dict[(first_state_id, 'resource')],
                           first_state_id)

    # This is not the ordered schedule but rather the roads cleared until this state
    cleared_roads = [
        i for i, val in enumerate(state_dict[(first_state_id, 'debris')])
        if val == 0
    ]
    Schedule.extend(cleared_roads)

    for cr in cleared_roads:
        ed = [edge for edge, edge_id in ActionList.items() if edge_id == cr][0]
        G2[ed[0]][ed[1]]['debris'] = 0
        G_disrupted.add_edge(ed[0], ed[1])

    reachable_nodes = []  # list for reachable nodes
    for s in supply_nodes:
        reachable_nodes.extend(list(nx.dfs_preorder_nodes(G_disrupted, s)))

    actions = funcs2.initializeActionSpace(
        reachable_nodes, G, ActionList
    )  # actions are the admissable action indices corresponding in ActionList

    return first_state, actions, Schedule, reachable_nodes
Example #2
0
File: Main3.py Project: aybikeu/RL
phi_sa = {}
betw_centrality_service = {}
betw_centrality_regular = {}
betw_centrality_debris = {}
betw_centrality_regular_sp = {}

explored_states = []

state_dict[(0, 'demand')] = initial_demand
state_dict[(0, 'debris')] = initial_debris
state_dict[(0, 'supply')] = initial_supply
state_dict[(0, 'period')] = 1
state_dict[(0, 'resource')] = 0


initial_state = st.State(initial_debris, initial_supply, initial_demand, 0, None)
id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict)
actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList)
explored_states.append(initial_state.ID)


for e in range(int(n_episodes)):


    # Initialize the environment
    #remaining_demand = True
    Schedule = [] #Initialization - these roads are as if they are cleared before

    reachable_nodes = set(supply_nodes)
    rem_resource = resource
    period = 1
Example #3
0
state_dict = {}
explored_states = []
resource = 1
phi_sa = {}
betw_centrality_service = {}
betw_centrality_regular = {}
betw_centrality_debris = {}
betw_centrality_regular_sp = {}

state_dict[(0, 'demand')] = initial_demand
state_dict[(0, 'debris')] = initial_debris
state_dict[(0, 'supply')] = initial_supply
state_dict[(0, 'period')] = 1
state_dict[(0, 'resource')] = 0

initial_state = st.State(initial_debris, initial_supply, initial_demand, 0,
                         None)
id_counter, id_dict = initial_state.getStateIndex(id_counter, id_dict)
actions = funcs2.initializeActionSpace(supply_nodes, G, ActionList)
explored_states.append(initial_state.ID)

step_size = 0.1

# Get the actual optimal calculated to see if GD is working
df_q = pd.read_csv(
    'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/Q_optimalVI_INS8.csv',
    sep=',')
df_q.set_index('Unnamed: 0', inplace=True)

df = pd.read_csv(
    'C:/Users/ulusan.a/Desktop/RL_rep/RL/data_files/basis_INS8.csv', sep=',')
df.set_index('Unnamed: 0', inplace=True)
Example #4
0
def sample(first_state, actions, supply_nodes, resource, Qmatrix, Schedule,
           Q_alphaMatrix, G_restored, G2, G, EdgeList, reachable_nodes,
           ActionList, dist, phi_sa, total_debris, total_supply,
           explored_states, state_dict, id_dict, id_counter,
           betw_centrality_service, betw_centrality_regular,
           betw_centrality_debris, betw_centrality_regular_sp):

    #These parameters are very likely that they are not being used
    epsilon = 0.3
    rule = 'glie'
    e = 1
    T = 10
    # n_episodes = 10000
    # alpha = 0.1
    n_nodes = len(G.nodes)
    Cost = np.zeros((n_nodes, n_nodes))

    # debris_feature = total_debris - sum(first_state.rem_debris)  # This is the debris cleared until now
    # demand_feature = total_supply - sum(first_state.rem_supply)  # This is the total demand satisfied until now

    #Choose action
    action = first_state.choose_action(epsilon, Qmatrix, actions, Schedule,
                                       rule, T, Q_alphaMatrix, e)

    Schedule.append(action)

    ## Vertex collapse - condense the network
    # For large sized instances calculating sp can be hard
    #betw centrality is just used to create the basis for s,a - if already calculated then don't redo it
    #Its not in constructfeatures function because it had to be done before updating G_restored etc
    try:
        betw_centrality_service[first_state.ID]
    except:
        betw_centrality_service, betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp = SNEBC.BC_calcs(
            G_restored, first_state, G2, EdgeList, betw_centrality_service,
            betw_centrality_regular, betw_centrality_debris,
            betw_centrality_regular_sp)

    ######### Realize the new state and get its information #########
    #################################################################
    #Find where that action leads - how the graph changes
    new_node, discovered_nodes = funcs2.get_newReachableNode(
        set(reachable_nodes), action, ActionList, G_restored, G2)

    #Update the action list by adding the new_node's connections
    if new_node is not None:
        funcs2.updateActions(new_node, actions, ActionList, G)

        #Find from which supply locations the new_node is accessible
        connected_supply = first_state.establishSupplyConnection(
            new_node, G_restored)

        #If the newly found node connects supplies - then supply transfer
        if len(connected_supply) > 1:
            first_state.transferSupply(connected_supply)

    #Get the resource usage and update remaining debris amounts
    new_rem_debris, resource_usage = first_state.updateDebris(action)

    period_before = funcs2.getPeriod(first_state.cum_resource, resource)
    first_state.cum_resource = first_state.cum_resource + resource_usage

    #Update the planning horizon and resource amounts
    period = funcs2.getPeriod(first_state.cum_resource, resource)

    #Construct features
    #Not yet demand is realized and not allocated yet
    phi_sa, new_phi_check = funcs2.constructfeatures(
        first_state, action, phi_sa, ActionList, period, resource_usage,
        total_debris, betw_centrality_service[first_state.ID], period_before,
        total_supply, betw_centrality_regular[first_state.ID],
        betw_centrality_debris[first_state.ID],
        betw_centrality_regular_sp[first_state.ID])

    # First realize demand then allocate supply immediately
    new_rem_demand, new_rem_supply, satisfied_demand, dem = first_state.realizeDemand(
        new_node, dist, connected_supply, G_restored, Cost, reachable_nodes)

    ###### ---------------------- 12 --------------------------####
    if new_phi_check == 1:
        if dem > 0:
            # For now the mean distributions are the same
            # But if demand nodes have diff dist then mean_dist is going to be the mean of each dist
            mean_dist = 3
            phi_sa[(first_state.ID, action)].append(mean_dist)
        else:
            phi_sa[(first_state.ID, action)].append(0)
    ###### -----------------------------------------------------------------------##################

    reachable_nodes = discovered_nodes

    #Calculate the reward to switch to the next state
    reward = funcs2.getReward(period, satisfied_demand)

    #Create the new state
    new_state = st.State(new_rem_debris, new_rem_supply, new_rem_demand,
                         first_state.cum_resource, None)

    #Get its index
    id_counter, id_dict = new_state.getStateIndex(id_counter, id_dict)

    state_dict[(new_state.ID, 'demand')] = copy(new_state.rem_demand)
    state_dict[(new_state.ID, 'debris')] = copy(new_state.rem_debris)
    state_dict[(new_state.ID, 'supply')] = copy(new_state.rem_supply)
    state_dict[(new_state.ID, 'period')] = copy(period)
    state_dict[(new_state.ID, 'resource')] = copy(new_state.cum_resource)

    if new_state.ID not in explored_states and sum(new_state.rem_demand) > 0:
        explored_states.append(new_state.ID)

    return phi_sa, action,id_counter, new_state, reward, period, actions, betw_centrality_service,  \
           betw_centrality_regular, betw_centrality_debris, betw_centrality_regular_sp, reachable_nodes