예제 #1
0
def construct_curriculum_env(curr_id, tensor_state=True):
    one_eight_config = {'agent_speed_range': default_speed_range, 'width': default_width,
                        'lanes': default_lanes,
                        'finish_position': Point(43, 8)
                        }
    quarter_config = {'agent_speed_range': default_speed_range, 'width': default_width,
                      'lanes': default_lanes,
                      'finish_position': Point(36, 6)
                      }
    half_config = {'agent_speed_range': default_speed_range, 'width': default_width,
                   'lanes': default_lanes,
                   'finish_position': Point(24, 4)
                  }
    full_config = {'agent_speed_range': default_speed_range, 'width': default_width,
                   'lanes': default_lanes,
                   'finish_position': Point(0, 0), 'agent_pos_init': Point(3, 0)

                   }
    if curr_id == 0:
        config = one_eight_config
    elif curr_id == 1:
        config = quarter_config
    elif curr_id == 2:
        config = half_config
    elif curr_id == 3:
        config = full_config
    else:
        raise ValueError("No curriculum of ID: {}".format(curr_id))
    if tensor_state:
        config['observation_type'] = 'tensor'
    return gym.make('GridDriving-v0', **config)
예제 #2
0
def construct_training_env():
    config = {
        'observation_type':
        'tensor',
        'agent_speed_range': [-3, -1],
        'finish_position':
        Point(0, 0),
        'random_seed':
        15,
        'stochasticity':
        1.,
        'lanes': [
            LaneSpec(cars=7, speed_range=[-2, -1]),
            LaneSpec(cars=8, speed_range=[-2, -1]),
            LaneSpec(cars=6, speed_range=[-1, -1]),
            LaneSpec(cars=6, speed_range=[-3, -1]),
            LaneSpec(cars=7, speed_range=[-2, -1]),
            LaneSpec(cars=8, speed_range=[-2, -1]),
            LaneSpec(cars=6, speed_range=[-3, -2]),
            LaneSpec(cars=7, speed_range=[-1, -1]),
            LaneSpec(cars=6, speed_range=[-2, -1]),
            LaneSpec(cars=8, speed_range=[-2, -2])
        ],
        'width':
        50,
        'tensor_state':
        True,
        'flicker_rate':
        0.,
        'mask':
        None
    }
    return gym.make('GridDriving-v0', **config)
예제 #3
0
def construct_task2_env(tensor_state=True):
    large_config = {'agent_speed_range': [-3, -1], 'width': 50,
                    'lanes': default_lanes
                    }
    small_config = {'observation_type': 'tensor', 'agent_speed_range': [-2, -1], 'stochasticity': 0.0, 'width': 10,
              'lanes': [
                  LaneSpec(cars=3, speed_range=[-2, -1]),
                  LaneSpec(cars=4, speed_range=[-2, -1]),
                  LaneSpec(cars=2, speed_range=[-1, -1]),
                  LaneSpec(cars=2, speed_range=[-3, -1])
              ]}
    medium_config = {'observation_type': 'tensor', 'agent_speed_range': [-3, -1], 'width': 15,
              'lanes': [
                  LaneSpec(cars=3, speed_range=[-2, -1]),
                  LaneSpec(cars=4, speed_range=[-2, -1]),
                  LaneSpec(cars=2, speed_range=[-1, -1]),
                  LaneSpec(cars=2, speed_range=[-3, -1]),
                  LaneSpec(cars=3, speed_range=[-2, -1]),
                  LaneSpec(cars=4, speed_range=[-2, -1])
              ]}
    medium_large_config = {'agent_speed_range': [-3, -1], 'width': 40,
                    'lanes': [LaneSpec(cars=6, speed_range=[-2, -1]),
                              LaneSpec(cars=7, speed_range=[-2, -1]),
                              LaneSpec(cars=5, speed_range=[-1, -1]),
                              LaneSpec(cars=5, speed_range=[-3, -1]),
                              LaneSpec(cars=6, speed_range=[-2, -1]),
                              LaneSpec(cars=7, speed_range=[-2, -1]),
                              ]
                    }
    curri_large_config = {'agent_speed_range': [-3, -1], 'width': 50,
                    'lanes': default_lanes,
                    'finish_position': Point(43, 6)
                    }
    config = large_config
    if tensor_state:
        config['observation_type'] = 'tensor'
    return gym.make('GridDriving-v0', **config)
        'seed': 25,
        'iters': 300
    }]

    test_case_number = 0  #Change the index for a different test case
    LANES = test_config[test_case_number]['lanes']
    WIDTH = test_config[test_case_number]['width']
    RANDOM_SEED = test_config[test_case_number]['seed']
    numiters = test_config[test_case_number]['iters']
    stochasticity = 1.
    env = gym.make(
        'GridDriving-v0',
        lanes=LANES,
        width=WIDTH,
        agent_speed_range=(-3, -1),
        finish_position=Point(0, 0),  #agent_ pos_init=Point(4,2),
        stochasticity=stochasticity,
        tensor_state=False,
        flicker_rate=0.,
        mask=None,
        random_seed=RANDOM_SEED)

    actions = env.actions
    env.render()
    done = False
    mcts = MonteCarloTreeSearch(env=env,
                                numiters=numiters,
                                explorationParam=1.,
                                random_seed=RANDOM_SEED)
    while not env.done:
        state = GridWorldState(env.state, is_done=done)
예제 #5
0
def train(model, env, train_type=0, model_class=ConvDQN):
    # Initialize model and target network
    f = open('record.txt', 'a')
    if not model:
        model = model_class(env.observation_space.shape,
                            env.action_space.n).to(device)
    target = model_class(env.observation_space.shape,
                         env.action_space.n).to(device)
    target.load_state_dict(model.state_dict())
    target.eval()

    # Initialize replay buffer
    memory = ReplayBuffer(buffer_limit)

    # Initialize rewards, losses, and optimizer
    rewards = []
    losses = []
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for episode in range(max_episodes):
        if not use_epsilon:
            epsilon = compute_epsilon(episode)
        else:
            epsilon = use_epsilon

        if train_type == 0:
            env.agent_pos_init = Point(
                random.randint(1, 5 * curriculum_num - 1),
                random.randint(0, curriculum_num - 1))
        elif train_type == 1:
            env.agent_pos_init = Point(
                random.randint(5 * curriculum_num - 6, 5 * curriculum_num - 1),
                random.randint(curriculum_num - 2, curriculum_num - 1))
        elif train_type == 2:
            env.agent_pos_init = Point(5 * curriculum_num - 1,
                                       curriculum_num - 1)
        elif train_type == 3:
            pass

        state = env.reset()
        episode_rewards = 0.0

        for t in range(t_max):
            # Model takes action
            action = model.act(state, epsilon)

            # Apply the action to the environment
            next_state, reward, done, info = env.step(action)

            if env.world.agent_state == AgentState.crashed:
                reward = -2

            # Save transition to replay buffer
            memory.push(
                Transition(state, [action], [reward], next_state, [done]))

            state = next_state
            episode_rewards += reward
            if done or train_type == 4 and t > 40:
                break
        rewards.append(episode_rewards)

        # Train the model if memory is sufficient
        if len(memory) > min_buffer:
            # if np.mean(rewards[print_interval:]) < -60:
            #     print('Bad initialization. Please restart the training.')
            #     exit()
            for i in range(train_steps):
                loss = optimize(model, target, memory, optimizer)
                losses.append(loss.item())

        # Update target network every once in a while
        if episode % target_update == 0:
            target.load_state_dict(model.state_dict())

        if episode % print_interval == 0 and episode > 0:
            print(
                "[Curriculum {} Type {} Episode {}]\tavg rewards : {:.3f},\tavg loss: : {:.6f},\tbuffer size : {},\tepsilon : {:.1f}%"
                .format(curriculum_num, train_type, episode,
                        np.mean(rewards[print_interval:]),
                        np.mean(losses[print_interval * 10:]), len(memory),
                        epsilon * 100))
            f.write(
                "[Curriculum {} Type {} Episode {}]\tavg rewards : {:.3f},\tavg loss: : {:.6f},\tbuffer size : {},\tepsilon : {:.1f}% \n"
                .format(curriculum_num, train_type, episode,
                        np.mean(rewards[print_interval:]),
                        np.mean(losses[print_interval * 10:]), len(memory),
                        epsilon * 100))
            f.flush()

        if episode % 1000 == 0:
            save_model_with_path(
                model, './model_' + str(curriculum_num) + '_' +
                str(train_type) + '_' + str(episode) + '.pt')
    f.close()

    return model
예제 #6
0
    done = env.done
    state = []
    for car in cars:
        state += [car.position.x, car.position.y]
    state.append(int(done == True))
    return state


if not SUBMISSION:
    ### Sample test cases.
    test_config = [{
        'lanes': [LaneSpec(0, [-2, -1])] * 5,
        'width': 9,
        'gamma': 0.9,
        'seed': 15,
        'fin_pos': Point(0, 0),
        'agent_pos': Point(8, 4),
        'stochasticity': 1.
    }, {
        'lanes': [LaneSpec(1, [-2, -1])] * 2,
        'width': 4,
        'gamma': 0.9,
        'seed': 15,
        'fin_pos': Point(0, 1),
        'agent_pos': Point(3, 1),
        'stochasticity': 1.
    }, {
        'lanes': [LaneSpec(1, [-3, -1])] * 2 + [LaneSpec(0, [0, 0])],
        'width': 4,
        'gamma': 0.9,
        'seed': 100,
예제 #7
0
def heuristic_reward():
    # parse_sas_plan(pos_memo)
    task2_env = construct_task2_env()
    n_lanes, n_width, agent_speed_range = len(task2_env.lanes), task2_env.width, task2_env.agent_speed_range
    pos_memo = [[0 if (x == 0 and y == 0) else None for y in range(n_lanes)] for x in range(n_width)]

    lanes = [LaneSpec(0, [0, 0])] * n_lanes

    for start_x in list(range(n_width))[::-1]:
        for start_y in list(range(n_lanes))[::-1]:
            if pos_memo[start_x][start_y] is None:
                print("Start x: {} start y: {}".format(start_x, start_y))
                env = gym.make('GridDriving-v0', lanes=lanes, width=n_width,
                               random_seed=42, agent_speed_range=(-3, -1), agent_pos_init=Point(x=start_x, y=start_y))
                gen = initializeSystem(env)
                generateDomainPDDLFile(gen)
                generateProblemPDDLFile(gen)
                runPDDLSolver(gen)
                parse_sas_plan(pos_memo, start_x, start_y)
                print("pos_memo:")
                print(pos_memo)

    pos_inf_reward, neg_inf_reward = 10, -10

    for x in range(n_width):
        for y in range(n_lanes):
            if pos_memo[x][y] == 0:
                pos_memo[x][y] = pos_inf_reward
            elif pos_memo[x][y] == -1:
                pos_memo[x][y] = neg_inf_reward
            else:
                pos_memo[x][y] = 1. / pos_memo[x][y]
    print("Final reward matrix")
    pos_memo = np.array(pos_memo)
    print(pos_memo)
    save_to_pickle(pos_memo, "reward_shaping.p")
예제 #8
0
    ### Sample test cases. 
    test_config = [{'lanes' : [LaneSpec(1, [-1, -1])] *3,'width' :5, 'seed' : 10, 'iters': 300},
                   {'lanes' : [LaneSpec(2, [-2, -1])] *3,'width' :7, 'seed' : 15, 'iters': 100},
                   {'lanes' : [LaneSpec(2, [-2, -1])] *4,'width' :8, 'seed' : 125, 'iters': 500},
                   {'lanes' : [LaneSpec(2, [-3, -2])] *4,'width' :10, 'seed' : 44, 'iters': 300},
                   {'lanes' : [LaneSpec(2, [-3, -1])] *4,'width' :10, 'seed' : 125, 'iters': 400},
                   {'lanes' : [LaneSpec(2, [-3, -1])] *4,'width' :10, 'seed' : 25, 'iters': 300}]

    test_case_number = 5  #Change the index for a different test case
    LANES = test_config[test_case_number]['lanes']
    WIDTH = test_config[test_case_number]['width']
    RANDOM_SEED = test_config[test_case_number]['seed']
    numiters = test_config[test_case_number]['iters']
    stochasticity = 1.
    env = gym.make('GridDriving-v0', lanes=LANES, width=WIDTH, 
                   agent_speed_range=(-3,-1), finish_position=Point(0,0), #agent_ pos_init=Point(4,2),
                   stochasticity=stochasticity, tensor_state=False, flicker_rate=0., mask=None, random_seed=RANDOM_SEED)

    actions = env.actions
    env.render()
    done = False
    mcts = MonteCarloTreeSearch(env=env, numiters=numiters, explorationParam=1.,random_seed=RANDOM_SEED)
    while not env.done:
        state = GridWorldState(env.state, is_done=done)
        action = mcts.buildTreeAndReturnBestAction(initialState=state)
        print (action)
        done = env.step(state=deepcopy(state.state), action=action)[2]
        env.render()
        if done == True:
            break
    print ("simulation done")
예제 #9
0
def getStateTuple(env):
    '''
    Helper function to convert an env state to a state feature vector.
    '''

    cars = env.cars
    done = env.done
    state = []
    for car in cars:
        state += [car.position.x, car.position.y]
    state.append(int(done==True))
    return state

if not SUBMISSION:
    ### Sample test cases. 
    test_config = [{'lanes' : [LaneSpec(0, [-2, -1])] *5,'width' :9, 'gamma' : 0.9, 'seed' : 15, 'fin_pos' : Point(0,0), 'agent_pos': Point(8,4),'stochasticity': 1.  },
                   {'lanes' : [LaneSpec(1, [-2, -1])] *2,'width' :4, 'gamma' : 0.9, 'seed' : 15, 'fin_pos' : Point(0,1), 'agent_pos': Point(3,1),'stochasticity': 1.  },
                   {'lanes' : [LaneSpec(1, [-3, -1])] *2 + [LaneSpec(0, [0, 0])],'width' :4, 'gamma' : 0.9, 'seed' : 100, 'fin_pos' : Point(0,0), 'agent_pos': Point(3,2),'stochasticity': .5 },
                   {'lanes' : [LaneSpec(0, [0, 0])] + [LaneSpec(1, [-3, -1])] *2,'width' :4, 'gamma' : 0.5, 'seed' : 128, 'fin_pos' : Point(0,0), 'agent_pos': Point(3,2),'stochasticity': 0.75 },
                   {'lanes' : [LaneSpec(1, [-3, -1])] *2 + [LaneSpec(0, [0, 0])],'width' :4, 'gamma' : 0.99, 'seed' : 111, 'fin_pos' : Point(0,0), 'agent_pos': Point(3,2),'stochasticity': .5 },
                   {'lanes' : [LaneSpec(1, [-3, -1]), LaneSpec(0, [0, 0]), LaneSpec(1, [-3, -1])] ,'width' :4, 'gamma' : 0.999, 'seed' : 125, 'fin_pos' : Point(0,0), 'agent_pos': Point(3,2),'stochasticity': 0.9 }]

    test_case_number = 0 #Change the index for a different test case
    LANES = test_config[test_case_number]['lanes']
    WIDTH = test_config[test_case_number]['width']
    RANDOM_SEED = test_config[test_case_number]['seed']
    GAMMA = test_config[test_case_number]['gamma']
    FIN_POS = test_config[test_case_number]['fin_pos']
    AGENT_POS = test_config[test_case_number]['agent_pos']
    stochasticity = test_config[test_case_number]['stochasticity']
    env = gym.make('GridDriving-v0', lanes=LANES, width=WIDTH,