Exemple #1
0
def make_env():
    env_config = {
        "bag_capacity": 63,
        'item_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9],
        # 'item_probabilities': [0.14, 0.10, 0.06, 0.13, 0.11, 0.13, 0.03, 0.11, 0.19], #bounded waste
        'item_probabilities': [0.06, 0.11, 0.11, 0.22, 0, 0.11, 0.06, 0,
                               0.33],  # perfect pack
        #                  'item_probabilities': [0, 0, 0, 1/3, 0, 0, 0, 0, 2/3], #linear waste
        'time_horizon': 1000,  # 10000
    }

    env = BinPackingActionMaskGymEnvironment(env_config)

    return env
Exemple #2
0
def make_env(env_name, agent, seed=-1, render_mode=False):
    # env = CarRacingDream(agent)
    # if seed <0:
    #   seed = np.random.randint(2**31-1)
    # env.seed(seed)
    #
    # return env
    env_config = {
        "bag_capacity": 63,
        'item_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9],
        # 'item_probabilities': [0.14, 0.10, 0.06, 0.13, 0.11, 0.13, 0.03, 0.11, 0.19], #bounded waste
        'item_probabilities': [0.06, 0.11, 0.11, 0.22, 0, 0.11, 0.06, 0,
                               0.33],  # perfect pack
        #                  'item_probabilities': [0, 0, 0, 1/3, 0, 0, 0, 0, 2/3], #linear waste
        'time_horizon': 1000,  # 10000
    }

    env = BinPackingActionMaskGymEnvironment(env_config)

    return env
    print(action)

    return [action]


env_config = {
    "bag_capacity": 63,
    'item_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9],
    # 'item_probabilities': [0.14, 0.10, 0.06, 0.13, 0.11, 0.13, 0.03, 0.11, 0.19], #bounded waste
    'item_probabilities': [0.06, 0.11, 0.11, 0.22, 0, 0.11, 0.06, 0,
                           0.33],  #perfect pack
    #                  'item_probabilities': [0, 0, 0, 1/3, 0, 0, 0, 0, 2/3], #linear waste
    'time_horizon': 1000,  # 10000
}

env = BinPackingActionMaskGymEnvironment(env_config)
state = env.reset()

done = False
total_reward = 0
while not done:
    print([x[0] for x in state[0]])
    print(np.shape(state))
    # action = get_action(state)
    action = get_human_action()
    print(action)
    state, reward, done, info = env.step(action)
    print("reward", reward)
    total_reward += reward

print("Total reward for best fit baseline agent: ", total_reward)
Exemple #4
0
 def register_env_creator(self):
     from bin_packing_environment import BinPackingActionMaskGymEnvironment
     register_env(
         "BinPackingActionMaskGymEnvironment-v1",
         lambda env_config: BinPackingActionMaskGymEnvironment(env_config))