Esempio n. 1
0
def create_agent_66_experiment_runner(num_dcs, num_customers, dcs_per_customer,
                                      demand_mean, demand_var, num_commodities,
                                      orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)

    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = AgentHighest(env)

    return ExperimentRunner(order_generator,
                            generator,
                            agent,
                            env,
                            experiment_name="randomvalid_validation")
Esempio n. 2
0
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean,
                    demand_var, num_commodities, orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day)
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)
    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    obs = env.reset()
    reward = 0
    done = False
    print("=========== starting episode loop ===========")
    print("Initial environment: ")
    env.render()
    actions = []
    episode_rewards = []
    #demands_per_k = np.zeros((num_commodities,num_steps))
    #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente
    while not done:
        action = agent.act(obs, reward, done)

        # print(f"Agent is taking action: {action}")
        # the agent observes the first state and chooses an action
        # environment steps with the agent's action and returns new state and reward
        obs, reward, done, info = env.step(action)
        # print(f"Got reward {reward} done {done}")

        # Render the current state of the environment
        env.render()
        actions.append(action)
        episode_rewards.append(reward)

        if done:
            print("===========Environment says we are DONE ===========")

    return actions, episode_rewards
Esempio n. 3
0
def main():
    config_dict = {
        "env": {
            "num_dcs": 4,  #3
            "num_customers": 100,
            "num_commodities": 35,
            "orders_per_day": int(100 * 0.05),
            "dcs_per_customer": 2,
            "demand_mean": 500,
            "demand_var": 150,
            "num_steps": 30,  # steps per episode
            "big_m_factor":
            10000  # how many times the customer cost is the big m.
        },
        "hps": {
            "env": "shipping-v0",  #openai env ID.
            "episode_length": 30,
            "max_episodes":
            35,  # to do is this num episodes, is it being used?
            "lr": 1e-4,
            "discount": 0.95,
            "epsilon": 0.01,
            # "batch_size": 30,
            # "sync_rate": 2, # Rate to sync the target and learning network.
        },
        "seed": 0,
        "agent": "q_learning"
        # "agent": "random_valid"
    }

    torch.manual_seed(config_dict['seed'])
    np.random.seed(config_dict['seed'])
    random.seed(config_dict['seed'])  # not sure if actually used
    np.random.seed(config_dict['seed'])

    run = wandb.init(config=config_dict)  # todo why not saving config???

    config = wandb.config
    environment_config = config.env
    hparams = config.hps

    experiment_name = f"q_{config.agent}_few_warehouses"
    wandb_logger = WandbLogger(
        project="rl_warehouse_assignment",
        name=experiment_name,
        tags=[
            "debug"
            # "experiment"
        ],
        log_model=False)

    wandb_logger.log_hyperparams(dict(config))

    environment_parameters = network_flow_env_builder.build_network_flow_env_parameters(
        environment_config, hparams['episode_length'], order_gen='biased')

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = q_learning_agent.ShippingEnvQLearningAgent(
        environment_config['num_customers'], environment_config['num_dcs'],
        hparams['lr'], hparams['discount'], hparams['epsilon'], env)

    model = GreedyAgentRLModel(agent, env, experiment_name=experiment_name)

    trainer = pl.Trainer(
        max_epochs=hparams['max_episodes'],
        early_stop_callback=False,
        val_check_interval=100,
        logger=wandb_logger,
        log_save_interval=1,
        row_log_interval=1,  # the default of this may leave info behind.
        callbacks=[
            MyPrintingCallback(),
            ShippingFacilityEnvironmentStorageCallback(
                experiment_name,
                base="data/results/",
                experiment_uploader=WandbDataUploader())
        ])

    trainer.fit(model)