def create_agent_66_experiment_runner(num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, orders_per_day, num_steps): physical_network = PhysicalNetwork( num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, ) order_generator = ActualOrderGenerator(physical_network, orders_per_day) generator = DirichletInventoryGenerator(physical_network) environment_parameters = EnvironmentParameters(physical_network, order_generator, generator, num_steps) env = ShippingFacilityEnvironment(environment_parameters) agent = AgentHighest(env) return ExperimentRunner(order_generator, generator, agent, env, experiment_name="randomvalid_validation")
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, orders_per_day, num_steps): physical_network = PhysicalNetwork( num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, ) # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day) order_generator = ActualOrderGenerator(physical_network, orders_per_day) generator = DirichletInventoryGenerator(physical_network) environment_parameters = EnvironmentParameters(physical_network, order_generator, generator, num_steps) env = ShippingFacilityEnvironment(environment_parameters) agent = RandomAgent(env) obs = env.reset() reward = 0 done = False print("=========== starting episode loop ===========") print("Initial environment: ") env.render() actions = [] episode_rewards = [] #demands_per_k = np.zeros((num_commodities,num_steps)) #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente while not done: action = agent.act(obs, reward, done) # print(f"Agent is taking action: {action}") # the agent observes the first state and chooses an action # environment steps with the agent's action and returns new state and reward obs, reward, done, info = env.step(action) # print(f"Got reward {reward} done {done}") # Render the current state of the environment env.render() actions.append(action) episode_rewards.append(reward) if done: print("===========Environment says we are DONE ===========") return actions, episode_rewards
def main(): config_dict = { "env": { "num_dcs": 4, #3 "num_customers": 100, "num_commodities": 35, "orders_per_day": int(100 * 0.05), "dcs_per_customer": 2, "demand_mean": 500, "demand_var": 150, "num_steps": 30, # steps per episode "big_m_factor": 10000 # how many times the customer cost is the big m. }, "hps": { "env": "shipping-v0", #openai env ID. "episode_length": 30, "max_episodes": 35, # to do is this num episodes, is it being used? "lr": 1e-4, "discount": 0.95, "epsilon": 0.01, # "batch_size": 30, # "sync_rate": 2, # Rate to sync the target and learning network. }, "seed": 0, "agent": "q_learning" # "agent": "random_valid" } torch.manual_seed(config_dict['seed']) np.random.seed(config_dict['seed']) random.seed(config_dict['seed']) # not sure if actually used np.random.seed(config_dict['seed']) run = wandb.init(config=config_dict) # todo why not saving config??? config = wandb.config environment_config = config.env hparams = config.hps experiment_name = f"q_{config.agent}_few_warehouses" wandb_logger = WandbLogger( project="rl_warehouse_assignment", name=experiment_name, tags=[ "debug" # "experiment" ], log_model=False) wandb_logger.log_hyperparams(dict(config)) environment_parameters = network_flow_env_builder.build_network_flow_env_parameters( environment_config, hparams['episode_length'], order_gen='biased') env = ShippingFacilityEnvironment(environment_parameters) agent = q_learning_agent.ShippingEnvQLearningAgent( environment_config['num_customers'], environment_config['num_dcs'], hparams['lr'], hparams['discount'], hparams['epsilon'], env) model = GreedyAgentRLModel(agent, env, experiment_name=experiment_name) trainer = pl.Trainer( max_epochs=hparams['max_episodes'], early_stop_callback=False, val_check_interval=100, logger=wandb_logger, log_save_interval=1, row_log_interval=1, # the default of this may leave info behind. callbacks=[ MyPrintingCallback(), ShippingFacilityEnvironmentStorageCallback( experiment_name, base="data/results/", experiment_uploader=WandbDataUploader()) ]) trainer.fit(model)