# INIT ENV env, observation_space, action_space = build_env_wrapper( params["env_name"], env_type=params["env_type"]) # LOGGING writer = SummaryWriter(comment="-" + params["run_name"] + "-basic") # NETWORK net = dqn_mlp_net.Network(observation_space, action_space, hidden_layer_size=64).to(device) tgt_net = agents.TargetNetwork(net) # AGENT selector = actions.EpsilonGreedyActionSelector( epsilon=params["epsilon_start"]) epsilon_tracker = logger.EpsilonTracker(selector, params) agent = agents.DQNAgent(net, selector, device=device) # RUNNER exp_source = runner.RunnerSourceFirstLast(env, agent, gamma=params["gamma"], steps_count=1) buffer = ExperienceReplayBuffer(exp_source, buffer_size=params["replay_size"]) optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"]) frame_idx = 0 done = False
net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size()) # load the network if RL_options['load_net'] is True: with open( os.path.join(RL_options['net_saved_path'], RL_options['net_file']), "rb") as f: checkpoint = torch.load(f) net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size()) net.load_state_dict(checkpoint['state_dict']) # create buffer net.to(torch.device("cuda")) # pass into gpu selector = actions.EpsilonGreedyActionSelector(RL_options['epsilon_start']) agent = agents.DQNAgent(net, selector) # agent = agents.Supervised_DQNAgent(net, selector, sample_sheet, assistance_ratio=0.2) exp_source = experience.ExperienceSourceFirstLast( env, agent, RL_options['gamma'], steps_count=RL_options['reward_steps']) buffer = experience.ExperienceReplayBuffer(exp_source, RL_options['replay_size']) # create optimizer optimizer = optim.Adam(net.parameters(), lr=RL_options['lr']) # create net pre-processor net_processor = common.netPreprocessor(net, agent.target_model)