env = RewardNegativeDeath(env, death_factor=2) env = ObservationReshape(env) # create agent model = CartpoleNetwork(learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, input_shape=(env.observation_space.shape[0], ), output_shape=env.action_space.n) agent = DQNAgent(actions=env.action_space.n, expl_max=EXPLORATION_MAX, expl_min=EXPLORATION_MIN, expl_decay=EXPLORATION_DECAY, model=model, memory_size=MEMORY_SIZE, batch_size=BATCH_SIZE) # get and parse user args args = Parser.parseargs(defaultTrainIterations=10000, defaultEvalIterations=10) if args.load: agent.load(env, args.loadversion) if args.train != 0: #agent.init_fill_memory(env, 50000) agent.train(env, args.train, train_s=1, save_i=MODEL_SAVE_EVERY) if args.eval != 0: print("Evaluation results (higher scores are better):") agent.evaluate(env, args.eval) if args.save: agent.save(env, args.saveversion) if args.render: agent.render_episode(env, random_action=args.renderrandom) # close env env.close()