Esempio n. 1
0
def main():
    persistence = Persistence()
    env = BitEnvironment()
    direct = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0],
              [1, 1]]
    terminal, step_count, e, success, = False, 0, 0, 0
    obes = []
    for i in range(0, 80):
        obes.append([random.randint(0, 99), random.randint(0, 99)])
    env.setObstacle(obes)

    while True:
        #direction = random.randint(0, 7)
        # 包含两个,01方向减法,23上次速度
        direction = env.getNextStep()
        Position, move = env.step(direction)
        env.preDirect = direct[direction]
        print("行动否:{} 具体行动方向: {} 当前位置 {}".format(move, direct[direction],
                                                 Position))
        dist, realDirect = env.distToTarget()
        print("距离目标: {} 目标方位: {}".format(dist, realDirect))
        if env.judgeCollision() or env.done():
            terminal = True
        step_count += 1
        print("阶段 {} 完成, 总成功: {} 当轮步数: {}".format(e, success, step_count))
        env.showimage()
        if terminal:
            if env.done():
                success += 1
                print(
                    "-----------------------------success----------------------------------"
                )
            elif env.judgeCollision():
                print(
                    "-----------------------------发生碰撞----------------------------------"
                )
            step_count = 0
            e += 1
            infoShow = "time {}, episode {} finish, total success: {} step: {}".format(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())), e, success,
                step_count)
            persistence.saveTerminalRecord("GoInBitMap", infoShow)

            env.reset([random.randint(0, 99), random.randint(0, 99)])
            env.setObstacle(obes)
            terminal = False
        time.sleep(0.2)
Esempio n. 2
0
def main():
    env = drone_env_heightcontrol(aim=None)
    state = env.reset()
    state_shape = 4
    action_bound = 1
    action_dim = 2
    e, success, episode_reward, step_count = 0, 0, 0, 0
    persistence = Persistence()

    if choose == "1":
        infoShow = "time {}, start new train, method{}".format(
            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
            "DDPG")
        persistence.saveTerminalRecord("DDPG", infoShow)

        with tf.device("/gpu:0"):
            config = tf.ConfigProto(
                allow_soft_placement=True
            )  # tf.ConfigProto()函数用在创建session的时候,用来对session进行参数配置
            config.gpu_options.allow_growth = True  # 当allow_growth设置为True时,分配器将不会指定所有的GPU内存,而是根据需求增长
            with tf.Session(config=config) as sess:

                globe_episode = tf.Variable(
                    0, dtype=tf.int32, trainable=False, name='globe_episode'
                )  # 设定trainable=False 可以防止该变量被数据流图的 GraphKeys.TRAINABLE_VARIABLES 收集,                                                                                                 # 这样我们就不会在训练的时候尝试更新它的值。

                agent = DDPG_agent(sess, state_shape, action_bound, action_dim)
                saver = tf.train.Saver(var_list=tf.global_variables())
                print(DIR)
                if not agent.load(saver, DIR):
                    sess.run(tf.global_variables_initializer())
                    if not os.path.exists(DIR):
                        os.mkdir(DIR)
                else:
                    print("coninnue------------------")

                if PREMODEL:
                    prepath = os.path.join(PATH, 'premodel\checkpoint')
                    ckpt = tf.train.get_checkpoint_state(
                        os.path.dirname(prepath))
                    if ckpt and ckpt.model_checkpoint_path:
                        saver.restore(agent.sess, ckpt.model_checkpoint_path)
                        agent.action_noise.reset()
                        print(
                            "------------pretrained model loaded-------------")
                while True:
                    print(state[1])
                    action = agent.act(state)
                    next_state, reward, terminal, info = env.step(action)
                    episode_reward += reward
                    agent.observe(state, action, reward, next_state, terminal)
                    agent.train()
                    state = next_state
                    '''
					if train%10==0:
						print("total training episode: {}".format(train))
						print("--------------------------------------------------------------------")
					if train%10000==0:
						nDir = os.path.join(PATH, "data/" + str(int(train // 10000)))
						if not os.path.exists(nDir):
							os.mkdir(nDir)
						agent.save(saver, nDir)
						print("save")
						print("--------------------------------------------------------------------")
					train+=1
					'''
                    step_count += 1
                    print("aim height: {}".format(env.aim_height).ljust(
                        20, " "),
                          "reward: {:.5f}.".format(reward).ljust(20, " "),
                          "steps: {}".format(step_count).ljust(20, " "),
                          end="\r")

                    if terminal:
                        if info == "success":
                            success += 1
                        print(" " * 80, end="\r")
                        print(
                            "episode {} finish, average reward: {:.5f}, total success: {} result: {} step: {}"
                            .format(e, step_count).ljust(80, " "))
                        episode_reward = 0
                        step_count = 0
                        e += 1
                        total_episode = sess.run(globe_episode.assign_add(1))
                        if e % 10 == 0:
                            nDir = os.path.join(PATH,
                                                "data/" + str(int(e // 10)))
                            if not os.path.exists(nDir):
                                os.mkdir(nDir)
                            agent.save(saver, nDir, DIR)
                            print("total training episode: {}".format(
                                total_episode))

                        infoShow = "time {}, episode {} finish, total success: {} step: {}".format(
                            time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time())), e,
                            success, step_count)
                        persistence.saveTerminalRecord("DDPG", infoShow)

                        state = env.reset()
                return

    elif choose == "2":
        agent = EnergyAgent(state_shape, action_bound, action_dim)
        infoShow = "time {}, start new train, method{}".format(
            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
            "EnergyAgent")
        persistence.saveTerminalRecord("EnergyAgent", infoShow)

        while True:
            # 包含两个,01方向减法,23上次速度
            print("目标相对位置,目前位移方向: {} ".format(state[1]))
            action = agent.act(state)
            print("具体方向: {} ".format(action))
            next_state, reward, terminal, info = env.step(action)
            state = next_state
            step_count += 1
            print("阶段 {} 完成, 总成功: {} 当轮步数: {}".format(e, success, step_count))

            if terminal:
                if info == "success":
                    success += 1
                    print(
                        "-----------------------------success----------------------------------"
                    )
                step_count = 0
                e += 1

                infoShow = "time {}, episode {} finish, total success: {} step: {}".format(
                    time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(time.time())), e, success,
                    step_count)
                persistence.saveTerminalRecord("EnergyAgent", infoShow)

                state = env.reset()

    else:
        print("仿真结束")
        return