def run(count=10, **kwargs):
    force.init()

    for i in range(count):
        kwargs['xh'] = i
        _run(**kwargs)
        env = SingleCartPoleEnv().unwrapped
        RL = DeepQNetwork(n_actions=env.action_space.n,
                          n_features=env.observation_space.shape[0])
        force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
def docommand(command, params):
    # 显示复杂度曲面
    if command.strip().lower() == 'fig3':
        fig3(params)
    # 显示几种特殊复杂度曲线
    elif command.strip().lower() == 'fig4':
        fig4(params)
    # 执行neat复杂度-奖励曲线计算过程
    elif command.strip().lower() == 'runa':
        runa(params)
    # 对多次试验A数据进行汇总
    elif command.strip().lower() == 'runatotal':
        runatotal(params)
    # 显示所有算法的复杂度奖励曲线
    elif command.strip().lower() == 'fig5':
        fig5(params)
    # 测试在初始状态下自然倾倒所需要的时间
    elif command.strip().lower() == 'lean':
        env = SingleCartPoleEnv()
        env.lean(params)
    # 对实验A的数据执行Mann-Whitney U test
    elif command.strip().lower() == 'table1':
        table1(params)
    # 计算试验A的进化能力值
    elif command.strip().lower() == 'table2':
        table2(params)
    # 显示奖励范围曲线
    elif command.strip().lower() == 'fig6':
        fig6(params)
    # 计算试验B中的健壮性
    elif command.strip().lower() == 'table3':
        table3(params)
    # 显示独立进化和连续进化对比曲线
    elif command.strip().lower() == 'fig7':
        fig7(params)
    # 计算试验C的Mann-Whitney U test,计算试验C的独立进化和连续进化进化能力和健壮性对比
    elif command.strip().lower() == 'table45':
        table45(params)
    # 模块度计算
    elif command.strip().lower() == 'fig8':
        fig8(params)
Exemple #3
0
def fitness(ind,session):
    '''
    以连续不倒的次数作为适应度
    :param ind:
    :param session:
    :return:
    '''
    env = SingleCartPoleEnv()
    net = ind.getPhenome()
    reward_list, notdone_count_list = runner.do_evaluation(1,env,net.activate)

    return max(notdone_count_list)
def run(count=10, **kwargs):
    force.init()

    for i in range(count):
        kwargs = {
            'mode': 'reset',
            'xh': i,
            'maxepochcount': 1500,
            'complexunit': 100.0
        }
        env = SingleCartPoleEnv().unwrapped
        net = PolicyGradients()
        while 1:
            result = net.run(**kwargs)
            if kwargs['mode'] == 'noreset':
                break
            if not result:
                break
            env = SingleCartPoleEnv().unwrapped
            net = PolicyGradients()
        force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
Exemple #5
0
# -*- coding: UTF-8 -*-

import tensorflow as tf
import numpy as np

import os
import csv
import copy
from domains.cartpoles.enviornment.cartpole import SingleCartPoleEnv
from domains.cartpoles.enviornment import force

# 引用自:https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Policy%20Gradients/Cartpole/Cartpole%20REINFORCE%20Monte%20Carlo%20Policy%20Gradients.ipynb

env = SingleCartPoleEnv().unwrapped

## ENVIRONMENT Hyperparameters
state_size = 4
action_size = env.action_space.n

## TRAINING Hyperparameters
learning_rate = 0.01
gamma = 0.95 # Discount rate


mode = 'noreset'
maxepochcount = 1000
complexunit = 20.

class PolicyGradients:
    def __init__(self):
        with tf.name_scope("inputs"):
Exemple #6
0
    '''
    以连续不倒的次数作为适应度
    :param ind:
    :param session:
    :return:
    '''
    env = SingleCartPoleEnv()
    net = ind.getPhenome()
    reward_list, notdone_count_list = runner.do_evaluation(1,env,net.activate)

    return max(notdone_count_list)

fitness_records = []
complex_records=[]
maxfitness_records = []
env = SingleCartPoleEnv()
mode = 'noreset'
epochcount = 0
maxepochcount = 10
complexunit = 20.
modularities = []

# 记录最优个体的平衡车运行演示视频
def callback(event,monitor):
    callbacks.neat_callback(event,monitor)
    global epochcount
    global  mode
    if event == 'epoch.end':
        gc.collect()
        maxfitness = monitor.evoTask.curSession.pop.inds[0]['fitness']
        maxfitness_ind = monitor.evoTask.curSession.pop.inds[0]
Exemple #7
0
def execute(xh=None, mode='noreset'):
    global env
    global RL

    complexes = []
    reward_list = []
    notdone_count_list = []
    steps = []

    episode_reward_list = []
    episode_notdone_count_list = []
    total_step = 0

    if xh is None or str(int(xh)) == '':
        xh = ''
    else:
        xh = "_" + str(int(xh))

    while True:
        # 执行一次
        notdone_count, episode_reward, step, total_step = runner.do_until_done(
            env, RL.choose_action, total_step, _do_learn)

        # 记录执行得到的奖励和不倒下次数
        episode_reward_list.append(episode_reward)
        episode_notdone_count_list.append(notdone_count)
        # 每执行100次,打印一下
        if total_step % 100 == 0 and total_step != 0:
            print("持续次数=", episode_notdone_count_list, ",平均=",
                  np.average(episode_notdone_count_list))
            print("累计奖励=", episode_reward_list, ",平均=",
                  np.average(episode_reward_list))

        # 判断是否可以提升复杂度
        if notdone_count > env.max_notdone_count or total_step >= maxepochcount:
            # 记录复杂度和对应获得的奖励(平均还是最大)
            complexes.append(force.force_generator.currentComplex())
            reward_list.append(np.max(episode_reward_list))
            notdone_count_list.append(np.max(episode_notdone_count_list))
            steps.append(total_step)
            #filename = os.path.split(os.path.realpath(__file__))[0] + '\\datas\\dqn' + str(xh) +'.npy'
            #np.save(filename, (complexes, notdone_count_list, reward_list,steps))
            print([(f, c) for f, c in zip(complexes, notdone_count_list)])

            # 记录过程记录
            filename = os.path.split(os.path.realpath(__file__))[0] + os.sep + 'datas_'+mode + os.sep + 'dqn' + \
                       os.sep + 'dqn' + str(xh) + '.csv'
            out = open(filename, 'a', newline='')
            csv_write = csv.writer(out, dialect='excel')
            csv_write.writerow([complexes[-1]] + episode_notdone_count_list)

            episode_notdone_count_list, episode_reward_list = [], [],
            total_step = 0

            print('复杂度:', complexes)
            print('奖励:', reward_list)
            print("持续次数:", notdone_count_list)

            # 升级复杂度,为了加快执行速度,让复杂度增加幅度至少大于min_up
            changed, newcomplex, k, w, f, sigma = force.force_generator.promptComplex(
                complexunit)
            if not changed or newcomplex is None:
                break  # 复杂度已经达到最大,结束
            print('新的环境复杂度=%.3f,k=%.2f,w=%.2f,f=%.2f,sigma=%.2f' %
                  (newcomplex, k, w, f, sigma))

            if mode == 'reset':
                env = SingleCartPoleEnv().unwrapped
                RL = DeepQNetwork(n_actions=env.action_space.n,
                                  n_features=env.observation_space.shape[0])

    #np.save('dqn_result.npz', complexes, notdone_count_list,reward_list)
    RL.save()
    #plt.plot(complexes, reward_list, label='reward')
    plt.plot(complexes, notdone_count_list, label='times')
    plt.xlabel('complexes')
    plt.savefig('dqn_cartpole.png')