from evolution.session import EvolutionTask from utils.properties import Properties from ne.neat.idgenerator import NeatIdGenerator import brain.networks as networks import domains.cartpoles.enviornment.force as force import domains.cartpoles.enviornment.runner as runner from ne.hyperneat.decode import HyperNEAT from evolution.agent import IndividualType from ne.factory import DefaultNeuralNetworkGenomeFactory import evolution.agent as agent from brain.viewer import NetworkView import utils.files as files hyperneatdatapath = files.get_data_path( ) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'hyperneat' + os.sep def fitness(ind, session): ''' 以连续不倒的次数作为适应度 :param ind: :param session: :return: ''' env = SingleCartPoleEnv() net = ind.getPhenome() reward_list, notdone_count_list = runner.do_evaluation( 1, env, net.activate) return max(notdone_count_list)
env = SingleCartPoleEnv().unwrapped ## ENVIRONMENT Hyperparameters state_size = 4 action_size = env.action_space.n ## TRAINING Hyperparameters learning_rate = 0.01 gamma = 0.95 # Discount rate mode = 'noreset' maxepochcount = 1000 complexunit = 20. policydatapath = files.get_data_path( ) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'policy' + os.sep class PolicyGradients: def __init__(self): with tf.name_scope("inputs"): self.input_ = tf.placeholder(tf.float32, [None, state_size], name="input_") self.actions = tf.placeholder(tf.int32, [None, action_size], name="actions") self.discounted_episode_rewards_ = tf.placeholder( tf.float32, [ None, ], name="discounted_episode_rewards") # Add this placeholder for having this variable in tensorboard
import matplotlib.pyplot as plt from domains.cartpoles.enviornment.cartpole import SingleCartPoleEnv from domains.cartpoles.enviornment import force from rl.dqn import DeepQNetwork import utils.files as files import domains.cartpoles.enviornment.runner as runner env = SingleCartPoleEnv().unwrapped RL = DeepQNetwork(n_actions=env.action_space.n, n_features=env.observation_space.shape[0]) mode = 'noreset' maxepochcount = 1500 complexunit = 20. dqndatapath = files.get_data_path( ) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'dqn' + os.sep def _do_learn(observation, action, reward, observation_, step, totalreward, total_step): RL.store_transition(observation, action, reward, observation_) if total_step > 10: RL.learn() def execute(xh=None, mode='noreset'): global env global RL complexes = [] reward_list = []
import gc import os import csv import domains.ne.cartpoles.enviornment.force as force import domains.ne.cartpoles.dqn_cartpole as dqnrunner import domains.ne.cartpoles.ddqn_cartpole as ddqnrunner import domains.ne.cartpoles.neat_feedforeward as neatrunner import domains.ne.cartpoles.hyperneat_feedforeward as hyperneatrunner import domains.ne.cartpoles.policy as policyrunner import utils.files as files # The following is a complete experiment of the paper "Evolvability Of TWEANN In Dynamic Environment" # . datapath = files.get_data_path() + '\\evolvability\\' def __param_to_dict(params): if params is None: return {} r = {} for p in params: kv = p.split('=') r[kv[0]] = kv[1] #r[kv[0]] = eval(kv[1]) return r def create_samples(k, w, f, sigma, t_min=0., t_max=2., t_step=0.02, count=2): '''