Ejemplo n.º 1
0
from evolution.session import EvolutionTask
from utils.properties import Properties
from ne.neat.idgenerator import NeatIdGenerator
import brain.networks as networks

import domains.cartpoles.enviornment.force as force
import domains.cartpoles.enviornment.runner as runner

from ne.hyperneat.decode import HyperNEAT
from evolution.agent import IndividualType
from ne.factory import DefaultNeuralNetworkGenomeFactory
import evolution.agent as agent
from brain.viewer import NetworkView
import utils.files as files

hyperneatdatapath = files.get_data_path(
) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'hyperneat' + os.sep


def fitness(ind, session):
    '''
    以连续不倒的次数作为适应度
    :param ind:
    :param session:
    :return:
    '''
    env = SingleCartPoleEnv()
    net = ind.getPhenome()
    reward_list, notdone_count_list = runner.do_evaluation(
        1, env, net.activate)

    return max(notdone_count_list)
env = SingleCartPoleEnv().unwrapped

## ENVIRONMENT Hyperparameters
state_size = 4
action_size = env.action_space.n

## TRAINING Hyperparameters
learning_rate = 0.01
gamma = 0.95  # Discount rate

mode = 'noreset'
maxepochcount = 1000
complexunit = 20.

policydatapath = files.get_data_path(
) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'policy' + os.sep


class PolicyGradients:
    def __init__(self):
        with tf.name_scope("inputs"):
            self.input_ = tf.placeholder(tf.float32, [None, state_size],
                                         name="input_")
            self.actions = tf.placeholder(tf.int32, [None, action_size],
                                          name="actions")
            self.discounted_episode_rewards_ = tf.placeholder(
                tf.float32, [
                    None,
                ], name="discounted_episode_rewards")

            # Add this placeholder for having this variable in tensorboard
import matplotlib.pyplot as plt
from domains.cartpoles.enviornment.cartpole import SingleCartPoleEnv
from domains.cartpoles.enviornment import force
from rl.dqn import DeepQNetwork
import utils.files as files
import domains.cartpoles.enviornment.runner as runner

env = SingleCartPoleEnv().unwrapped
RL = DeepQNetwork(n_actions=env.action_space.n,
                  n_features=env.observation_space.shape[0])

mode = 'noreset'
maxepochcount = 1500
complexunit = 20.

dqndatapath = files.get_data_path(
) + os.sep + 'evolvability' + os.sep + 'experimentA' + os.sep + 'dqn' + os.sep


def _do_learn(observation, action, reward, observation_, step, totalreward,
              total_step):
    RL.store_transition(observation, action, reward, observation_)
    if total_step > 10:
        RL.learn()


def execute(xh=None, mode='noreset'):
    global env
    global RL

    complexes = []
    reward_list = []
Ejemplo n.º 4
0
import gc
import os
import csv

import domains.ne.cartpoles.enviornment.force as force
import domains.ne.cartpoles.dqn_cartpole as dqnrunner
import domains.ne.cartpoles.ddqn_cartpole as ddqnrunner
import domains.ne.cartpoles.neat_feedforeward as neatrunner
import domains.ne.cartpoles.hyperneat_feedforeward as hyperneatrunner
import domains.ne.cartpoles.policy as policyrunner

import utils.files as files
# The following is a complete experiment of the paper "Evolvability Of TWEANN In Dynamic Environment"
# .

datapath = files.get_data_path() + '\\evolvability\\'


def __param_to_dict(params):
    if params is None:
        return {}
    r = {}
    for p in params:
        kv = p.split('=')
        r[kv[0]] = kv[1]
        #r[kv[0]] = eval(kv[1])
    return r


def create_samples(k, w, f, sigma, t_min=0., t_max=2., t_step=0.02, count=2):
    '''