population_size = 128
total_tournaments = 100000
save_freq = 1000

def mutate(length, mutation_rate, mutation_sigma):
  # (not used, in case I wanted to do partial mutations)
  # create an additive mutation vector of some size
  mask = np.random.randint(int(1/mutation_rate), size=length)
  mask = 1-np.minimum(mask, 1)
  noise = np.random.normal(size=length) * mutation_sigma
  return mask * noise

# Log results
from library import util

args = util.get_args('ga_selfplay_obs', '../zoo/ga/_sp/ga.json')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
  os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
with open(BEST_MODEL_PATH) as f:
  data = json.load(f)
  model_params = np.array(data[0])

# Create two instances of a feed forward policy we may need later.
policy_left = Model(mlp.games['slimevolleylite'])
policy_right = Model(mlp.games['slimevolleylite'])
param_count = policy_left.param_count
print("Number of parameters of the neural net policy:", param_count) # 273 for slimevolleylite
save_freq = 1000


def mutate(length, mutation_rate, mutation_sigma):
    # (not used, in case I wanted to do partial mutations)
    # create an additive mutation vector of some size
    mask = np.random.randint(int(1 / mutation_rate), size=length)
    mask = 1 - np.minimum(mask, 1)
    noise = np.random.normal(size=length) * mutation_sigma
    return mask * noise


# Log results
from library import util

args = util.get_args('ga_selfplay_actionbit', '../zoo/ga/_sp/ga.json')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
with open(BEST_MODEL_PATH) as f:
    data = json.load(f)
    model_params = np.array(data[0])

# Create two instances of a feed forward policy we may need later.
policy_left = Model(mlp.games['slimevolleylite'])
policy_right = Model(mlp.games['slimevolleylite'])
param_count = policy_left.param_count
print("Number of parameters of the neural net policy:",
      param_count)  # 273 for slimevolleylite
Esempio n. 3
0
save_freq = 1000


def mutate(length, mutation_rate, mutation_sigma):
    # (not used, in case I wanted to do partial mutations)
    # create an additive mutation vector of some size
    mask = np.random.randint(int(1 / mutation_rate), size=length)
    mask = 1 - np.minimum(mask, 1)
    noise = np.random.normal(size=length) * mutation_sigma
    return mask * noise


# Log results
from library import util

args = util.get_args('ga_selfplay_constant', '../zoo/ga/_sp/ga.json')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
with open(BEST_MODEL_PATH) as f:
    data = json.load(f)
    model_params = np.array(data[0])

# Create two instances of a feed forward policy we may need later.
policy_left = Model(mlp.games['slimevolleylite'])
policy_right = Model(mlp.games['slimevolleylite'])
param_count = policy_left.param_count
print("Number of parameters of the neural net policy:",
      param_count)  # 273 for slimevolleylite
from mpi4py import MPI
from stable_baselines.common import set_global_seeds
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import bench, logger, PPO1
from stable_baselines.common.callbacks import EvalCallback

NUM_TIMESTEPS = int(2e6)
SEED = 831
EVAL_FREQ = 200000
EVAL_EPISODES = 1000

# Log results and get trained model location
from library import util

args = util.get_args(logdir='ppo_mpi_spike',
                     modelpath='../zoo/ppo/best_model.zip')

LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)
PPO_PATH = args.modelpath

from library import wrapper


def make_env(seed):
    # Add spike wrapper to environment
    env = wrapper.SpikeWrapper(gym.make("SlimeVolley-v0"))
    env.seed(seed)
    return env
Esempio n. 5
0
from stable_baselines.common import set_global_seeds
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import bench, logger, PPO1
from stable_baselines.common.callbacks import EvalCallback

from library.Observation_Wrapper import ObservationWrapper

NUM_TIMESTEPS = int(2e6)
SEED = 831
EVAL_FREQ = 200000
EVAL_EPISODES = 1000

# Log results
from library import util

args = util.get_args('ppo_extended_obs', '../zoo/ppo/best_model.zip')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
if not os.path.exists(BEST_MODEL_PATH):
    raise Exception('File does not exist:', BEST_MODEL_PATH)


def make_env(seed):
    env = ObservationWrapper(gym.make("SlimeVolley-v0"))
    env.seed(seed)
    return env

from library.Observation_Wrapper import ObservationWrapperHalf

# Settings
SEED = 17
NUM_TIMESTEPS = int(2e6)
EVAL_FREQ = int(1e5)
EVAL_EPISODES = int(1e2)
BEST_THRESHOLD = 0.5  # must achieve a mean score above this to replace prev best self

RENDER_MODE = False  # set this to false if you plan on running for full 1000 trials.

# Log results
from library import util

args = util.get_args('ppo_selfplay_extended_obs_half',
                     '../zoo/ppo_sp/history_00000144.zip')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
if not os.path.exists(BEST_MODEL_PATH):
    raise Exception('File does not exist:', BEST_MODEL_PATH)


class SlimeVolleySelfPlayEnv(ObservationWrapperHalf):
    # wrapper over the normal single player env, but loads the best self play model
    def __init__(self):
        super(SlimeVolleySelfPlayEnv,
              self).__init__(slimevolleygym.SlimeVolleyEnv())
        self.policy = self
Esempio n. 7
0
save_freq = 1000


def mutate(length, mutation_rate, mutation_sigma):
    # (not used, in case I wanted to do partial mutations)
    # create an additive mutation vector of some size
    mask = np.random.randint(int(1 / mutation_rate), size=length)
    mask = 1 - np.minimum(mask, 1)
    noise = np.random.normal(size=length) * mutation_sigma
    return mask * noise


# Log results and get trained model location
from library import util

args = util.get_args(logdir='ga_selfplay_spike',
                     modelpath='../zoo/ga_sp/ga.json')

LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)
GA_PATH = args.modelpath

# Create two instances of a feed forward policy we may need later.
policy_left = Model(mlp.games['slimevolleylite'])
policy_right = Model(mlp.games['slimevolleylite'])
param_count = policy_left.param_count
print("Number of parameters of the neural net policy:",
      param_count)  # 273 for slimevolleylite

# store our population here
population = np.random.normal(
Esempio n. 8
0
from library.action_wrapper import ConstantNoiseActionWrapper

# Settings
SEED = 17
NUM_TIMESTEPS = int(2e6)
EVAL_FREQ = int(1e5)
EVAL_EPISODES = int(1e2)
BEST_THRESHOLD = 0.5  # must achieve a mean score above this to replace prev best self

RENDER_MODE = False  # set this to false if you plan on running for full 1000 trials.

# Log results
from library import util

args = util.get_args('ppo_selfplay_constant',
                     '../zoo/ppo_sp/history_00000144.zip')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
if not os.path.exists(BEST_MODEL_PATH):
    raise Exception('File does not exist:', BEST_MODEL_PATH)


class SlimeVolleySelfPlayEnv(ConstantNoiseActionWrapper):
    # wrapper over the normal single player env, but loads the best self play model
    def __init__(self):
        super(SlimeVolleySelfPlayEnv,
              self).__init__(slimevolleygym.SlimeVolleyEnv())
        self.policy = self
from shutil import copyfile  # keep track of generations

# Settings
SEED = 17
NUM_TIMESTEPS = int(2e6)
EVAL_FREQ = int(1e5)
EVAL_EPISODES = int(1e2)
BEST_THRESHOLD = 0.5  # must achieve a mean score above this to replace prev best self

RENDER_MODE = False  # set this to false if you plan on running for full 1000 trials.

# Log results
from library import util
from library import wrapper

args = util.get_args(logdir='ppo_selfplay_spike',
                     modelpath='../zoo/ppo_sp/history_00000144.zip')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
if not os.path.exists(BEST_MODEL_PATH):
    raise Exception('File does not exist:', BEST_MODEL_PATH)


class SlimeVolleySelfPlayEnv(wrapper.SpikeWrapper):
    # wrapper over the normal single player env, but loads the best self play model
    def __init__(self):
        super(SlimeVolleySelfPlayEnv,
              self).__init__(slimevolleygym.SlimeVolleyEnv())
        self.policy = self
Esempio n. 10
0
from stable_baselines.common import set_global_seeds
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import bench, logger, PPO1
from stable_baselines.common.callbacks import EvalCallback

from library.action_wrapper import ConstantNoiseActionWrapper

NUM_TIMESTEPS = int(2e6)
SEED = 831
EVAL_FREQ = 200000
EVAL_EPISODES = 1000

# Log results
from library import util

args = util.get_args('ppo_constant', '../zoo/ppo/best_model.zip')
LOGDIR = args.logdir
if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)

BEST_MODEL_PATH = args.modelpath
if not os.path.exists(BEST_MODEL_PATH):
    raise Exception('File does not exist:', BEST_MODEL_PATH)


def make_env(seed):
    env = ConstantNoiseActionWrapper(gym.make("SlimeVolley-v0"))
    env.seed(seed)
    return env