def __init__(self): super(PredictActionsCartpoleEnv, self).__init__() self.cartpole = CartPoleEnv() self.observation_space = self.cartpole.observation_space self.action_space = spaces.Tuple( (self.cartpole.action_space, ) * (NUM_PREDICTED_ACTIONS + 1))
def __init__(self): super(OffSwitchCartpoleProbEnv, self).__init__() self.observation_space = spaces.Tuple( (spaces.Discrete(2), self.observation_space)) self.threshold_crossed = False # number of episodes in which the cart crossed the left/right threshold (first). self.num_crosses = [0., 0.]
def __init__(self): super(PredictObsCartpoleEnv, self).__init__() self.cartpole = CartPoleEnv() self.observation_space = self.cartpole.observation_space self.action_space = spaces.Tuple((self.cartpole.action_space, ) + (self.cartpole.observation_space, ) * (NUM_PREDICTED_OBSERVATIONS))
def __init__(self, natural=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) self._seed() # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Start the first game self._reset()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg )) # observation features, in order: num of instances, num of labels, # number of filter in part A / B of neural net, num of neurons in # output layer, validation accuracy after training with given # parameters self.observation_space = spaces.Box(-1e5, 1e5, 6) # validation accuracy # Start the first game self._reset()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg spaces.Box(0.0, 1.0, (5, 2)), # convolutional layer parameters spaces.Box(0.0, 1.0, (2, 2)), # fully connected layer parameters )) # observation features, in order: num of instances, num of labels, # validation accuracy after training with given parameters self.observation_space = spaces.Box(-1e5, 1e5, 2) # validation accuracy # Start the first game self._reset()