def __init__(self, env, max_length=np.inf, dense_reward=True, save_fr=10, save_dest="state_box", render=False): Env.__init__(self) DictSerializable.__init__(self, DictSerializable.get_numpy_save()) self.eval_env = env # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: if env is not None: self.action_space = self.eval_env.action_space # Example for using image as input: self.observation_space = self.eval_env.observation_space self._dense_reward = dense_reward self.partial_reward = 0. self.partial_length = 0 self.returns = [] self.episode_lengths = [] self.successes = [] self._unused = True self._max_length = max_length self.max_episode_steps = max_length self._save_fr = save_fr self._save_dest = save_dest self._render = render
def __init__(self, config, visualize=True): Env.__init__(self) self._config = config self._step_dt = config['env']['step_dt'] self._model_name = "mug" # setup DPS wrapper self._diagram_wrapper = DrakePusherSliderDiagramWrapper(config) # setup the simulator # add procedurally generated table env_utils.add_procedurally_generated_table(self.diagram_wrapper.mbp, config['env']['table']), self.diagram_wrapper.add_pusher() self.add_object_model() self.diagram_wrapper.finalize() self.diagram_wrapper.export_ports() if visualize: self.diagram_wrapper.connect_to_meshcat() self.diagram_wrapper.connect_to_drake_visualizer() self.diagram_wrapper.add_sensors_from_config(config) self.diagram_wrapper.build() # records port indices self._port_idx = dict() # add controller and other stuff builder = DiagramBuilder() self._builder = builder # print("type(self.diagram_wrapper.diagram)", type(self.diagram_wrapper.diagram)) builder.AddSystem(self.diagram_wrapper.diagram) # need to connect actuator ports # set the controller gains pid_data = self.diagram_wrapper.add_pid_controller(builder=builder) self._port_idx["pid_input_port_desired_state"] = pid_data[ 'pid_input_port_index'] diagram = builder.Build() self._diagram = diagram self._pid_input_port_desired_state = self._diagram.get_input_port( self._port_idx["pid_input_port_desired_state"]) # setup simulator context = diagram.CreateDefaultContext() self._simulator = Simulator(self._diagram, context) self._sim_initialized = False self._context = context # reset env self.reset()
def __init__(self, grid: Grid): Env.__init__(self) GameEnv.__init__(self, grid) self.FPS = -1 self.clock = None # Define action and observation space # They must be gym.spaces objects # Using 3 discrete actions: self.action_space = spaces.Discrete(3) # Using image as input: self.observation_space = spaces.Box(low=0, high=255, shape=(grid.x * grid.scale, grid.y * grid.scale, 3), dtype=np.uint8) self.reset()
def __init__(self, action_mapping): self._seed() self.verbose = 0 self.viewer = None self.batch_size = 32 self.optimizer = None self.model = None self.current_step = 0 self.action_mapping = action_mapping self.action_space = action_mapping.action_space bounds = float('inf') self.observation_space = spaces.Box(-bounds, bounds, (4, )) self.viewer = None self.best = None self.evaluate_test = False Env.__init__(self)
def __init__(self, num_healthy, num_contaminated, world_size, min_obs_rad, max_obs_rad, torus=False, dynamics='direct'): Env.__init__(self) self.num_healthy = num_healthy self.num_contaminated = num_contaminated self.world_size = world_size self.min_obs_rad = min_obs_rad self.max_obs_rad = max_obs_rad self.torus = torus self.world = base.World(world_size, torus, dynamics) self.global_actor = GlobalActor(min_obs_rad, max_obs_rad) ClusterManager(self.max_obs_rad) self.reset()
def __init__(self, sess=None): ''' ''' Env.__init__(self) # a dictionary specifying configurations # It will not change later. self.configs = { # the number of problems (each set of (X,Y) data is a different problem). (different objective functions) "problem_num": 120, # the number of data for each problem in each type. "num_data_each": 25, # the number of data types for one problem. "num_data_type": 4, # the maximum number of times the agent is allowed to optimize this problem. "max_opt_times": 1000, # the dimension for input data 'x' and also the dimension of weight. "x_dim": 3, # 'c' is the constant specified in the paper. "c": 1, # In the paper's common settings "horizon": 25, } logger.log("Robust Linear Regression environment initializing...") # generate data sampling operations self.distributions = [ self._rand_Gaussian_Dist() for _ in range(self.configs["num_data_type"]) ] self.sample_data_ops = [ dist.sample(self.configs["num_data_each"]) for dist in self.distributions ] self.perturbation_dist = tfd.Normal(loc=0, scale=1) self.perturbation_ops = self.perturbation_dist.sample( self.configs["num_data_each"]) # assign tf session if sess != None: self.sess = sess else: self.sess = tf.Session() # setup the formula self.vars = {} with tf.variable_scope("regression-objective"): # not initialized storing value, which will be numpy array when running. self.vars["w_val"] = None self.vars["b_val"] = None # equation placeholder self.vars["w"] = tf.placeholder(tf.float32, shape=(self.configs["x_dim"], ), name="w") self.vars["b"] = tf.placeholder(tf.float32, shape=(1, ), name="b") self.vars["x"] = tf.placeholder(tf.float32, shape=(self.configs["x_dim"], None), name="x") self.vars["y"] = tf.placeholder(tf.float32, shape=(1, None), name="y") w_reshape = tf.reshape(self.vars["w"], [1, self.configs["x_dim"]]) # based on the equation, "dom" is the term in the parenthesis to be squared. self.vars["dom"] = self.vars["y"] - ( tf.matmul(w_reshape, self.vars["x"]) - self.vars["b"]) self.vars["squared"] = tf.math.square(self.vars["dom"]) self.vars["each"] = tf.divide( self.vars["squared"], (self.vars["squared"] + self.configs["c"] * self.configs["c"])) self.vars["loss"] = tf.reduce_mean(self.vars["each"], axis=1) # gradients for the controller information self.vars["gradients"] = tf.gradients( self.vars["loss"], [self.vars["w"], self.vars["b"]]) # initializing data, storing function is not implemented self.all_data = [ self._generate_data() for _ in range(self.configs["problem_num"]) ] self.data_ind = 0 # using index to retrive data from all collection # reset the environment for starting self.reset()
def __init__(self, c, worker_id, start_position, with_step_penalty, with_revisit_penalty, stay_inside, with_color_reward, total_reward, covered_steps_ratio, depth_channel_first=True, changing_start_positions=False, as_image=False, color_on_visit=True): Env.__init__(self) log.info('creating environment for files {}'.format(c.data_files)) # needed in order to simulate gym environment self.reward_range = None self.metadata = {'render.modes': []} self.spec = None self.enabled = False self.observation_space = None self.c = c # First channel # 0 - blank cell # 1 - pattern cell # Second channel # 0 - not stitched # 1 - stitched # Third channel # 0 - no agent # 1 - agent self.with_step_penalty = with_step_penalty self.with_revisit_penalty = with_revisit_penalty self.stay_inside = stay_inside self.action_encodings = {0: 'u', 1: 'd', 2: 'l', 3: 'r'} self.with_color_reward = with_color_reward self.total_reward = total_reward self.covered_steps_ratio = covered_steps_ratio self.inv_action_encodings = { v: k for k, v in self.action_encodings.items() } self.action_space = spaces.Discrete(len(self.action_encodings)) self.layer_descriptions = OrderedDict([ (ColoringEnv.channel_pattern, 'Pattern'), (ColoringEnv.channel_stitch, 'Completed pattern'), (ColoringEnv.channel_agent, 'Agent position'), ]) self.worker_id = worker_id self.start_position = start_position self.env_reset_count = 0 self.steps = [] self.emb_pattern_layer = None self.emb_pattern_count = 0 self.x_dim = None self.y_dim = None self.base_observation = None self.initial_observation = None self.max_steps = -1 self.step_count = 0 self.data_file = None self.done = False self.depth_channel_first = depth_channel_first self.changing_start_positions = changing_start_positions self.as_image = as_image self.color_on_visit = color_on_visit self.alice_state = None self.init_uncovered_count = 0 self.reset()