def __init__(self, opts, discrete_actions): self.gui = opts.gui self.delay = opts.delay if self.gui else 0.0 self.max_episode_len = opts.max_episode_len # threshold for pole position. # if absolute x or y moves outside this we finish episode self.pos_threshold = 2.0 # TODO: higher? # threshold for angle from z-axis. # if x or y > this value we finish episode. self.angle_threshold = 0.3 # radians; ~= 12deg # force to apply per action simulation step. # in the discrete case this is the fixed force applied # in the continuous case each x/y is in range (-F, F) self.action_force = opts.action_force # initial push force. this should be enough that taking no action will always # result in pole falling after initial_force_steps but not so much that you # can't recover. see also initial_force_steps. self.initial_force = opts.initial_force # number of sim steps initial force is applied. # (see initial_force) self.initial_force_steps = 30 # whether we do initial push in a random direction # if false we always push with along x-axis (simplee problem, useful for debugging) self.random_theta = not opts.no_random_theta # true if action space is discrete; 5 values; no push, left, right, up & down # false if action space is continuous; fx, fy both (-action_force, action_force) self.discrete_actions = discrete_actions # 5 discrete actions: no push, left, right, up, down # 2 continuous action elements; fx & fy if self.discrete_actions: self.action_space = spaces.Discrete(5) else: self.action_space = spaces.Box(-1.0, 1.0, shape=(1, 2)) # open event log if opts.event_log_out: import event_log self.event_log = event_log.EventLog(opts.event_log_out, opts.use_raw_pixels) else: self.event_log = None # how many time to repeat each action per step(). # and how many sim steps to do per state capture # (total number of sim steps = action_repeats * steps_per_repeat self.repeats = opts.action_repeats self.steps_per_repeat = opts.steps_per_repeat # how many cameras to render? # if 1 just render from front # if 2 render from front and 90deg side if opts.num_cameras not in [1, 2]: raise ValueError("--num-cameras must be 1 or 2") self.num_cameras = opts.num_cameras # whether we are using raw pixels for state or just pole + cart pose self.use_raw_pixels = opts.use_raw_pixels # in the use_raw_pixels is set we will be rendering self.render_width = opts.render_width self.render_height = opts.render_height # decide observation space if self.use_raw_pixels: # in high dimensional case each observation is an RGB images (H, W, 3) # we have R repeats and C cameras resulting in (H, W, 3, R, C) # final state fed to network is concatenated in depth => (H, W, 3*R*C) state_shape = (self.render_height, self.render_width, 3, self.num_cameras, self.repeats) else: # in the low dimensional case obs space for problem is (R, 2, 7) # R = number of repeats # 2 = two items; cart & pole # 7d tuple for pos + orientation pose state_shape = (self.repeats, 2, 7) float_max = np.finfo(np.float32).max self.observation_space = gym.spaces.Box(-float_max, float_max, state_shape) # check reward type assert opts.reward_calc in ['fixed', 'angle', 'action', 'angle_action'] self.reward_calc = opts.reward_calc # no state until reset. self.state = np.empty(state_shape, dtype=np.float32) # setup bullet p.connect(p.GUI if self.gui else p.DIRECT) p.setGravity(0, 0, -9.81) p.loadURDF("models/ground.urdf", 0, 0, 0, 0, 0, 0, 1) self.cart = p.loadURDF("models/cart.urdf", 0, 0, 0.08, 0, 0, 0, 1) self.pole = p.loadURDF("models/pole.urdf", 0, 0, 0.35, 0, 0, 0, 1)
return 0 if 0: list = [] e = {} e['type'] = "sms" e['sender'] = "123456" e['text'] = "Hello, world! One day, we'll have full desktop distributions running on our cellphones, and will write each application just once, not once for desktop and once for mobile." list += [ e ] e = {} e['type'] = "sms" e['sender'] = "567823" e['text'] = "Androids are nice, there's nothing wrong with Android. Its just... that there's too much non-free software on pretty much every Android handset. Good luck to Cyanogen, but I still prefer Linux." list += [ e ] e = {} e['type'] = "missed" e['sender'] = "234567" e['text'] = "" list += [ e ] if __name__ == "__main__": elog = event_log.EventLog() db = contactsdb.ContactsDb() db.load_org() win = ViewEvents(db) win.basic_main_window() win.update(elog.events) main()
# can't do this without more complex caching of world state vid frames #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) # load mission spec mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True) mission_record = MalmoPython.MissionRecordSpec() # return all return client_pool, malmo, mission, mission_record client_pool, malmo, mission, mission_record = create_malmo_components() # init our rl_agent agent_cstr = eval("agents.%sAgent" % opts.agent) agent = agent_cstr(opts) # init event log (if logging events) event_log = event_log.EventLog(opts.event_log_out) if opts.event_log_out else None # hook up connection to trainer if opts.trainer_port == 0: trainer = None else: channel = grpc.insecure_channel("localhost:%d" % opts.trainer_port) trainer = model_pb2.ModelStub(channel) for episode_idx in itertools.count(0): print util.dts(), "EPISODE", episode_idx, "eval", opts.eval # start new mission; explicitly wait for first observation # (not just world_state.has_mission_begun) mission_start = time.time() while True:
print(res) return res def getHumanUtt(): for i, utt in enumerate(human_utts): print(f'{i}) {utt.text}') idx = int(input('>>> ')) return human_utts[idx] # TODO: not initiating var_spec from Goal - fix. var_spec = var_spec.VarSpec.fromFileAndUpdate(bot_module_base + '/var_spec', robot_utts + human_utts) config = response_logic.Config(var_spec, repeated_utt_demotion=1) scoring_params = response_logic.ScoringParams(event_log.EventLog([]), config) def interactiveMode(state): while True: # human_utt = getHumanUtt() human_utt = getHumanUttFreeform() scoring_params.event_log.add(human_utt) state, diff = response_logic.applyUttAndDiff(state, human_utt) print(colors.C(human_utt.text, colors.OKBLUE)) pprint.pprint(diff) goal, robot_utt, score = response_logic.bestReplyForAllGoals( state, goals, robot_utts, scoring_params) scoring_params.event_log.add(robot_utt) print(