def test_palette_support(): terms = ( ('dumb', TermLevel.DUMB), ('linux', TermLevel.ANSI_BASIC), ('xterm-color', TermLevel.ANSI_BASIC), ('xterm-256color', TermLevel.ANSI_EXTENDED), ) for name, result in terms: detection.env = Environment(environ=dict(TERM=name)) assert detection.detect_terminal_level() == result detection.env = Environment(environ=dict(COLORTERM='24bit')) assert detection.detect_terminal_level() == TermLevel.ANSI_DIRECT from . import windows # try win implementation terms = ( ('dumb', TermLevel.DUMB), ('xterm-color', TermLevel.ANSI_BASIC), ('xterm-256color', TermLevel.ANSI_EXTENDED), ('cygwin', TermLevel.ANSI_DIRECT), # ? ) for name, result in terms: windows.env = Environment(environ=dict(TERM=name)) assert windows.detect_terminal_level() == result windows.env = Environment(environ=dict(ANSICON='1')) assert windows.detect_terminal_level() == TermLevel.ANSI_EXTENDED
def play_before_step_n(p_net, p_net_op, sl_net, sl_net_op, n): # N回まではSL networkで. それ以降はPolicy netでプレイする. # N回目の黒の盤面情報を保存するが,それ以降はPolicy net同士で対戦して,勝敗を決める. env = Environment(sl_net_op, "black") flag = False for i in range(n): probs = sl_net(torch.from_numpy(env.get_state()).double()) act = select_legal_hand(probs, env.othello.legal_hands("black")) _, _, done = env.step(act) if done: return False state = env.get_state() # N回目の試行が終わった段階でのstateを保存する env2 = Environment(p_net_op, "black") env2.import_othello(env.othello) # env2に以降する while True: probs = p_net(torch.from_numpy(env2.get_state()).double()) act = select_legal_hand(probs, env2.othello.legal_hands("black")) _, rew, done = env.step(act) if done: if rew == 1.0: color = "black" elif rew == -1.0: color = "white" else: color = "none" ans = train_data(state, color) global data_queue data_queue = np.append(data_queue, ans) return True
def test_terminal_level_detection_override(): terms = ( ('dumb', (TermLevel.DUMB, '@')), ('linux', (TermLevel.ANSI_BASIC, '@')), ('xterm-color', (TermLevel.ANSI_BASIC, '@')), ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')), ('xterm-direct', (TermLevel.ANSI_DIRECT, '@')), ('fbterm', (TermLevel.ANSI_EXTENDED, '@')), ) for name, expected in terms: detection.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@')) if name == 'fbterm': # :-/ detection.is_fbterm = True assert detection.detect_terminal_level() == expected from . import windows # try win implementation terms = ( ('dumb', (TermLevel.DUMB, '@')), ('xterm-color', (TermLevel.ANSI_BASIC, '@')), ('xterm-256color', (TermLevel.ANSI_EXTENDED, '@')), ('cygwin', (TermLevel.ANSI_DIRECT, '@')), # ? ) for name, expected in terms: windows.env = Environment(environ=dict(TERM=name, PY_CONSOLE_COLOR_SEP='@')) assert windows.detect_terminal_level() == expected windows.env = Environment(environ=dict(ANSICON='1')) assert windows.detect_terminal_level() ==( TermLevel.ANSI_EXTENDED, ';')
def test_color_allowed(): detection.env = Environment(environ={}) assert detection.color_is_allowed() is True detection.env = Environment(environ=dict(CLICOLOR='0')) assert detection.color_is_allowed() is False detection.env = Environment(environ=dict(NO_COLOR='')) assert detection.color_is_allowed() is False
def test_color_disabled_none_false(): detection.env = Environment(environ={}) assert detection.color_is_disabled() is None detection.env = Environment(environ=dict(CLICOLOR='')) assert detection.color_is_disabled() is None detection.env = Environment(environ=dict(CLICOLOR='1')) assert detection.color_is_disabled() is False
def test_color_forced(): detection.env = Environment(environ={}) assert detection.color_is_forced().value is None detection.env = Environment(environ=dict(CLICOLOR_FORCE='0')) assert detection.color_is_forced() is False detection.env = Environment(environ=dict(CLICOLOR_FORCE='foo')) assert detection.color_is_forced() is True detection.env = Environment(environ=dict(CLICOLOR_FORCE='1')) assert detection.color_is_forced() is True
def train_iteratively(args): # iteration 1 team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)] team_red = [ Agent(args.n_friends + idx, "red") for idx in range(args.n_enemies) ] training_agents = team_blue agents = team_blue + team_red env = Environment(agents) args.n_actions = 6 + args.n_enemies args.n_inputs = 4 + 3 * (args.n_friends - 1) + 3 * args.n_enemies model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions) for agent in training_agents: agent.set_model(model) training_agents = train_agents(env, training_agents, args) trained_model = copy.deepcopy(training_agents[0].model) for iteration in range(args.n_iterations): args.n_steps = 10000 * (iteration + 2) team_blue = [PGAgent(idx, "blue") for idx in range(args.n_friends)] team_red = [ PGAgent(args.n_friends + idx, "red") for idx in range(args.n_enemies) ] training_agents = team_blue agents = team_blue + team_red env = Environment(agents, args) model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions) model.load_state_dict(trained_model.state_dict()) model.eval() for agent in team_red: agent.set_model(model) model = ForwardModel(input_shape=args.n_inputs, n_actions=args.n_actions) for agent in team_blue: agent.set_model(model) training_agents = train_agents(env, training_agents, args) trained_model = copy.deepcopy(training_agents[0].model) torch.save(trained_model.state_dict(), args.path + f'RUN_{get_run_id()}.torch')
def __init__(self): self.state_dim = STATE_DIM # m x n grid self.action_dim = ACTION_DIM # action : up, down, right, left self.env = Environment(self.state_dim, self.action_dim) self.agent = Agent(self.state_dim, self.action_dim) self.episode = 0 self.batch_size = BATCH_SIZE self.isTraining = TRAINING self.isPlaying = PLAYING #======opengl setting====== argv = sys.argv glutInit(argv) glutInitWindowPosition(0,0) glutInitWindowSize(WIN_WIDTH, WIN_HEIGHT) glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH) glutCreateWindow("DQN example") glutDisplayFunc(self.display) glutReshapeFunc(self.reshape) glutKeyboardFunc(self.keyCB) #====================== if self.isTraining: self.training() else: if self.isPlaying: self.agent.load(saved_weight) self.playing() glutMainLoop()
def main(_): #cpu only tf.config.experimental.set_visible_devices([], 'GPU') tf.get_logger().setLevel('INFO') #tf.debugging.set_log_device_placement(True) config = get_config(FLAGS) or FLAGS env = Environment(config, is_training=True) game = CFRRL_Game(config, env) model_weights_queues = [] experience_queues = [] if FLAGS.num_agents == 0 or FLAGS.num_agents >= mp.cpu_count(): FLAGS.num_agents = mp.cpu_count() - 1 print('Agent num: %d, iter num: %d\n'%(FLAGS.num_agents+1, FLAGS.num_iter)) for _ in range(FLAGS.num_agents): model_weights_queues.append(mp.Queue(1)) experience_queues.append(mp.Queue(1)) tm_subsets = np.array_split(game.tm_indexes, FLAGS.num_agents) coordinator = mp.Process(target=central_agent, args=(config, game, model_weights_queues, experience_queues)) coordinator.start() agents = [] for i in range(FLAGS.num_agents): agents.append(mp.Process(target=agent, args=(i, config, game, tm_subsets[i], model_weights_queues[i], experience_queues[i]))) for i in range(FLAGS.num_agents): agents[i].start() coordinator.join()
def scenario_one(): """ This scenario consists of a single agent type with constant c and state variables x, y, z The dynamics of this system are define as: # x = y + z + c # y = 2z # z = sin(x - y) """ env = Environment() agent = Agent(['c'], ['x', 'y', 'z'], 'agent1') env.register_agents(agent) env.compile() constants = pd.DataFrame({'c': np.random.rand(1000)}) states = pd.DataFrame({var: np.random.random(1000) for var in ['x', 'y', 'z']}) data_input = {agent: { 'constants': constants, 'states': states }} agent_output = pd.DataFrame({ 'x': states['y'] + states['z'] + constants['c'], 'y': 2 * states['z'], 'z': np.sin(states['x'] - states['y']) }) data_output = {agent: agent_output} env.solo_train(data_input, data_output) env.solo_test(data_input, data_output) # print(env.derivativeMatrix(agent, 'x', 100)) print(env.correlation_matrix(agent))
def work(self, encoder_network): while not self.coord.should_stop() and self.epoch < self.num_epochs: step = 0 env = Environment(self.img_path) # 开始一个episode while step < self.train_batches_per_epoch: images_np = env.load_image(self.img_offset) self.img_offset += 1 raw_img, target_img, act_list, mse = env.take_action(images_np) encoder_network.update_state_predicter(raw_img, act_list, target_img, self.lr) encoder_network.update_action_predicter( raw_img, act_list, target_img, self.lr) # 缺少自动编码器的Loss encoder_network.pull_all_params() step += 1 self.epoch += 1
def main(): start_date = datetime.datetime(2002, 1, 1) end_date = datetime.datetime(2021, 1, 30) stocks = web.DataReader('SPY', 'yahoo', start_date, end_date) env = Environment(stocks, LOOK_BACK, stocks.shape[1]) in_dim = env.observation_shape[0] out_dim = env.action_space.n agent = Agent(in_dim, out_dim) agent.reset() optimizer = optim.RMSprop(agent.parameters(), lr=0.05) for epi in range(EPISODES): state = env.set_state() for t in range(env.total_days - env.look_back): action = agent.act(state) state, reward, done = env.step(action) agent.rewards.append(reward) if done: break loss = agent.fit(optimizer, GAMMA) total_reward = sum(agent.rewards) reward_records = agent.rewards agent.reset() #clear memory after training print(f'Episode {epi}, Loss: {loss}, Profit: {total_reward}') print("----Training Over----") cum_rewards = np.cumsum(reward_records) plt.title("Cumulative Profit on Last Episode") plt.xlabel("Days of Trading") plt.ylabel("Price ($)") plt.plot(cum_rewards) plt.show()
def __init__(self, sim_number, init=True, _seed=None, ATNE=True): super(EnvironmentListener, self).__init__() seed(_seed) self.ATNE = ATNE self.sim_number = sim_number self.post_process = PostGraph(self.sim_number, columns=[ "sim_number", "sim_step", "veh_id", "edge_id", "speed", "capacity", "budget", "prev_poi" ]) self.t = 0 self.break_condition = False #condition to check if all vehicles has arrived file_dir = os.path.dirname(GraphSetting.sumo_config) map_list = glob.glob(os.path.join(file_dir, r"*.map")) try: self.sim_env = self.read(map_list[0]) print(f"loaded map from {map_list[0]}") except IndexError: print(f".map file generating for {GraphSetting.sumo_config}") self.sim_env = Environment() self.save(GraphSetting.sumo_config, self.sim_env) if init: self.initial_reward_random(GraphSetting.reward_numbers) self.initial_route_random(GraphSetting.car_numbers) self.junction_sub()
def main(): # Define if argument requirements exact_count = -1 # Set to -1 to disable exact arg checking min_count = 1 # Set to -1 to disable min arg checking max_count = -1 # Set to -1 to disable max arg checking summary = "IBM Homework test script" usage = "usage: %prog [options] <test_name> <args>" # Set the add custom options callback if necessary add_custom_options_cb = 0 # custom_options_cb # Allocate and initialize the scripts environment env = Environment(os.path.basename(__file__), usage, summary, exact_count, min_count, max_count, add_custom_options_cb) # Execute script specific functionality if env.error == False and env.complete == False: env.error = execute (env) # Exit the program env.exit()
def envConfig(): if request.method == 'POST': global marseEnv mydict = request.get_json() marseEnv = Environment(temperature=mydict.get('temperature'), humidity=mydict.get('humidity'), solar_flare=mydict.get('solar-flare'), storm=mydict.get('storm'), area_map=mydict.get('area-map')) if marseEnv.solar_flare: myrover.battery = 11 if marseEnv.storm: shield_index = None for index, item in enumerate(myrover.inventory): if item.type == 'storm-shield': shield_index = index if shield_index is not None: if myrover.inventory[shield_index].qty == 1: myrover.inventory.pop(shield_index) else: myrover.inventory[shield_index].qty = myrover.inventory[ shield_index].qty - 1 else: func = request.environ.get('werkzeug.server.shutdown') if func is None: raise RuntimeError('Not running with the Werkzeug Server') func() return Response(status=200)
def main(cfg: DictConfig) -> None: "The entry point for parsing user-provided texts" assert cfg.model_path is not None, "Need to specify model_path for testing." assert cfg.input is not None assert cfg.language in ("english", "chinese") log.info("\n" + OmegaConf.to_yaml(cfg)) # load the model checkpoint model_path = hydra.utils.to_absolute_path(cfg.model_path) log.info("Loading the model from %s" % model_path) checkpoint = load_model(model_path) restore_hyperparams(checkpoint["cfg"], cfg) vocabs = checkpoint["vocabs"] model = Parser(vocabs, cfg) model.load_state_dict(checkpoint["model_state"]) device, _ = get_device() model.to(device) log.info("\n" + str(model)) log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()])) input_file = hydra.utils.to_absolute_path(cfg.input) ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder) loader = DataLoader( ds, batch_size=cfg.eval_batch_size, collate_fn=form_batch, num_workers=cfg.num_workers, pin_memory=torch.cuda.is_available(), ) env = Environment(loader, model.encoder, subbatch_max_tokens=9999999) state = env.reset() oup = (sys.stdout if cfg.output is None else open( hydra.utils.to_absolute_path(cfg.output), "wt")) time_start = time() with torch.no_grad(): # type: ignore while True: with torch.cuda.amp.autocast(cfg.amp): # type: ignore actions, _ = model(state) state, done = env.step(actions) if done: for tree in env.pred_trees: assert tree is not None print(tree.linearize(), file=oup) # pred_trees.extend(env.pred_trees) # load the next batch try: with torch.cuda.amp.autocast(cfg.amp): # type: ignore state = env.reset() except EpochEnd: # no next batch available (complete) log.info("Time elapsed: %f" % (time() - time_start)) break if cfg.output is not None: log.info("Parse trees saved to %s" % cfg.output)
def train_multiple(summary_interval): models = [QNetwork, Small, Large, Dropout] with Environment() as env: summary_scores = {} for model in models: summary = train(model, env, summary_interval) summary_scores[model.__name__] = summary plot_summary(summary_scores, summary_interval)
def __init__(self): super(EnvironmentListener, self).__init__() self.sim_env = Environment() self.initial_reward_random(GraphSetting.reward_numbers) self.initial_route_random(GraphSetting.car_numbers) self.junction_sub()
def test_lava_is_terminal(): env = Environment(CONFIG, add_agent_value=False) _, r, done = env.step('R') assert r == 0 assert not done _, r, done = env.step('R') assert r == -10 assert done
def test_env_reset_position_after_few_steps(): env = Environment(CONFIG, add_agent_value=False) env.reset() env.step('R') env.step('U') assert env.agent_position != env.start_position env.reset() assert env.agent_position == env.start_position
def test_palette_support(): terms = ( ('dumb', None), ('linux', 'basic'), ('xterm-color', 'basic'), ('xterm-256color', 'extended'), # ? ) pal = (1,) # dummy palette, tests true for name, result in terms: detection.env = Environment(environ=dict(TERM=name)) assert detection.detect_palette_support(basic_palette=pal) == (result, pal) detection.env = Environment(environ=dict(ANSICON='1')) assert detection.detect_palette_support(basic_palette=pal) == ('extended', pal) detection.env = Environment(environ=dict(COLORTERM='24bit')) assert detection.detect_palette_support(basic_palette=pal) == ('truecolor', pal)
def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.6, verbose=True): self.env = Environment() self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.verbose = verbose self.num_actions = 8 self.qtable = {} self.current_state = self.env.detect_nearby()
def main(args): if args.examine: with Environment(no_graphics=True) as env: examine(env) if args.random: with Environment(no_graphics=False) as env: for i in range(5): random(env) if args.train: with Environment(no_graphics=True) as env: experiment = _setup_experiment(disabled=(not args.log)) if experiment: with experiment.train(): train(env, experiment) else: train(env, experiment) if args.test: with Environment(no_graphics=False) as env: test(env)
def produce_env(total_time=600, punish_flag=True, valid_flag=False): environment = Environment(remain_dict=remain_dict, \ edge_sequence=edge_sequence,\ node_num=node_num,\ path_dict=path_dict, \ request_type_list=request_type_list,\ total_time=total_time,\ punish_flag=punish_flag, valid_flag=valid_flag ) return environment
def __init__(self, config): self.sess = tf.Session() self.sess.__enter__() env = Environment(config) policy_net = PolicyNet(config, env) super().__init__(config, env, policy_net) self.update_policy = PPOUpdater(policy_net, config, self.logger) self.sess.run(tf.global_variables_initializer()) # warm up agent.scalar self._run_policy(batch_size=1000)
def test_reward_for_bridge_resets_correctly(): env = Environment(CONFIG, add_agent_value=False) steps = ['R', 'U', 'R', 'R', 'R'] for step in steps: _, r, _ = env.step(step) env.reset() for step in steps: _, r, _ = env.step(step) assert r == 1
def test_successful_trajectory(): env = Environment(CONFIG, add_agent_value=False) steps = ['R', 'U', 'R', 'R', 'R', 'R', 'R', 'U'] for step in steps: s, r, done = env.step(step) assert s.coordinate == (1, 6) assert np.all(s.observation == np.array([[2, 2, 2], [0, 0, 2], [0, 0, 2]])) assert r == 100 assert done
def test_step(): env = Environment(CONFIG, add_agent_value=False) env.step('R') assert env.agent_position == (3, 2) env.step('U') assert env.agent_position == (2, 2) env.step('L') assert env.agent_position == (2, 1)
def train(self, my_lambda, true_star_value): environment = Environment() terminated = False eligibility = np.zeros([36]) # initial state where both player and dealer draw black card card = round(random.uniform(1, 10)) dealer_card = round(random.uniform(1, 10)) states = [] actions = [] current_state = [dealer_card, card] current_action = self.epsilon_action(current_state) while not terminated: current_action_index = 1 if current_action == 'hit' else 0 states.append(current_state) actions.append(current_action_index) old_card = card state_prime, reward = environment.step(current_state, current_action) card = state_prime[1] # reward is 2 when state is not terminated because 0 overlaps with termination state draw reward # handle terminal state future action values to be 0 current_reward = 0 action_prime = None if reward != 2: current_reward = reward delta = current_reward + (0 - self.do_approximation( [dealer_card, old_card], current_action_index)) else: action_prime = self.epsilon_action([dealer_card, card]) action_prime_index = 1 if action_prime == 'hit' else 0 delta = current_reward + (self.do_approximation( [dealer_card, card], action_prime_index) - self.do_approximation( [dealer_card, old_card], current_action_index)) eligibility = (my_lambda * eligibility + self.feature_vector.flatten()) self.weights += self.step_size * delta * eligibility current_state = [dealer_card, card] current_action = action_prime if reward == -1 or reward == 1 or reward == 0: terminated = True mse = self.calculate_mse(true_star_value) return mse
def create_environment(self, seed): return Environment( rom_file=self.cfg['ENV']['game'], frame_skip=self.cfg['ENV']['frame_skip'], num_frames=self.cfg['ENV']['history_size'], no_op_start=self.cfg['ENV']['max_no_op_frames'], seed=seed, obs_height=self.cfg['ENV']['obs_height'], obs_width=self.cfg['ENV']['obs_width'], dead_as_end=self.cfg['ENV']['dead_as_end'], max_episode_steps=self.cfg['ENV']['max_episode_steps'])