def test_trace_recency(self): # Ensure that config for unseen recency is passed to unseen items, first observation will always be unseen env = park.make('cache') config.cache_unseen_recency = 1000 obs = env.reset() self.assertTrue(obs[2] == 1000) config.cache_unseen_recency = 500 obs = env.reset() self.assertTrue(obs[2] == 500)
def run_env_with_random_agent(env_name, seed): # suppress unittest from throwing weird warnings warnings.simplefilter('ignore', category=ImportWarning) env = park.make(env_name) env.seed(seed) obs = env.reset() done = False while not done: act = env.action_space.sample() obs, reward, done, info = env.step(act)
def _thunk(): if env_id not in PARK_ENV_LIST: raise ValueError( "Unsupported environment, expect the environment to be one of " + str(PARK_ENV_LIST) + " but got: " + str(env_id)) elif env_id == 'load_balance': # arrange the number of stream jobs env = park.make(env_id, num_stream_jobs=args.num_stream_jobs, service_rates=args.load_balance_service_rates) # random act after resetting to diversify the state # only use when training if train: env = LoadBalanceRandomReset(env, args.max_random_init_steps) # if using load balance, clip and normalize the observation with this wrapper if args is not None: env = ProcessLoadBalanceObservation( env, args.job_size_norm_factor, args.server_load_norm_factor, args.highest_server_obs, args.highest_job_obs, args.elapsed_time_norm_factor, args.highest_elapsed_time) # normalize reward env = RewardNormalize(env, args.reward_norm_factor) if args.fix_job_sequence: # fix job sequence env = FixJobSequence(env, seed) if max_episode_steps: env = TimeLimit(env, max_episode_steps) # adding information to env for computing return env = TimeLimitMask(env) # IMPORTANT: all environments used same random seed to repeat the input-process if train and args.algo.startswith('mib'): env.seed(seed) else: env.seed(seed + rank) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) return env
def __init__(self): """ working now: 1. load_balance 2. abr_sim 3. switch_scheduling 4. simple_queue 5. region_assignment not working: 1. abr 2. aqm 3. congestion_control 4. spark_sim 5. query_optimizer 6. cache 7. tf_placement 8. circuit_three_stage_transimpedance 9. tf_placement_sim 10. multi_dim_index 11. spark """ self.env = park.make('abr_sim')
def train(self, db, training_samples, use_subqueries=False, test_samples=None): assert isinstance(training_samples[0], dict) if not self.nn_type == "num_tables": self.num_threads = multiprocessing.cpu_count() print("setting num threads to: ", self.num_threads) torch.set_num_threads(self.num_threads) else: self.num_threads = -1 self.db = db db.init_featurizer(num_tables_feature=self.num_tables_feature, max_discrete_featurizing_buckets=self. max_discrete_featurizing_buckets) # get one true source of X,Y feature vector pairs, which won't be # reused. start = time.time() self.training_samples = training_samples self.Xtrain, self.Ytrain, self.train_num_table_mapping = \ self._get_feature_vectors(self.training_samples) # create a new park env, and close at the end. self.env = park.make('query_optimizer') self.test_samples = test_samples if test_samples is not None and len(test_samples) > 0: # random.shuffle(test_samples) self.Xtest, self.Ytest, self.test_num_table_mapping = \ self._get_feature_vectors(self.test_samples) print("{} training, {} test subqueries".format( len(self.Xtrain), len(self.Xtest))) self.test_env = park.make('query_optimizer') print("feature len: {}, generation time: {}".\ format(len(self.Xtrain[0]), time.time()-start)) # FIXME: multiple table version self.init_nets() model_size = self.num_parameters() print("model size: {} MB".format(model_size)) self.num_iter = 0 # start off uniformly self.subquery_sampling_weights = [1 / len(self.Xtrain)] * len( self.Xtrain) prev_end = time.time() while True: if (self.num_iter % 100 == 0): pass # progress stuff it_time = time.time() - prev_end prev_end = time.time() print("MB: {}, T:{}, I:{} : {}".format(\ self.mb_size, self.num_threads, self.num_iter, it_time)) sys.stdout.flush() if (self.num_iter % self.eval_iter == 0): # we will wait on these results when we reach this point in the # next iteration self._update_join_results(self.train_join_results, self.training_samples, "train", self.num_iter - self.eval_iter) self.train_join_results = self._periodic_eval( self.Xtrain, self.Ytrain, self.training_samples, "train", self.train_join_loss_pool, self.env) # TODO: handle reweighing schemes here if test_samples is not None: self._update_join_results(self.test_join_results, self.test_samples, "test", self.num_iter - self.eval_iter) self.test_join_results = self._periodic_eval( self.Xtest, self.Ytest, self.test_samples, "test", self.test_join_loss_pool, self.test_env) if self.num_iter % self.eval_iter == 0: self.save_stats() self.train_step(self.eval_iter) self.num_iter += self.eval_iter if (self.num_iter >= self.max_iter): print("breaking because max iter done") break
for steps in range(num_steps): action, _ = agent.predict(state) new_state, reward, done, _ = env.step(action) rewards.append(reward) state = new_state if done or steps == num_steps - 1: all_rewards.append(np.sum(rewards)) all_lengths.append(steps) average_lengths.append(np.mean(all_lengths[-10:])) if episode % 1 == 0: print( "episode: {}, reward: {}, total length: {}, average length: {} \n" .format(episode, np.sum(rewards), steps, average_lengths[-1])) print( "count_bhr: {}, count_ohr: {}, total_size: {}, bhr_ratio: {} \n" .format( env.sim.count_bhr, env.sim.count_ohr, env.sim.size_all, float( float(env.sim.count_bhr) / float(env.sim.size_all)))) break env = park.make('cache') admitAll(env)
def test_bounds_low(self): # New lower bound for the cache test traces, test trace numbers start at 0 env = park.make('cache') env.reset(low=0, high=1)
def test_bounds(self): # New upper bound for the cache test traces, test trace numbers end at 999 env = park.make('cache') env.reset(low=1000, high=1001)
def main(): env = park.make('congestion_control') env.run(RandomAgent, ())