elif args.local_mode or config.pop('local_mode', False): resume=False local_mode = True max_failures = 0 if args.temp_dir is None: args.temp_dir = config.pop('temp_dir', None) if args.name is not None: name = args.name config.pop('name', None) else: name = config.pop('name', "{}_training".format(os.getlogin())) exp = tune.Experiment( name=name, run=get_trainable(config.pop('train_class')), trial_name_creator=tune.function(trial_str_creator), loggers=[GIFLogger], resources_per_trial= {"cpu": 1, "gpu": args.n_gpus}, checkpoint_freq=config.pop('save_freq', 5000), upload_dir=config.pop('upload_dir', None), local_dir=config.pop('local_dir', None), config=config # evaluate last to allow all popping above ) ray.init(redis_address=redis_address, local_mode=local_mode, temp_dir=args.temp_dir) trials = tune.run(exp, queue_trials=True, resume=resume, checkpoint_at_end=True, max_failures=max_failures) exit(0)
"policy_graphs": { "def_policy": (VTracePolicyGraph, Box(0.0, 255.0, shape=(84, 84, 3)), Discrete(9), { "gamma": 0.99 }) }, "policy_mapping_fn": tune.function(lambda agent_id: "def_policy"), }, "env_config": env_actor_configs, "num_workers": args.num_workers, "num_envs_per_worker": args.envs_per_worker, "sample_batch_size": args.sample_bs_per_worker, "train_batch_size": args.train_bs } experiment_spec = tune.Experiment( "multi-carla/" + args.model_arch, "IMPALA", # timesteps_total is init with None (not 0) which causes issue # stop={"timesteps_total": args.num_steps}, stop={"timesteps_since_restore": args.num_steps}, config=config, checkpoint_freq=1000, checkpoint_at_end=True, resources_per_trial={ "cpu": 4, "gpu": 1 }) tune.run_experiments(experiment_spec)
def main(**kwargs): print('===> Tuning hyperparameters. For normal training use `train.py`') print('===> TensorFlow v. {}'.format(tf.__version__)) if args.max_time: print('Tuning process will terminate after {} seconds'.format(str(args.max_time))) os.environ['TRIALRUNNER_WALLTIME_LIMIT'] = str(args.max_time) n_gpus = 2 if args.gpu_id is not None: print('Restricting visible GPUs.') print('Using: GPU {}'.format(args.gpu_id)) os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id) n_gpus = 1 ray.init(num_gpus=n_gpus) features_id, labels_id = Data.load_data(directories.train, tune=True) test_features_id, test_labels_id = Data.load_data(directories.test, tune=True) """ Adjust stopping criteria - this terminates individual trials """ stopping_criteria = { # For individual trials "time_total_s": args.max_time_s, "episode_reward_mean": 1.0 # Otherwise negative loss "mean_accuracy": 1.0 } # Hyperparameters to be optimized # Important to define search space sensibly config = { "args": args, "user_config": config_train, "train_ids": [features_id, labels_id], "test_ids": [test_features_id, test_labels_id], } hp_config = { """ Include hyperparameters to be tuned, and their permitted range """ } config.update(hp_config) hp_resamples_pbt = { """ Include perturbation/resample ranges for population-based training """ } # Specify experiment configuration # Default uses machine with 32 CPUs / 2 GPUs """ Params to modify: num_samples (grid fineness) checkpoint_freq time_attr reward_attr """ experiment_spec = tune.Experiment( name='tune_opt', run=TrainModel, stop=stopping_criteria, config=config, resources_per_trial={'cpu': 8, 'gpu': 0.5}, num_samples=16, local_dir='~/ray_results', checkpoint_freq=8, checkpoint_at_end=True, trial_name_creator=tune.function(functools.partial(trial_str_creator, name=args.name)) ) pbt = tune.schedulers.PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=8, # Mutation interval in time_attr units hyperparam_mutations=hp_resamples_pbt, resample_probability=0.25 # Resampling resets to value sampled from lambda ) ahb = tune.schedulers.AsyncHyperBandScheduler( time_attr="training_iteration", reward_attr="episode_reward_mean", max_t=64, grace_period=8, reduction_factor=3, brackets=3 ) scheduler = ahb if args.pbt is True: scheduler = pbt trials = tune.run_experiments( experiments=experiment_spec, scheduler=scheduler, resume=False # "prompt" ) # Save results t_ids = [t.trial_id for t in trials] t_config = [t.config for t in trials] t_result = [t.last_result for t in trials] df = pd.DataFrame([t_ids, t_config, t_result]).transpose() df.columns = ['name', 'config', 'result'] df.to_hdf('{}_results.h5'.format(args.name), key='df')
experiment_spec = tune.Experiment( # Name Structure # Algorithm_ObservationSpace_Seed_Gamma_Alpha_Beta_LearningRate # _ExplorationAnnealingTimesteps_ExplorationFraction # _PrioritizedReplay_Hidden_Noisy_Dueling_DoubleQ_NetworkUpdateFreq_Buffer # # ie. DQN_default_s20_g0.6_a10_b1_lr5e-4_et50k_e0.1_prF_h256_nF_dF_qqF_u800 name=NAME, run=OPTIONS['alg'], checkpoint_freq=3, checkpoint_at_end=True, config={ # === Resources === # Number of actors used for parallelism "num_workers": 0, # Number of GPUs to allocate to the driver. Note that not all algorithms # can take advantage of driver GPUs. This can be fraction (e.g., 0.3 GPUs). "num_gpus": 0, # Number of CPUs to allocate per worker. "num_cpus_per_worker": 6, # Number of GPUs to allocate per worker. This can be fractional. "num_gpus_per_worker": 0, # Any custom resources to allocate per worker. "custom_resources_per_worker": {}, # Number of CPUs to allocate for the driver. Note: this only takes effect # when running in Tune. "num_cpus_for_driver": 1, # === Execution === # Number of environments to evaluate vectorwise per worker. ## "num_envs_per_worker": 1, # Default sample batch size "sample_batch_size": 4, # Training batch size, if applicable. Should be >= sample_batch_size. # Samples batches will be concatenated together to this size for training. "train_batch_size": 32, # === Model === # Number of atoms for representing the distribution of return. When # this is greater than 1, distributional Q-learning is used. # the discrete supports are bounded by v_min and v_max "num_atoms": 1, "v_min": -10.0, "v_max": 10.0, # Whether to use noisy network "noisy": OPTIONS['noisy'], # Whether to use dueling dqn "dueling": OPTIONS['dueling'], # Whether to use double dqn "double_q": OPTIONS['dueling'], # Hidden layer sizes of the state and action value networks "hiddens": OPTIONS['hidden'], # N-step Q learning "n_step": 1, # === Exploration === # Max num timesteps for annealing schedules. Exploration is annealed from # 1.0 to exploration_fraction over this number of timesteps scaled by # exploration_fraction "schedule_max_timesteps": OPTIONS['epsilon_ts'], # Number of env steps to optimize for before returning #morning: 10800/10 = 1080 steps total "timesteps_per_iteration": 1080, # Fraction of entire training period over which the exploration rate is # annealed "exploration_fraction": 1, # Final value of random action probability "exploration_final_eps": OPTIONS['epsilon'], # Update the target network every `target_network_update_freq` steps. "target_network_update_freq": OPTIONS['update_freq'], # === Replay buffer === # Size of the replay buffer. Note that if async_updates is set, then # each worker will have a replay buffer of this size. "buffer_size": OPTIONS['buffer'], # If True prioritized replay buffer will be used. "prioritized_replay": OPTIONS['pr'], # Alpha parameter for prioritized replay buffer. "prioritized_replay_alpha": 0.6, # Beta parameter for sampling from prioritized replay buffer. "prioritized_replay_beta": 0.4, # Fraction of entire training period over which the beta parameter is # annealed "beta_annealing_fraction": 0.2, # Final value of beta "final_prioritized_replay_beta": 0.4, # Epsilon to add to the TD errors when updating priorities. "prioritized_replay_eps": 1e-6, # Whether to LZ4 compress observations "compress_observations": True, # === Optimization === # Learning rate for adam optimizer "lr": OPTIONS['lr'], # Adam epsilon hyper parameter "adam_epsilon": 1e-8, # If not None, clip gradients during optimization at this value "grad_norm_clipping": 40, # How many steps of the model to sample before learning starts. "learning_starts": 2160, # Update the replay buffer with this many samples at once. Note that # this setting applies per-worker if num_workers > 1. "sample_batch_size": 4, # Size of a batched sampled from replay buffer for training. Note that # if async_updates is set, then each worker returns gradients for a # batch of this size. "train_batch_size": 32, # === Parallelism === # Optimizer class to use. "optimizer_class": "SyncReplayOptimizer", # Whether to use a distribution of epsilons across workers for exploration. "per_worker_exploration": False, # Whether to compute priorities on workers. "worker_side_prioritization": False, # Prevent iterations from going lower than this time span "min_iter_time_s": 1, # === Environment === # Discount factor of the MDP "gamma": OPTIONS['gamma'], "env": ChulaSSSEnv, "env_config": { "observation_space": OPTIONS['obs_space'], "time_select": "morning", "great_edition": True, "with_gui": False, "with_libsumo": True, "no_internal_links": True, "time_to_teleport": -1, "viewport": "surasak", "step_length": 1, "seed": OPTIONS['seed'], "impatience_time": 300, "step_size": 10, "alpha": OPTIONS['alpha'], "beta": OPTIONS['beta'], "name": NAME # for logging } })
def launch_local_experiment(init_algo_functions_and_log_fnames, exp_variant, use_gpu=False, exp_prefix='test', seeds=1, checkpoint_freq=50, max_failures=10, resume=False, local_ray=True, from_remote=False, resources_per_trial=None, logging_level=logging.DEBUG): """Launches a ray experiment locally Args: init_algo_functions_and_log_fnames ((function, str)[]): a list of tuples. The first element of each tuple is a function that returns an algo in ray format (i.e, has a _train() method that returns a log dict and will train for a single epoch). The second element is the filename of the logging file. exp_variant (dict): the experiment variant. This will be passed in each time to each init_algo_function in init_algo_functions_and_log_fnames use_gpu (bool): exp_prefix (str): seeds (int): checkpoint_freq (int): how often to checkpoint for handling failures. max_failures (int): how many times to retry if a trial fails. Useful for remote launching. resume (bool): whether the trials should try and resume a failed trial if possible. local_ray (bool): whether to use local_ray mode. stdout get printed and pdb is possible in local_ray=True. from_remote (bool): If the experiment is being launched from a remote instance. User should not set this. Automatically set by remote_launch.py resources_per_trial (dict): Specify {'cpu': float, 'gpu': float}. This is the number of allocated resources to the trial. logging_level: """ if from_remote: redis_address = ray.services.get_node_ip_address() + ':6379' ray.init(redis_address=redis_address, logging_level=logging_level) else: ray.init(local_mode=local_ray) for idx, (init_func, log_fname) in enumerate(init_algo_functions_and_log_fnames): init_algo_functions_and_log_fnames[idx] = ( tune.function(init_func), log_fname ) exp = tune.Experiment( name=exp_prefix, run=SequentialRayExperiment, upload_dir=config.LOG_BUCKET, num_samples=seeds, stop={"global_done": True}, config={ 'algo_variant': exp_variant, 'init_algo_functions_and_log_fnames': init_algo_functions_and_log_fnames, 'use_gpu': use_gpu, 'resources_per_trial': resources_per_trial, }, resources_per_trial=resources_per_trial, checkpoint_freq=checkpoint_freq, loggers=[JsonLogger, SequentialCSVLogger], ) tune.run( exp, resume=resume, max_failures=max_failures, queue_trials=True, )
def main(): parser = argparse.ArgumentParser(description='ChulaSSSEnv DQN Runner') # === Flags for Name Arguments === parser.add_argument('-A', '--algorithm', action='store', default='DQN', type=str, help='The algorithm to train', choices=['DQN', 'APEX']) parser.add_argument( '-O', '--observation', action='store', default='default', type=str, help='The observation space', choices=['default', 'all3', "all3_no_downstream", "no_downstream"]) parser.add_argument('-s', '--seed', action='store', default=20, type=int, help='Seed number') parser.add_argument('-g', '--gamma', action='store', default=0.9, type=float, help='Discount Factor') parser.add_argument('-a', '--alpha', action='store', default=10.0, type=float, help='Reward throughput coefficient') parser.add_argument('-b', '--beta', action='store', default=0.0, type=float, help='Reward backlog coefficient') parser.add_argument('-l', '--learningRate', action='store', default=5e-4, type=str, help='Learning Rate (scientific notation) ie. 5e-4') parser.add_argument('-T', '--annealTimeStep', action='store', default='100k', type=str, help='Exploration Annealing Timesteps (in k)') parser.add_argument('-e', '--epsilon', action='store', default=0.1, type=float, help='The exploration fraction to anneal to') parser.add_argument('-p', '--prioritizedReplay', action='store_true', help='Whether to use prioritized replay') parser.add_argument('-H', '--hidden', action='store', default='256', type=str, help='Hidden Layers (comma separated)') parser.add_argument('-N', '--noisy', action='store_true', help='Noisy network') parser.add_argument('-D', '--dueling', action='store_true', help='Dueling DQN') parser.add_argument('-d', '--double', action='store_true', help='Double DQN') parser.add_argument('-u', '--updateFreq', action='store', default=800, type=int, help='Network update frequency') parser.add_argument('-B', '--buffer', action='store', default='100k', type=str, help='Size of replay buffer (in k)') parser.add_argument('-L', '--load', action='store', default=1.0, type=float, help='Load factor of Great routes') # === Flags for running arguments === parser.add_argument('-i', '--trainIter', action='store', default=100000, type=int, help='Training Iteration') parser.add_argument('-c', '--checkFreq', action='store', default=5, type=int, help='Checkpoint saving frequency') parser.add_argument('--learningStart', action='store', default=4320, type=int, help='Steps before Learning starts') parser.add_argument('--trainBatch', action='store', default=32, type=int, help='Training batch size') parser.add_argument('--sampleBatch', action='store', default=4, type=int, help='Sample batch size') args = parser.parse_args() # Name Structure # Algorithm_ObservationSpace_Seed_Gamma_Alpha_Beta_LearningRate # _ExplorationAnnealingTimesteps_ExplorationFraction # _PrioritizedReplay_Hidden_Noisy_Dueling_DoubleQ_NetworkUpdateFreq_Buffer # _LoadFactor # # ie. DQN_default_s20_g0.6_a10_b1_lr5e-4_et50k_e0.1_pr0_h256_n0_d0_qq0_u800_l1.0 NAME = "{}_{}_s{}_g{}_a{}_b{}_lr{:.0e}_et{}_e{}_pr{:n}_h{}_n{:n}_d{:n}_qq{:n}_u{}_b{}_l{}".format( args.algorithm, args.observation, args.seed, args.gamma, args.alpha, args.beta, args.learningRate, args.annealTimeStep, args.epsilon, args.prioritizedReplay, args.hidden, args.noisy, args.dueling, args.double, args.updateFreq, args.buffer, args.load) print("Starting Experiment with name {}".format(NAME)) OPT = NAME.split("_") OPTIONS = { "alg": OPT[0], "obs_space": OPT[1], "seed": int(OPT[2][1:]), "gamma": float(OPT[3][1:]), "alpha": float(OPT[4][1:]), "beta": float(OPT[5][1:]), "lr": float(OPT[6][2:]), "epsilon_ts": int(OPT[7][2:-1]) * 1000, "epsilon": float(OPT[8][1:]), "pr": bool(int(OPT[9][2:])), "hidden": list(map(int, OPT[10][1:].split(','))), "noisy": bool(int(OPT[11][1:])), "dueling": bool(int(OPT[12][1:])), "doubleQ": bool(int(OPT[13][2:])), "update_freq": int(OPT[14][1:]), "buffer": int(OPT[15][1:-1]) * 1000, "load": float(OPT[16][1:]) } ray.init( #object_store_memory=int(4e9), # 4gb #redis_max_memory=int(2e9) #2gb ) experiment_spec = tune.Experiment( name=NAME, run=OPTIONS["alg"], checkpoint_freq=args.checkFreq, checkpoint_at_end=True, stop={"training_iteration": args.trainIter}, upload_dir="gs://ray_results/", custom_loggers=[ActionLogger], config={ # === Configure Callbacks === "callbacks": { "on_episode_start": tune.function(logger_callbacks.on_episode_start), "on_episode_step": tune.function(logger_callbacks.on_episode_step), "on_episode_end": tune.function(logger_callbacks.on_episode_end), "on_sample_end": tune.function(logger_callbacks.on_sample_end), "on_train_result": tune.function(logger_callbacks.on_train_result), }, # === Resources === # Number of actors used for parallelism "num_workers": 0, # Number of GPUs to allocate to the driver. Note that not all algorithms # can take advantage of driver GPUs. This can be fraction (e.g., 0.3 GPUs). "num_gpus": 0, # Number of CPUs to allocate per worker. "num_cpus_per_worker": 4, # Number of GPUs to allocate per worker. This can be fractional. "num_gpus_per_worker": 0, # Any custom resources to allocate per worker. "custom_resources_per_worker": {}, # Number of CPUs to allocate for the driver. Note: this only takes effect # when running in Tune. "num_cpus_for_driver": 1, # === Model === # Number of atoms for representing the distribution of return. When # this is greater than 1, distributional Q-learning is used. # the discrete supports are bounded by v_min and v_max "num_atoms": 1, "v_min": -10.0, "v_max": 10.0, # Whether to use noisy network "noisy": OPTIONS['noisy'], # Whether to use dueling dqn "dueling": OPTIONS['dueling'], # Whether to use double dqn "double_q": OPTIONS['dueling'], # Hidden layer sizes of the state and action value networks "hiddens": OPTIONS['hidden'], # N-step Q learning "n_step": 1, # === Exploration === # Max num timesteps for annealing schedules. Exploration is annealed from # 1.0 to exploration_fraction over this number of timesteps scaled by # exploration_fraction "schedule_max_timesteps": OPTIONS['epsilon_ts'], # Number of env steps to optimize for before returning #morning: 10800/10 = 1080 steps total "timesteps_per_iteration": 1080, # Fraction of entire training period over which the exploration rate is # annealed "exploration_fraction": 1, # Final value of random action probability "exploration_final_eps": OPTIONS['epsilon'], # Update the target network every `target_network_update_freq` steps. "target_network_update_freq": OPTIONS['update_freq'], # === Replay buffer === # Size of the replay buffer. Note that if async_updates is set, then # each worker will have a replay buffer of this size. "buffer_size": OPTIONS['buffer'], # If True prioritized replay buffer will be used. "prioritized_replay": OPTIONS['pr'], # Alpha parameter for prioritized replay buffer. "prioritized_replay_alpha": 0.6, # Beta parameter for sampling from prioritized replay buffer. "prioritized_replay_beta": 0.4, # Fraction of entire training period over which the beta parameter is # annealed "beta_annealing_fraction": 0.2, # Final value of beta "final_prioritized_replay_beta": 0.4, # Epsilon to add to the TD errors when updating priorities. "prioritized_replay_eps": 1e-6, # Whether to LZ4 compress observations "compress_observations": True, # === Optimization === # Learning rate for adam optimizer "lr": OPTIONS['lr'], # Adam epsilon hyper parameter "adam_epsilon": 1e-8, # If not None, clip gradients during optimization at this value "grad_norm_clipping": 40, # How many steps of the model to sample before learning starts. "learning_starts": args.learningStart, # Update the replay buffer with this many samples at once. Note that # this setting applies per-worker if num_workers > 1. # Default sample batch size "sample_batch_size": args.sampleBatch, # Size of a batched sampled from replay buffer for training. Note that # if async_updates is set, then each worker returns gradients for a # batch of this size. # Training batch size, if applicable. Should be >= sample_batch_size. # Samples batches will be concatenated together to this size for training. "train_batch_size": args.trainBatch, # === Parallelism === # Optimizer class to use. "optimizer_class": "SyncReplayOptimizer", # Whether to use a distribution of epsilons across workers for exploration. "per_worker_exploration": False, # Whether to compute priorities on workers. "worker_side_prioritization": False, # Prevent iterations from going lower than this time span "min_iter_time_s": 1, # === Environment === # Discount factor of the MDP "gamma": OPTIONS['gamma'], "env": ChulaSSSEnv, "env_config": { "observation_space": OPTIONS['obs_space'], "time_select": TIME_SELECT_STR, "great_edition": GREAT_EDITION, "with_gui": WITH_GUI, "with_libsumo": WITH_LIBSUMO, "no_internal_links": True, "time_to_teleport": -1, "viewport": VIEWPORT, "step_length": STEP_LENGTH, "seed": OPTIONS['seed'], "impatience_time": IMPATIENCE_TIME, "step_size": STEP_SIZE, "alpha": OPTIONS['alpha'], "beta": OPTIONS['beta'], "name": NAME, "load": OPTIONS['load'] } }) tune.run_experiments(experiment_spec, resume='prompt')
def fit(self, x_user, x_questions, y_vals, **kwargs): kwargs.setdefault('latent_traits', None) kwargs.setdefault('batch_size', 16) kwargs.setdefault('epochs', 64) kwargs.setdefault('validation_split', 0.2) kwargs.setdefault('params', self.params) self.proxy_model.l_traits = kwargs['latent_traits'] self.proxy_model.x_train_user = x_user self.proxy_model.x_train_questions = x_questions self.proxy_model.y_ = y_vals self.l_traits = kwargs['latent_traits'] # affirming if params are given in either of(init or fit) methods self.params = self.params or kwargs['params'] if self.params != None: # Validate implementation with different types of tune input if not isinstance(self.params, dict): raise TypeError("Params should be of type 'dict'") self.params = _parse_params(self.params, return_as='flat') self.proxy_model.update_params(self.params) # triggers for fourPL model if self.proxy_model.name is 'tpm' and 'slip_params' in self.params and 'train' in self.params['slip_params'].keys(): if self.params['slip_params']['train']: self.proxy_model.name = 'fourPL' ray_verbose = False _ray_log_level = logging.INFO if ray_verbose else logging.ERROR ray.init(log_to_driver=False, logging_level=_ray_log_level, ignore_reinit_error=True, redis_max_memory=20*1000*1000*1000, object_store_memory=1000000000, num_cpus=4) def train_model(config, reporter): self.proxy_model.set_params(params=config, set_by='optimizer') print('\nIntitializing fit for {} model. . .\nBatch_size: {}; epochs: {};'.format( self.proxy_model.name, kwargs['batch_size'], kwargs['epochs'])) model = self.proxy_model.create_model() self.history = model.fit(x=[x_user, x_questions], y=y_vals, batch_size=kwargs['batch_size'], epochs=kwargs['epochs'], verbose=0, validation_split=kwargs['validation_split']) _, mae, accuracy = model.evaluate( x=[x_user, x_questions], y=y_vals) # [1] last_checkpoint = "weights_tune_{}.h5".format( list(zip(np.random.choice(10, len(config), replace=False), config))) model.save_weights(last_checkpoint) reporter(mean_error=mae, mean_accuracy=accuracy, checkpoint=last_checkpoint) t1 = time.time() configuration = tune.Experiment("experiment_name", run=train_model, resources_per_trial={"cpu": 4}, stop={"mean_error": 0.15, "mean_accuracy": 95}, config=self.proxy_model.get_params()) trials = tune.run_experiments(configuration, verbose=0) self.trials = trials metric = "mean_error" # "mean_accuracy" # Restore a model from the best trial. def get_sorted_trials(trial_list, metric): return sorted(trial_list, key=lambda trial: trial.last_result.get(metric, 0), reverse=True) sorted_trials = get_sorted_trials(trials, metric) for best_trial in sorted_trials: try: print("Creating model...") self.proxy_model.set_params( params=best_trial.config, set_by='optimizer') best_model = self.proxy_model.create_model() weights = os.path.join( best_trial.logdir, best_trial.last_result["checkpoint"]) print("Loading from", weights) # TODO Validate this loaded model. best_model.load_weights(weights) break except Exception as e: print(e) print("Loading failed. Trying next model") exe_time = time.time()-t1 self.model = best_model #self.model = model #print('\nIntitializing fit for {} model. . .\nBatch_size: {}; epochs: {};'.format(self.proxy_model.name, kwargs['batch_size'], kwargs['epochs'])) #model = self.proxy_model.create_model() #t1= time.time() # self.history= model.fit(x=[x_user, x_questions], y=y_vals, batch_size=kwargs['batch_size'], epochs=kwargs['epochs'], verbose=0, validation_split=kwargs['validation_split'])#, callbacks= kwargs['callbacks'])#added callbacks #exe_time = time.time()-t1 # #self.model = model # Following lets user access each coeffs as and when required self.difficulty = self.coefficients()['difficulty_level'] self.discrimination = self.coefficients()['disc_param'] self.guessing = self.coefficients()['guessing_param'] self.slip = self.coefficients()['slip_param'] num_trainables = np.sum([K.count_params(layer) for layer in self.model.trainable_weights]) sample_size = y_vals.shape[0] log_lik, _, _ = self.model.evaluate(x=[x_user, x_questions], y=y_vals) self.AIC = 2*num_trainables - 2*np.log(log_lik) self.AICc = self.AIC + (2*np.square(num_trainables) + 2*num_trainables)/(sample_size - num_trainables - 1) print('\nTraining on : {} samples for : {} epochs has completed in : {} seconds.'.format( self.proxy_model.x_train_user.shape[0], kwargs['epochs'], np.round(exe_time, decimals=3))) print('\nAIC value: {} and AICc value: {}'.format( np.round(self.AIC, 3), np.round(self.AICc, 3))) print('\nUse `object.plot()` to view train/validation loss curves;\nUse `object.history` to obtain train/validation loss across all the epochs.\nUse `object.coefficients()` to obtain model parameters--Question difficulty, discrimination, guessing & slip') print('Use `object.AIC` & `object.AIC` to obtain Akaike Information Criterion(AIC & AICc) values.') return self
from ChulaSSSEnv import ChulaSSSEnv if __name__ == "__main__": ray.init() experiment_spec = tune.Experiment( name = "experiment_dqn", run = "DQN", config = { "num_gpus": 0, "num_workers": 1, "env": ChulaSSSEnv, "env_config" : {"observation_space": "default", "time_select" : "morning", "great_edition" : True, "with_gui" : False, "with_libsumo" : True, "no_internal_links" : True, "time_to_teleport": -1, "viewport": "surasak", "step_length": 1, "seed" : 20, "impatience_time": 300, "step_size" : 10, "alpha":10, "beta":1, } } ) tune.run_experiments(experiment_spec, resume=True)