Ejemplo n.º 1
0
    elif args.local_mode or config.pop('local_mode', False):
        resume=False
        local_mode = True
        max_failures = 0
    
    if args.temp_dir is None:
        args.temp_dir = config.pop('temp_dir', None)

    if args.name is not None:
        name = args.name
        config.pop('name', None)
    else:
        name = config.pop('name', "{}_training".format(os.getlogin()))

    exp = tune.Experiment(
                name=name,
                run=get_trainable(config.pop('train_class')),
                trial_name_creator=tune.function(trial_str_creator),
                loggers=[GIFLogger],
                resources_per_trial= {"cpu": 1, "gpu": args.n_gpus},
                checkpoint_freq=config.pop('save_freq', 5000),
                upload_dir=config.pop('upload_dir', None),
                local_dir=config.pop('local_dir', None),
                config=config                                   # evaluate last to allow all popping above
    )
    
    ray.init(redis_address=redis_address, local_mode=local_mode, temp_dir=args.temp_dir)
    trials = tune.run(exp, queue_trials=True, resume=resume,
                      checkpoint_at_end=True, max_failures=max_failures)
    exit(0)
            "policy_graphs": {
                "def_policy":
                (VTracePolicyGraph, Box(0.0, 255.0,
                                        shape=(84, 84, 3)), Discrete(9), {
                                            "gamma": 0.99
                                        })
            },
            "policy_mapping_fn": tune.function(lambda agent_id: "def_policy"),
        },
        "env_config": env_actor_configs,
        "num_workers": args.num_workers,
        "num_envs_per_worker": args.envs_per_worker,
        "sample_batch_size": args.sample_bs_per_worker,
        "train_batch_size": args.train_bs
    }

    experiment_spec = tune.Experiment(
        "multi-carla/" + args.model_arch,
        "IMPALA",
        # timesteps_total is init with None (not 0) which causes issue
        # stop={"timesteps_total": args.num_steps},
        stop={"timesteps_since_restore": args.num_steps},
        config=config,
        checkpoint_freq=1000,
        checkpoint_at_end=True,
        resources_per_trial={
            "cpu": 4,
            "gpu": 1
        })
    tune.run_experiments(experiment_spec)
Ejemplo n.º 3
0
def main(**kwargs):

    print('===> Tuning hyperparameters. For normal training use `train.py`')
    print('===> TensorFlow v. {}'.format(tf.__version__))
    
    if args.max_time:
        print('Tuning process will terminate after {} seconds'.format(str(args.max_time)))
        os.environ['TRIALRUNNER_WALLTIME_LIMIT'] = str(args.max_time)

    n_gpus = 2
    if args.gpu_id is not None:
        print('Restricting visible GPUs.')
        print('Using: GPU {}'.format(args.gpu_id))
        os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id)
        n_gpus = 1
    
    ray.init(num_gpus=n_gpus)

    features_id, labels_id = Data.load_data(directories.train, tune=True)
    test_features_id, test_labels_id = Data.load_data(directories.test, tune=True)

    """
    Adjust stopping criteria - this terminates individual trials
    """
    stopping_criteria = {  # For individual trials
        "time_total_s": args.max_time_s,
        "episode_reward_mean": 1.0  # Otherwise negative loss
        "mean_accuracy": 1.0
    }

    # Hyperparameters to be optimized
    # Important to define search space sensibly
    config = {
        "args": args,
        "user_config": config_train,
        "train_ids": [features_id, labels_id],
        "test_ids": [test_features_id, test_labels_id],
    }

    hp_config = {
        """
        Include hyperparameters to be tuned, and their permitted range
        """
    }

    config.update(hp_config)

    hp_resamples_pbt = {
        """
        Include perturbation/resample ranges for population-based training
        """
    }

    # Specify experiment configuration
    # Default uses machine with 32 CPUs / 2 GPUs
    """
    Params to modify:
        num_samples (grid fineness)
        checkpoint_freq
        time_attr
        reward_attr
    """

    experiment_spec = tune.Experiment(
        name='tune_opt',
        run=TrainModel,
        stop=stopping_criteria,
        config=config,
        resources_per_trial={'cpu': 8, 'gpu': 0.5},
        num_samples=16,
        local_dir='~/ray_results',
        checkpoint_freq=8,
        checkpoint_at_end=True,
        trial_name_creator=tune.function(functools.partial(trial_str_creator, 
            name=args.name))
    )

    pbt = tune.schedulers.PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        perturbation_interval=8,  # Mutation interval in time_attr units
        hyperparam_mutations=hp_resamples_pbt,
        resample_probability=0.25  # Resampling resets to value sampled from lambda
    )

    ahb = tune.schedulers.AsyncHyperBandScheduler(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        max_t=64,
        grace_period=8,
        reduction_factor=3,
        brackets=3
    )

    scheduler = ahb
    if args.pbt is True:
        scheduler = pbt

    trials = tune.run_experiments(
        experiments=experiment_spec,
        scheduler=scheduler,
        resume=False  # "prompt"
    )

    # Save results 
    t_ids = [t.trial_id for t in trials]
    t_config = [t.config for t in trials]
    t_result = [t.last_result for t in trials]
    df = pd.DataFrame([t_ids, t_config, t_result]).transpose()
    df.columns = ['name', 'config', 'result']
    df.to_hdf('{}_results.h5'.format(args.name), key='df')
    experiment_spec = tune.Experiment(
        # Name Structure
        # Algorithm_ObservationSpace_Seed_Gamma_Alpha_Beta_LearningRate
        #          _ExplorationAnnealingTimesteps_ExplorationFraction
        #          _PrioritizedReplay_Hidden_Noisy_Dueling_DoubleQ_NetworkUpdateFreq_Buffer
        #
        # ie. DQN_default_s20_g0.6_a10_b1_lr5e-4_et50k_e0.1_prF_h256_nF_dF_qqF_u800
        name=NAME,
        run=OPTIONS['alg'],
        checkpoint_freq=3,
        checkpoint_at_end=True,
        config={
            # === Resources ===
            # Number of actors used for parallelism
            "num_workers": 0,
            # Number of GPUs to allocate to the driver. Note that not all algorithms
            # can take advantage of driver GPUs. This can be fraction (e.g., 0.3 GPUs).
            "num_gpus": 0,
            # Number of CPUs to allocate per worker.
            "num_cpus_per_worker": 6,
            # Number of GPUs to allocate per worker. This can be fractional.
            "num_gpus_per_worker": 0,
            # Any custom resources to allocate per worker.
            "custom_resources_per_worker": {},
            # Number of CPUs to allocate for the driver. Note: this only takes effect
            # when running in Tune.
            "num_cpus_for_driver": 1,

            # === Execution ===
            # Number of environments to evaluate vectorwise per worker.
            ##            "num_envs_per_worker": 1,
            # Default sample batch size
            "sample_batch_size": 4,
            # Training batch size, if applicable. Should be >= sample_batch_size.
            # Samples batches will be concatenated together to this size for training.
            "train_batch_size": 32,

            # === Model ===
            # Number of atoms for representing the distribution of return. When
            # this is greater than 1, distributional Q-learning is used.
            # the discrete supports are bounded by v_min and v_max
            "num_atoms": 1,
            "v_min": -10.0,
            "v_max": 10.0,
            # Whether to use noisy network
            "noisy": OPTIONS['noisy'],
            # Whether to use dueling dqn
            "dueling": OPTIONS['dueling'],
            # Whether to use double dqn
            "double_q": OPTIONS['dueling'],
            # Hidden layer sizes of the state and action value networks
            "hiddens": OPTIONS['hidden'],
            # N-step Q learning
            "n_step": 1,

            # === Exploration ===
            # Max num timesteps for annealing schedules. Exploration is annealed from
            # 1.0 to exploration_fraction over this number of timesteps scaled by
            # exploration_fraction
            "schedule_max_timesteps": OPTIONS['epsilon_ts'],
            # Number of env steps to optimize for before returning
            #morning: 10800/10 = 1080 steps total
            "timesteps_per_iteration": 1080,
            # Fraction of entire training period over which the exploration rate is
            # annealed
            "exploration_fraction": 1,
            # Final value of random action probability
            "exploration_final_eps": OPTIONS['epsilon'],
            # Update the target network every `target_network_update_freq` steps.
            "target_network_update_freq": OPTIONS['update_freq'],

            # === Replay buffer ===
            # Size of the replay buffer. Note that if async_updates is set, then
            # each worker will have a replay buffer of this size.
            "buffer_size": OPTIONS['buffer'],
            # If True prioritized replay buffer will be used.
            "prioritized_replay": OPTIONS['pr'],
            # Alpha parameter for prioritized replay buffer.
            "prioritized_replay_alpha": 0.6,
            # Beta parameter for sampling from prioritized replay buffer.
            "prioritized_replay_beta": 0.4,
            # Fraction of entire training period over which the beta parameter is
            # annealed
            "beta_annealing_fraction": 0.2,
            # Final value of beta
            "final_prioritized_replay_beta": 0.4,
            # Epsilon to add to the TD errors when updating priorities.
            "prioritized_replay_eps": 1e-6,
            # Whether to LZ4 compress observations
            "compress_observations": True,

            # === Optimization ===
            # Learning rate for adam optimizer
            "lr": OPTIONS['lr'],
            # Adam epsilon hyper parameter
            "adam_epsilon": 1e-8,
            # If not None, clip gradients during optimization at this value
            "grad_norm_clipping": 40,
            # How many steps of the model to sample before learning starts.
            "learning_starts": 2160,
            # Update the replay buffer with this many samples at once. Note that
            # this setting applies per-worker if num_workers > 1.
            "sample_batch_size": 4,
            # Size of a batched sampled from replay buffer for training. Note that
            # if async_updates is set, then each worker returns gradients for a
            # batch of this size.
            "train_batch_size": 32,

            # === Parallelism ===
            # Optimizer class to use.
            "optimizer_class": "SyncReplayOptimizer",
            # Whether to use a distribution of epsilons across workers for exploration.
            "per_worker_exploration": False,
            # Whether to compute priorities on workers.
            "worker_side_prioritization": False,
            # Prevent iterations from going lower than this time span
            "min_iter_time_s": 1,

            # === Environment ===
            # Discount factor of the MDP
            "gamma": OPTIONS['gamma'],
            "env": ChulaSSSEnv,
            "env_config": {
                "observation_space": OPTIONS['obs_space'],
                "time_select": "morning",
                "great_edition": True,
                "with_gui": False,
                "with_libsumo": True,
                "no_internal_links": True,
                "time_to_teleport": -1,
                "viewport": "surasak",
                "step_length": 1,
                "seed": OPTIONS['seed'],
                "impatience_time": 300,
                "step_size": 10,
                "alpha": OPTIONS['alpha'],
                "beta": OPTIONS['beta'],
                "name": NAME  # for logging
            }
        })
Ejemplo n.º 5
0
def launch_local_experiment(init_algo_functions_and_log_fnames,
                            exp_variant, use_gpu=False,
                            exp_prefix='test', seeds=1, checkpoint_freq=50,
                            max_failures=10, resume=False, local_ray=True,
                            from_remote=False, resources_per_trial=None,
                            logging_level=logging.DEBUG):
    """Launches a ray experiment locally

    Args:
        init_algo_functions_and_log_fnames ((function, str)[]): a list of tuples.
            The first element of each tuple is a function that returns an algo
            in ray format (i.e, has a _train() method that returns a log dict
            and will train for a single epoch). The second element is the
            filename of the logging file.
        exp_variant (dict): the experiment variant. This will be passed in each
            time to each init_algo_function in init_algo_functions_and_log_fnames
        use_gpu (bool):
        exp_prefix (str):
        seeds (int):
        checkpoint_freq (int): how often to checkpoint for handling failures.
        max_failures (int): how many times to retry if a trial fails. Useful for
            remote launching.
        resume (bool): whether the trials should try and resume a failed trial
            if possible.
        local_ray (bool): whether to use local_ray mode. stdout get printed and
            pdb is possible in local_ray=True.
        from_remote (bool): If the experiment is being launched from a remote
            instance. User should not set this. Automatically set by
            remote_launch.py
        resources_per_trial (dict): Specify {'cpu': float, 'gpu': float}. This
            is the number of allocated resources to the trial.
        logging_level:
    """
    if from_remote:
        redis_address = ray.services.get_node_ip_address() + ':6379'
        ray.init(redis_address=redis_address, logging_level=logging_level)
    else:
        ray.init(local_mode=local_ray)
    for idx, (init_func, log_fname) in enumerate(init_algo_functions_and_log_fnames):
        init_algo_functions_and_log_fnames[idx] = (
            tune.function(init_func),
            log_fname
        )
    exp = tune.Experiment(
        name=exp_prefix,
        run=SequentialRayExperiment,
        upload_dir=config.LOG_BUCKET,
        num_samples=seeds,
        stop={"global_done": True},
        config={
            'algo_variant': exp_variant,
            'init_algo_functions_and_log_fnames': init_algo_functions_and_log_fnames,
            'use_gpu': use_gpu,
            'resources_per_trial': resources_per_trial,
        },
        resources_per_trial=resources_per_trial,
        checkpoint_freq=checkpoint_freq,
        loggers=[JsonLogger, SequentialCSVLogger],
    )
    tune.run(
        exp,
        resume=resume,
        max_failures=max_failures,
        queue_trials=True,
    )
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='ChulaSSSEnv DQN Runner')
    # === Flags for Name Arguments ===
    parser.add_argument('-A',
                        '--algorithm',
                        action='store',
                        default='DQN',
                        type=str,
                        help='The algorithm to train',
                        choices=['DQN', 'APEX'])
    parser.add_argument(
        '-O',
        '--observation',
        action='store',
        default='default',
        type=str,
        help='The observation space',
        choices=['default', 'all3', "all3_no_downstream", "no_downstream"])
    parser.add_argument('-s',
                        '--seed',
                        action='store',
                        default=20,
                        type=int,
                        help='Seed number')
    parser.add_argument('-g',
                        '--gamma',
                        action='store',
                        default=0.9,
                        type=float,
                        help='Discount Factor')
    parser.add_argument('-a',
                        '--alpha',
                        action='store',
                        default=10.0,
                        type=float,
                        help='Reward throughput coefficient')
    parser.add_argument('-b',
                        '--beta',
                        action='store',
                        default=0.0,
                        type=float,
                        help='Reward backlog coefficient')
    parser.add_argument('-l',
                        '--learningRate',
                        action='store',
                        default=5e-4,
                        type=str,
                        help='Learning Rate (scientific notation) ie. 5e-4')
    parser.add_argument('-T',
                        '--annealTimeStep',
                        action='store',
                        default='100k',
                        type=str,
                        help='Exploration Annealing Timesteps (in k)')
    parser.add_argument('-e',
                        '--epsilon',
                        action='store',
                        default=0.1,
                        type=float,
                        help='The exploration fraction to anneal to')
    parser.add_argument('-p',
                        '--prioritizedReplay',
                        action='store_true',
                        help='Whether to use prioritized replay')
    parser.add_argument('-H',
                        '--hidden',
                        action='store',
                        default='256',
                        type=str,
                        help='Hidden Layers (comma separated)')
    parser.add_argument('-N',
                        '--noisy',
                        action='store_true',
                        help='Noisy network')
    parser.add_argument('-D',
                        '--dueling',
                        action='store_true',
                        help='Dueling DQN')
    parser.add_argument('-d',
                        '--double',
                        action='store_true',
                        help='Double DQN')
    parser.add_argument('-u',
                        '--updateFreq',
                        action='store',
                        default=800,
                        type=int,
                        help='Network update frequency')
    parser.add_argument('-B',
                        '--buffer',
                        action='store',
                        default='100k',
                        type=str,
                        help='Size of replay buffer (in k)')
    parser.add_argument('-L',
                        '--load',
                        action='store',
                        default=1.0,
                        type=float,
                        help='Load factor of Great routes')
    # === Flags for running arguments ===
    parser.add_argument('-i',
                        '--trainIter',
                        action='store',
                        default=100000,
                        type=int,
                        help='Training Iteration')
    parser.add_argument('-c',
                        '--checkFreq',
                        action='store',
                        default=5,
                        type=int,
                        help='Checkpoint saving frequency')
    parser.add_argument('--learningStart',
                        action='store',
                        default=4320,
                        type=int,
                        help='Steps before Learning starts')
    parser.add_argument('--trainBatch',
                        action='store',
                        default=32,
                        type=int,
                        help='Training batch size')
    parser.add_argument('--sampleBatch',
                        action='store',
                        default=4,
                        type=int,
                        help='Sample batch size')

    args = parser.parse_args()

    # Name Structure
    # Algorithm_ObservationSpace_Seed_Gamma_Alpha_Beta_LearningRate
    #          _ExplorationAnnealingTimesteps_ExplorationFraction
    #          _PrioritizedReplay_Hidden_Noisy_Dueling_DoubleQ_NetworkUpdateFreq_Buffer
    #          _LoadFactor
    #
    # ie. DQN_default_s20_g0.6_a10_b1_lr5e-4_et50k_e0.1_pr0_h256_n0_d0_qq0_u800_l1.0

    NAME = "{}_{}_s{}_g{}_a{}_b{}_lr{:.0e}_et{}_e{}_pr{:n}_h{}_n{:n}_d{:n}_qq{:n}_u{}_b{}_l{}".format(
        args.algorithm, args.observation, args.seed, args.gamma, args.alpha,
        args.beta, args.learningRate, args.annealTimeStep, args.epsilon,
        args.prioritizedReplay, args.hidden, args.noisy, args.dueling,
        args.double, args.updateFreq, args.buffer, args.load)
    print("Starting Experiment with name {}".format(NAME))

    OPT = NAME.split("_")

    OPTIONS = {
        "alg": OPT[0],
        "obs_space": OPT[1],
        "seed": int(OPT[2][1:]),
        "gamma": float(OPT[3][1:]),
        "alpha": float(OPT[4][1:]),
        "beta": float(OPT[5][1:]),
        "lr": float(OPT[6][2:]),
        "epsilon_ts": int(OPT[7][2:-1]) * 1000,
        "epsilon": float(OPT[8][1:]),
        "pr": bool(int(OPT[9][2:])),
        "hidden": list(map(int, OPT[10][1:].split(','))),
        "noisy": bool(int(OPT[11][1:])),
        "dueling": bool(int(OPT[12][1:])),
        "doubleQ": bool(int(OPT[13][2:])),
        "update_freq": int(OPT[14][1:]),
        "buffer": int(OPT[15][1:-1]) * 1000,
        "load": float(OPT[16][1:])
    }

    ray.init(  #object_store_memory=int(4e9),  # 4gb
        #redis_max_memory=int(2e9)  #2gb
    )
    experiment_spec = tune.Experiment(
        name=NAME,
        run=OPTIONS["alg"],
        checkpoint_freq=args.checkFreq,
        checkpoint_at_end=True,
        stop={"training_iteration": args.trainIter},
        upload_dir="gs://ray_results/",
        custom_loggers=[ActionLogger],
        config={
            # === Configure Callbacks ===
            "callbacks": {
                "on_episode_start":
                tune.function(logger_callbacks.on_episode_start),
                "on_episode_step":
                tune.function(logger_callbacks.on_episode_step),
                "on_episode_end":
                tune.function(logger_callbacks.on_episode_end),
                "on_sample_end":
                tune.function(logger_callbacks.on_sample_end),
                "on_train_result":
                tune.function(logger_callbacks.on_train_result),
            },

            # === Resources ===
            # Number of actors used for parallelism
            "num_workers": 0,
            # Number of GPUs to allocate to the driver. Note that not all algorithms
            # can take advantage of driver GPUs. This can be fraction (e.g., 0.3 GPUs).
            "num_gpus": 0,
            # Number of CPUs to allocate per worker.
            "num_cpus_per_worker": 4,
            # Number of GPUs to allocate per worker. This can be fractional.
            "num_gpus_per_worker": 0,
            # Any custom resources to allocate per worker.
            "custom_resources_per_worker": {},
            # Number of CPUs to allocate for the driver. Note: this only takes effect
            # when running in Tune.
            "num_cpus_for_driver": 1,

            # === Model ===
            # Number of atoms for representing the distribution of return. When
            # this is greater than 1, distributional Q-learning is used.
            # the discrete supports are bounded by v_min and v_max
            "num_atoms": 1,
            "v_min": -10.0,
            "v_max": 10.0,
            # Whether to use noisy network
            "noisy": OPTIONS['noisy'],
            # Whether to use dueling dqn
            "dueling": OPTIONS['dueling'],
            # Whether to use double dqn
            "double_q": OPTIONS['dueling'],
            # Hidden layer sizes of the state and action value networks
            "hiddens": OPTIONS['hidden'],
            # N-step Q learning
            "n_step": 1,

            # === Exploration ===
            # Max num timesteps for annealing schedules. Exploration is annealed from
            # 1.0 to exploration_fraction over this number of timesteps scaled by
            # exploration_fraction
            "schedule_max_timesteps": OPTIONS['epsilon_ts'],
            # Number of env steps to optimize for before returning
            #morning: 10800/10 = 1080 steps total
            "timesteps_per_iteration": 1080,
            # Fraction of entire training period over which the exploration rate is
            # annealed
            "exploration_fraction": 1,
            # Final value of random action probability
            "exploration_final_eps": OPTIONS['epsilon'],
            # Update the target network every `target_network_update_freq` steps.
            "target_network_update_freq": OPTIONS['update_freq'],

            # === Replay buffer ===
            # Size of the replay buffer. Note that if async_updates is set, then
            # each worker will have a replay buffer of this size.
            "buffer_size": OPTIONS['buffer'],
            # If True prioritized replay buffer will be used.
            "prioritized_replay": OPTIONS['pr'],
            # Alpha parameter for prioritized replay buffer.
            "prioritized_replay_alpha": 0.6,
            # Beta parameter for sampling from prioritized replay buffer.
            "prioritized_replay_beta": 0.4,
            # Fraction of entire training period over which the beta parameter is
            # annealed
            "beta_annealing_fraction": 0.2,
            # Final value of beta
            "final_prioritized_replay_beta": 0.4,
            # Epsilon to add to the TD errors when updating priorities.
            "prioritized_replay_eps": 1e-6,
            # Whether to LZ4 compress observations
            "compress_observations": True,

            # === Optimization ===
            # Learning rate for adam optimizer
            "lr": OPTIONS['lr'],
            # Adam epsilon hyper parameter
            "adam_epsilon": 1e-8,
            # If not None, clip gradients during optimization at this value
            "grad_norm_clipping": 40,
            # How many steps of the model to sample before learning starts.
            "learning_starts": args.learningStart,
            # Update the replay buffer with this many samples at once. Note that
            # this setting applies per-worker if num_workers > 1.
            # Default sample batch size
            "sample_batch_size": args.sampleBatch,
            # Size of a batched sampled from replay buffer for training. Note that
            # if async_updates is set, then each worker returns gradients for a
            # batch of this size.
            # Training batch size, if applicable. Should be >= sample_batch_size.
            # Samples batches will be concatenated together to this size for training.
            "train_batch_size": args.trainBatch,

            # === Parallelism ===
            # Optimizer class to use.
            "optimizer_class": "SyncReplayOptimizer",
            # Whether to use a distribution of epsilons across workers for exploration.
            "per_worker_exploration": False,
            # Whether to compute priorities on workers.
            "worker_side_prioritization": False,
            # Prevent iterations from going lower than this time span
            "min_iter_time_s": 1,

            # === Environment ===
            # Discount factor of the MDP
            "gamma": OPTIONS['gamma'],
            "env": ChulaSSSEnv,
            "env_config": {
                "observation_space": OPTIONS['obs_space'],
                "time_select": TIME_SELECT_STR,
                "great_edition": GREAT_EDITION,
                "with_gui": WITH_GUI,
                "with_libsumo": WITH_LIBSUMO,
                "no_internal_links": True,
                "time_to_teleport": -1,
                "viewport": VIEWPORT,
                "step_length": STEP_LENGTH,
                "seed": OPTIONS['seed'],
                "impatience_time": IMPATIENCE_TIME,
                "step_size": STEP_SIZE,
                "alpha": OPTIONS['alpha'],
                "beta": OPTIONS['beta'],
                "name": NAME,
                "load": OPTIONS['load']
            }
        })
    tune.run_experiments(experiment_spec, resume='prompt')
Ejemplo n.º 7
0
    def fit(self, x_user, x_questions, y_vals, **kwargs):
        kwargs.setdefault('latent_traits', None)
        kwargs.setdefault('batch_size', 16)
        kwargs.setdefault('epochs', 64)
        kwargs.setdefault('validation_split', 0.2)
        kwargs.setdefault('params', self.params)

        self.proxy_model.l_traits = kwargs['latent_traits']

        self.proxy_model.x_train_user = x_user
        self.proxy_model.x_train_questions = x_questions
        self.proxy_model.y_ = y_vals

        self.l_traits = kwargs['latent_traits']
        # affirming if params are given in either of(init or fit) methods
        self.params = self.params or kwargs['params']
        if self.params != None:  # Validate implementation with different types of tune input
            if not isinstance(self.params, dict):
                raise TypeError("Params should be of type 'dict'")
            self.params = _parse_params(self.params, return_as='flat')
            self.proxy_model.update_params(self.params)
            # triggers for fourPL model
            if self.proxy_model.name is 'tpm' and 'slip_params' in self.params and 'train' in self.params['slip_params'].keys():
                if self.params['slip_params']['train']:
                    self.proxy_model.name = 'fourPL'

        ray_verbose = False
        _ray_log_level = logging.INFO if ray_verbose else logging.ERROR
        ray.init(log_to_driver=False, logging_level=_ray_log_level, ignore_reinit_error=True, redis_max_memory=20*1000*1000*1000, object_store_memory=1000000000,
                 num_cpus=4)

        def train_model(config, reporter):
            self.proxy_model.set_params(params=config, set_by='optimizer')
            print('\nIntitializing fit for {} model. . .\nBatch_size: {}; epochs: {};'.format(
                self.proxy_model.name, kwargs['batch_size'], kwargs['epochs']))
            model = self.proxy_model.create_model()

            self.history = model.fit(x=[x_user, x_questions], y=y_vals, batch_size=kwargs['batch_size'],
                                     epochs=kwargs['epochs'], verbose=0, validation_split=kwargs['validation_split'])

            _, mae, accuracy = model.evaluate(
                x=[x_user, x_questions], y=y_vals)  # [1]
            last_checkpoint = "weights_tune_{}.h5".format(
                list(zip(np.random.choice(10, len(config), replace=False), config)))
            model.save_weights(last_checkpoint)
            reporter(mean_error=mae, mean_accuracy=accuracy,
                     checkpoint=last_checkpoint)
        t1 = time.time()
        configuration = tune.Experiment("experiment_name",
                                        run=train_model,
                                        resources_per_trial={"cpu": 4},
                                        stop={"mean_error": 0.15,
                                              "mean_accuracy": 95},
                                        config=self.proxy_model.get_params())

        trials = tune.run_experiments(configuration, verbose=0)
        self.trials = trials
        metric = "mean_error"  # "mean_accuracy"
        # Restore a model from the best trial.

        def get_sorted_trials(trial_list, metric):
            return sorted(trial_list, key=lambda trial: trial.last_result.get(metric, 0), reverse=True)

        sorted_trials = get_sorted_trials(trials, metric)

        for best_trial in sorted_trials:
            try:
                print("Creating model...")
                self.proxy_model.set_params(
                    params=best_trial.config, set_by='optimizer')
                best_model = self.proxy_model.create_model()
                weights = os.path.join(
                    best_trial.logdir, best_trial.last_result["checkpoint"])
                print("Loading from", weights)
                # TODO Validate this loaded model.
                best_model.load_weights(weights)
                break
            except Exception as e:
                print(e)
                print("Loading failed. Trying next model")
        exe_time = time.time()-t1
        self.model = best_model

        #self.model = model
        #print('\nIntitializing fit for {} model. . .\nBatch_size: {}; epochs: {};'.format(self.proxy_model.name, kwargs['batch_size'], kwargs['epochs']))
        #model = self.proxy_model.create_model()
        #t1= time.time()
        # self.history= model.fit(x=[x_user, x_questions], y=y_vals, batch_size=kwargs['batch_size'], epochs=kwargs['epochs'], verbose=0, validation_split=kwargs['validation_split'])#, callbacks= kwargs['callbacks'])#added callbacks
        #exe_time = time.time()-t1
#
        #self.model = model

        # Following lets user access each coeffs as and when required
        self.difficulty = self.coefficients()['difficulty_level']
        self.discrimination = self.coefficients()['disc_param']
        self.guessing = self.coefficients()['guessing_param']
        self.slip = self.coefficients()['slip_param']

        num_trainables = np.sum([K.count_params(layer)
                                 for layer in self.model.trainable_weights])
        sample_size = y_vals.shape[0]
        log_lik, _, _ = self.model.evaluate(x=[x_user, x_questions], y=y_vals)

        self.AIC = 2*num_trainables - 2*np.log(log_lik)
        self.AICc = self.AIC + (2*np.square(num_trainables) +
                                2*num_trainables)/(sample_size - num_trainables - 1)

        print('\nTraining on : {} samples for : {} epochs has completed in : {} seconds.'.format(
            self.proxy_model.x_train_user.shape[0], kwargs['epochs'], np.round(exe_time, decimals=3)))
        print('\nAIC value: {} and AICc value: {}'.format(
            np.round(self.AIC, 3), np.round(self.AICc, 3)))

        print('\nUse `object.plot()` to view train/validation loss curves;\nUse `object.history` to obtain train/validation loss across all the epochs.\nUse `object.coefficients()` to obtain model parameters--Question difficulty, discrimination, guessing & slip')
        print('Use `object.AIC` & `object.AIC` to obtain Akaike Information Criterion(AIC & AICc) values.')
        return self
from ChulaSSSEnv import ChulaSSSEnv

if __name__ == "__main__":
    ray.init()
    experiment_spec = tune.Experiment(
        name = "experiment_dqn",
        run = "DQN",
        config = {
            "num_gpus": 0,
            "num_workers": 1,
            "env": ChulaSSSEnv,
            "env_config" : {"observation_space": "default",
                       "time_select" : "morning",
                       "great_edition" : True,
                       "with_gui" : False,
                       "with_libsumo" : True,
                       "no_internal_links" : True,
                       "time_to_teleport": -1,
                       "viewport": "surasak",
                       "step_length": 1,
                       "seed" : 20,
                       "impatience_time": 300,
                       "step_size" : 10,
                       "alpha":10,
                       "beta":1,
                       }
            }
        
        )
    tune.run_experiments(experiment_spec, resume=True)