Ejemplo n.º 1
0
def load_checkpoint(agent, checkpoint_exp_key, timestep=None):
    """
    Loads a checkpoint from Comet.

    Args:
        agent (Agent): the agent to be loaded
    """
    assert checkpoint_exp_key is not None, 'Checkpoint experiment key must be set.'
    print('Loading checkpoint from ' + checkpoint_exp_key + '...')
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=checkpoint_exp_key)
    asset_list = experiment.get_asset_list()
    if timestep is not None:
        # get the specified checkpoint
        file_name = 'ckpt_step_' + str(timestep) + '.ckpt'
        asset = [a for a in asset_list if a['fileName'] == file_name]
        if len(asset) == 0:
            raise KeyError('Checkpoint timestep not found.')
        asset = asset[0]
    else:
        # get most recent checkpoint
        asset_times = [asset['createdAt'] for asset in asset_list if 'ckpt' in asset['fileName']]
        asset = [a for a in asset_list if a['createdAt'] == max(asset_times)][0]
    print('Checkpoint Name:', asset['fileName'])
    ckpt = experiment.get_asset(asset['assetId'])
    state_dict = torch.load(io.BytesIO(ckpt))
    agent.load_state_dict(state_dict)
    print('Done.')
Ejemplo n.º 2
0
def analyze_agent_kl(exp_key):
    """
    Evaluates the agent KL post-hoc for a given experiment.

    Args:
        exp_key (str): the experiment ID
    """
    # load the experiment
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=exp_key)

    # create the environment
    param_summary = experiment.get_parameters_summary()
    env_name = [a for a in param_summary if a['name'] == 'env'][0]['valueCurrent']
    env = create_env(env_name)

    # create the agent
    asset_list = experiment.get_asset_list()
    agent_config_asset_list = [a for a in asset_list if 'agent_args' in a['fileName']]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = experiment.get_asset(agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args['inference_optimizer_args'] else None
    agent = create_agent(env, agent_args=agent_args)[0]

    # get the list of checkpoint timesteps
    ckpt_asset_list = [a for a in asset_list if 'ckpt' in a['fileName']]
    ckpt_asset_names = [a['fileName'] for a in ckpt_asset_list]
    ckpt_timesteps = [int(s.split('ckpt_step_')[1].split('.ckpt')[0]) for s in ckpt_asset_names]
    ckpt_timesteps = list(np.sort(ckpt_timesteps)[::CKPT_SUBSAMPLE])

    agent_kls = []

    # initial episode using random init
    prev_episode, _, _ = collect_episode(env, agent)

    for ckpt_ind, ckpt_timestep in enumerate(ckpt_timesteps):
        # load the checkpoint
        print('Evaluating checkpoint ' + str(ckpt_ind + 1) + ' of ' + str(len(ckpt_timesteps)))
        load_checkpoint(agent, exp_key, ckpt_timestep)

        # evaluate agent KL
        print(' Evaluating agent KL...')
        agent_kls.append(estimate_agent_kl(env, agent, prev_episode))
        print(' Done.')

        # collect an episode
        print(' Collecting episode...')
        prev_episode, _, _ = collect_episode(env, agent)
        print(' Done.')

    return {'steps': ckpt_timesteps, 'agent_kl': np.array(agent_kls)}
Ejemplo n.º 3
0
def clear_comet_ml_project():
    import comet_ml

    # This is hardcoded in the `ray/ml/examples/upload_to_comet_ml.py` example
    comet_ml_project = "ray-air-example"

    api = comet_ml.API()
    workspace = api.get_default_workspace()
    experiments = api.get_experiments(workspace=workspace,
                                      project_name=comet_ml_project)
    api.delete_experiments([experiment.key for experiment in experiments])
def get_experiment(run_id):
    experiment_id = hashlib.sha1(run_id.encode("utf-8")).hexdigest()
    os.environ["COMET_EXPERIMENT_KEY"] = experiment_id

    api = comet_ml.API()  # Assumes API key is set in config/env
    api_experiment = api.get_experiment_by_id(experiment_id)

    if api_experiment is None:
        return comet_ml.Experiment(project_name=PROJECT_NAME)

    else:
        return comet_ml.ExistingExperiment(project_name=PROJECT_NAME)
Ejemplo n.º 5
0
    def init_logger(self, cfg):
        logger = None
        # Check to see if there is a key in environment:
        EXPERIMENT_KEY = cfg.experiment_key

        # First, let's see if we continue or start fresh:
        CONTINUE_RUN = cfg.resume
        if (EXPERIMENT_KEY is not None):
            # There is one, but the experiment might not exist yet:
            api = comet_ml.API()  # Assumes API key is set in config/env
            try:
                api_experiment = api.get_experiment_by_id(EXPERIMENT_KEY)
            except Exception:
                api_experiment = None
            if api_experiment is not None:
                CONTINUE_RUN = True
                # We can get the last details logged here, if logged:
                # step = int(api_experiment.get_parameters_summary("batch")["valueCurrent"])
                # epoch = int(api_experiment.get_parameters_summary("epochs")["valueCurrent"])

        if CONTINUE_RUN:
            # 1. Recreate the state of ML system before creating experiment
            # otherwise it could try to log params, graph, etc. again
            # ...
            # 2. Setup the existing experiment to carry on:
            logger = comet_ml.ExistingExperiment(
                previous_experiment=EXPERIMENT_KEY,
                log_env_details=True,  # to continue env logging
                log_env_gpu=True,  # to continue GPU logging
                log_env_cpu=True,  # to continue CPU logging
                auto_histogram_weight_logging=True,
                auto_histogram_gradient_logging=True,
                auto_histogram_activation_logging=True)
            # Retrieved from above APIExperiment
            # self.logger.set_epoch(epoch)

        else:
            # 1. Create the experiment first
            #    This will use the COMET_EXPERIMENT_KEY if defined in env.
            #    Otherwise, you could manually set it here. If you don't
            #    set COMET_EXPERIMENT_KEY, the experiment will get a
            #    random key!
            logger = comet_ml.Experiment(
                disabled=cfg.disabled,
                project_name=cfg.project,
                auto_histogram_weight_logging=True,
                auto_histogram_gradient_logging=True,
                auto_histogram_activation_logging=True)
            logger.add_tags(cfg.tags.split())
            logger.log_parameters(self.cfg)

        return logger
Ejemplo n.º 6
0
def _download_model_weights_from_comet(
    experiment_key: str = "latest",
    download_path: str = "model/weights/srgan_generator_model_weights.npz",
) -> (int, float):
    """
    Download DeepBedMap's Generator neural network model weights from Comet.ML
    By default, the model weights from the latest experimental run are downloaded
    Passing in an alternative experiment_key hash will download that one instead.
    Also returns the model's num_residual_blocks and residual_scaling hyperparameters.

    Uses Comet.ML's Python REST API class at https://www.comet.ml/docs/python-sdk/API/
    Requires the COMET_REST_API_KEY environment variable to be set in the .env file
    """
    comet_api = comet_ml.API(rest_api_key=base64.b64decode(
        s=os.environ["COMET_REST_API_KEY"]))

    # Get pointer to a DeepBedMap experiment on Comet ML
    if experiment_key == "latest":
        # Get list of DeepBedMap experiments
        project = comet_api.get(workspace="weiji14", project="deepbedmap")
        df = pd.io.json.json_normalize(
            data=project.data["experiments"].values())
        # Get the key to the latest DeepBedMap experiment on Comet ML
        experiment_key = df.loc[
            df["start_server_timestamp"].idxmax()].experiment_key

    experiment = comet_api.get(workspace="weiji14",
                               project="deepbedmap",
                               experiment=experiment_key)

    # Use key to access url to the experiment's asset which is the npz weight file
    assets = experiment.asset_list
    for asset in experiment.asset_list:
        if asset["fileName"].endswith(
                ".npz"):  # make sure we pick the .npz file
            asset_id = asset["assetId"]
            break

    # Download the neural network weight file (npz format) to the right place!
    os.makedirs(name=os.path.dirname(download_path), exist_ok=True)
    with open(download_path, mode="wb") as model_weight_file:
        model_weight_file.write(experiment.get_asset(asset_id=asset_id))

    # Get hyperparameters needed to recreate DeepBedMap model architecture properly
    parameters: dict = (pd.io.json.json_normalize(
        data=experiment.parameters).set_index(
            keys="name").valueCurrent.to_dict())
    return int(parameters["num_residual_blocks"]), float(
        parameters["residual_scaling"])
Ejemplo n.º 7
0
def compare_policies(exp_key1, exp_key2, write_result=True):
    """
    Compares the policies of two agents at the end of training.

    Args:
        exp_key1 (str):
        exp_key2 (str):
        write_result (bool)
    """
    # load the experiments
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    exp1 = comet_api.get_experiment(project_name=PROJECT_NAME,
                                    workspace=WORKSPACE,
                                    experiment=exp_key1)
    exp2 = comet_api.get_experiment(project_name=PROJECT_NAME,
                                    workspace=WORKSPACE,
                                    experiment=exp_key2)

    # create the environment
    param_summary = exp1.get_parameters_summary()
    env_name = [a for a in param_summary if a['name'] == 'env'][0]['valueCurrent']
    env1 = create_env(env_name)
    env2 = create_env(env_name)

    # create the agents
    asset_list = exp1.get_asset_list()
    agent_config_asset_list = [a for a in asset_list if 'agent_args' in a['fileName']]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = exp1.get_asset(agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args['inference_optimizer_args'] else None
    agent1 = create_agent(env1, agent_args=agent_args)[0]
    load_checkpoint(agent1, exp_key1)

    asset_list = exp2.get_asset_list()
    agent_config_asset_list = [a for a in asset_list if 'agent_args' in a['fileName']]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = exp2.get_asset(agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args['inference_optimizer_args'] else None
    agent2 = create_agent(env1, agent_args=agent_args)[0]
    load_checkpoint(agent2, exp_key2)

    # evaluate the KL between policies
    kl12 = []
    kl21 = []
    agent1.reset(); agent1.eval()
    agent2.reset(); agent2.eval()

    state1 = env1.reset()
    state2 = env2.reset()

    for state_ind in range(N_STATES):
        # perform policy optimization on state1
        action1 = agent1.act(state1)
        agent2.act(state1)
        kl = kl_divergence(agent1.approx_post, agent2.approx_post).sum().detach().item()
        kl12.append(kl)

        agent1.reset(); agent1.eval()
        agent2.reset(); agent2.eval()

        # perform policy optimization on state2
        agent1.act(state2)
        action2 = agent2.act(state2)
        kl = kl_divergence(agent2.approx_post, agent1.approx_post).sum().detach().item()
        kl21.append(kl)

        # step the environments
        state1, _, done1, _ = env1.step(action1)
        state2, _, done2, _ = env2.step(action2)

        if done1:
            agent1.reset(); agent1.eval()
            state1 = env1.reset()
            done1 = False
        if done2:
            agent2.reset(); agent2.eval()
            state2 = env2.reset()
            done2 = False

    kls = {'kl12': kl12,
           'kl21': kl21}

    if write_result:
        pickle.dump(kls, open('policy_kl_' + exp_key1 + '_' + exp_key2 + '.p', 'wb'))

    return kls
    def score_bayes_trainer_harness(self):
        """
      Returns
      -------
      :return: ``None``
          None, but results in saved models suitable for scoring and trained
          on all available data.
      """

        self.__log.info("Starting generic score train loop")
        train_data = self.train_data
        val_data = self.val_data
        models = self.model
        out_path = self.out_path
        type_time_series = self.type_time_series
        param_search = self.param_search
        trainer = self.trainer
        api_key = self.api_key
        rest_api_key = self.rest_api_key
        workspace = self.workspace

        for models_ in models.get(type_time_series):

            for key, value in models_.items():

                model_name = key
                model = value[0]
                model_kwarg = value[1]

                if param_search == 'bayes':

                    search_space = GluonTSBayesEstimatorKwargs.BayesModelLookup.get(
                        model_name)

                    # comet-ml hyperparameter optimization configuration (bayes in this case)
                    config = {
                        "algorithm": "bayes",
                        "spec": {
                            "maxCombo": 5,  # no of combinations to try
                            "objective": "minimize",
                            "metric": "loss",
                            "seed": 42,
                            "gridSize": 10,
                            "minSampleSize": 100,
                            "retryLimit": 20,
                            "retryAssignLimit": 0,
                        },
                        "name": "My Bayesian Search",
                        "trials": 1,
                    }

                    config['parameters'] = search_space

                    # current time
                    timestr = time.strftime("%Y%m%d-%H%M%S")
                    # comet-ml project name for the optimization
                    project_name = f"optimizer-{model_name}-{timestr}"
                    # initialize the comet-ml optimizer
                    optimizer = Optimizer(config=config,
                                          api_key=api_key,
                                          project_name=project_name)
                    # loop through the parameter combinations that the bayes optimizer suggests
                    for experiment in optimizer.get_experiments():

                        # explicitly set the model parameters (should be generic for any model)
                        if model_name == "SimpleFeedForward":

                            hidden1 = experiment.get_parameter(
                                "hidden_layer_size")
                            hidden2 = experiment.get_parameter(
                                "hidden2_layer_size")
                            model_kwarg['num_hidden_dimensions'] = [
                                hidden1, hidden2
                            ]

                            self.__log.info(
                                f"model_kwarg['num_hidden_dimensions'] : {model_kwarg['num_hidden_dimensions']}"
                            )

                        elif model_name == "DeepAREstimate":

                            model_kwarg[
                                'num_layers'] = experiment.get_parameter(
                                    "num_layers")
                            model_kwarg[
                                'num_cells'] = experiment.get_parameter(
                                    "num_cells")
                            model_kwarg[
                                'cell_type'] = experiment.get_parameter(
                                    "cell_type")
                            model_kwarg[
                                'dropout_rate'] = experiment.get_parameter(
                                    "dropout_rate")

                        # set trainer params
                        trainer.learning_rate = experiment.get_parameter(
                            "learning_rate")
                        trainer.batch_size = experiment.get_parameter(
                            "batch_size")
                        trainer.epochs = 2

                        # initialize model from the suggested hyperparameters
                        model = model.from_hyperparameters(**model_kwarg)
                        # set the trainer
                        model.trainer = trainer

                        self.__log.info(
                            f'\n model.trainer.lr : {model.trainer.learning_rate}'
                        )
                        self.__log.info(
                            f'model.trainer.epochs : {model.trainer.epochs}\n')

                        # train the model
                        predictor = model.train(train_data)
                        # make predictions
                        forecast_it, ts_it = make_evaluation_predictions(
                            dataset=val_data,  # test dataset
                            predictor=predictor,  # predictor
                            num_eval_samples=
                            1,  # number of sample paths we want for evaluation
                        )

                        # convert gluonts objects to lists
                        forecasts = list(forecast_it)
                        tss = list(ts_it)

                        # get prediction length
                        prediction_length = forecasts[0].mean.shape[0]

                        y_test_ = list(val_data)[0]['target']
                        y_preds_ = forecasts[0].mean
                        y_test_ = y_test_[-prediction_length:]

                        mae_ = mean_absolute_error(y_test_, y_preds_)

                        # Report the loss to comet
                        experiment.log_metric("loss", mae_)

                experiment.end()

            # initialize comet REST API to retrieve the best hyperparameters
            comet_api = comet_ml.API(rest_api_key=rest_api_key)

            project = comet_api.get(workspace=workspace,
                                    project_name=optimizer.
                                    experiment_kwargs['project_name'].lower())

            # get the experiment ids
            exp_ids = [x.id for x in project]

            scores_df = pd.DataFrame(index=exp_ids, columns=['metric'])
            # loop through the experiments within the comet project
            for exp_id in exp_ids:

                exp = comet_api.get(
                    f"{workspace}/{project_name.lower()}/{exp_id}")

                scores_df.at[exp_id,
                             'metric'] = exp.get_metrics()[0]['metricValue']

            scores_df.metric = scores_df.metric.map(float)
            # get experiment_id of the best score
            best_exp_id = scores_df.metric.idxmin()
            # get the best experiment
            exp = comet_api.get(
                f"{workspace}/{project_name.lower()}/{best_exp_id}")
            # get the best hyperparameters
            best_params = {
                x['name']: x['valueCurrent']
                for x in exp.get_parameters_summary() if x['name'] != 'f'
            }
            # save best params in model_name-keyed dictionary for later use
            self.best_params[model_name] = best_params
Ejemplo n.º 9
0
def compare_goal_optimizers(model_exp_key,
                            opt_exp_key=None,
                            write_results=True,
                            stochastic_model=False):
    """
    Optimize random goal states using a model-based estimator.
    Train the policy optimizer online. Compare with other optimizers.
    Note: tailored to HalfCheetah-v2 environment currently.

    Args:
        model_exp_key (str): model-based experiment key
        opt_exp_key (str): optimizer experiment key. If None, trains from scratch
        write_results (bool): whether to pickle results directly
        stochastic_model (bool): whether to sample states or use mean estimate
        train_model (bool) whether to train the model online
    """

    ## MODEL
    # load the model experiment
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=model_exp_key)

    # create the environment
    param_summary = experiment.get_parameters_summary()
    env_name = [a for a in param_summary
                if a['name'] == 'env'][0]['valueCurrent']
    env = create_env(env_name)
    # create a synchronous env to parallelize training
    sync_env = SynchronousEnv(env, BATCH_SIZE)

    # create the agent
    asset_list = experiment.get_asset_list()
    agent_config_asset_list = [
        a for a in asset_list if 'agent_args' in a['fileName']
    ]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = experiment.get_asset(
            agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args[
            'inference_optimizer_args'] else None
    agent = create_agent(env, agent_args=agent_args)[0]

    # also, load the most recent episode to sample goal states
    asset_times = [
        asset['createdAt'] for asset in asset_list
        if 'state' in asset['fileName']
    ]
    state_asset = [
        a for a in asset_list if a['createdAt'] == max(asset_times)
    ][0]
    episode_states = json.loads(experiment.get_asset(state_asset['assetId']))

    # load the checkpoint
    load_checkpoint(agent, model_exp_key)

    if stochastic_model:
        agent.q_value_estimator.state_variable.cond_likelihood.stochastic = True

    # swap out the value estimator for goal-based estimator
    gb_estimator = GoalBasedQEstimator()
    # copy over the dynamics model
    gb_estimator.state_likelihood_model = agent.q_value_estimator.state_likelihood_model
    gb_estimator.state_variable = agent.q_value_estimator.state_variable
    # set the estimator
    agent.q_value_estimator = gb_estimator
    agent.q_value_estimator.set_goal_std(GOAL_STD)
    # agent.alphas['pi'] = 0.

    total_results = {
        'grad_based': None,
        'cem': None,
        'it_am': None,
        'goal_cond': None
    }

    goals = []
    print('Sampling goals...')
    for step_ind in range(N_TOTAL_STEPS):
        new_goal_states = np.stack([
            episode_states[np.random.randint(0, 25)] for _ in range(BATCH_SIZE)
        ])
        # goal_state = episode_states[goal_ind]
        new_goal_states = torch.from_numpy(new_goal_states).float().view(
            BATCH_SIZE, -1)
        new_goal_states[:, 8:] *= 0.
        if step_ind == 0:
            goal_state = new_goal_states
        else:
            # randomly change the goal state with some small probability
            flips = (torch.rand(BATCH_SIZE, 1) <
                     GOAL_FLIP_PROB).float().repeat(1,
                                                    new_goal_states.shape[-1])
            goal_state = (1 - flips) * goal_state + flips * new_goal_states
        goals.append(goal_state)

    print('Evaluating gradient-based agent...')
    agent.inference_optimizer = GradientBasedInference(lr=1e-3, n_inf_iters=50)
    grad_based_results = collect_goal_optimization(agent, sync_env, goals)
    total_results['grad_based'] = grad_based_results
    print('Done.')

    # print('Evaluating CEM agent...')
    # agent.inference_optimizer = CEMInference(lr=1e-3, n_top_samples=10, n_inf_iters=50)
    # agent.n_action_samples = 100
    # cem_results = collect_goal_optimization(agent, sync_env, goals)
    # total_results['cem'] = cem_results
    # print('Done.')

    print('Evaluating iterative amortized agent...')
    # create an iterative amortized optimizer
    inputs = ['params', 'grads', 'state']
    n_input = 24
    if 'state' in inputs:
        n_input += 17
    network_args = {
        'type': 'recurrent',
        'n_layers': 2,
        'inputs': inputs,
        'n_units': 512,
        'connectivity': 'highway',
        'n_input': n_input
    }
    agent.inference_optimizer = IterativeInferenceModel(
        network_args=network_args, n_inf_iters=10)
    for m in agent.approx_post.models:
        agent.approx_post.models[m] = FullyConnectedLayer(512, 6)
        agent.approx_post.gates[m] = FullyConnectedLayer(
            512, 6, non_linearity='sigmoid')
        agent.approx_post.update = 'iterative'
    # create a parameter optimizer for the inference model
    inference_parameters = [_ for _ in agent.inference_optimizer.parameters()
                            ] + [_ for _ in agent.approx_post.parameters()]
    inf_optim = optim.Adam(inference_parameters, lr=3e-4)
    it_am_results = collect_goal_optimization(agent,
                                              sync_env,
                                              goals,
                                              inf_optim=inf_optim)
    total_results['it_am'] = it_am_results
    print('Done.')

    print('Evaluating goal-conditioned agent...')
    # create a direct, goal-conditioned network
    network_args = {
        'type': 'fully_connected',
        'n_layers': 2,
        'inputs': ['state', 'goal'],
        'n_units': 512,
        'connectivity': 'highway',
        'n_input': 17 + 17
    }
    agent.inference_optimizer = DirectGoalInferenceModel(
        network_args=network_args)
    for m in agent.approx_post.models:
        agent.approx_post.models[m] = FullyConnectedLayer(512, 6)
        agent.approx_post.update = 'direct'
    # create a parameter optimizer for the inference model
    inference_parameters = [_ for _ in agent.inference_optimizer.parameters()
                            ] + [_ for _ in agent.approx_post.parameters()]
    inf_optim = optim.Adam(inference_parameters, lr=3e-4)
    goal_cond_results = collect_goal_optimization(agent,
                                                  sync_env,
                                                  goals,
                                                  inf_optim=inf_optim)
    total_results['goal_cond'] = goal_cond_results
    print('Done.')

    if write_results:
        pickle.dump(total_results,
                    open('comp_goal_opt_' + model_exp_key + '.p', 'wb'))

    return total_results
Ejemplo n.º 10
0
def goal_optimization(model_exp_key, opt_exp_key=None, write_results=True):
    """
    Optimize random goal states using a model-based estimator.
    Note: tailored to HalfCheetah-v2 environment currently.

    Args:
        model_exp_key (str): model-based experiment key
        opt_exp_key (str): optimizer experiment key
        write_results (bool): whether to pickle results directly
    """

    # load the experiment
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=model_exp_key)

    # create the environment
    param_summary = experiment.get_parameters_summary()
    env_name = [a for a in param_summary
                if a['name'] == 'env'][0]['valueCurrent']
    env = create_env(env_name)

    # create the agent
    asset_list = experiment.get_asset_list()
    agent_config_asset_list = [
        a for a in asset_list if 'agent_args' in a['fileName']
    ]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = experiment.get_asset(
            agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args[
            'inference_optimizer_args'] else None
    agent = create_agent(env, agent_args=agent_args)[0]

    # also, load the most recent episode to sample goal states
    asset_times = [
        asset['createdAt'] for asset in asset_list
        if 'state' in asset['fileName']
    ]
    state_asset = [
        a for a in asset_list if a['createdAt'] == max(asset_times)
    ][0]
    episode_states = json.loads(experiment.get_asset(state_asset['assetId']))

    # load the checkpoint
    load_checkpoint(agent, model_exp_key)

    # load the optimizer
    if opt_exp_key is not None:
        # load the experiment
        comet_api = comet_ml.API(api_key=LOADING_API_KEY)
        opt_experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                                  workspace=WORKSPACE,
                                                  experiment=opt_exp_key)

        # create the agent
        asset_list = opt_experiment.get_asset_list()
        agent_config_asset_list = [
            a for a in asset_list if 'agent_args' in a['fileName']
        ]
        agent_args = None
        if len(agent_config_asset_list) > 0:
            # if we've saved the agent config dict, load it
            agent_args = opt_experiment.get_asset(
                agent_config_asset_list[0]['assetId'])
            agent_args = json.loads(agent_args)
            agent_args = agent_args if 'opt_type' in agent_args[
                'inference_optimizer_args'] else None
        opt_agent = create_agent(env, agent_args=agent_args)[0]

        # load the checkpoint
        load_checkpoint(opt_agent, opt_exp_key)

        agent.inference_optimizer = opt_agent.inference_optimizer
        agent.inference_optimizer.n_inf_iters = 20
    else:
        # create a gradient-based optimizer
        agent.inference_optimizer = GradientBasedInference(lr=1e-3,
                                                           n_inf_iters=50)

    # swap out the value estimator for goal-based estimator
    gb_estimator = GoalBasedQEstimator()
    # copy over the dynamics model
    gb_estimator.state_likelihood_model = agent.q_value_estimator.state_likelihood_model
    gb_estimator.state_variable = agent.q_value_estimator.state_variable
    # set the estimator
    agent.q_value_estimator = gb_estimator
    agent.q_value_estimator.set_goal_std(GOAL_STD)
    # agent.alphas['pi'] = 0.

    # optimize goal states
    goal_states = []
    traj_states = []
    env_states = {'qpos': [], 'qvel': []}
    actions = []
    inf_objectives = []

    agent.reset()
    agent.eval()
    state = env.reset()
    if RENDER:
        env.render()
    goal_state = None

    # goal_ind = 0

    print('Collecting goal-optimization episode...')
    for step_ind in range(N_TOTAL_STEPS):
        print('STEP: ' + str(step_ind))
        if step_ind % GOAL_INTERVAL == 0:
            goal_state = episode_states[np.random.randint(0, 25)]
            # goal_state = episode_states[goal_ind]
            goal_state = torch.from_numpy(np.array(goal_state)).float().view(
                1, -1)
            goal_state[:, 8:] *= 0.
            if not TRAJECTORY_FOLLOW:
                agent.q_value_estimator.set_goal_state(goal_state)
            # goal_ind += 1
        if TRAJECTORY_FOLLOW:
            # define a sub-goal between current state and goal state
            delta_state = goal_state - state
            traj_state = state + 0.1 * delta_state
            agent.q_value_estimator.set_goal_state(traj_state)
            traj_states.append(traj_state)
        else:
            traj_states.append(goal_states)
        goal_states.append(goal_state)
        env_states['qpos'].append(copy.deepcopy(env.sim.data.qpos))
        env_states['qvel'].append(copy.deepcopy(env.sim.data.qvel))
        action = agent.act(state, eval=True)
        state, _, _, _ = env.step(action)
        inf_objectives.append(agent.inference_optimizer.estimated_objectives)
        # import ipdb; ipdb.set_trace()
        agent.inference_optimizer.reset(1)
        if RENDER:
            env.render()
        actions.append(action)
    print('Done.')

    # save the results
    results = {
        'goal_states': goal_states,
        'traj_states': traj_states,
        'env_states': env_states,
        'actions': actions
    }

    if write_results:
        pickle.dump(results, open('goal_opt_' + model_exp_key + '.p', 'wb'))

    return results
Ejemplo n.º 11
0
def goal_optimization_training(model_exp_key,
                               opt_exp_key=None,
                               write_results=True,
                               stochastic_model=False,
                               train_model=False):
    """
    Optimize random goal states using a model-based estimator.
    Train the policy optimizer online.
    Note: tailored to HalfCheetah-v2 environment currently.

    Args:
        model_exp_key (str): model-based experiment key
        opt_exp_key (str): optimizer experiment key. If None, trains from scratch
        write_results (bool): whether to pickle results directly
        stochastic_model (bool): whether to sample states or use mean estimate
        train_model (bool) whether to train the model online
    """

    # load the experiment
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=model_exp_key)

    # create the environment
    param_summary = experiment.get_parameters_summary()
    env_name = [a for a in param_summary
                if a['name'] == 'env'][0]['valueCurrent']
    env = create_env(env_name)
    # create a synchronous env to parallelize training
    sync_env = SynchronousEnv(env, BATCH_SIZE)

    # create the agent
    asset_list = experiment.get_asset_list()
    agent_config_asset_list = [
        a for a in asset_list if 'agent_args' in a['fileName']
    ]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = experiment.get_asset(
            agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args[
            'inference_optimizer_args'] else None
    agent = create_agent(env, agent_args=agent_args)[0]

    # also, load the most recent episode to sample goal states
    asset_times = [
        asset['createdAt'] for asset in asset_list
        if 'state' in asset['fileName']
    ]
    state_asset = [
        a for a in asset_list if a['createdAt'] == max(asset_times)
    ][0]
    episode_states = json.loads(experiment.get_asset(state_asset['assetId']))

    # load the checkpoint
    load_checkpoint(agent, model_exp_key)

    if stochastic_model:
        agent.q_value_estimator.state_variable.cond_likelihood.stochastic = True

    # load the optimizer
    if opt_exp_key is not None:
        # load the experiment
        comet_api = comet_ml.API(api_key=LOADING_API_KEY)
        opt_experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                                  workspace=WORKSPACE,
                                                  experiment=opt_exp_key)

        # create the agent
        asset_list = opt_experiment.get_asset_list()
        agent_config_asset_list = [
            a for a in asset_list if 'agent_args' in a['fileName']
        ]
        agent_args = None
        if len(agent_config_asset_list) > 0:
            # if we've saved the agent config dict, load it
            agent_args = opt_experiment.get_asset(
                agent_config_asset_list[0]['assetId'])
            agent_args = json.loads(agent_args)
            agent_args = agent_args if 'opt_type' in agent_args[
                'inference_optimizer_args'] else None
        opt_agent = create_agent(env, agent_args=agent_args)[0]

        # load the checkpoint
        load_checkpoint(opt_agent, opt_exp_key)

        agent.inference_optimizer = opt_agent.inference_optimizer
        agent.inference_optimizer.n_inf_iters = 10
    else:
        # create an iterative amortized optimizer
        n_input = 12
        if ENCODING_TYPE == 'grads':
            inputs = ['params', 'grads']
            n_input += 12
        elif ENCODING_TYPE == 'errors':
            inputs = ['params', 'errors']
            n_input += (17 + 17 + 6)
        n_units = 512
        # network_args = {'type': 'fully_connected',
        #                 'n_layers': 2,
        #                 'inputs': inputs,
        #                 'n_units': n_units,
        #                 'connectivity': 'highway',
        #                 'batch_norm': False,
        #                 'non_linearity': 'elu',
        #                 'dropout': None,
        #                 'separate_networks': False,
        #                 'n_input': n_input}
        network_args = {
            'type': 'recurrent',
            'n_layers': 2,
            'inputs': inputs,
            'n_units': n_units,
            'connectivity': 'highway',
            'batch_norm': False,
            'dropout': None,
            'separate_networks': False,
            'n_input': n_input
        }
        agent.inference_optimizer = IterativeInferenceModel(
            network_args=network_args,
            n_inf_iters=5,
            encoding_type=ENCODING_TYPE)
        for m in agent.approx_post.models:
            agent.approx_post.models[m] = FullyConnectedLayer(n_units, 6)
            agent.approx_post.gates[m] = FullyConnectedLayer(
                n_units, 6, non_linearity='sigmoid')

    # create a parameter optimizer for the inference model
    inference_parameters = [_ for _ in agent.inference_optimizer.parameters()
                            ] + [_ for _ in agent.approx_post.parameters()]
    param_opt = optim.Adam(inference_parameters, lr=3e-4)

    # swap out the value estimator for goal-based estimator
    gb_estimator = GoalBasedQEstimator()
    # copy over the dynamics model
    gb_estimator.state_likelihood_model = agent.q_value_estimator.state_likelihood_model
    gb_estimator.state_variable = agent.q_value_estimator.state_variable
    # set the estimator
    agent.q_value_estimator = gb_estimator
    agent.q_value_estimator.set_goal_std(GOAL_STD)
    # agent.alphas['pi'] = 0.

    model_param_opt = None
    if train_model:
        # create a parameter optimizer for the inference model
        model_parameters = [
            _ for _ in
            agent.q_value_estimator.state_likelihood_model.parameters()
        ] + [_ for _ in agent.q_value_estimator.state_variable.parameters()]
        model_param_opt = optim.Adam(model_parameters, lr=3e-4)

    # optimize goal states
    goal_states = []
    traj_states = []
    env_states = {'qpos': [], 'qvel': []}
    actions = []
    inf_objectives = []
    state_log_likelihoods = []
    state_squared_errors = []
    state_locs = []
    state_scales = []
    model_cll_training = []

    agent.reset(batch_size=BATCH_SIZE)
    agent.eval()
    state = sync_env.reset()
    if RENDER:
        env.render()
    goal_state = None
    state_likelihood = None

    # goal_ind = 0

    print('Collecting goal-optimization episode...')
    for step_ind in range(N_TOTAL_STEPS):
        print('STEP: ' + str(step_ind))
        # if step_ind % GOAL_INTERVAL == 0:
        if True:
            new_goal_states = np.stack([
                episode_states[np.random.randint(0, 25)]
                for _ in range(BATCH_SIZE)
            ])
            # goal_state = episode_states[goal_ind]
            new_goal_states = torch.from_numpy(new_goal_states).float().view(
                BATCH_SIZE, -1)
            new_goal_states[:, 8:] *= 0.
            if step_ind == 0:
                goal_state = new_goal_states
            else:
                # randomly change the goal state with some small probability
                flips = (torch.rand(BATCH_SIZE, 1) <
                         GOAL_FLIP_PROB).float().repeat(
                             1, new_goal_states.shape[-1])
                goal_state = (1 - flips) * goal_state + flips * new_goal_states
            if not TRAJECTORY_FOLLOW:
                agent.q_value_estimator.set_goal_state(goal_state)
            # goal_ind += 1
        if TRAJECTORY_FOLLOW:
            # define a sub-goal between current state and goal state
            delta_state = goal_state - state
            traj_state = state + 0.1 * delta_state
            agent.q_value_estimator.set_goal_state(traj_state)
            traj_states.append(traj_state)
        else:
            traj_states.append(goal_states)
        goal_states.append(goal_state)
        qpos = np.stack(
            [copy.deepcopy(e.sim.data.qpos) for e in sync_env.envs])
        qvel = np.stack(
            [copy.deepcopy(e.sim.data.qvel) for e in sync_env.envs])
        env_states['qpos'].append(qpos)
        env_states['qvel'].append(qvel)
        action = agent.act(state, eval=True)
        state, _, _, _ = sync_env.step(action)
        inf_objectives.append(agent.inference_optimizer.estimated_objectives)

        if train_model:
            agent.q_value_estimator.generate(agent)
            cll = -agent.q_value_estimator.state_variable.cond_log_likelihood(
                state).view(-1, 1).mean()
            model_cll_training.append(cll.detach().item())
            cll.backward()
            model_param_opt.step()

        if state_likelihood is not None:
            state_ll = state_likelihood.log_prob(state)
            state_log_likelihoods.append(state_ll)
            state_squared_error = (state_likelihood.loc - state).pow(2)
            state_squared_errors.append(state_squared_error)

        state_loc = agent.collector.distributions['state']['cond_like']['loc'][
            -1]
        state_scale = agent.collector.distributions['state']['cond_like'][
            'scale'][-1]
        state_locs.append(state_loc)
        state_scales.append(state_scale)
        state_likelihood = Normal(state_loc, state_scale)

        # update the inference optimizer
        grads = [param.grad for param in inference_parameters]
        divide_gradients_by_value(grads, agent.inference_optimizer.n_inf_iters)
        divide_gradients_by_value(grads, BATCH_SIZE)
        param_opt.step()
        param_opt.zero_grad()

        agent.inference_optimizer.reset(BATCH_SIZE)
        if RENDER:
            env.render()
        actions.append(action)
    print('Done.')

    # save the results
    results = {
        'goal_states': goal_states,
        'traj_states': traj_states,
        'env_states': env_states,
        'actions': actions,
        'inf_objectives': inf_objectives,
        'state_locs': state_locs,
        'state_scales': state_scales,
        'state_log_likelihoods': state_log_likelihoods,
        'state_squared_errors': state_squared_errors,
        'model_cll_training': model_cll_training
    }

    if write_results:
        pickle.dump(results, open('goal_opt_' + model_exp_key + '.p', 'wb'))

    return results
Ejemplo n.º 12
0
def evaluate_estimator(exp_key, n_state_action, n_mc_samples, device_id=None):
    """
    Evaluates the value estimator of a cached experiment throughout learning.

    Args:
        exp_key (str): the string of the comet experiment key
        n_state_action (int): number of state action pairs to evaluate
        n_mc_samples (int): number of Monte Carlo samples to estimate
                            environment returns

    Returns dictionary containing:
                ckpt_timesteps [n_ckpts]
                value_estimates [n_ckpts, n_state_action, 1],
                direct_value_estimates [n_ckpts, n_state_action, 1]
                mc_estimates [n_ckpts, n_state_action, n_mc_samples]
    """
    # load the experiment
    comet_api = comet_ml.API(api_key=LOADING_API_KEY)
    experiment = comet_api.get_experiment(project_name=PROJECT_NAME,
                                          workspace=WORKSPACE,
                                          experiment=exp_key)

    # create the corresponding environment
    param_summary = experiment.get_parameters_summary()
    env_name = [a for a in param_summary
                if a['name'] == 'env'][0]['valueCurrent']
    env = create_env(env_name)

    # collect state-action samples using random policy
    print('Collecting ' + str(n_state_action) + ' state-action pairs...')
    sa_pairs = {'states': [], 'env_states': [], 'actions': []}
    state = env.reset()
    env_state = (copy.deepcopy(env.sim.data.qpos),
                 copy.deepcopy(env.sim.data.qvel))
    for _ in range(n_state_action):
        action = env.action_space.sample()
        next_state, reward, done, _ = env.step(action)
        sa_pairs['states'].append(state)
        sa_pairs['env_states'].append(env_state)
        sa_pairs['actions'].append(torch.from_numpy(action).view(1, -1))
        state = env.reset() if done else next_state
        env_state = (copy.deepcopy(env.sim.data.qpos),
                     copy.deepcopy(env.sim.data.qvel))
    print('Done.')

    # enumerate state-action pairs, estimating returns at each stage of learning
    asset_list = experiment.get_asset_list()
    agent_config_asset_list = [
        a for a in asset_list if 'agent_args' in a['fileName']
    ]
    agent_args = None
    if len(agent_config_asset_list) > 0:
        # if we've saved the agent config dict, load it
        agent_args = experiment.get_asset(
            agent_config_asset_list[0]['assetId'])
        agent_args = json.loads(agent_args)
        agent_args = agent_args if 'opt_type' in agent_args[
            'inference_optimizer_args'] else None
    agent = create_agent(env, agent_args=agent_args, device_id=device_id)[0]
    # get the list of checkpoint timesteps
    ckpt_asset_list = [a for a in asset_list if 'ckpt' in a['fileName']]
    ckpt_asset_names = [a['fileName'] for a in ckpt_asset_list]
    ckpt_timesteps = [
        int(s.split('ckpt_step_')[1].split('.ckpt')[0])
        for s in ckpt_asset_names
    ]

    # convert n_mc_samples to a round number of batches
    n_batches = math.ceil(n_mc_samples / ROLLOUT_BATCH_SIZE)
    n_mc_samples = ROLLOUT_BATCH_SIZE * n_batches

    # TODO: the first dimension should be divided by CKPT_SUBSAMPLE
    value_estimates = np.zeros((len(ckpt_timesteps), n_state_action, 1))
    direct_value_estimates = np.zeros((len(ckpt_timesteps), n_state_action, 1))
    mc_estimates = np.zeros(
        (len(ckpt_timesteps), n_state_action, n_mc_samples))
    # iterate over sub-sampled checkpoint timesteps, evaluating
    ckpt_timesteps = list(np.sort(ckpt_timesteps)[::CKPT_SUBSAMPLE])
    for ckpt_ind, ckpt_timestep in enumerate(ckpt_timesteps):
        # load the checkpoint
        print('Evaluating checkpoint ' + str(ckpt_ind + 1) + ' of ' +
              str(len(ckpt_timesteps)))
        load_checkpoint(agent, exp_key, ckpt_timestep)
        # get value estimate and estimate returns for the state-action pairs
        for sa_ind, (env_state, state, act) in enumerate(
                zip(sa_pairs['env_states'], sa_pairs['states'],
                    sa_pairs['actions'])):
            t_start = time.time()
            action_value_estimate = get_agent_value_estimate(agent, state, act)
            value_estimates[ckpt_ind,
                            sa_ind, :] = action_value_estimate['estimate']
            direct_value_estimates[ckpt_ind,
                                   sa_ind, :] = action_value_estimate['direct']
            returns = estimate_monte_carlo_return(env, agent, env_state, state,
                                                  act, n_batches)
            mc_estimates[ckpt_ind, sa_ind, :] = returns
            if sa_ind % 1 == 0:
                print('  Evaluated ' + str(sa_ind + 1) + ' of ' +
                      str(len(sa_pairs['states'])) + ' state-action pairs.')
                print('  Duration: ' + '{:.2f}'.format(time.time() - t_start) +
                      ' s / state-action pair.')

    # TODO: log the value estimates to comet (need to json-ify the numpy arrays)
    # prev_exp = comet_ml.ExistingExperiment(api_key=LOGGING_API_KEY,
    #                                        previous_experiment=exp_key)
    # prev_exp.log_asset_data(value_estimates, name='value_estimates')
    # prev_exp.log_asset_data(direct_value_estimates, name='direct_value_estimates')
    # prev_exp.log_asset_data(mc_estimates, name='mc_estimates')

    return {
        'ckpt_timesteps': ckpt_timesteps,
        'value_estimates': value_estimates,
        'direct_value_estimates': direct_value_estimates,
        'mc_estimates': mc_estimates
    }