Esempio n. 1
0
def main():
    """
    prototype on thought process
    """
    
    world = mod_env.Environment('Earth')  # may implement later, but not needed now
    
    goals = [
            mod_goal.Goal('Be Happy'),
            mod_goal.Goal('Get Rich'),
            mod_goal.Goal('Learn to Juggle')
            ]
    jenny = ThinkingAgent('Jenny', goals)
    print(jenny)
    
    # define the thoughts and actions that can occur
    t = Thoughts(jenny)
    t.add(Thought('Hang out with Friends', result_success = actions.fun, result_fail = [], time_cost=1, cash_cost=0.0))
    t.add(Thought('Find a Job', result_success = actions.job, result_fail = emotions.sadness, time_cost=3, cash_cost=0.2))
    t.add(Thought('Go to Work', result_success = objects.cash, result_fail = emotions.sadness, time_cost=5, cash_cost=-10))
    t.add(Thought('Read a book', result_success = [actions.fun, actions.training], result_fail = [], time_cost=3, cash_cost=0))
    t.add(Thought('Rob a bank', result_success = [objects.cash, actions.prison], result_fail = [emotions.sadness, actions.prison], time_cost=1, cash_cost=0.2))

    all_links = Links(jenny)
    all_links.add_link('goal', goals[0], 'thought', t.thoughts[0])
    all_links.add_link('goal', goals[1], 'thought', t.thoughts[1])
    all_links.add_link('goal', goals[1], 'thought', t.thoughts[2])
    all_links.add_link('goal', goals[0], 'thought', t.thoughts[3])
    
    print(all_links)
Esempio n. 2
0
def run(args):
  """ Either generates a dataset from a baseline and computes its associated counts, or evaluates a baseline.
  """

  # fix random seed for reproducibility
  np.random.seed(args.seed)

  for fff in os.listdir(args.baseline_dir):
    if fff.endswith(".yaml"):
      yaml_file = os.path.join(args.baseline_dir, fff)
      params = yaml.safe_load(open(yaml_file, 'r'))
      print('Loading config from {}'.format(yaml_file))
      break
  if not params:
    raise ValueError('We could not find the configuration file for the baseline, it should be a yaml file.')

  if args.extra_stochasticity > 0.0:
    params['extra_stochasticity'] = args.extra_stochasticity

  env = environment.Environment(params['domain'], params)

  baseline = Baseline(os.path.join(args.baseline_dir, args.baseline_name), params['network_size'], state_shape=params['state_shape'],
                      nb_actions=params['nb_actions'], seed=args.seed, temperature=args.temperature,
                      device=args.device, normalize=params['normalize'])

  if args.evaluate_baseline:
    baseline.evaluate_baseline(env, params, args.eval_steps, args.eval_epochs, args.noise_factor)
    return

  print("Generating dataset with actual size {}...".format(args.dataset_size), flush=True)
  dataset = baseline.generate_dataset(
      env, os.path.join(args.baseline_dir, args.dataset_dir), params, dataset_size=args.dataset_size,
      overwrite=args.overwrite, noise_factor=args.noise_factor)

  compute_counts(dataset, overwrite=args.overwrite, param=args.param)
Esempio n. 3
0
def run(args):
    """ Either generates a dataset from a baseline and computes its associated counts, or evaluates a baseline.
    """
    for fff in os.listdir(args.baseline_dir):
        if fff.endswith(".yaml"):
            yaml_file = os.path.join(args.baseline_dir, fff)
            params = yaml.safe_load(open(yaml_file, 'r'))
            print('Loading config from {}'.format(yaml_file))
            break
    else:
        # no yaml file found
        raise ValueError('We could not find the configuration file for the baseline, it should be a yaml file.')

    if args.seed is not None:
        # if seed is given in the command line ignores seed from yaml file
        np.random.seed(args.seed)
        params['seed'] = args.seed
    else:
        if 'seed' in params:
            args.seed = params['seed']
        else:
            print("no seed found, using 123")
            args.seed = 123
            params['seed'] = 123

    if args.extra_stochasticity > 0.0:
        params['extra_stochasticity'] = args.extra_stochasticity

    env = environment.Environment(params['domain'], params)

    baseline = Baseline(network_size=params['network_size'],
                        network_path=os.path.join(args.baseline_dir, args.baseline_name),
                        state_shape=params['state_shape'], nb_actions=params['nb_actions'], device=args.device,
                        seed=args.seed, temperature=args.temperature, normalize=params['normalize'],
                        results_folder=args.baseline_dir)

    if args.evaluate_baseline:
        baseline.evaluate_baseline(env, args.eval_steps, args.eval_epochs, args.noise_factor, save_results=True)

    if args.generate_dataset:
        print("Generating dataset with actual size {}...".format(args.dataset_size), flush=True)
        dataset = baseline.generate_dataset(
            env, args.dataset_dir, params, dataset_size=args.dataset_size,
            overwrite=args.overwrite, noise_factor=args.noise_factor)

        compute_counts(dataset, overwrite=args.overwrite, count_param=args.count_param)
Esempio n. 4
0
def run(domain, config, options):

    dir_path = os.path.dirname(os.path.realpath(__file__))
    if not config:
        config = 'config_' + domain
    cfg_file = os.path.join(dir_path, config + '.yaml')
    params = yaml.safe_load(open(cfg_file, 'r'))

    # replacing params with command line options
    for opt in options:
        assert opt[0] in params
        dtype = type(params[opt[0]])
        if dtype == bool:
            new_opt = False if opt[1] != 'True' else True
        else:
            new_opt = dtype(opt[1])
        params[opt[0]] = new_opt

    print('\n')
    print('Parameters ')
    for key in params:
        print(key, params[key])
    print('\n')

    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    random_state = np.random.RandomState(params['seed'])
    device = torch.device(params["device"])

    DATA_DIR = os.path.join(params['folder_location'], params['folder_name'])

    env = environment.Environment(domain, params, random_state)

    if params['batch']:
        from baseline import Baseline
        baseline_path = os.path.join(DATA_DIR, params['baseline_path'])
        baseline = Baseline(baseline_path,
                            params['network_size'],
                            state_shape=params['state_shape'],
                            nb_actions=params['nb_actions'],
                            seed=params['seed'],
                            temperature=params['baseline_temp'],
                            device=params['device'],
                            normalize=params['normalize'])

        dataset_path = os.path.join(DATA_DIR, params['dataset_path'])
        print("\nLoading dataset from file {}".format(dataset_path),
              flush=True)
        if not os.path.exists(dataset_path):
            raise ValueError("The dataset file does not exist")
        with open(dataset_path, "rb") as f:
            data = pickle.load(f)
        dataset = Dataset_Counts(data, params['count_param'])
        print("Data with counts loaded: {} samples".format(len(data['s'])),
              flush=True)
        folder_name = os.path.dirname(dataset_path)
        expt = BatchExperiment(
            dataset=dataset,
            env=env,
            folder_name=folder_name,
            episode_max_len=params['episode_max_len'],
            minimum_count=params['minimum_count'],
            extra_stochasticity=params['extra_stochasticity'],
            history_len=params['history_len'],
            max_start_nullops=params['max_start_nullops'])

    else:
        # Create experiment folder
        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)

        baseline = None
        expt = DQNExperiment(env=env,
                             ai=None,
                             episode_max_len=params['episode_max_len'],
                             annealing=params['annealing'],
                             history_len=params['history_len'],
                             max_start_nullops=params['max_start_nullops'],
                             replay_min_size=params['replay_min_size'],
                             test_epsilon=params['test_epsilon'],
                             folder_name=DATA_DIR,
                             network_path=params['network_path'],
                             extra_stochasticity=params['extra_stochasticity'],
                             score_window_size=100)

    for ex in range(params['num_experiments']):
        print('\n')
        print('>>>>> Experiment ',
              ex,
              ' >>>>> ',
              params['learning_type'],
              ' >>>>> Epsilon >>>>> ',
              params['epsilon_soft'],
              ' >>>>> Minimum Count >>>>> ',
              params['minimum_count'],
              ' >>>>> Kappa >>>>> ',
              params['kappa'],
              ' >>>>> ',
              flush=True)
        print('\n')
        ai = AI(baseline,
                state_shape=env.state_shape,
                nb_actions=env.nb_actions,
                action_dim=params['action_dim'],
                reward_dim=params['reward_dim'],
                history_len=params['history_len'],
                gamma=params['gamma'],
                learning_rate=params['learning_rate'],
                epsilon=params['epsilon'],
                final_epsilon=params['final_epsilon'],
                test_epsilon=params['test_epsilon'],
                annealing_steps=params['annealing_steps'],
                minibatch_size=params['minibatch_size'],
                replay_max_size=params['replay_max_size'],
                update_freq=params['update_freq'],
                learning_frequency=params['learning_frequency'],
                ddqn=params['ddqn'],
                learning_type=params['learning_type'],
                network_size=params['network_size'],
                normalize=params['normalize'],
                device=device,
                kappa=params['kappa'],
                minimum_count=params['minimum_count'],
                epsilon_soft=params['epsilon_soft'])
        expt.ai = ai

        env.reset()
        with open(expt.folder_name + '/config.yaml', 'w') as y:
            yaml.safe_dump(params, y)  # saving params for reference
        expt.do_epochs(number_of_epochs=params['num_epochs'],
                       is_learning=params['is_learning'],
                       steps_per_epoch=params['steps_per_epoch'],
                       is_testing=params['is_testing'],
                       steps_per_test=params['steps_per_test'],
                       passes_on_dataset=params['passes_on_dataset'],
                       exp_id=ex)
Esempio n. 5
0
    def __init__(self, training_steps, validation_steps, validation_size,
                 mini_batch_size, learning_rate, number_of_epochs,
                 network_size, folder_location, dataset_file,
                 cloned_network_path, sample_from_env, entropy_coefficient,
                 device, seed, experiment_name, config_file,
                 update_learning_rate):

        self.sample_from_env = sample_from_env
        self.smaller_validation_loss = None
        self.seed = seed
        try:
            self.params = yaml.safe_load(open(config_file, 'r'))
        except FileNotFoundError as e:
            print(
                "Configuration file not found; Define a config_file to be able to sample from environment"
            )
            raise e

        # initialize seeds for reproducibility
        np.random.seed(seed)
        torch.manual_seed(seed)

        # set paths for data and output path
        log_path = os.path.join('./logs/' + experiment_name)
        data_dir = folder_location
        dataset_path = dataset_file
        self.output_folder = os.path.dirname(dataset_path)
        self.cloned_network_path = os.path.join(os.path.dirname(dataset_path),
                                                cloned_network_path)

        # start
        self.logger = SummaryWriter(log_path)

        # import data
        full_dataset = Dataset_Counts.load_dataset(dataset_path)
        self.dataset_train, self.dataset_validation = full_dataset.train_validation_split(
            test_size=validation_size)

        # set training parameters
        self.mini_batch_size = mini_batch_size
        self.number_of_epochs = number_of_epochs
        self.network_size = network_size
        self.entropy_coefficient = entropy_coefficient
        self.device = device
        self.learning_rate = learning_rate
        self.update_learning_rate = update_learning_rate

        if training_steps != 0:
            self.training_steps = training_steps
        else:
            self.training_steps = int(self.dataset_train.size /
                                      self.mini_batch_size)
        if validation_steps != 0:
            self.validation_steps = validation_steps
        else:
            self.validation_steps = int(self.dataset_validation.size /
                                        self.mini_batch_size)
        self.log_frequency = int(self.training_steps / 10)
        print(
            "Training with {} training steps and {} validation steps ".format(
                self.training_steps, self.validation_steps))

        # create model
        self.cloned_baseline_policy = ClonedBaseline(
            network_size=network_size,
            network_path=None,
            state_shape=self.params['state_shape'],
            nb_actions=self.params['nb_actions'],
            device=device,
            seed=seed,
            temperature=0)
        self.best_policy = ClonedBaseline(
            network_size=network_size,
            network_path=None,
            state_shape=self.params['state_shape'],
            nb_actions=self.params['nb_actions'],
            device=device,
            seed=seed,
            temperature=0,
            results_folder=self.output_folder)
        self.best_policy._copy_weight_from(
            self.best_policy.network.state_dict())

        # define loss and optimizer
        self.nll_loss_function = nn.NLLLoss()
        self.optimizer = torch.optim.SGD(
            self.cloned_baseline_policy.network.parameters(), lr=learning_rate)
        # optimizer = torch.optim.RMSprop(network.parameters(), lr=learning_rate, alpha=0.95, eps=1e-07)

        # instantiate environment for policy evaluation
        self.env = environment.Environment(self.params['domain'], self.params)

        if sample_from_env:
            print("sampling from environment")
            baseline_network_path = os.path.join(data_dir,
                                                 self.params["network_path"])
            self.baseline = Baseline(self.params['network_size'],
                                     network_path=baseline_network_path,
                                     state_shape=self.params['state_shape'],
                                     nb_actions=self.params['nb_actions'],
                                     device=device,
                                     seed=seed,
                                     temperature=self.params.get(
                                         "baseline_temp", 0.1),
                                     normalize=self.params['normalize'])
        else:
            self.baseline = None
Esempio n. 6
0
def run(config_file, options):
    try:
        params = yaml.safe_load(open(config_file, 'r'))
    except FileNotFoundError as e:
        print("Configuration file not found")
        raise e


    # replacing params with command line options
    for opt in options:
        assert opt[0] in params
        dtype = type(params[opt[0]])
        if dtype == bool:
            new_opt = False if opt[1] != 'True' else True
        else:
            new_opt = dtype(opt[1])
        params[opt[0]] = new_opt

    print('\n')
    print('Parameters ')
    for key in params:
        print(key, params[key])
    print('\n')

    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    random_state = np.random.RandomState(params['seed'])
    device = torch.device(params["device"])

    DATA_DIR = os.path.join(params['folder_location'], params['folder_name'])

    env = environment.Environment(params["domain"], params, random_state)

    if params['batch']:
        dataset_path = params['dataset_path']
        print("\nLoading dataset from file {}".format(dataset_path), flush=True)
        if not os.path.exists(dataset_path):
            raise ValueError("The dataset file does not exist")
        dataset = Dataset_Counts.load_dataset(dataset_path)

        baseline_path = os.path.join(DATA_DIR, params['baseline_path'])
        if 'behavior_cloning' in params['learning_type']:
            baseline_path = os.path.join(os.path.dirname(dataset_path), 'cloned_network_weights.pt')
            baseline = ClonedBaseline(
                params['network_size'], network_path=baseline_path, state_shape=params['state_shape'],
                nb_actions=params['nb_actions'], device=device, seed=params['seed'],
                temperature=params['baseline_temp'], normalize=params['normalize'])
        elif params['learning_type'] in ['pi_b', 'soft_sort']:
            baseline = Baseline(params['network_size'], network_path=baseline_path, state_shape=params['state_shape'],
                                nb_actions=params['nb_actions'], device=device, seed=params['seed'],
                                temperature=params['baseline_temp'], normalize=params['normalize'])
        elif 'count_based' in params['learning_type']:
            baseline = SimilarityBaseline(dataset=dataset, seed=params['seed'], nb_actions=params['nb_actions'],
                                          results_folder=os.path.dirname(dataset_path))
            baseline.evaluate_baseline(env, number_of_steps=100000, number_of_epochs=1,
                                       verbose=True, save_results=True)
        else:
            # no baseline, should use counters to estimate policy
            baseline = None

        folder_name = os.path.dirname(dataset_path)
        print("Data with counts loaded: {} samples".format(dataset.size), flush=True)
        expt = BatchExperiment(dataset=dataset, env=env, folder_name=folder_name, episode_max_len=params['episode_max_len'],
                               minimum_count=params['minimum_count'], extra_stochasticity=params['extra_stochasticity'],
                               history_len=params['history_len'], max_start_nullops=params['max_start_nullops'],
                               keep_all_logs=False)
    else:
        # Create experiment folder
        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)

        folder_name = DATA_DIR
        baseline = None
        expt = DQNExperiment(env=env, ai=None, episode_max_len=params['episode_max_len'], annealing=params['annealing'],
                             history_len=params['history_len'], max_start_nullops=params['max_start_nullops'],
                             replay_min_size=params['replay_min_size'], test_epsilon=params['test_epsilon'],
                             folder_name=folder_name, network_path=params['network_path'],
                             extra_stochasticity=params['extra_stochasticity'], score_window_size=100,
                             keep_all_logs=False)

    for ex in range(params['num_experiments']):
        print('\n')
        print('>>>>> Experiment ', ex, ' >>>>> ',
              params['learning_type'], ' >>>>> Epsilon >>>>> ',
              params['epsilon_soft'], ' >>>>> Minimum Count >>>>> ',
              params['minimum_count'], ' >>>>> Kappa >>>>> ',
              params['kappa'], ' >>>>> ', flush=True)
        print('\n')
        print("\nPROGRESS: {0:02.2f}%\n".format(ex / params['num_experiments'] * 100), flush=True)
        ai = AI(baseline, state_shape=env.state_shape, nb_actions=env.nb_actions, action_dim=params['action_dim'],
                reward_dim=params['reward_dim'], history_len=params['history_len'], gamma=params['gamma'],
                learning_rate=params['learning_rate'], epsilon=params['epsilon'], final_epsilon=params['final_epsilon'],
                test_epsilon=params['test_epsilon'], annealing_steps=params['annealing_steps'], minibatch_size=params['minibatch_size'],
                replay_max_size=params['replay_max_size'], update_freq=params['update_freq'],
                learning_frequency=params['learning_frequency'], ddqn=params['ddqn'], learning_type=params['learning_type'],
                network_size=params['network_size'], normalize=params['normalize'], device=device,
                kappa=params['kappa'], minimum_count=params['minimum_count'], epsilon_soft=params['epsilon_soft'])
        expt.ai = ai
        if not params['batch']:
            # resets dataset for online experiment
            expt.dataset_counter = Dataset_Counts(count_param=params['count_param'],
                                                  state_shape=env.state_shape,
                                                  nb_actions=env.nb_actions,
                                                  replay_max_size=params['replay_max_size'],
                                                  is_counting=ai.needs_state_action_counter())

        env.reset()
        with open(expt.folder_name + '/config.yaml', 'w') as y:
            yaml.safe_dump(params, y)  # saving params for reference
        expt.do_epochs(number_of_epochs=params['num_epochs'], is_learning=params['is_learning'],
                       steps_per_epoch=params['steps_per_epoch'], is_testing=params['is_testing'],
                       steps_per_test=params['steps_per_test'],
                       passes_on_dataset=params['passes_on_dataset'], exp_id=ex)