def make_results_table(models=None,
                       tasks=None,
                       max_npe='5M',
                       name_prefix='bf_rl_paper',
                       extra_desc='',
                       models_dir='/tmp'):
    """Creates a table of results: algorithm + version by tasks.

  Args:
    models: The table columns. A list of (algorithm, desc) tuples.
    tasks: The table rows. List of task names.
    max_npe: String SI unit representation of the maximum NPE threshold for the
        experiment. For example, "5M" means 5 million. All entries in the table
        share the same max-NPE.
    name_prefix: Name prefix used in logging directory for the experiment.
    extra_desc: Extra description added to name of logging directory for the
        experiment.
    models_dir: Parent directory containing all experiment folders.

  Returns:
    A 2D list holding the table cells.
  """
    if models is None:
        models = DEFAULT_MODELS
    if tasks is None:
        tasks = DEFAULT_TASKS
    model_results = {}
    for model_type, desc in models:
        model_results[model_type] = {
            tname: get_results_for_experiment(models_dir,
                                              tname,
                                              model_type,
                                              max_npe,
                                              desc,
                                              name_prefix=name_prefix,
                                              extra_desc=extra_desc).processed
            for tname in tasks
        }

    def info(stats):
        return [
            str(stats['repetitions']),
            '%.2f' % stats['success_rate'],
            str(int(stats['avg_total_npe']))
        ]

    rows = [['max NPE: ' + max_npe] +
            misc.flatten([['{0} ({1})'.format(m, d), '', '']
                          for m, d in models])]
    rows.append([''] + misc.flatten([['reps', 'success rate', 'avg NPE']
                                     for _ in models]))
    for tname in tasks:
        rows.append([tname] + misc.flatten(
            [info(model_results[model][tname]) for model, _ in models]))

    return rows
def make_results_table(
    models=None,
    tasks=None,
    max_npe='5M',
    name_prefix='bf_rl_paper',
    extra_desc='',
    models_dir='/tmp'):
  """Creates a table of results: algorithm + version by tasks.

  Args:
    models: The table columns. A list of (algorithm, desc) tuples.
    tasks: The table rows. List of task names.
    max_npe: String SI unit representation of the maximum NPE threshold for the
        experiment. For example, "5M" means 5 million. All entries in the table
        share the same max-NPE.
    name_prefix: Name prefix used in logging directory for the experiment.
    extra_desc: Extra description added to name of logging directory for the
        experiment.
    models_dir: Parent directory containing all experiment folders.

  Returns:
    A 2D list holding the table cells.
  """
  if models is None:
    models = DEFAULT_MODELS
  if tasks is None:
    tasks = DEFAULT_TASKS
  model_results = {}
  for model_type, desc in models:
    model_results[model_type] = {
        tname: get_results_for_experiment(
            models_dir, tname, model_type, max_npe, desc,
            name_prefix=name_prefix, extra_desc=extra_desc
        ).processed
        for tname in tasks}

  def info(stats):
    return [str(stats['repetitions']),
            '%.2f' % stats['success_rate'],
            str(int(stats['avg_total_npe']))]

  rows = [['max NPE: ' + max_npe]
          + misc.flatten([['{0} ({1})'.format(m, d), '', '']
                          for m, d in models])]
  rows.append(
      [''] + misc.flatten([['reps', 'success rate', 'avg NPE']
                           for _ in models]))
  for tname in tasks:
    rows.append(
        [tname]
        + misc.flatten([info(model_results[model][tname])
                        for model, _ in models]))

  return rows
Example #3
0
    def __init__(self):
        # Paths are sequences of sub-sequences. Here we form unique sub-sequences
        # out of 3 arbitrary ints. We use sub-sequences instead of single entities
        # to make the task harder by making the episodes last longer, i.e. more
        # for the agent to remember.
        a = (1, 2, 3)
        b = (4, 5, 6)
        c = (7, 8, 7)
        d = (6, 5, 4)
        e = (3, 2, 1)
        f = (8, 5, 1)
        g = (6, 4, 2)
        h = (1, 8, 3)
        self.paths = Trie()
        self.paths.insert([a, b, h])
        self.paths.insert([a, b, c, d, e, f, g, h])
        self.paths.insert([a, b, c, d, e, b, a])
        self.paths.insert([a, b, g, h])
        self.paths.insert([a, e, f, g])
        self.correct_sequence = misc.flatten([a, b, c, d, e, f, g, h])

        def distance_fn(a, b):
            len_diff = abs(len(a) - len(b))
            return sum(
                reward_lib.mod_abs_diff(ai - 1, bi - 1, 8)
                for ai, bi in zip(a, b)) + len_diff * 4  # 8 / 2 = 4

        self.distance_fn = distance_fn
Example #4
0
  def __init__(self):
    # Paths are sequences of sub-sequences. Here we form unique sub-sequences
    # out of 3 arbitrary ints. We use sub-sequences instead of single entities
    # to make the task harder by making the episodes last longer, i.e. more
    # for the agent to remember.
    a = (1, 2, 3)
    b = (4, 5, 6)
    c = (7, 8, 7)
    d = (6, 5, 4)
    e = (3, 2, 1)
    f = (8, 5, 1)
    g = (6, 4, 2)
    h = (1, 8, 3)
    self.paths = Trie()
    self.paths.insert([a, b, h])
    self.paths.insert([a, b, c, d, e, f, g, h])
    self.paths.insert([a, b, c, d, e, b, a])
    self.paths.insert([a, b, g, h])
    self.paths.insert([a, e, f, g])
    self.correct_sequence = misc.flatten([a, b, c, d, e, f, g, h])

    def distance_fn(a, b):
      len_diff = abs(len(a) - len(b))
      return sum(reward_lib.mod_abs_diff(ai - 1, bi - 1, 8)
                 for ai, bi in zip(a, b)) + len_diff * 4  # 8 / 2 = 4
    self.distance_fn = distance_fn