Ejemplo n.º 1
0
def policy_decoder(augment, num_policy, num_op):
    op_list = augment_list(False)
    policies = []
    for i in range(num_policy):
        ops = []
        for j in range(num_op):
            op_idx = augment['policy_%d_%d' % (i, j)]
            op_prob = augment['prob_%d_%d' % (i, j)]
            op_level = augment['level_%d_%d' % (i, j)]
            ops.append((op_list[op_idx][0].__name__, op_prob, op_level))
        policies.append(ops)
    return policies
Ejemplo n.º 2
0
                        })
                        del corrects, corrects_max
        except StopIteration:
            pass

    del model
    metrics = metrics / 'cnt'

    # This is what it keeps track of for hyperopting.
    tune.track.log(top_1_valid=metrics['correct'],
                   minus_loss=metrics['minus_loss'],
                   plus_loss=metrics['plus_loss'])
    return metrics['minus_loss']


ops = augment_list(False)  # Get the default augmentation set.
# Define the space of our augmentations.
space = {}
for i in range(args.num_policy):
    for j in range(args.num_op):
        space['policy_%d_%d' % (i, j)] = hp.choice('policy_%d_%d' % (i, j),
                                                   list(range(0, len(ops))))
        space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_%d' % (i, j), 0.0,
                                                  1.0)
        space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_%d' % (i, j), 0.0,
                                                   1.0)

final_policy_set = []

reward_attr = 'minus_loss'