def task_lr(seed, batch_size=None, total_epochs=40, data_set='mnist'):
    '''Train a logistic classification model.'''
    sequence = load_data(data_set)
    features = sequence.features
    labels = sequence.labels
    batch_size = len(features) if batch_size is None else batch_size
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.Dense(
            48,
            input_shape=features.shape[1:],
            kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            bias_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            activation='relu',
            use_bias=True))
    model.add(
        tf.keras.layers.Dense(
            48,
            kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            bias_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            activation='relu',
            use_bias=True))
    model.add(
        tf.keras.layers.Dense(
            labels.shape[-1],
            kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            bias_initializer=tf.keras.initializers.glorot_normal(seed=seed),
            activation='softmax'))
    # model.compile(tf.train.GradientDescentOptimizer(1e-1),
    #              loss='categorical_crossentropy',
    #              metrics=['accuracy'])
    model.compile(tf.train.AdamOptimizer(1e-2),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callback = CustomCallback()
    model.fit(features,
              labels,
              epochs=total_epochs,
              verbose=0,
              shuffle=True,
              batch_size=batch_size,
              callbacks=[callback])
    return callback.history
def train_model(parameters, trial):
    '''Train an model to classify a data set.'''
    parameters = parameters.copy()
    path = Path(parameters['path'], str(trial.number))
    parameters['path'] = str(path)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e0)
    parameters.update({'learning_rate': learning_rate})
    sequence = load_data(parameters['data_set'])
    layers = [sequence.feature_shape[0], 256, 256, sequence.target_shape[0]]
    graph = tf.Graph()
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    with graph.as_default():
        tensor = model_in = tf.keras.layers.Input([layers[0]])
        layers = [layers[0], layers[-1]]
        for layer in layers[1:-1]:
            layer = tf.keras.layers.Dense(layer, activation='relu')
            tensor = layer(tensor)
        layer = tf.keras.layers.Dense(layers[-1], activation='softmax')
        tensor = layer(tensor)
        model = tf.keras.Model(inputs=model_in, outputs=tensor)
        callbacks = [CustomCallback(trial, 'loss')]
        with tf.Session(graph=graph, config=config):
            try:
                model.compile(tf.train.AdamOptimizer(learning_rate),
                              loss='categorical_crossentropy',
                              metrics=['accuracy'])
                model.fit_generator(sequence,
                                    epochs=parameters['total_epochs'],
                                    verbose=0,
                                    callbacks=callbacks)
            finally:
                path.mkdir()
                utils_file.save_json(parameters, path / 'parameters.json')
                dataframe = pd.DataFrame.from_dict(callbacks[0].history)
                dataframe.to_csv(path / 'results.csv')
                model.save(str(path / 'model.hdf5'))
    return dataframe['loss'].iloc[-1]
def task(path, seed, batch_size=None, total_epochs=40, data_set='mnist'):
    '''
    Run the agent on a data set.
    '''
    parameters = utils_file.load_json(path / 'hyperparams.json')
    alg, *_ = path.name.split('-')
    save_path = path / 'model.pkl'
    sequence = load_data(data_set)
    num_of_samples = len(sequence.features)
    steps_per_epoch = ceil(num_of_samples / batch_size) if batch_size else 1
    kwargs = parameters.get('kwargs', {})
    kwargs['data_set'] = data_set
    kwargs['batch_size'] = batch_size
    kwargs['max_batches'] = steps_per_epoch * total_epochs
    env = partial(gym.make, parameters['env_name'], **kwargs)
    vec_env = OptVecEnv([env])
    if alg == 'PPO':
        with open(save_path, 'rb') as pkl:
            model = PPO2.load(pkl)  # , env=vec_env)
    elif alg == 'A2C':
        with open(save_path, 'rb') as pkl:
            model = A2C.load(pkl, env=vec_env)
    elif alg == 'DDPG':
        model = DDPG.load(save_path, env=vec_env)
    states = vec_env.reset()
    info_list = []
    cumulative_reward = 0
    for epoch_no in trange(total_epochs, leave=False):
        for step in trange(steps_per_epoch, leave=False):
            actions = model.predict(states, deterministic=False)[0]
            states, rewards, _, infos = vec_env.step(actions)
            cumulative_reward = cumulative_reward + rewards[0]
            info = infos[0]
            info['step'] = epoch_no * steps_per_epoch + step
            info['cumulative_reward'] = cumulative_reward
            info['seed'] = seed
            info['epoch'] = epoch_no
            info_list.append(info)
    return info_list
    def __init__(self, model_fn=None, data_set=None):
        '''
        Create a problem with the goal of optimizing a NN for a data set.

        :param model_fn: (callable) A function which returns a keras model.
        :param data_set: (callable) A function which returns a data set.
        '''
        self.data_set_iter = None
        self.current_batch = None
        if data_set:
            self.data_set = data_set
        else:
            self.data_set = load_data()
        inputs = keras.Input(self.data_set.feature_shape, name='feature')
        target = keras.Input(self.data_set.target_shape, name='target')
        with tf.name_scope('network'):
            if callable(model_fn):
                outputs = model_fn(inputs)
            else:
                outputs = utils_tf.create_neural_net(inputs)
            outputs = keras.layers.Dense(self.data_set.target_shape[0],
                                         activation='softmax')(outputs)
        weight_tensors = tf.trainable_variables(scope='network')
        with tf.name_scope('output'):
            loss = keras.losses.categorical_crossentropy(target, outputs)
            self.tensors_out = NetTensors(tf.gradients(loss, weight_tensors),
                                          tf.reduce_mean(loss), weight_tensors)
        shapes = tuple(tuple(w.shape.as_list()) for w in weight_tensors)
        weight_in = tuple(
            tf.placeholder(tf.float32, shape) for shape in shapes)
        weight_update = tuple(
            w.assign(p) for w, p in zip(weight_tensors, weight_in))
        self._size = sum([np.prod(shape) for shape in shapes])
        self.param_info = NetParamObjs(weight_in, weight_update, shapes)
        self.net_inputs = NetInput(inputs, target)
        self.reset_init = tf.global_variables_initializer()
        self.reset()
def task(path, seed, batch_size=None, total_epochs=40, data_set='mnist'):
    '''
    Run the agent on a data set.
    '''
    sequence = load_data(data_set)
    num_of_samples = len(sequence.features)
    steps_per_epoch = ceil(num_of_samples / batch_size) if batch_size else 1
    max_batches = steps_per_epoch*total_epochs

    multi_env = MultiOptLRs(data_set=data_set, max_batches=max_batches,
                            batch_size=batch_size, max_history=25,
                            reward_version=0, observation_version=3,
                            action_version=0, version=5)
    model = MaddpgInference(multi_env.observation_space,
                            multi_env.action_space,
                            shared_policy=True)
    model.load(str(path / 'model.ckpt'))
    states = multi_env.reset()
    info_list = []
    cumulative_reward = 0
    print_tqdm('Starting...')
    for epoch_no in trange(total_epochs, leave=False):
        for step in trange(steps_per_epoch, leave=False):
            actions = model.predict(states)
            actions = {key: np.squeeze(act) for key, act in actions.items()}
            states, rewards, _, infos = multi_env.step(actions)
            cumulative_reward = cumulative_reward + rewards
            info = infos
            info['step'] = epoch_no*steps_per_epoch + step
            info['cumulative_reward'] = cumulative_reward
            info['seed'] = seed
            info['epoch'] = epoch_no
            info_list.append(info)
            if info['accuracy']:
                print_tqdm('Accuracy:', info['accuracy'],
                           'Loss:', info['loss'])
    return info_list