def main(main_args):
	
	### Set all arguments
	
	## Target maker
	target_maker_args = {}
	target_maker_args['future_steps'] = [1,2,4,8,16,32]
	target_maker_args['meas_to_predict'] = [0,1,2]
	target_maker_args['min_num_targs'] = 3	
	target_maker_args['rwrd_schedule_type'] = 'exp'
	target_maker_args['gammas'] = []
	target_maker_args['invalid_targets_replacement'] = 'nan'
	
	## Simulator
	simulator_args = {}
	simulator_args['config'] = '../../maps/D3_battle.cfg'
	simulator_args['resolution'] = (84,84)
	simulator_args['frame_skip'] = 4
	simulator_args['color_mode'] = 'GRAY'	
	simulator_args['use_shaping_reward'] = False
	simulator_args['maps'] = ['MAP01']
	simulator_args['switch_maps'] = False
	#train
	simulator_args['num_simulators'] = 8
	
	## Experience
	# Train experience
	train_experience_args = {}
	train_experience_args['memory_capacity'] = 20000
	train_experience_args['history_length'] = 1
	train_experience_args['history_step'] = 1
	train_experience_args['action_format'] = 'enumerate'
	train_experience_args['shared'] = False
	
	# Test prediction experience
	test_prediction_experience_args = train_experience_args.copy()
	test_prediction_experience_args['memory_capacity'] = 1
	
	# Test policy experience
	test_policy_experience_args = train_experience_args.copy()
	test_policy_experience_args['memory_capacity'] = 55000
		
	## Agent	
	agent_args = {}
	
	# agent type
	agent_args['agent_type'] = 'advantage'
	
	# preprocessing
	agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
	agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
	targ_scale_coeffs = np.expand_dims((np.expand_dims(np.array([7.5,30.,1.]),1) * np.ones((1,len(target_maker_args['future_steps'])))).flatten(),0)
	agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs
	agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs
		
	# agent properties
	agent_args['objective_coeffs_temporal'] = [0., 0. ,0. ,0.5, 0.5, 1.]
	agent_args['objective_coeffs_meas'] = [0.5, 0.5, 1.]
	agent_args['random_exploration_schedule'] = lambda step: (0.02 + 145000. / (float(step) + 150000.))
	agent_args['new_memories_per_batch'] = 8
	
	# net parameters
	agent_args['conv_params']     = np.array([(32,8,4), (64,4,2), (64,3,1)],
									 dtype = [('out_channels',int), ('kernel',int), ('stride',int)])
	agent_args['fc_img_params']   = np.array([(512,)], dtype = [('out_dims',int)])
	agent_args['fc_meas_params']  = np.array([(128,), (128,), (128,)], dtype = [('out_dims',int)]) 
	agent_args['fc_joint_params'] = np.array([(512,), (-1,)], dtype = [('out_dims',int)]) # we put -1 here because it will be automatically replaced when creating the net
	agent_args['weight_decay'] = 0.00000
	
	# optimization parameters
	agent_args['batch_size'] = 64
	agent_args['init_learning_rate'] = 0.0001
	agent_args['lr_step_size'] = 250000
	agent_args['lr_decay_factor'] = 0.3
	agent_args['adam_beta1'] = 0.95
	agent_args['adam_epsilon'] = 1e-4		
	agent_args['optimizer'] = 'Adam'
	agent_args['reset_iter_count'] = False
	
	# directories		
	agent_args['checkpoint_dir'] = 'checkpoints'
	agent_args['log_dir'] = 'logs'
	agent_args['init_model'] = ''
	agent_args['model_name'] = "predictor.model"
	agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S")		
	
	# logging and testing
	agent_args['print_err_every'] = 50
	agent_args['detailed_summary_every'] = 1000
	agent_args['test_pred_every'] = 0
	agent_args['test_policy_every'] = 7812
	agent_args['num_batches_per_pred_test'] = 0
	agent_args['num_steps_per_policy_test'] = test_policy_experience_args['memory_capacity'] / simulator_args['num_simulators']
	agent_args['checkpoint_every'] = 10000
	agent_args['save_param_histograms_every'] = 5000
	agent_args['test_policy_in_the_beginning'] = True				
	
	# experiment arguments
	experiment_args = {}
	experiment_args['num_train_iterations'] = 820000
	experiment_args['test_objective_coeffs_temporal'] = np.array([0., 0., 0., 0.5, 0.5, 1.])
	experiment_args['test_objective_coeffs_meas'] = np.array([0.5,0.5,1.])
	experiment_args['test_random_prob'] = 0.
	experiment_args['test_checkpoint'] = 'checkpoints/2017_04_08_10_44_20'
	experiment_args['test_policy_num_steps'] = 2000
	experiment_args['show_predictions'] = False
	experiment_args['multiplayer'] = False
	
	
	# Create and run the experiment
	
	experiment = MultiExperiment(target_maker_args=target_maker_args, 
							simulator_args=simulator_args, 
							train_experience_args=train_experience_args, 
							test_policy_experience_args=test_policy_experience_args, 
							agent_args=agent_args,
							experiment_args=experiment_args)
	
	experiment.run(main_args[0])
Example #2
0
def main(main_args):

    ### Set all arguments

    ## Target maker
    target_maker_args = {}
    target_maker_args['future_steps'] = [
        1, 2, 4, 8, 16, 32
    ]  # Offsets with which to predict the measurements
    target_maker_args['meas_to_predict'] = [
        0, 1, 2, 3
    ]  # Measurements that we aim to predict
    target_maker_args[
        'min_num_targs'] = 3  # Defines the minimum nb of available measurements needed to try evaluate a frame
    target_maker_args[
        'rwrd_schedule_type'] = 'exp'  # Possible discount for future rewards
    target_maker_args['gammas'] = []
    target_maker_args[
        'invalid_targets_replacement'] = 'nan'  # Replacement for unavailable targets (when close to the end of experiment)

    ## Simulator
    simulator_args = {}
    simulator_args[
        'config'] = '../../maps/D5.cfg'  # defines some parameter (available buttons, game variables etc)
    simulator_args['resolution'] = (84, 84
                                    )  # Resolution used for the images (?)
    simulator_args[
        'frame_skip'] = 4  # Take decisions and predict every n frames (?)
    simulator_args['color_mode'] = 'GRAY'
    simulator_args['maps'] = ['MAP01']  # Map on which to play
    simulator_args['switch_maps'] = False
    #train
    simulator_args[
        'num_simulators'] = 8  # Number of simulations to run in parallel (?)

    ## Experience
    # Train experience
    train_experience_args = {}
    train_experience_args[
        'memory_capacity'] = 20000  # Number of observations to retain
    train_experience_args[
        'history_length'] = 1  # Number of frames taken as input when making a prediction
    train_experience_args['history_step'] = 1
    train_experience_args['action_format'] = 'enumerate'
    train_experience_args['shared'] = False

    # Test prediction experience
    test_prediction_experience_args = train_experience_args.copy()
    test_prediction_experience_args['memory_capacity'] = 1

    # Test policy experience
    test_policy_experience_args = train_experience_args.copy()
    test_policy_experience_args['memory_capacity'] = 55000

    ## Agent
    agent_args = {}

    # agent type
    agent_args[
        'agent_type'] = 'advantage'  # Defines the kind of network used for the prediction

    # preprocessing
    agent_args[
        'preprocess_input_images'] = lambda x: x / 255. - 0.5  # Preprocessing to apply to the images
    agent_args[
        'preprocess_input_measurements'] = lambda x: x / 100. - 0.5  # Preprocessing to apply to measurements
    targ_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([30., 100., 100., 100.]), 1) * np.ones(
            (1, len(target_maker_args['future_steps'])))).flatten(),
        0)  # addapt dim to meas?
    # scaling at 100 works for VELOCITY meas
    agent_args[
        'preprocess_input_targets'] = lambda x: x / targ_scale_coeffs  # targ_scale_coeffs is a simple array of 1
    agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs

    # agent properties
    agent_args['objective_coeffs_temporal'] = [
        0., 0., 0., 0.5, 0.5, 1.
    ]  # Multiplicative factors for rewards accross predicted time steps
    agent_args['objective_coeffs_meas'] = [1., 0., 0.,
                                           0.]  # Weights of means for reward
    agent_args['random_exploration_schedule'] = lambda step: (
        0.02 + 145000. /
        (float(step) + 150000.))  # epsilon for epsilon-greedy policy (?)
    agent_args['new_memories_per_batch'] = 8

    # net parameters
    agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(512, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array(
        [(512, ), (-1, )], dtype=[('out_dims', int)]
    )  # we put -1 here because it will be automatically replaced when creating the net
    agent_args['weight_decay'] = 0.00000

    # optimization parameters
    agent_args['batch_size'] = 64
    agent_args['init_learning_rate'] = 0.0001
    agent_args['lr_step_size'] = 250000
    agent_args['lr_decay_factor'] = 0.3
    agent_args['adam_beta1'] = 0.95
    agent_args['adam_epsilon'] = 1e-4
    agent_args['optimizer'] = 'Adam'
    agent_args['reset_iter_count'] = False

    # directories
    agent_args['checkpoint_dir'] = 'checkpoints'
    agent_args['log_dir'] = 'logs'
    agent_args['init_model'] = ''
    agent_args['model_name'] = "predictor.model"
    agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S")

    # logging and testing
    agent_args['print_err_every'] = 50
    agent_args['detailed_summary_every'] = 1000
    agent_args['test_pred_every'] = 0
    agent_args['test_policy_every'] = 7812
    agent_args['num_batches_per_pred_test'] = 0
    agent_args['num_steps_per_policy_test'] = test_policy_experience_args[
        'memory_capacity'] / simulator_args['num_simulators']
    agent_args['checkpoint_every'] = 10000
    agent_args['save_param_histograms_every'] = 5000
    agent_args['test_policy_in_the_beginning'] = True

    # experiment arguments
    experiment_args = {}
    experiment_args['num_train_iterations'] = 820000
    experiment_args['test_objective_coeffs_temporal'] = np.array(
        [0., 0., 0., 0.5, 0.5, 1.])
    experiment_args['test_objective_coeffs_meas'] = np.array([1., 0., 0., 0.])
    experiment_args['test_random_prob'] = 0.
    experiment_args['test_checkpoint'] = 'checkpoints/2017_04_09_09_11_48'
    experiment_args['test_policy_num_steps'] = 2000
    experiment_args['show_predictions'] = False
    experiment_args['multiplayer'] = False

    # Create and run the experiment

    experiment = MultiExperiment(
        target_maker_args=target_maker_args,
        simulator_args=simulator_args,
        train_experience_args=train_experience_args,
        test_policy_experience_args=test_policy_experience_args,
        agent_args=agent_args,
        experiment_args=experiment_args)

    experiment.run(main_args[0])
Example #3
0
def main(mode, doom_config_file):

    ### Set all arguments

    ## Target maker
    target_maker_args = {}
    target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32]
    target_maker_args['meas_to_predict'] = [0, 1, 2]
    target_maker_args['min_num_targs'] = 3
    target_maker_args['rwrd_schedule_type'] = 'exp'
    target_maker_args['gammas'] = []
    target_maker_args['invalid_targets_replacement'] = 'nan'

    ## Simulator
    simulator_args = {}
    simulator_args[
        'config'] = '../../maps/' + doom_config_file  #D3_battle.cfg'
    simulator_args['resolution'] = (84, 84)
    simulator_args[
        'frame_skip'] = 1  #4 #TODO Change back to 4 for experiements. 1 helps get nicer videos though.
    simulator_args['color_mode'] = 'GRAY'
    simulator_args['maps'] = ['MAP01']
    simulator_args['switch_maps'] = False
    #train
    simulator_args[
        'num_simulators'] = 8  #TODO Set back to 8, just testing something. 8

    ## Experience
    # Train experience
    train_experience_args = {}
    train_experience_args['memory_capacity'] = 20000
    train_experience_args['history_length'] = 1
    train_experience_args['history_step'] = 1
    train_experience_args['action_format'] = 'enumerate'
    train_experience_args['shared'] = False

    # Test prediction experience
    test_prediction_experience_args = train_experience_args.copy()
    test_prediction_experience_args['memory_capacity'] = 1

    # Test policy experience
    test_policy_experience_args = train_experience_args.copy()
    test_policy_experience_args['memory_capacity'] = 55000

    ## Agent
    agent_args = {}

    # agent type
    agent_args['agent_type'] = 'advantage'

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    targ_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([7.5, 30., 1.]), 1) * np.ones(
            (1, len(target_maker_args['future_steps'])))).flatten(), 0)
    agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs
    agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs

    # agent properties
    agent_args['objective_coeffs_temporal'] = [0., 0., 0., 0.5, 0.5, 1.]
    agent_args['objective_coeffs_meas'] = [
        0.5, 0.5, 1.
    ]  #KOE: Note these are vals for training (?) for testing is below.
    agent_args['random_exploration_schedule'] = lambda step: (
        0.02 + 145000. / (float(step) + 150000.))
    agent_args['new_memories_per_batch'] = 8
    agent_args['random_objective_coeffs'] = True
    agent_args['objective_coeffs_distribution'] = 'uniform_pos_neg'

    # net parameters
    agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(512, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_obj_params'] = np.array([(128, ), (128, ), (128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array(
        [(512, ), (-1, )], dtype=[('out_dims', int)]
    )  # we put -1 here because it will be automatically replaced when creating the net
    agent_args['weight_decay'] = 0.00000

    # optimization parameters
    agent_args['batch_size'] = 64
    agent_args['init_learning_rate'] = 0.0001
    agent_args['lr_step_size'] = 250000
    agent_args['lr_decay_factor'] = 0.3
    agent_args['adam_beta1'] = 0.95
    agent_args['adam_epsilon'] = 1e-4
    agent_args['optimizer'] = 'Adam'
    agent_args['reset_iter_count'] = False

    # directories
    agent_args['checkpoint_dir'] = 'checkpoints'
    agent_args['log_dir'] = 'logs'
    agent_args['init_model'] = ''
    agent_args['model_name'] = "predictor.model"
    agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S")

    # logging and testing
    agent_args['print_err_every'] = 50
    agent_args['detailed_summary_every'] = 1000
    agent_args['test_pred_every'] = 0
    agent_args['test_policy_every'] = 7812
    agent_args['num_batches_per_pred_test'] = 0
    agent_args['num_steps_per_policy_test'] = test_policy_experience_args[
        'memory_capacity'] / simulator_args['num_simulators']
    agent_args['checkpoint_every'] = 10000
    agent_args['save_param_histograms_every'] = 5000
    agent_args['test_policy_in_the_beginning'] = True

    # experiment arguments
    experiment_args = {}
    experiment_args['num_train_iterations'] = 820000
    # KOE: This defines the weights and temporal weights of objectives.
    experiment_args['test_objective_coeffs_temporal'] = np.array(
        [0., 0., 0., 0.5, 0.5, 1.])
    experiment_args['test_objective_coeffs_meas'] = np.array([0.5, 0.5, 1.])
    #KOETODO Revert to old values above.

    #Health, ammo, frags
    #experiment_args['test_objective_coeffs_meas'] = np.array([-1,-1,-1]) KOE Opposite objectives, just for testing.
    experiment_args['test_random_prob'] = 0.
    experiment_args[
        'test_checkpoint'] = 'checkpoints/2017_04_09_09_13_17'  #KOE: This defines the weights to load
    experiment_args[
        'test_policy_num_steps'] = 2000  #KOE: How many steps to run the test agent.
    experiment_args['show_predictions'] = False
    experiment_args['multiplayer'] = False

    # Create and run the experiment

    experiment = MultiExperiment(
        target_maker_args=target_maker_args,
        simulator_args=simulator_args,
        train_experience_args=train_experience_args,
        test_policy_experience_args=test_policy_experience_args,
        agent_args=agent_args,
        experiment_args=experiment_args)

    return experiment.run(mode)
Example #4
0
def main(main_args):

    ### Set all arguments

    ## Target maker
    target_maker_args = {}
    target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32]
    target_maker_args['meas_to_predict'] = [0]
    target_maker_args['min_num_targs'] = 3
    target_maker_args['rwrd_schedule_type'] = 'exp'
    target_maker_args['gammas'] = []
    target_maker_args['invalid_targets_replacement'] = 'nan'

    ## Simulator
    simulator_args = {}
    simulator_args['config'] = '../../maps/D1_basic.cfg'
    simulator_args['resolution'] = (84, 84)
    simulator_args['frame_skip'] = 4
    simulator_args['color_mode'] = 'GRAY'
    # simulator_args['color_mode'] = 'RGB'
    simulator_args['maps'] = ['MAP01']
    simulator_args['switch_maps'] = False
    #train
    simulator_args['num_simulators'] = 8

    ## Experience
    # Train experience
    train_experience_args = {}
    train_experience_args['memory_capacity'] = 20000
    train_experience_args['history_length'] = 1
    train_experience_args['history_step'] = 1
    train_experience_args['action_format'] = 'enumerate'
    train_experience_args['shared'] = False

    # Test prediction experience
    test_prediction_experience_args = train_experience_args.copy()
    test_prediction_experience_args['memory_capacity'] = 1

    # Test policy experience
    test_policy_experience_args = train_experience_args.copy()
    test_policy_experience_args['memory_capacity'] = 55000

    ## Agent
    agent_args = {}

    # agent type
    # agent_args['agent_type'] = 'advantage'
    # agent_args['agent_type'] = 'advantage_objects'
    agent_args['agent_type'] = 'advantage_objects_depth'
    # agent_args['agent_type'] = 'advantage_objects_no_image'

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    targ_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([30.]), 1) * np.ones(
            (1, len(target_maker_args['future_steps'])))).flatten(), 0)
    agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs
    agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs

    # agent properties
    agent_args['objective_coeffs_temporal'] = [0., 0., 0., 0.5, 0.5, 1.]
    agent_args['objective_coeffs_meas'] = [1.]
    agent_args['random_exploration_schedule'] = lambda step: (
        0.02 + 145000. / (float(step) + 150000.))
    agent_args['new_memories_per_batch'] = 8

    # net parameters
    agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(512, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_objects_params'] = np.array([(128, ), (128, ), (128, )],
                                               dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array(
        [(512, ), (-1, )], dtype=[('out_dims', int)]
    )  # we put -1 here because it will be automatically replaced when creating the net
    agent_args['weight_decay'] = 0.00000

    # optimization parameters
    agent_args['batch_size'] = 64
    agent_args['init_learning_rate'] = 0.0001
    agent_args['lr_step_size'] = 250000
    agent_args['lr_decay_factor'] = 0.3
    agent_args['adam_beta1'] = 0.95
    agent_args['adam_epsilon'] = 1e-4
    agent_args['optimizer'] = 'Adam'
    agent_args['reset_iter_count'] = False

    # directories
    agent_args['checkpoint_dir'] = 'checkpoints'
    agent_args['log_dir'] = 'logs'
    agent_args['init_model'] = ''
    agent_args['model_name'] = "predictor.model"
    agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S")

    # logging and testing
    agent_args['print_err_every'] = 50
    agent_args['detailed_summary_every'] = 1000
    agent_args['test_pred_every'] = 0
    agent_args['test_policy_every'] = 7812
    agent_args['num_batches_per_pred_test'] = 0
    agent_args['num_steps_per_policy_test'] = test_policy_experience_args[
        'memory_capacity'] / simulator_args['num_simulators']
    agent_args['checkpoint_every'] = 10000
    agent_args['save_param_histograms_every'] = 5000
    agent_args['test_policy_in_the_beginning'] = True

    # experiment arguments
    experiment_args = {}
    experiment_args['num_train_iterations'] = 820000
    experiment_args['test_objective_coeffs_temporal'] = np.array(
        [0., 0., 0., 0.5, 0.5, 1.])
    experiment_args['test_objective_coeffs_meas'] = np.array([1.])
    experiment_args['test_random_prob'] = 0.
    experiment_args['test_checkpoint'] = 'checkpoints/2017_04_09_09_07_45'
    experiment_args['test_policy_num_steps'] = 2000
    experiment_args['show_predictions'] = False
    experiment_args['multiplayer'] = False

    # Create and run the experiment

    experiment = MultiExperiment(
        target_maker_args=target_maker_args,
        simulator_args=simulator_args,
        train_experience_args=train_experience_args,
        test_policy_experience_args=test_policy_experience_args,
        agent_args=agent_args,
        experiment_args=experiment_args)

    experiment.run(main_args[0])
def main(main_args):

    ### Set all arguments

    ## Target maker
    target_maker_args = {}
    target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32]
    target_maker_args['meas_to_predict'] = [0, 2]
    target_maker_args['min_num_targs'] = 3
    target_maker_args['rwrd_schedule_type'] = 'exp'
    target_maker_args['gammas'] = []
    target_maker_args['invalid_targets_replacement'] = 'nan'

    ## Simulator
    simulator_args = {}
    simulator_args['config'] = '../../maps/cartpole.cfg'
    simulator_args['resolution'] = (84, 84)
    simulator_args['frame_skip'] = 0
    simulator_args['env_name'] = 'CartPole-v1'
    simulator_args['color_mode'] = 'RGB'
    simulator_args['environnement'] = 'gym'
    simulator_args['num_meas'] = 4
    simulator_args['gym'] = True
    #train
    simulator_args['num_simulators'] = 8

    ## Experience
    # Train experience
    train_experience_args = {}
    train_experience_args['memory_capacity'] = 20000
    train_experience_args['history_length'] = 3
    train_experience_args['history_step'] = 1
    train_experience_args['action_format'] = 'enumerate'
    train_experience_args['shared'] = False

    # Test prediction experience
    test_prediction_experience_args = train_experience_args.copy()
    test_prediction_experience_args['memory_capacity'] = 1

    # Test policy experience
    test_policy_experience_args = train_experience_args.copy()
    test_policy_experience_args['memory_capacity'] = 3000

    ## Agent
    agent_args = {}

    # agent type
    agent_args['agent_type'] = 'advantage'

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x
    targ_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([1, 12.]), 1) * np.ones(
            (1, len(target_maker_args['future_steps'])))).flatten(), 0)
    agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs
    agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs
    agent_args['discrete_controls_manual'] = []
    agent_args['opposite_button_pairs'] = [[0, 1]]

    # agent properties
    agent_args['objective_coeffs_temporal'] = [0.1, 0.1, 0.1, 1, 1, 1]
    agent_args['objective_coeffs_meas'] = [
        -0.1, -1
    ]  # position and angle pos (-4.8, 4.8) , angle (-24, 24 )

    def f1(x):
        return x**2

    def f2(x):
        return np.abs(x)

    agent_args['objective_function'] = [f1, f2]  # do not deviate from center

    agent_args['random_exploration_schedule'] = lambda step: (0.02 + 14500. / (
        float(step) + 15000.))
    agent_args['new_memories_per_batch'] = 8

    agent_args['gym'] = True  # in gym only one button is allowed

    # net parameters
    agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(512, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array(
        [(512, ), (-1, )], dtype=[('out_dims', int)]
    )  # we put -1 here because it will be automatically replaced when creating the net
    agent_args['weight_decay'] = 0.00001

    # optimization parameters
    agent_args['batch_size'] = 64
    agent_args['init_learning_rate'] = 0.0001
    agent_args['lr_step_size'] = 250000
    agent_args['lr_decay_factor'] = 0.3
    agent_args['adam_beta1'] = 0.95
    agent_args['adam_epsilon'] = 1e-4
    agent_args['optimizer'] = 'Adam'
    agent_args['reset_iter_count'] = False

    # directories
    agent_args['checkpoint_dir'] = 'checkpoints'
    agent_args['log_dir'] = 'logs'
    agent_args['init_model'] = ''
    agent_args['model_name'] = "predictor.model"
    agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S")

    # logging and testing
    agent_args['print_err_every'] = 50
    agent_args['detailed_summary_every'] = 1000
    agent_args['test_pred_every'] = 0
    agent_args['test_policy_every'] = 1000
    agent_args['num_batches_per_pred_test'] = 0
    agent_args['num_steps_per_policy_test'] = test_policy_experience_args[
        'memory_capacity'] / simulator_args['num_simulators']
    agent_args['checkpoint_every'] = 10000
    agent_args['save_param_histograms_every'] = 5000
    agent_args['test_policy_in_the_beginning'] = True

    # experiment arguments
    experiment_args = {}
    experiment_args['num_train_iterations'] = 820000
    experiment_args['test_objective_coeffs_temporal'] = np.array(
        [0.1, 0.1, 0.1, 1, 1, 1])
    experiment_args['test_objective_coeffs_meas'] = np.array([-0.1, -1])
    experiment_args['test_random_prob'] = 0.
    experiment_args['test_checkpoint'] = 'checkpoints/2020_01_11_23_06_26'
    experiment_args['test_policy_num_steps'] = 2000
    experiment_args['show_predictions'] = False
    experiment_args['multiplayer'] = False

    # Create and run the experiment

    experiment = MultiExperiment(
        target_maker_args=target_maker_args,
        simulator_args=simulator_args,
        train_experience_args=train_experience_args,
        test_policy_experience_args=test_policy_experience_args,
        agent_args=agent_args,
        experiment_args=experiment_args)

    experiment.run(main_args[0])