import sys from struct import pack from subprocess import call from constants import STAGE2_SIZE, constants from os import environ, path, name as osname REGION = "usa" TYPE = "old" # "new" if len(sys.argv) > 1: REGION = sys.argv[1].lower() if len(sys.argv) > 2: TYPE = sys.argv[2].lower() for name, regions in constants.items(): if REGION not in regions: print("Error: {} does not contain a constant for {}".format( REGION, name)) globals()[name] = regions[REGION] def p(x): return pack("<I", x) def pb(x): return pack(">I", x) def get_arm_none_eabi_binutils_exec(name):
def run(args, server): env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, envWrap=args.envWrap, designHead=args.designHead, noLifeReward=args.noLifeReward) trainer = A3C(env, args.task, args.visualise, args.unsup, args.envWrap, args.designHead, args.noReward) # logging if args.task == 0: with open(args.log_dir + '/log.txt', 'w') as fid: for key, val in constants.items(): fid.write('%s: %s\n'%(str(key), str(val))) fid.write('designHead: %s\n'%args.designHead) fid.write('input observation: %s\n'%str(env.observation_space.shape)) fid.write('env name: %s\n'%str(env.spec.id)) fid.write('unsup method type: %s\n'%str(args.unsup)) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() else: variables_to_save = [v for v in tf.all_variables() if not v.name.startswith("local")] init_op = tf.initialize_variables(variables_to_save) init_all_op = tf.initialize_all_variables() saver = FastSaver(variables_to_save) if args.pretrain is not None: variables_to_restore = [v for v in tf.trainable_variables() if not v.name.startswith("local")] pretrain_saver = FastSaver(variables_to_restore) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) if args.pretrain is not None: pretrain = tf.train.latest_checkpoint(args.pretrain) logger.info("==> Restoring from given pretrained checkpoint.") logger.info(" Pretraining address: %s", pretrain) pretrain_saver.restore(ses, pretrain) logger.info("==> Done restoring model! Restored %d variables.", len(variables_to_restore)) config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor(is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = constants['MAX_GLOBAL_STEPS'] logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.") with sv.managed_session(server.target, config=config) as sess, sess.as_default(): # Workaround for FailedPreconditionError # see: https://github.com/openai/universe-starter-agent/issues/44 and 31 sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at gobal_step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)
def run(args, server): env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, envWrap=args.envWrap, designHead=args.designHead, noLifeReward=args.noLifeReward) trainer = A3C(env, args.task, args.visualise, args.unsup, args.envWrap, args.designHead, args.noReward, imagined_weight=args.imagined_weight, no_stop_grads=args.noStopGrads, stop_grads_forward=args.stopGradsForward, bonus_cap=args.bonus_cap, activate_bug=args.activateBug, consistency_bonus=args.consistency_bonus, imagination4RL=args.imagination4RL, add_cur_model=args.addCurModel, no_policy=args.noPolicy, add_con_model=args.addConModel, policy_trainer=args.policyTrainer) # logging if args.task == 0: with open(args.log_dir + '/log.txt', 'w') as fid: for key, val in constants.items(): fid.write('%s: %s\n' % (str(key), str(val))) fid.write('designHead: %s\n' % args.designHead) fid.write('input observation: %s\n' % str(env.observation_space.shape)) fid.write('env name: %s\n' % str(env.spec.id)) fid.write('unsup method type: %s\n' % str(args.unsup)) fid.write('imagined weight: %s\n' % str(args.imagined_weight)) if args.noStopGrads: fid.write( 'Turning off stop gradients on the forward and embedding model\n' ) elif args.stopGradsForward: fid.write( 'Imagined gradients are stopped on the forward model and the embedding model\n' ) else: fid.write( 'Imagined gradients are stopped only on the embedding/encoding layers\n' ) fid.write('Saving a checkpoint every %s hours\n' % str(args.keepCheckpointEveryNHours)) fid.write('Capping the curiosity reward bonus at %s\n' % str(args.bonus_cap)) fid.write( 'The imagined_weight does not reduce the contribution of real samples to the inverse loss\n' ) if args.activateBug: fid.write( 'The bug is activated!!! Asking it to predict random actions from real states!\n' ) fid.write( 'Weight of cnsistency bonus given to the policy is %s\n' % str(args.consistency_bonus)) if args.imagination4RL: fid.write('Using imagined actions to train the RL policy\n') if args.noPolicy: fid.write( 'Not using RL policy, relying on 1-step curiosity predictor\n' ) if args.addCurModel: fid.write( 'Adding a 1-step curiosity predictor to the policy encoder\n' ) if args.addConModel: fid.write( 'Adding a 1-step consistency predictor to the policy encoder\n' ) if args.policyTrainer: fid.write( 'Using a supervised policy trainer to approximate rewards and train the policy\n' ) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = [ v for v in tf.global_variables() if not v.name.startswith("local") ] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() else: variables_to_save = [ v for v in tf.all_variables() if not v.name.startswith("local") ] init_op = tf.initialize_variables(variables_to_save) init_all_op = tf.initialize_all_variables() if args.saveMeta: saver = tf.train.Saver( variables_to_save, keep_checkpoint_every_n_hours=args.keepCheckpointEveryNHours) else: saver = FastSaver( variables_to_save, keep_checkpoint_every_n_hours=args.keepCheckpointEveryNHours) if args.pretrain is not None: variables_to_restore = [ v for v in tf.trainable_variables() if not v.name.startswith("local") ] pretrain_saver = FastSaver(variables_to_restore) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) if args.pretrain is not None: pretrain = tf.train.latest_checkpoint(args.pretrain) logger.info("==> Restoring from given pretrained checkpoint.") logger.info(" Pretraining address: %s", pretrain) pretrain_saver.restore(ses, pretrain) logger.info("==> Done restoring model! Restored %d variables.", len(variables_to_restore)) config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task) ]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor( is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = constants['MAX_GLOBAL_STEPS'] logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified." ) with sv.managed_session(server.target, config=config) as sess, sess.as_default(): # Workaround for FailedPreconditionError # see: https://github.com/openai/universe-starter-agent/issues/44 and 31 sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at gobal_step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)