コード例 #1
0
import sys
from struct import pack
from subprocess import call
from constants import STAGE2_SIZE, constants
from os import environ, path, name as osname

REGION = "usa"
TYPE = "old"  # "new"

if len(sys.argv) > 1:
    REGION = sys.argv[1].lower()

if len(sys.argv) > 2:
    TYPE = sys.argv[2].lower()

for name, regions in constants.items():
    if REGION not in regions:
        print("Error: {} does not contain a constant for {}".format(
            REGION, name))
    globals()[name] = regions[REGION]


def p(x):
    return pack("<I", x)


def pb(x):
    return pack(">I", x)


def get_arm_none_eabi_binutils_exec(name):
コード例 #2
0
def run(args, server):
    env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, envWrap=args.envWrap, designHead=args.designHead,
                        noLifeReward=args.noLifeReward)
    trainer = A3C(env, args.task, args.visualise, args.unsup, args.envWrap, args.designHead, args.noReward)

    # logging
    if args.task == 0:
        with open(args.log_dir + '/log.txt', 'w') as fid:
            for key, val in constants.items():
                fid.write('%s: %s\n'%(str(key), str(val)))
            fid.write('designHead: %s\n'%args.designHead)
            fid.write('input observation: %s\n'%str(env.observation_space.shape))
            fid.write('env name: %s\n'%str(env.spec.id))
            fid.write('unsup method type: %s\n'%str(args.unsup))

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.global_variables_initializer()
    else:
        variables_to_save = [v for v in tf.all_variables() if not v.name.startswith("local")]
        init_op = tf.initialize_variables(variables_to_save)
        init_all_op = tf.initialize_all_variables()
    saver = FastSaver(variables_to_save)
    if args.pretrain is not None:
        variables_to_restore = [v for v in tf.trainable_variables() if not v.name.startswith("local")]
        pretrain_saver = FastSaver(variables_to_restore)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)
        if args.pretrain is not None:
            pretrain = tf.train.latest_checkpoint(args.pretrain)
            logger.info("==> Restoring from given pretrained checkpoint.")
            logger.info("    Pretraining address: %s", pretrain)
            pretrain_saver.restore(ses, pretrain)
            logger.info("==> Done restoring model! Restored %d variables.", len(variables_to_restore))

    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)

    logger.info("Events directory: %s_%s", logdir, args.task)
    sv = tf.train.Supervisor(is_chief=(args.task == 0),
                             logdir=logdir,
                             saver=saver,
                             summary_op=None,
                             init_op=init_op,
                             init_fn=init_fn,
                             summary_writer=summary_writer,
                             ready_op=tf.report_uninitialized_variables(variables_to_save),
                             global_step=trainer.global_step,
                             save_model_secs=30,
                             save_summaries_secs=30)

    num_global_steps = constants['MAX_GLOBAL_STEPS']

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.")
    with sv.managed_session(server.target, config=config) as sess, sess.as_default():
        # Workaround for FailedPreconditionError
        # see: https://github.com/openai/universe-starter-agent/issues/44 and 31
        sess.run(trainer.sync)

        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at gobal_step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps):
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
コード例 #3
0
def run(args, server):
    env = create_env(args.env_id,
                     client_id=str(args.task),
                     remotes=args.remotes,
                     envWrap=args.envWrap,
                     designHead=args.designHead,
                     noLifeReward=args.noLifeReward)
    trainer = A3C(env,
                  args.task,
                  args.visualise,
                  args.unsup,
                  args.envWrap,
                  args.designHead,
                  args.noReward,
                  imagined_weight=args.imagined_weight,
                  no_stop_grads=args.noStopGrads,
                  stop_grads_forward=args.stopGradsForward,
                  bonus_cap=args.bonus_cap,
                  activate_bug=args.activateBug,
                  consistency_bonus=args.consistency_bonus,
                  imagination4RL=args.imagination4RL,
                  add_cur_model=args.addCurModel,
                  no_policy=args.noPolicy,
                  add_con_model=args.addConModel,
                  policy_trainer=args.policyTrainer)

    # logging
    if args.task == 0:
        with open(args.log_dir + '/log.txt', 'w') as fid:
            for key, val in constants.items():
                fid.write('%s: %s\n' % (str(key), str(val)))
            fid.write('designHead: %s\n' % args.designHead)
            fid.write('input observation: %s\n' %
                      str(env.observation_space.shape))
            fid.write('env name: %s\n' % str(env.spec.id))
            fid.write('unsup method type: %s\n' % str(args.unsup))
            fid.write('imagined weight: %s\n' % str(args.imagined_weight))
            if args.noStopGrads:
                fid.write(
                    'Turning off stop gradients on the forward and embedding model\n'
                )
            elif args.stopGradsForward:
                fid.write(
                    'Imagined gradients are stopped on the forward model and the embedding model\n'
                )
            else:
                fid.write(
                    'Imagined gradients are stopped only on the embedding/encoding layers\n'
                )
            fid.write('Saving a checkpoint every %s hours\n' %
                      str(args.keepCheckpointEveryNHours))
            fid.write('Capping the curiosity reward bonus at %s\n' %
                      str(args.bonus_cap))
            fid.write(
                'The imagined_weight does not reduce the contribution of real samples to the inverse loss\n'
            )
            if args.activateBug:
                fid.write(
                    'The bug is activated!!! Asking it to predict random actions from real states!\n'
                )
            fid.write(
                'Weight of cnsistency bonus given to the policy is %s\n' %
                str(args.consistency_bonus))
            if args.imagination4RL:
                fid.write('Using imagined actions to train the RL policy\n')
            if args.noPolicy:
                fid.write(
                    'Not using RL policy, relying on 1-step curiosity predictor\n'
                )
            if args.addCurModel:
                fid.write(
                    'Adding a 1-step curiosity predictor to the policy encoder\n'
                )
            if args.addConModel:
                fid.write(
                    'Adding a 1-step consistency predictor to the policy encoder\n'
                )
            if args.policyTrainer:
                fid.write(
                    'Using a supervised policy trainer to approximate rewards and train the policy\n'
                )

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = [
            v for v in tf.global_variables() if not v.name.startswith("local")
        ]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.global_variables_initializer()
    else:
        variables_to_save = [
            v for v in tf.all_variables() if not v.name.startswith("local")
        ]
        init_op = tf.initialize_variables(variables_to_save)
        init_all_op = tf.initialize_all_variables()

    if args.saveMeta:
        saver = tf.train.Saver(
            variables_to_save,
            keep_checkpoint_every_n_hours=args.keepCheckpointEveryNHours)
    else:
        saver = FastSaver(
            variables_to_save,
            keep_checkpoint_every_n_hours=args.keepCheckpointEveryNHours)

    if args.pretrain is not None:
        variables_to_restore = [
            v for v in tf.trainable_variables()
            if not v.name.startswith("local")
        ]
        pretrain_saver = FastSaver(variables_to_restore)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)
        if args.pretrain is not None:
            pretrain = tf.train.latest_checkpoint(args.pretrain)
            logger.info("==> Restoring from given pretrained checkpoint.")
            logger.info("    Pretraining address: %s", pretrain)
            pretrain_saver.restore(ses, pretrain)
            logger.info("==> Done restoring model! Restored %d variables.",
                        len(variables_to_restore))

    config = tf.ConfigProto(device_filters=[
        "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)
    ])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)

    logger.info("Events directory: %s_%s", logdir, args.task)
    sv = tf.train.Supervisor(
        is_chief=(args.task == 0),
        logdir=logdir,
        saver=saver,
        summary_op=None,
        init_op=init_op,
        init_fn=init_fn,
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=trainer.global_step,
        save_model_secs=30,
        save_summaries_secs=30)

    num_global_steps = constants['MAX_GLOBAL_STEPS']

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. "
        +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified."
    )
    with sv.managed_session(server.target,
                            config=config) as sess, sess.as_default():
        # Workaround for FailedPreconditionError
        # see: https://github.com/openai/universe-starter-agent/issues/44 and 31
        sess.run(trainer.sync)

        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at gobal_step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps
                                        or global_step < num_global_steps):
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)