コード例 #1
0
def run_monitor(args, server):
    logger.info("Execute run monitor")
    env = create_icegame_env(args.logdir, args.env_id, args)
    monitor = PolicyMonitor(env, args.policy, args.task, args)

    variables_to_save = [
        v for v in tf.global_variables() if not v.name.startswith("local")
    ]
    init_op = tf.variables_initializer(variables_to_save)
    init_all_op = tf.global_variables_initializer()

    # print trainable variables
    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 tf.get_variable_scope().name)
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)

    config = tf.ConfigProto(device_filters=[
        "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)
    ])

    logdir = os.path.join(args.logdir, 'eval')
    summary_writer = tf.summary.FileWriter(logdir)

    monitor.set_writer(summary_writer)

    sv = tf.train.Supervisor(
        is_chief=(args.task == 0),
        logdir=logdir,
        summary_op=None,
        init_op=init_op,
        init_fn=init_fn,
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=monitor.global_step)

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. "
        +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified."
    )

    with sv.managed_session(server.target,
                            config=config) as sess, sess.as_default():
        logger.info("PE Session Entered")
        sess.run(monitor.sync)
        global_step = sess.run(monitor.global_step)
        logger.info("Starting monitoring at step=%d", global_step)

        while not sv.should_stop():
            monitor.eval(sess)
            time.sleep(args.monitor_eval_secs)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
コード例 #2
0
def LongLoopAlgorithm(args):
    outdir = os.path.join(args.out_dir, 'loopalgo')
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    # define environment
    env = create_icegame_env(outdir, args.env_id)
    env.start(create_defect=False)

    length = 0
    rewards = 0

    for ep in range(args.num_episodes):
        last_state = env.reset(create_defect=True)
        steps_rewards = []

        while True:
            state, reward, terminate, info = env.auto_step()

            rewards += reward
            length += 1

            if terminate:
                print("Episode finished. Sum of rewards: %.2f. Length: %d." %
                      (rewards, length))
                length = 0
                rewards = 0
                break
コード例 #3
0
def run(args, server):
    env = create_icegame_env(args.log_dir, args.env_id)
    trainer = A3C(env, args.task, args.visualise)

    # Variable names that start with "local" are not saved in checkpoints.
    if use_tf12_api:
        variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")]
        init_op = tf.variables_initializer(variables_to_save)
        init_all_op = tf.global_variables_initializer()
    else:
        variables_to_save = [v for v in tf.all_variables() if not v.name.startswith("local")]
        init_op = tf.initialize_variables(variables_to_save)
        init_all_op = tf.initialize_all_variables()
    saver = FastSaver(variables_to_save)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)

    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])
    logdir = os.path.join(args.log_dir, 'train')

    if use_tf12_api:
        summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task)
    else:
        summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task)

    logger.info("Events directory: %s_%s", logdir, args.task)
    sv = tf.train.Supervisor(is_chief=(args.task == 0),
                            logdir=logdir,
                            saver=saver,
                            summary_op=None,
                            init_op=init_op,
                            init_fn=init_fn,
                            summary_writer=summary_writer,
                            ready_op=tf.report_uninitialized_variables(variables_to_save),
                            global_step=trainer.global_step,
                            save_model_secs=300,
                            save_summaries_secs=300)

    # 1e9 steps (100M steps)
    num_global_steps = 100000000

    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.")
    with sv.managed_session(server.target, config=config) as sess, sess.as_default():
        sess.run(trainer.sync)
        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps):
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
コード例 #4
0
def inference(args):
    indir = os.path.join(args.logdir, 'train')
    outdir = os.path.join(args.logdir,
                          'player') if args.outdir is None else args.outdir
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    with open(indir + "/checkpoint", "r") as f:
        first_line = f.readline().strip()
        print("first_line is : {}".format(first_line))
    ckpt = first_line.split(' ')[-1].split('/')[-1][:-1]
    ckpt = ckpt.split('-')[-1]
    ckpt = indir + '/model.ckpt-' + ckpt

    print("ckpt: {}".format(ckpt))

    # define environment
    #env = create_icegame_env(outdir, args.env_id, args)
    env = create_icegame_env(outdir, args.env_id)
    # define environment
    local_space = env.local_observation_space.n
    global_space = env.global_observation_space.shape
    action_space = env.action_space.n

    # resize the system and enable subregion
    #if env.L != args.system_size:
    #    print ("Enlarge the system {} --> {}".format(env.L, args.system_size))
    #    env.resize_ice_config(args.system_size, args.mcsteps)
    #    env.dump_env_setting()
    #    env.save_ice()

    # our trained cnn always 32, 32
    env.enable_subregion()
    print("Enable sub-region mechanism.")

    # policy recoder
    ppath = os.path.join(outdir, "episodes")
    if not os.path.exists(ppath):
        os.makedirs(ppath)
    pirec = PolicyRecorder(ppath)

    with tf.device("/cpu:0"):
        # define policy network
        with tf.variable_scope("global"):
            if args.policy == "simple":
                policy = models.SimplePolicy(global_space, local_space,
                                             action_space, args)
            elif args.policy == "cnn":
                policy = models.CNNPolicy(global_space, local_space,
                                          action_space, args)
            policy.global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0, dtype=tf.int32),
                trainable=False)
        # Variable names that start with "local" are not saved in checkpoints.
        variables_to_restore = [
            v for v in tf.global_variables() if not v.name.startswith("local")
        ]
        init_all_op = tf.global_variables_initializer()

        saver = FastSaver(variables_to_restore)

        # print trainable variables
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     tf.get_variable_scope().name)
        logger.info('Trainable vars:')
        for v in var_list:
            logger.info('  {} {}'.format(v.name, v.get_shape()))
        logger.info("Restored the trained model.")

        # summary of rewards
        action_writers = []
        summary_writer = tf.summary.FileWriter(outdir)
        """NOT so useful.
        for act_idx in range(action_space):
            action_writers.append(tf.summary.FileWriter(
                os.path.join(outdir, "action_{}".format(act_idx))
            ))
        """

        logger.info("Inference events directory: %s", outdir)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)

        with tf.Session() as sess:
            logger.info("Initializing all parameters.")
            sess.run(init_all_op)
            logger.info("Restoring trainable global parameters.")
            saver.restore(sess, ckpt)
            logger.info("Restored model was trained for %.2fM global steps",
                        sess.run(policy.global_step) / 1000000.)

            #last_features = policy.get_initial_features()  # reset lstm memory
            length = 0
            rewards = 0

            # For plotting
            if args.render:
                import matplotlib.pyplot as plt
                import matplotlib.gridspec as gridspec

                plt.ion()
                fig = plt.figure(num=None,
                                 figsize=(8, 8),
                                 dpi=92,
                                 facecolor='w',
                                 edgecolor='k')

                gs1 = gridspec.GridSpec(3, 3)
                gs1.update(left=0.05, right=0.85, wspace=0.15)
                ax1 = plt.subplot(gs1[:-1, :])
                ax2 = plt.subplot(gs1[-1, :-1])
                ax3 = plt.subplot(gs1[-1, -1])

                ax1.set_title("IceGame (UpTimes: {})".format(
                    env.sim.get_updated_counter()))

                ind = np.arange(action_space)
                width = 0.20
                action_legends = [
                    "head_0", "head_1", "head_2", "tail_0", "tail_1", "tail_2",
                    "Metro"
                ]

                steps_energies = []

            for ep in range(args.num_tests):
                """TODO: policy sampling strategy
                    random, greedy and sampled policy.
                """

                env.start(create_defect=True)
                last_state = env.reset()
                # these for plotting
                steps_rewards = []
                steps_values = []
                step = 0

                # policy recorder
                pirec.attach_episode(ep)
                # TODO: Call save_ice here?

                # running policy
                while True:
                    fetched = policy.act_inference(last_state)
                    prob_action, action, value_ = fetched[0], fetched[
                        1], fetched[2]
                    """TODO: Policy Recorder
                        * prob_action
                        * value_
                        * local config
                        * init_config (of course, but store in other way.)
                        * Store all cases
                        Q: Can we put these in env_hist.json?
                    """

                    stepAct = action.argmax()
                    state, reward, terminal, info = env.step(stepAct)
                    local = last_state.local_obs.tolist()
                    pi_ = prob_action.tolist()
                    value_ = value_.tolist()[0]
                    action_ = action.tolist()

                    # TODO: We need env 'weights', p(s, s', a) = ? (what the f**k is it?)
                    # And we also want some physical observables
                    pirec.push_step(step, stepAct, pi_, value_, local, reward)

                    # update stats
                    length += 1
                    step += 1
                    rewards += reward
                    last_state = state

                    if info:
                        loopsize = info["Loop Size"]
                        looparea = info["Loop Area"]
                    """Animation for State and Actions
                        Show Energy Bar On Screen.
                    """

                    if args.render:
                        # save list for plotting
                        steps_rewards.append(rewards)
                        steps_values.append(value_)

                        ax2.clear()
                        ax2.bar(ind, prob_action)
                        ax2.set_xticks(ind + width / 2)
                        ax2.set_xticklabels(action_legends)

                        canvas = state.global_obs[:, :, 0]
                        ax1.clear()
                        ax1.imshow(canvas,
                                   'Reds',
                                   interpolation="None",
                                   vmin=-1,
                                   vmax=1)
                        ax1.set_title("IceGame: (UpTimes: {})".format(
                            env.sim.get_updated_counter()))

                        ax3.clear()
                        ax3.plot(steps_energies, linewidth=2)

                        plt.pause(0.05)
                    """TODO:
                        1. Need more concrete idea for playing the game when interfering.
                        2. Save these values for post processing.
                        3. We need penalty for timeout. --> Move timeout into env.
                    """
                    if terminal:
                        print(
                            "Episode finished. Sum of rewards: %.2f. Length: %d."
                            % (rewards, length))
                        pirec.dump_episode()
                        length = 0
                        rewards = 0
                        step = 0
                        break

        logger.info('Finished %d true episodes.', args.num_tests)
        if args.render:
            plt.savefig("GameScene.png")
        logger.info("Save the last scene to GameScene.png")
        env.close()
コード例 #5
0
def inference(args):
    indir = os.path.join(args.log_dir, 'train')
    outdir = os.path.join(args.log_dir, 'player') if args.out_dir is None else args.out_dir

    with open(indir + "/checkpoint", "r") as f:
        first_line = f.readline().strip()
        print ("first_line is : {}".format(first_line))
    ckpt = first_line.split(' ')[-1].split('/')[-1][:-1]
    ckpt = ckpt.split('-')[-1]
    ckpt = indir + '/model.ckpt-' + ckpt

    print ("ckpt: {}".format(ckpt))

    # define environment
    env = create_icegame_env(outdir, args.env_id)
    num_actions = env.action_space.n

    with tf.device("/cpu:0"):
        # define policy network
        with tf.variable_scope("global"):
            policy = LSTMPolicy(env.observation_space.shape, num_actions)
            policy.global_step = tf.get_variable("global_step", [], 
                    tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False)
        # Variable names that start with "local" are not saved in checkpoints.
        variables_to_restore = [v for v in tf.global_variables() if not v.name.startswith("local")]
        init_all_op = tf.global_variables_initializer()

        saver = FastSaver(variables_to_restore)

        # print trainable variables
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
        logger.info('Trainable vars:')
        for v in var_list:
            logger.info('  {} {}'.format(v.name, v.get_shape()))
        logger.info("Restored the trained model.")

        # summary of rewards
        action_writers = []
        summary_writer = tf.summary.FileWriter(outdir)
        for act_idx in range(num_actions):
            action_writers.append(tf.summary.FileWriter(
                os.path.join(outdir, "action_{}".format(act_idx))
            ))

        logger.info("Inference events directory: %s", outdir)
        config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)

        with tf.Session() as sess:
            logger.info("Initializing all parameters.")
            sess.run(init_all_op)
            logger.info("Restoring trainable global parameters.")
            saver.restore(sess, ckpt)
            logger.info("Restored model was trained for %.2fM global steps", sess.run(policy.global_step)/1000000.)

            last_features = policy.get_initial_features()  # reset lstm memory
            length = 0
            rewards = 0

            # For plotting
            plt.ion()
            fig = plt.figure(num=None, figsize=(8, 8), dpi=92, facecolor='w', edgecolor='k')

            gs1 = gridspec.GridSpec(3, 3)
            gs1.update(left=0.05, right=0.85, wspace=0.15)
            ax1 = plt.subplot(gs1[:-1, :])
            ax2 = plt.subplot(gs1[-1, :-1])
            ax3 = plt.subplot(gs1[-1, -1])

            ax1.set_title("IceGame (Agent Lives: {}, UpTimes: {})".format(env.lives, env.sim.get_updated_counter()))

            ind = np.arange(num_actions)
            width = 0.20
            #action_legends = ["Up", "Down", "Left", "Right", "NextUp", "NextDown", "Metropolis"]
            action_legends = [">", "v", "<", "^", "", "", "Metro"]

            for ep in range(args.num_episodes):
                """TODO: policy sampling strategy
                    random, greedy and sampled policy.
                """

                last_state = env.reset()
                steps_rewards=[]
                steps_values=[]

                # running policy
                while True:
                    fetched = policy.act_inference(last_state, *last_features)
                    prob_action, action, value_, features = fetched[0], fetched[1], fetched[2], fetched[3:]

                    #TODO: policy sampling strategy

                    # Greedy
                    #print ("Prob of actions: {}".format(prob_action))
                    stepAct = action.argmax()
                    state, reward, terminal, info = env.step(stepAct)

                    # update stats
                    length += 1
                    rewards += reward
                    last_state = state
                    last_features = features
                    steps_rewards.append(rewards)
                    steps_values.append(value_)

                    if info:
                        loopsize = info["Loop Size"]
                        looparea = info["Loop Area"]

                    """Animation for State and Actions
                    """
                    ax2.clear()
                    ax2.bar(ind, prob_action)
                    ax2.set_xticks(ind + width / 2)
                    ax2.set_xticklabels(action_legends)

                    ax1.imshow(state[:,:,2], 'Reds', interpolation="None",  vmin=-1, vmax=1)
                    # with hist
                    #ax1.imshow(state[:,:,7], 'Reds', interpolation="None",  vmin=-1, vmax=1)
                    ax1.set_title("IceGame: (Agent Lives: {}, UpTimes: {})".format(env.lives, env.sim.get_updated_counter()))

                    ax3.clear()
                    ax3.plot(steps_rewards, linewidth=2)
                    ax3.plot(steps_values, linewidth=2)
                    #plt.savefig("records/{}.png".format(length))

                    plt.pause(0.20)

                    # store summary
                    summary = tf.Summary()
                    summary.value.add(tag='ep_{}/reward'.format(ep), simple_value=reward)
                    summary.value.add(tag='ep_{}/netreward'.format(ep), simple_value=rewards)
                    summary.value.add(tag='ep_{}/value'.format(ep), simple_value=float(value_[0]))

                    if info:
                        summary.value.add(tag='ep_{}/loop_size'.format(ep), simple_value=loopsize)
                        summary.value.add(tag='ep_{}/loop_area'.format(ep), simple_value=looparea)

                    summary_writer.add_summary(summary, length)
                    summary_writer.flush()

                    summary = tf.Summary()
                    for ac_id in range(num_actions):
                        summary.value.add(tag='ep_{}/a_{}'.format(ep, ac_id), simple_value=float(prob_action[ac_id]))
                        action_writers[ac_id].add_summary(summary, length)
                        action_writers[ac_id].flush()

                    """TODO:
                        1. Need more concrete idea for playing the game when interfering.
                        2. Save these values for post processing.
                    """
                    if terminal:
                        #if length >= timestep_limit:
                        #    last_state, _, _, _ = env.reset()

                        last_features = policy.get_initial_features()  # reset lstm memory
                        print("Episode finished. Sum of rewards: %.2f. Length: %d." % (rewards, length))

                        length = 0
                        rewards = 0
                        break

        logger.info('Finished %d true episodes.', args.num_episodes)
        plt.savefig("GameScene.png")
        logger.info("Save the last scene to GameScene.png")
        env.close()
コード例 #6
0
def inference(args):
    indir = os.path.join(args.log_dir, 'train')
    outdir = os.path.join(
        args.log_dir, 'inference') if args.out_dir is None else args.out_dir

    with open(indir + "/checkpoint", "r") as f:
        first_line = f.readline().strip()
        print("first_line is : {}".format(first_line))
    ckpt = first_line.split(' ')[-1].split('/')[-1][:-1]
    ckpt = ckpt.split('-')[-1]
    ckpt = indir + '/model.ckpt-' + ckpt

    print("ckpt: {}".format(ckpt))

    # define environment
    env = create_icegame_env(outdir, args.env_id)
    num_actions = env.action_space.n

    with tf.device("/cpu:0"):
        # define policy network
        with tf.variable_scope("global"):
            policy = LSTMPolicy(env.observation_space.shape, num_actions)
            policy.global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0, dtype=tf.int32),
                trainable=False)
        # Variable names that start with "local" are not saved in checkpoints.
        variables_to_restore = [
            v for v in tf.global_variables() if not v.name.startswith("local")
        ]
        init_all_op = tf.global_variables_initializer()

        saver = FastSaver(variables_to_restore)

        # print trainable variables
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     tf.get_variable_scope().name)
        logger.info('Trainable vars:')
        for v in var_list:
            logger.info('  {} {}'.format(v.name, v.get_shape()))
        logger.info("Restored the trained model.")

        # summary of rewards
        action_writers = []
        summary_writer = tf.summary.FileWriter(outdir)
        for act_idx in range(num_actions):
            action_writers.append(
                tf.summary.FileWriter(
                    os.path.join(outdir, "action_{}".format(act_idx))))

        logger.info("Inference events directory: %s", outdir)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)

        with tf.Session() as sess:
            logger.info("Initializing all parameters.")
            sess.run(init_all_op)
            logger.info("Restoring trainable global parameters.")
            saver.restore(sess, ckpt)
            logger.info("Restored model was trained for %.2fM global steps",
                        sess.run(policy.global_step) / 1000000.)

            last_features = policy.get_initial_features()  # reset lstm memory
            length = 0
            rewards = 0
            loopsizes = []

            # All Episodes records
            for ep in range(args.num_episodes):
                """TODO: policy sampling strategy
                    random, greedy and sampled policy.
                """

                last_state = env.reset()

                # Episode records

                # running policy
                while True:
                    fetched = policy.act_inference(last_state, *last_features)
                    prob_action, action, value_, features = fetched[
                        0], fetched[1], fetched[2], fetched[3:]

                    #TODO: policy sampling strategy

                    # Greedy
                    stepAct = action.argmax()
                    state, reward, terminal, info = env.step(stepAct)

                    # update stats
                    length += 1
                    rewards += reward
                    last_state = state
                    last_features = features
                    """TODO: Resonable Statistics are necessary
                    """

                    if info:
                        loopsize = info["Loop Size"]
                        looparea = info["Loop Area"]

                    # store summary
                    summary = tf.Summary()
                    summary.value.add(tag='ep_{}/reward'.format(ep),
                                      simple_value=reward)
                    summary.value.add(tag='ep_{}/netreward'.format(ep),
                                      simple_value=rewards)
                    summary.value.add(tag='ep_{}/value'.format(ep),
                                      simple_value=float(value_[0]))

                    if info:
                        summary.value.add(tag='ep_{}/loop_size'.format(ep),
                                          simple_value=loopsize)
                        summary.value.add(tag='ep_{}/loop_area'.format(ep),
                                          simple_value=looparea)
                        loopsizes.append(loopsize)

                    summary_writer.add_summary(summary, length)
                    summary_writer.flush()

                    summary = tf.Summary()
                    for ac_id in range(num_actions):
                        summary.value.add(tag='ep_{}/a_{}'.format(ep, ac_id),
                                          simple_value=float(
                                              prob_action[ac_id]))
                        action_writers[ac_id].add_summary(summary, length)
                        action_writers[ac_id].flush()
                    """TODO:
                        1. Need more concrete idea for playing the game when interfering.
                        2. Save these values for post processing.
                    """
                    if terminal:
                        #if length >= timestep_limit:
                        #    last_state, _, _, _ = env.reset()

                        last_features = policy.get_initial_features(
                        )  # reset lstm memory
                        print(
                            "Episode finished. Sum of rewards: %.2f. Length: %d."
                            % (rewards, length))

                        length = 0
                        rewards = 0
                        break

        logger.info('Finished %d true episodes.', args.num_episodes)

        # Count loop topology
        unique, counts = np.unique(loopsizes, return_counts=True)
        loopstatistics = dict(zip(unique, counts))
        print(loopstatistics)
        env.close()