def evaluate():
  """Train ring_net for a number of steps."""
  with tf.Graph().as_default():
    # make inputs
    state, reward, action = ring_net.inputs(1, 15) 

    # possible input dropout 
    input_keep_prob = tf.placeholder("float")
    state_drop = tf.nn.dropout(state, input_keep_prob)

    # possible dropout inside
    keep_prob_encoding = tf.placeholder("float")
    keep_prob_lstm = tf.placeholder("float")

    # unwrap
    reward_2_o = []
    # first step
    x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state[:,0,:,:,:], action[:,1,:], None, keep_prob_encoding, keep_prob_lstm)
    tf.get_variable_scope().reuse_variables()
    # unroll for 9 more steps
    for i in xrange(9):
      x_2, reward_2,  hidden_state = ring_net.encode_compress_decode(state[:,i+1,:,:,:], action[:,i+2,:], hidden_state, keep_prob_encoding, keep_prob_lstm)
    # now collect values
    reward_2_o.append(reward_2)
    for i in xrange(4):
      x_2, reward_2, hidden_state = ring_net.encode_compress_decode(x_2, action[:,i+11,:], hidden_state, keep_prob_encoding, keep_prob_lstm)
      reward_2_o.append(reward_2)
    reward_2_o = tf.pack(reward_2_o)
    reward_2_o = tf.transpose(reward_2_o, perm=[1,0,2])

    # restore network
    variables_to_restore = tf.all_variables()
    saver = tf.train.Saver(variables_to_restore)
    sess = tf.Session()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      print("restored file from " + ckpt.model_checkpoint_path)
    else:
      print("no chekcpoint file found from " + FLAGS.checkpoint_dir + ", this is an error")

    # Start que runner
    tf.train.start_queue_runners(sess=sess)

    reward_g_o, reward_o = sess.run([reward_2_o, reward],feed_dict={keep_prob_encoding:1.0, keep_prob_lstm:1.0, input_keep_prob:1.0})

    print(reward_g_o.shape)
    print(reward_o.shape)

    plt.figure(0)
    plt.plot(reward_g_o[0,:,0], label= "predicted reward") 
    plt.plot(reward_o[0,10:,0], label= "reward") 
    plt.title("reward vs step")
    plt.xlabel("step")
    plt.ylabel("reward")
    plt.legend()
    plt.savefig("paper_reward.png")
def evaluate():
  """ Eval the system"""
  with tf.Graph().as_default():
    # make inputs
    state_start, reward_start, action_start, = ring_net.inputs(1, 15)
    action_size = int(action_start.get_shape()[2])
    action = tf.placeholder(tf.float32, (1, action_size))

    # unwrap
    x_2_o = []
    # first step
    x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state_start[:,0,:,:,:], action_start[:,1,:], None, 1.0, 1.0)
    tf.get_variable_scope().reuse_variables()
    # unroll for 9 more steps
    for i in xrange(9):
      x_2, reward_2,  hidden_state = ring_net.encode_compress_decode(state_start[:,i+1,:,:,:], action_start[:,i+2,:], hidden_state, 1.0, 1.0)

    # rename output_t
    x_1 = x_2
    print(hidden_state)
    hidden_state_1 = hidden_state
    x_2, reward_2, hidden_state_2 = ring_net.encode_compress_decode(x_1, action, hidden_state_1,  1.0, 1.0)

    # restore network
    variables_to_restore = tf.all_variables()
    saver = tf.train.Saver(variables_to_restore)
    sess = tf.Session()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      print("restored file from " + ckpt.model_checkpoint_path)
    else:
      print("no chekcpoint file found from " + FLAGS.checkpoint_dir + ", this is an error")

    # get frame
    tf.train.start_queue_runners(sess=sess)
    play_action = random_action(action_size)
    x_2_g, hidden_2_g = sess.run([x_1, hidden_state_1], feed_dict={})

    # Play!!!! 
    for step in xrange(100):
      print(step)
      #time.sleep(.5)
      # calc generated frame from t
      play_action = random_action(action_size)
      x_2_g, hidden_2_g = sess.run([x_2, hidden_state_2],feed_dict={x_1:x_2_g, hidden_state_1:hidden_2_g, action:play_action})
      frame = np.uint8(np.minimum(np.maximum(0, x_2_g*255.0), 255))
      frame = frame[0, :, :, :]
      video.write(frame)
      #frame = cv2.resize(frame, (500, 500))
      #cv2.imshow('frame', frame)
      #cv2.waitKey(0)
      #if cv2.waitKey(1) & 0xFF == ord('q'):
      #  break
    video.release()
    cv2.destroyAllWindows()
def train():
  """Train ring_net for a number of steps."""
  with tf.Graph().as_default():
    # make inputs
    state, reward, action = ring_net.inputs(4, 15) 

    # possible input dropout 
    input_keep_prob = tf.placeholder("float")
    state_drop = tf.nn.dropout(state, input_keep_prob)

    # possible dropout inside
    keep_prob_encoding = tf.placeholder("float")
    keep_prob_lstm = tf.placeholder("float")

    # unwrap
    x_2_o = []
    # first step
    x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state[:,0,:,:,:], action[:,1,:], None, keep_prob_encoding, keep_prob_lstm)
    tf.get_variable_scope().reuse_variables()
    # unroll for 9 more steps
    for i in xrange(9):
      x_2, reward_2,  hidden_state = ring_net.encode_compress_decode(state[:,i+1,:,:,:], action[:,i+2,:], hidden_state, keep_prob_encoding, keep_prob_lstm)
    # now collect values
    x_2_o.append(x_2)
    for i in xrange(4):
      x_2, reward_2, hidden_state = ring_net.encode_compress_decode(x_2, action[:,i+11,:], hidden_state, keep_prob_encoding, keep_prob_lstm)
      x_2_o.append(x_2)
      tf.image_summary('images_gen_' + str(i), x_2)
    x_2_o = tf.pack(x_2_o)
    x_2_o = tf.transpose(x_2_o, perm=[1,0,2,3,4])

    # error
    error = tf.nn.l2_loss(state[:,10:15,:,:,:] - x_2_o)
    tf.scalar_summary('loss', error)

    # train (hopefuly)
    train_op = ring_net.train(error, 1e-5)
    
    # List of all Variables
    variables = tf.all_variables()

    # Build a saver
    saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)   

    # Summary op
    summary_op = tf.merge_all_summaries()
 
    # Start running operations on the Graph.
    sess = tf.Session()

    # init from seq 1 model
    print("init from " + RESTORE_DIR)
    saver_restore = tf.train.Saver(variables)
    ckpt = tf.train.get_checkpoint_state(RESTORE_DIR)
    saver_restore.restore(sess, ckpt.model_checkpoint_path)


    # Start que runner
    tf.train.start_queue_runners(sess=sess)

    # Summary op
    graph_def = sess.graph.as_graph_def(add_shapes=True)
    summary_writer = tf.train.SummaryWriter(SAVE_DIR, graph_def=graph_def)

    for step in xrange(500000):
      t = time.time()
      _ , loss_value = sess.run([train_op, error],feed_dict={keep_prob_encoding:1.0, keep_prob_lstm:1.0, input_keep_prob:1.0})
      elapsed = time.time() - t

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step%100 == 0:
        print("loss value at " + str(loss_value))
        print("time per batch is " + str(elapsed))
        summary_str = sess.run(summary_op, feed_dict={keep_prob_encoding:1.0, keep_prob_lstm:1.0, input_keep_prob:1.0})
        summary_writer.add_summary(summary_str, step) 

      if step%1000 == 0:
        checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)  
        print("saved to " + SAVE_DIR)
예제 #4
0
def evaluate():
    """Train ring_net for a number of steps."""
    with tf.Graph().as_default():
        # make inputs
        state, reward, action = ring_net.inputs(1, 15)

        # possible input dropout
        input_keep_prob = tf.placeholder("float")
        state_drop = tf.nn.dropout(state, input_keep_prob)

        # possible dropout inside
        keep_prob_encoding = tf.placeholder("float")
        keep_prob_lstm = tf.placeholder("float")

        # unwrap
        reward_2_o = []
        # first step
        x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
            state[:, 0, :, :, :], action[:, 1, :], None, keep_prob_encoding,
            keep_prob_lstm)
        tf.get_variable_scope().reuse_variables()
        # unroll for 9 more steps
        for i in xrange(9):
            x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
                state[:, i + 1, :, :, :], action[:, i + 2, :], hidden_state,
                keep_prob_encoding, keep_prob_lstm)
        # now collect values
        reward_2_o.append(reward_2)
        for i in xrange(4):
            x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
                x_2, action[:, i + 11, :], hidden_state, keep_prob_encoding,
                keep_prob_lstm)
            reward_2_o.append(reward_2)
        reward_2_o = tf.pack(reward_2_o)
        reward_2_o = tf.transpose(reward_2_o, perm=[1, 0, 2])

        # restore network
        variables_to_restore = tf.all_variables()
        saver = tf.train.Saver(variables_to_restore)
        sess = tf.Session()
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print("restored file from " + ckpt.model_checkpoint_path)
        else:
            print("no chekcpoint file found from " + FLAGS.checkpoint_dir +
                  ", this is an error")

        # Start que runner
        tf.train.start_queue_runners(sess=sess)

        reward_g_o, reward_o = sess.run([reward_2_o, reward],
                                        feed_dict={
                                            keep_prob_encoding: 1.0,
                                            keep_prob_lstm: 1.0,
                                            input_keep_prob: 1.0
                                        })

        print(reward_g_o.shape)
        print(reward_o.shape)

        plt.figure(0)
        plt.plot(reward_g_o[0, :, 0], label="predicted reward")
        plt.plot(reward_o[0, 10:, 0], label="reward")
        plt.title("reward vs step")
        plt.xlabel("step")
        plt.ylabel("reward")
        plt.legend()
        plt.savefig("paper_reward.png")
예제 #5
0
def train():
    """Train ring_net for a number of steps."""
    with tf.Graph().as_default():
        # make inputs
        state, reward, action = ring_net.inputs(4, 15)

        # possible input dropout
        input_keep_prob = tf.placeholder("float")
        state_drop = tf.nn.dropout(state, input_keep_prob)

        # possible dropout inside
        keep_prob_encoding = tf.placeholder("float")
        keep_prob_lstm = tf.placeholder("float")

        # unwrap
        x_2_o = []
        # first step
        x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
            state[:, 0, :, :, :], action[:, 1, :], None, keep_prob_encoding,
            keep_prob_lstm)
        tf.get_variable_scope().reuse_variables()
        # unroll for 9 more steps
        for i in xrange(9):
            x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
                state[:, i + 1, :, :, :], action[:, i + 2, :], hidden_state,
                keep_prob_encoding, keep_prob_lstm)
        # now collect values
        x_2_o.append(x_2)
        for i in xrange(4):
            x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
                x_2, action[:, i + 11, :], hidden_state, keep_prob_encoding,
                keep_prob_lstm)
            x_2_o.append(x_2)
            tf.image_summary('images_gen_' + str(i), x_2)
        x_2_o = tf.pack(x_2_o)
        x_2_o = tf.transpose(x_2_o, perm=[1, 0, 2, 3, 4])

        # error
        error = tf.nn.l2_loss(state[:, 10:15, :, :, :] - x_2_o)
        tf.scalar_summary('loss', error)

        # train (hopefuly)
        train_op = ring_net.train(error, 1e-5)

        # List of all Variables
        variables = tf.all_variables()

        # Build a saver
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)

        # Summary op
        summary_op = tf.merge_all_summaries()

        # Start running operations on the Graph.
        sess = tf.Session()

        # init from seq 1 model
        print("init from " + RESTORE_DIR)
        saver_restore = tf.train.Saver(variables)
        ckpt = tf.train.get_checkpoint_state(RESTORE_DIR)
        saver_restore.restore(sess, ckpt.model_checkpoint_path)

        # Start que runner
        tf.train.start_queue_runners(sess=sess)

        # Summary op
        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.train.SummaryWriter(SAVE_DIR, graph_def=graph_def)

        for step in xrange(500000):
            t = time.time()
            _, loss_value = sess.run([train_op, error],
                                     feed_dict={
                                         keep_prob_encoding: 1.0,
                                         keep_prob_lstm: 1.0,
                                         input_keep_prob: 1.0
                                     })
            elapsed = time.time() - t

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 100 == 0:
                print("loss value at " + str(loss_value))
                print("time per batch is " + str(elapsed))
                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           keep_prob_encoding: 1.0,
                                           keep_prob_lstm: 1.0,
                                           input_keep_prob: 1.0
                                       })
                summary_writer.add_summary(summary_str, step)

            if step % 1000 == 0:
                checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                print("saved to " + SAVE_DIR)
예제 #6
0
def evaluate():
    """ Eval the system"""
    with tf.Graph().as_default():
        # make inputs
        state_start, reward_start, action_start, = ring_net.inputs(1, 15)
        action_size = int(action_start.get_shape()[2])
        action = tf.placeholder(tf.float32, (1, action_size))

        # unwrap
        x_2_o = []
        # first step
        x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
            state_start[:, 0, :, :, :], action_start[:, 1, :], None, 1.0, 1.0)
        tf.get_variable_scope().reuse_variables()
        # unroll for 9 more steps
        for i in xrange(9):
            x_2, reward_2, hidden_state = ring_net.encode_compress_decode(
                state_start[:, i + 1, :, :, :], action_start[:, i + 2, :],
                hidden_state, 1.0, 1.0)

        # rename output_t
        x_1 = x_2
        print(hidden_state)
        hidden_state_1 = hidden_state
        x_2, reward_2, hidden_state_2 = ring_net.encode_compress_decode(
            x_1, action, hidden_state_1, 1.0, 1.0)

        # restore network
        variables_to_restore = tf.all_variables()
        saver = tf.train.Saver(variables_to_restore)
        sess = tf.Session()
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print("restored file from " + ckpt.model_checkpoint_path)
        else:
            print("no chekcpoint file found from " + FLAGS.checkpoint_dir +
                  ", this is an error")

        # get frame
        tf.train.start_queue_runners(sess=sess)
        play_action = random_action(action_size)
        x_2_g, hidden_2_g = sess.run([x_1, hidden_state_1], feed_dict={})

        # Play!!!!
        for step in xrange(100):
            print(step)
            #time.sleep(.5)
            # calc generated frame from t
            play_action = random_action(action_size)
            x_2_g, hidden_2_g = sess.run([x_2, hidden_state_2],
                                         feed_dict={
                                             x_1: x_2_g,
                                             hidden_state_1: hidden_2_g,
                                             action: play_action
                                         })
            frame = np.uint8(np.minimum(np.maximum(0, x_2_g * 255.0), 255))
            frame = frame[0, :, :, :]
            video.write(frame)
            #frame = cv2.resize(frame, (500, 500))
            #cv2.imshow('frame', frame)
            #cv2.waitKey(0)
            #if cv2.waitKey(1) & 0xFF == ord('q'):
            #  break
        video.release()
        cv2.destroyAllWindows()