observation_last_labels.append(observation_last_label_t) observation_images.append(observation_image_t) action_t = tf.one_hot(tf.random_uniform([sim.batch_size_t], maxval=num_actions, dtype=tf.int32), num_actions) actions.append(action_t) reward_t, (observation_image_t, observation_last_label_t) = sim.do_step(action_t) rewards.append(reward_t) observation_images_t = tf.pack(observation_images, 1) actions_t = tf.pack(actions, 1) observation_last_labels_t = tf.pack(observation_last_labels, 1) rewards_t = tf.pack(rewards, 1) sess = tf.Session() #sess.run(init_op) images, labels, last_labels = data.get_batch_of_episodes(BATCH_SIZE, TIME_STEPS, CLASSES_PER_EPISODE) observation_images, actions, observation_last_labels, rewards = sess.run((observation_images_t, actions_t, observation_last_labels_t, rewards_t), { images_t: images, #labels_t: labels, last_labels_t: last_labels }) b = random.choice(range(BATCH_SIZE)) for t in range(actions.shape[1]): image = observation_images[b,t] im = Image.fromarray(np.uint8(image*255)) print t print 'response from last question: ', observation_last_labels[b,t] print 'current image' print 'action for current image: ', actions[b,t] print 'reward for current image + current action: ', rewards[b,t] im.show() raw_input()
# accuracy_1st_summary_t: accuracy_1st, # accuracy_2nd_summary_t: accuracy_2nd, # accuracy_5th_summary_t: accuracy_5th, # accuracy_10th_summary_t: accuracy_10th, # question_ave_summary_t: question_ave, # question_1st_summary_t: question_1st, # question_2nd_summary_t: question_2nd, # question_5th_summary_t: question_5th, # question_10th_summary_t: question_10th # }), e+1) # train_writer.flush() # process a test batch images, labels, last_labels = data.get_batch_of_episodes( BATCH_SIZE, TIME_STEPS, CLASSES_PER_EPISODE, NUM_LABELS, use_test_data=True) true_labels = np.argmax(labels[:, :-1], axis=2) predictions, rewards, loss = sess.run( [predictions_t, rewards_t, loss_t], { images_t: images, labels_t: labels, last_labels_t: last_labels, epsilon_t: 0.0 }) reward_max = np.mean(rewards) pred_labels = np.argmax(predictions, axis=2) accuracy_max = np.mean(true_labels == pred_labels) num_correct_predictions = np.sum(