def evaluate(eval_corpus_file, theta, start_url): docs = gen_docs.get_all_docs(eval_corpus_file) random.shuffle(docs) docs = docs[:100] driver = web.start(start_url) correct_docs = 0 correct_cmds = 0 total_cmds = 0 for doc in docs: driver.get(start_url) doc_correct = True for cmd in doc: total_cmds += 1 text_cmd, (cmd_type, wtid, arg) = cmd # do it state = web.build_state(driver, web.tokenize_command(text_cmd)) actions = state.enumerate_actions() action, best_score, probs = state.get_action_probs(actions, theta) if action and \ action.type == cmd_type and \ action.element.get_attribute('x-wtid') == wtid and \ (action.params == None or text_classification.untokenize_subcommand(action.params).lower() == arg.lower()): correct_cmds += 1 else: print "Failed: ", action, " for ", text_cmd doc_correct = False if action: action.perform(driver) if doc_correct: correct_docs += 1 driver.quit() return float(correct_docs) / len(docs), float(correct_cmds) / total_cmds
def get_reward(self): reward = 0 for i, (a_type, a_element, a_text) in enumerate(self.reward_history): gold_type, gold_wtid, gold_text = self.correct[i] if gold_text: gold_text = web.tokenize_command(gold_text) right_type = a_type == gold_type right_element = a_element == gold_wtid if right_type and right_element: reward += self.ok if gold_text and a_text: longer_text = max(len(gold_text), len(a_text)) text_rightness = float(len(set(gold_text).intersection(set(a_text)))) / longer_text reward += (self.perfect - self.ok) * text_rightness elif not gold_text and not a_text: reward += self.perfect - self.ok else: reward += self.bad return reward*1.0 / len(self.reward_history)
def policy_gradient(command_documents, start_url = "http://localhost:8000", visualize=False, ITERATIONS=50): theta = np.zeros(len(web.Action.FEATURE_NAMES)) for i in range(len(web.Action.FEATURE_NAMES)): theta[i] = (((random.random() * 2) - 1) / 1000) theta_history = [copy.copy(theta)] reward_history = [] driver = web.start(start_url) try: for i in range(ITERATIONS): avg_dist = 0 for doc_num, document in enumerate(command_documents): driver.get(start_url) state_actions = [] action_choices = [] rewarder = Rewarder([command[1] for command in document]) # STEP 3 for t in range(len(document)): cmd, annotation = document[t] state = web.build_state(driver, web.tokenize_command(cmd)) actions = state.enumerate_actions() action, best_score, probs = state.get_action_probs(actions, theta) # we got to page where we cant do anything any more so end # the history here if action == None: break # pick an action weighted by how likely it is r = random.random() acc_prob = 0 for a in probs: acc_prob += probs[a] if acc_prob > r: action = a break logging.debug(state.phi_dot_theta_str(action, theta)) rewarder.update_reward(state, action) logging.debug("Performing... %s for %s",action, cmd) action.perform(driver, dry=False) state_actions.append(( state, action, best_score )) action_choices.append(probs) gradient = np.zeros(len(web.Action.FEATURE_NAMES)) for t in range(len(state_actions)): phi_t = state_actions[t][1].as_numeric_vector() # STEP 4 weighted_actions = np.zeros(len(web.Action.FEATURE_NAMES)) for action in action_choices[t]: prob_action = action_choices[t][action] weighted_actions = np.add(weighted_actions, \ np.multiply(action.as_numeric_vector(), prob_action)) gradient = np.add(gradient, np.subtract(phi_t, weighted_actions)) # STEP 5 r = rewarder.get_reward() #reward_gold_standard(state_actions, document) logging.info("Reward: %d",r) reward_history.append(r) theta = np.add(theta, np.multiply(r, gradient)) theta_history.append(copy.copy(theta)) if len(theta_history) > 1: avg_dist += distance.euclidean(theta, theta_history[-2]) / len(command_documents) logging.info("Avg_dist: %f", avg_dist) if avg_dist < .1: logging.info("Theta is not changing much in the latest iteration, breaking") break finally: driver.quit() if visualize: from matplotlib import pyplot as plt plt.plot(reward_history, 'o') for i in range(len(web.Action.FEATURE_NAMES)): plt.plot([x[i] for x in theta_history]) plt.legend(['reward'] + web.Action.FEATURE_NAMES) plt.show() return theta
parser = argparse.ArgumentParser(description="Launch an interactive web talk session attached to Chrome") parser.add_argument("theta_file", type=file, help="A file containing a theta vector in the text output format of numpy, pass as an argument or pipe in through stdin", default=sys.stdin, nargs='?') args = parser.parse_args() theta = np.loadtxt(args.theta_file) try: driver = web.start("http://localhost:8000") cmd = "" sys.stdin = open('/dev/tty') while cmd != "QUIT": cmd = raw_input('> ') state = web.build_state(driver, web.tokenize_command(cmd)) actions = state.enumerate_actions() action, best_score, probs = state.get_action_probs(actions, theta) print "Performing... ", action print "With prob: ", probs[action] print state.phi_dot_theta_str(action, theta) if False: correct = [a for a in actions if a.type == 'click' and a.element.get_attribute('x-wtid') == 'continue' and a.params is None][0] print 'Correct action was:', correct print state.phi_dot_theta_str(correct, theta) action.perform(driver, dry=False)