def evaluate(eval_corpus_file, theta, start_url): docs = gen_docs.get_all_docs(eval_corpus_file) random.shuffle(docs) docs = docs[:100] driver = web.start(start_url) correct_docs = 0 correct_cmds = 0 total_cmds = 0 for doc in docs: driver.get(start_url) doc_correct = True for cmd in doc: total_cmds += 1 text_cmd, (cmd_type, wtid, arg) = cmd # do it state = web.build_state(driver, web.tokenize_command(text_cmd)) actions = state.enumerate_actions() action, best_score, probs = state.get_action_probs(actions, theta) if action and \ action.type == cmd_type and \ action.element.get_attribute('x-wtid') == wtid and \ (action.params == None or text_classification.untokenize_subcommand(action.params).lower() == arg.lower()): correct_cmds += 1 else: print "Failed: ", action, " for ", text_cmd doc_correct = False if action: action.perform(driver) if doc_correct: correct_docs += 1 driver.quit() return float(correct_docs) / len(docs), float(correct_cmds) / total_cmds
driver.quit() return float(correct_docs) / len(docs), float(correct_cmds) / total_cmds def eval_round(i, docs): print 'Training %d' % i theta = policy_gradient.policy_gradient(docs) print 'Theta:', list(theta) print 'Evaluating %d' % i doc_pct, cmd_pct = evaluate("data/sendacard_mturk_corpus.tsv", theta, "http://localhost:8000") print i, "Doc Pct: " , doc_pct , " Cmd Pct: " , cmd_pct return doc_pct, cmd_pct if __name__ == "__main__": docs = gen_docs.get_all_docs("data/sendacard_corpus.tsv") random.shuffle(docs) docs = docs[:40] ROUNDS = int(sys.argv[1]) print 'Doing %d rounds' % ROUNDS avg_doc_pct, avg_cmd_pct = 0,0 results = [eval_round(i, docs) for i in range(ROUNDS)] print '\a\a\a' print "Avg doc pct: ", sum([doc_pct/ROUNDS for doc_pct,cmd_pct in results]), " Avg cmd pct: ", sum([cmd_pct/ROUNDS for doc_pct,cmd_pct in results])