コード例 #1
0
ファイル: evaluate.py プロジェクト: sbirch/webtalk
def evaluate(eval_corpus_file, theta, start_url):
    docs = gen_docs.get_all_docs(eval_corpus_file)
    random.shuffle(docs)
    docs = docs[:100]
    driver = web.start(start_url)

    correct_docs = 0

    correct_cmds = 0
    total_cmds = 0
    for doc in docs:
        driver.get(start_url)
        doc_correct = True

        for cmd in doc:
            total_cmds += 1
            text_cmd, (cmd_type, wtid, arg) = cmd

            # do it
            state = web.build_state(driver, web.tokenize_command(text_cmd))
            actions = state.enumerate_actions()
            action, best_score, probs = state.get_action_probs(actions, theta)


            if action and \
               action.type == cmd_type and \
               action.element.get_attribute('x-wtid') == wtid and \
               (action.params == None or text_classification.untokenize_subcommand(action.params).lower() == arg.lower()):
                   correct_cmds += 1
            else:
                print "Failed: ", action, " for ", text_cmd
                doc_correct = False

            if action:
                action.perform(driver)
        if doc_correct:
            correct_docs += 1

    driver.quit()
    return float(correct_docs) / len(docs), float(correct_cmds) / total_cmds
コード例 #2
0
ファイル: policy_gradient.py プロジェクト: sbirch/webtalk
def policy_gradient(command_documents, start_url = "http://localhost:8000", visualize=False, ITERATIONS=50):
    theta = np.zeros(len(web.Action.FEATURE_NAMES))

    for i in range(len(web.Action.FEATURE_NAMES)):
        theta[i] = (((random.random() * 2) - 1) / 1000)

    theta_history = [copy.copy(theta)]
    reward_history = []

    driver = web.start(start_url)
    try:
        for i in range(ITERATIONS):
            avg_dist = 0
            for doc_num, document in enumerate(command_documents):
                driver.get(start_url)
                state_actions = []
                action_choices = []

                rewarder = Rewarder([command[1] for command in document])

                # STEP 3
                for t in range(len(document)):
                    cmd, annotation = document[t]

                    state = web.build_state(driver, web.tokenize_command(cmd))

                    actions = state.enumerate_actions()

                    action, best_score, probs = state.get_action_probs(actions, theta)


                    # we got to page where we cant do anything any more so end
                    # the history here
                    if action == None:
                        break

                    # pick an action weighted by how likely it is
                    r = random.random()
                    acc_prob = 0
                    for a in probs:
                        acc_prob += probs[a]
                        if acc_prob > r:
                            action = a
                            break

                    logging.debug(state.phi_dot_theta_str(action, theta))

                    rewarder.update_reward(state, action)

                    logging.debug("Performing... %s for %s",action, cmd)
                    action.perform(driver, dry=False)

                    state_actions.append((
                        state,
                        action,
                        best_score
                    ))
                    action_choices.append(probs)

                gradient = np.zeros(len(web.Action.FEATURE_NAMES))
                for t in range(len(state_actions)):
                    phi_t = state_actions[t][1].as_numeric_vector()

                    # STEP 4
                    weighted_actions = np.zeros(len(web.Action.FEATURE_NAMES))
                    for action in action_choices[t]:
                        prob_action = action_choices[t][action]
                        weighted_actions = np.add(weighted_actions, \
                                  np.multiply(action.as_numeric_vector(), prob_action))

                    gradient = np.add(gradient, np.subtract(phi_t, weighted_actions))

                # STEP 5
                r = rewarder.get_reward() #reward_gold_standard(state_actions, document)

                logging.info("Reward: %d",r)

                reward_history.append(r)

                theta = np.add(theta, np.multiply(r, gradient))
                theta_history.append(copy.copy(theta))
                if len(theta_history) > 1:
                    avg_dist += distance.euclidean(theta, theta_history[-2]) / len(command_documents)
            logging.info("Avg_dist: %f", avg_dist)
            if avg_dist < .1:
                logging.info("Theta is not changing much in the latest iteration, breaking")
                break
    finally:
        driver.quit()

    if visualize:
        from matplotlib import pyplot as plt
        plt.plot(reward_history, 'o')
        for i in range(len(web.Action.FEATURE_NAMES)):
            plt.plot([x[i] for x in theta_history])
        plt.legend(['reward'] + web.Action.FEATURE_NAMES)
        plt.show()

    return theta
コード例 #3
0
ファイル: interact.py プロジェクト: sbirch/webtalk
parser = argparse.ArgumentParser(description="Launch an interactive web talk session attached to Chrome")

parser.add_argument("theta_file", type=file, help="A file containing a theta vector in the text output format of numpy, pass as an argument or pipe in through stdin", default=sys.stdin, nargs='?')

args = parser.parse_args()

theta = np.loadtxt(args.theta_file)

try:
    driver = web.start("http://localhost:8000")

    cmd = ""
    sys.stdin = open('/dev/tty')
    while cmd != "QUIT":
        cmd = raw_input('> ')
        state = web.build_state(driver, web.tokenize_command(cmd))

        actions = state.enumerate_actions()

        action, best_score, probs = state.get_action_probs(actions, theta)

        print "Performing... ", action
        print "With prob: ", probs[action]
        print state.phi_dot_theta_str(action, theta)

        if False:
            correct = [a for a in actions if a.type == 'click' and a.element.get_attribute('x-wtid') == 'continue' and a.params is None][0]
            print 'Correct action was:', correct
            print state.phi_dot_theta_str(correct, theta)

        action.perform(driver, dry=False)