Beispiel #1
0
def Neural(env, params, prevTransition, sess, qnA, qnB):
    nextCandidates = prevTransition.nextCandidates.copy()
    nextVisited = prevTransition.nextVisited.copy()

    qValues, maxQ, action, link, reward = NeuralWalk(env, params,
                                                     nextCandidates,
                                                     nextVisited, sess, qnA)
    assert (link is not None)
    assert (qValues.shape[1] > 0)
    #print("qValues", qValues.shape, action, prevTransition.nextCandidates.Count(), nextCandidates.Count())
    nextCandidates.Group(nextVisited)

    # calc nextMaxQ
    if nextCandidates.Count() > 0:
        #  links to follow NEXT
        _, _, nextAction = qnA.PredictAll(env, sess, params.langIds,
                                          nextVisited, nextCandidates)
        #print("nextAction", nextAction, nextLangRequested, nextCandidates.Debug())
        nextQValuesB, _, _ = qnB.PredictAll(env, sess, params.langIds,
                                            nextVisited, nextCandidates)
        nextMaxQ = nextQValuesB[0, nextAction]
        #print("nextMaxQ", nextMaxQ, nextMaxQB, nextQValuesA[0, nextAction])
    else:
        nextMaxQ = 0

    newVal = reward + params.gamma * nextMaxQ
    targetQ = (1 - params.alpha) * maxQ + params.alpha * newVal
    qValues[0, action] = targetQ

    transition = Transition(env, action, link, params.langIds, qValues,
                            prevTransition.nextVisited,
                            prevTransition.nextCandidates, nextVisited,
                            nextCandidates)

    return transition, reward
Beispiel #2
0
def Neural(env, params, prevTransition, sess, qn):
    nextCandidates = prevTransition.nextCandidates.copy()
    nextVisited = prevTransition.nextVisited.copy()

    action, link, reward = NeuralWalk(env, params, params.eps, nextCandidates,
                                      nextVisited, sess, qn)
    assert (link is not None)
    #print("qValues", qValues.shape, action, prevTransition.nextCandidates.Count(), nextCandidates.Count())
    nextCandidates.Group(nextVisited)

    transition = Transition(env, action, reward, link, params.langIds,
                            prevTransition.nextVisited,
                            prevTransition.nextCandidates, nextVisited,
                            nextCandidates)

    return transition, reward
Beispiel #3
0
def Walk(env, params, sess, qns):
    ret = []
    visited = set()
    langsVisited = np.zeros([1, 3])  # langId -> count
    candidates = Candidates(params, env)
    node = env.nodes[sys.maxsize]

    #stopNode = env.nodes[0]
    #link = Link("", 0, stopNode, stopNode)
    #candidates.AddLink(link)

    mainStr = "lang:" + str(node.lang)
    rewardStr = "rewards:"
    actionStr = "actions:"

    i = 0
    numAligned = 0
    totReward = 0.0
    totDiscountedReward = 0.0
    discount = 1.0

    while True:
        qnA = qns.q[0]
        assert (node.urlId not in visited)
        #print("node", node.Debug())
        visited.add(node.urlId)
        #print("node.lang", node.lang, langsVisited.shape)
        UpdateLangsVisited(langsVisited, node, params.langIds)
        #print("   langsVisited", langsVisited)

        candidates.AddLinks(node, visited, params)

        numParallelDocs = NumParallelDocs(env, visited)
        ret.append(numParallelDocs)

        #print("candidates", candidates.Debug())
        _, _, _, _, _, _, _, _, action, link, reward = NeuralWalk(
            env, params, 0.0, candidates, visited, langsVisited, sess, qnA)
        node = link.childNode
        #print("action", action, qValues)
        actionStr += str(action) + " "

        totReward += reward
        totDiscountedReward += discount * reward

        mainStr += "->" + str(node.lang)
        rewardStr += "->" + str(reward)

        if node.alignedNode is not None:
            mainStr += "*"
            numAligned += 1

        discount *= params.gamma
        i += 1

        if node.urlId == 0:
            break

        if len(visited) > params.maxDocs:
            break

    mainStr += " " + str(i)
    rewardStr += " " + str(totReward) + "/" + str(totDiscountedReward)

    print(actionStr)
    print(mainStr)
    print(rewardStr)
    return ret, totReward, totDiscountedReward
Beispiel #4
0
def Neural(env, params, candidates, visited, langsVisited, sess, qnA, qnB):
    numActions, linkLang, mask, numSiblings, numVisitedSiblings, numMatchedSiblings, qValues, maxQ, action, link, reward = NeuralWalk(
        env, params, params.eps, candidates, visited, langsVisited, sess, qnA)
    assert (link is not None)

    # calc nextMaxQ
    nextVisited = visited.copy()
    nextVisited.add(link.childNode.urlId)

    nextCandidates = candidates.copy()
    nextCandidates.AddLinks(link.childNode, nextVisited, params)

    nextLangsVisited = langsVisited.copy()
    UpdateLangsVisited(nextLangsVisited, link.childNode, params.langIds)

    if nextCandidates.Count() > 0:
        _, _, _, _, _, _, _, _, nextAction = qnA.PredictAll(
            env, sess, params.langIds, nextLangsVisited, nextCandidates)
        #print("nextAction", nextAction, nextLangRequested, nextCandidates.Debug())
        _, _, _, _, _, _, nextQValuesB, _, _ = qnB.PredictAll(
            env, sess, params.langIds, nextLangsVisited, nextCandidates)
        nextMaxQ = nextQValuesB[0, nextAction]
        #print("nextMaxQ", nextMaxQ, nextMaxQB, nextQValuesA[0, nextAction])
    else:
        nextMaxQ = 0

    newVal = reward + params.gamma * nextMaxQ
    targetQ = (1 - params.alpha) * maxQ + params.alpha * newVal
    qValues[0, action] = targetQ

    transition = Transition(link.parentNode.urlId, link.childNode.urlId,
                            numActions, linkLang, mask, numSiblings,
                            numVisitedSiblings, numMatchedSiblings,
                            params.langIds, langsVisited, qValues)

    return transition