def Neural(env, params, prevTransition, sess, qnA, qnB): nextCandidates = prevTransition.nextCandidates.copy() nextVisited = prevTransition.nextVisited.copy() qValues, maxQ, action, link, reward = NeuralWalk(env, params, nextCandidates, nextVisited, sess, qnA) assert (link is not None) assert (qValues.shape[1] > 0) #print("qValues", qValues.shape, action, prevTransition.nextCandidates.Count(), nextCandidates.Count()) nextCandidates.Group(nextVisited) # calc nextMaxQ if nextCandidates.Count() > 0: # links to follow NEXT _, _, nextAction = qnA.PredictAll(env, sess, params.langIds, nextVisited, nextCandidates) #print("nextAction", nextAction, nextLangRequested, nextCandidates.Debug()) nextQValuesB, _, _ = qnB.PredictAll(env, sess, params.langIds, nextVisited, nextCandidates) nextMaxQ = nextQValuesB[0, nextAction] #print("nextMaxQ", nextMaxQ, nextMaxQB, nextQValuesA[0, nextAction]) else: nextMaxQ = 0 newVal = reward + params.gamma * nextMaxQ targetQ = (1 - params.alpha) * maxQ + params.alpha * newVal qValues[0, action] = targetQ transition = Transition(env, action, link, params.langIds, qValues, prevTransition.nextVisited, prevTransition.nextCandidates, nextVisited, nextCandidates) return transition, reward
def Neural(env, params, prevTransition, sess, qn): nextCandidates = prevTransition.nextCandidates.copy() nextVisited = prevTransition.nextVisited.copy() action, link, reward = NeuralWalk(env, params, params.eps, nextCandidates, nextVisited, sess, qn) assert (link is not None) #print("qValues", qValues.shape, action, prevTransition.nextCandidates.Count(), nextCandidates.Count()) nextCandidates.Group(nextVisited) transition = Transition(env, action, reward, link, params.langIds, prevTransition.nextVisited, prevTransition.nextCandidates, nextVisited, nextCandidates) return transition, reward
def Walk(env, params, sess, qns): ret = [] visited = set() langsVisited = np.zeros([1, 3]) # langId -> count candidates = Candidates(params, env) node = env.nodes[sys.maxsize] #stopNode = env.nodes[0] #link = Link("", 0, stopNode, stopNode) #candidates.AddLink(link) mainStr = "lang:" + str(node.lang) rewardStr = "rewards:" actionStr = "actions:" i = 0 numAligned = 0 totReward = 0.0 totDiscountedReward = 0.0 discount = 1.0 while True: qnA = qns.q[0] assert (node.urlId not in visited) #print("node", node.Debug()) visited.add(node.urlId) #print("node.lang", node.lang, langsVisited.shape) UpdateLangsVisited(langsVisited, node, params.langIds) #print(" langsVisited", langsVisited) candidates.AddLinks(node, visited, params) numParallelDocs = NumParallelDocs(env, visited) ret.append(numParallelDocs) #print("candidates", candidates.Debug()) _, _, _, _, _, _, _, _, action, link, reward = NeuralWalk( env, params, 0.0, candidates, visited, langsVisited, sess, qnA) node = link.childNode #print("action", action, qValues) actionStr += str(action) + " " totReward += reward totDiscountedReward += discount * reward mainStr += "->" + str(node.lang) rewardStr += "->" + str(reward) if node.alignedNode is not None: mainStr += "*" numAligned += 1 discount *= params.gamma i += 1 if node.urlId == 0: break if len(visited) > params.maxDocs: break mainStr += " " + str(i) rewardStr += " " + str(totReward) + "/" + str(totDiscountedReward) print(actionStr) print(mainStr) print(rewardStr) return ret, totReward, totDiscountedReward
def Neural(env, params, candidates, visited, langsVisited, sess, qnA, qnB): numActions, linkLang, mask, numSiblings, numVisitedSiblings, numMatchedSiblings, qValues, maxQ, action, link, reward = NeuralWalk( env, params, params.eps, candidates, visited, langsVisited, sess, qnA) assert (link is not None) # calc nextMaxQ nextVisited = visited.copy() nextVisited.add(link.childNode.urlId) nextCandidates = candidates.copy() nextCandidates.AddLinks(link.childNode, nextVisited, params) nextLangsVisited = langsVisited.copy() UpdateLangsVisited(nextLangsVisited, link.childNode, params.langIds) if nextCandidates.Count() > 0: _, _, _, _, _, _, _, _, nextAction = qnA.PredictAll( env, sess, params.langIds, nextLangsVisited, nextCandidates) #print("nextAction", nextAction, nextLangRequested, nextCandidates.Debug()) _, _, _, _, _, _, nextQValuesB, _, _ = qnB.PredictAll( env, sess, params.langIds, nextLangsVisited, nextCandidates) nextMaxQ = nextQValuesB[0, nextAction] #print("nextMaxQ", nextMaxQ, nextMaxQB, nextQValuesA[0, nextAction]) else: nextMaxQ = 0 newVal = reward + params.gamma * nextMaxQ targetQ = (1 - params.alpha) * maxQ + params.alpha * newVal qValues[0, action] = targetQ transition = Transition(link.parentNode.urlId, link.childNode.urlId, numActions, linkLang, mask, numSiblings, numVisitedSiblings, numMatchedSiblings, params.langIds, langsVisited, qValues) return transition