def PredictAll(self, env, sess, langIds, visited, candidates): numActions, parentLang, mask = candidates.GetFeatures() assert(numActions > 0) numActionsNP = np.empty([1,1], dtype=np.int32) numActionsNP[0,0] = numActions #print("parentLang", numActions, parentLang.shape) #print("mask", mask.shape, mask) #print("linkLang", linkLang.shape, linkLang) langsVisited = GetLangsVisited(visited, langIds, env) #print("langsVisited", langsVisited) (qValues, ) = sess.run([self.qValues, ], feed_dict={self.parentLang: parentLang, self.numActions: numActionsNP, self.mask: mask, self.langIds: langIds, self.langsVisited: langsVisited}) #qValues = qValues[0] #print("hidden3", hidden3.shape, hidden3) #print("qValues", qValues.shape, qValues) #print("linkSpecific", linkSpecific.shape) #print("numSiblings", numSiblings.shape) #print("numVisitedSiblings", numVisitedSiblings.shape) #print("numMatchedSiblings", numMatchedSiblings.shape) qValues = np.reshape(qValues, [1, qValues.shape[0] ]) #print(" qValues", qValues) #print() action = np.argmax(qValues[0, :numActions]) maxQ = qValues[0, action] #print("newAction", action, maxQ) return qValues, maxQ, action
def __init__(self, env, action, link, langIds, targetQ, visited, candidates, nextVisited, nextCandidates): self.action = action self.link = link self.langIds = langIds self.targetQ = np.array(targetQ, copy=True) if visited is not None: self.visited = visited self.langsVisited = GetLangsVisited(visited, langIds, env) if candidates is not None: self.candidates = candidates numActions, parentLang, mask, numSiblings, numVisitedSiblings, numMatchedSiblings, parentMatched, linkLang = candidates.GetFeatures( ) self.numActions = numActions self.parentLang = np.array(parentLang, copy=True) self.mask = np.array(mask, copy=True) self.numSiblings = np.array(numSiblings, copy=True) self.numVisitedSiblings = np.array(numVisitedSiblings, copy=True) self.numMatchedSiblings = np.array(numMatchedSiblings, copy=True) self.parentMatched = np.array(parentMatched, copy=True) self.linkLang = np.array(linkLang, copy=True) self.nextVisited = nextVisited self.nextCandidates = nextCandidates
def PredictAll(self, env, sess, langIds, visited, candidates): numActions, numCandidates, linkSpecific = candidates.GetMask() #print("numActions", numActions) #print("numCandidates", numCandidates.shape, numCandidates) #print("linkSpecific", linkSpecific.shape, linkSpecific) assert (numActions > 0) numActionsArr = np.empty([1, 1]) numActionsArr[0, 0] = numActions langsVisited = GetLangsVisited(visited, langIds, env) #print("langsVisited", langsVisited) (probs, logit, smNumer, smNumerSum, maxLogit, maskNum, maskBigNeg) = sess.run( [ self.probs, self.logit, self.smNumer, self.smNumerSum, self.maxLogit, self.maskNum, self.maskBigNeg ], feed_dict={ self.numCandidates: numCandidates, self.linkSpecificInput: linkSpecific, self.langsVisited: langsVisited, self.numActions: numActionsArr }) probs = np.reshape(probs, [probs.shape[1]]) try: action = np.random.choice(self.params.NUM_ACTIONS, p=probs) except: print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) print("probs", probs) print("logit", logit) print("maxLogit", maxLogit) print("smNumer", smNumer) print("smNumerSum", smNumerSum) print("langsVisited", langsVisited) print("numCandidates", numCandidates) print("maskBigNeg", maskBigNeg) bugger_something_went_wrong # print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) # print("probs", probs) # print("logit", logit) # print("maxLogit", maxLogit) # print("smNumer", smNumer) # print("smNumerSum", smNumerSum) # print("langsVisited", langsVisited) # print("mask", mask) # print("maskBigNeg", maskBigNeg) # print() #print("action", action, probs, logit, mask, langsVisited, numActions) if np.random.rand(1) < .005: print("action", action, probs, logit, numCandidates, linkSpecific.tolist(), langsVisited, numActions) #print() return action
def PredictAll(self, env, sess, langIds, visited, candidates): numActions, numCandidates, parentLang = candidates.GetMask() #print("numActions", numActions) #print("numCandidates", numCandidates.shape, numCandidates) #print("parentLang", parentLang.shape, parentLang) assert (numActions > 0) langsVisited = GetLangsVisited(visited, langIds, env) #print("langsVisited", langsVisited) (probs, logit, smNumer, smNumerSum, maxLogit, maskBigNeg) = sess.run( [ self.probs, self.logit, self.smNumer, self.smNumerSum, self.maxLogit, self.maskBigNeg ], feed_dict={ self.numCandidates: numCandidates, self.parentLang: parentLang, self.langsVisited: langsVisited }) probs = np.reshape(probs, [probs.shape[1]]) try: action = np.random.choice(self.params.NUM_ACTIONS, p=probs) except: print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) print("probs", probs) print("logit", logit) print("maxLogit", maxLogit) print("smNumer", smNumer) print("smNumerSum", smNumerSum) print("langsVisited", langsVisited) print("numCandidates", numCandidates) print("maskBigNeg", maskBigNeg) bugger_something_went_wrong # print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) # print("probs", probs) # print("logit", logit) # print("maxLogit", maxLogit) # print("smNumer", smNumer) # print("smNumerSum", smNumerSum) # print("langsVisited", langsVisited) # print("mask", mask) # print("maskBigNeg", maskBigNeg) # print() #print("action", action, probs, logit, mask, langsVisited, parentLang, numActions) if np.random.rand(1) < .005: print("action", action, probs, logit, numCandidates, parentLang, langsVisited, numActions) #print() return action
def PredictAll(self, env, sess, langIds, visited, candidates): numActions, mask = candidates.GetFeatures() #print("numActions", numActions) #print("mask", mask.shape, mask) #print("parentLang", parentLang.shape, parentLang) assert (numActions > 0) langsVisited = GetLangsVisited(visited, langIds, env) #print("langsVisited", langsVisited) (probs, logit, smNumer, smNumerSum, maxLogit, maskBigNeg) = sess.run([ self.probs, self.logit, self.smNumer, self.smNumerSum, self.maxLogit, self.maskBigNeg ], feed_dict={ self.mask: mask, self.langsVisited: langsVisited }) probs = np.reshape(probs, [probs.shape[1]]) try: action = np.random.choice(self.params.MAX_NODES, p=probs) except: print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) print("probs", probs) print("logit", logit) print("maxLogit", maxLogit) print("smNumer", smNumer) print("smNumerSum", smNumerSum) print("langsVisited", langsVisited) print("mask", mask) print("maskBigNeg", maskBigNeg) dsds # print("langsVisited", probs, logit, smNumer, smNumerSum, langsVisited) # print("probs", probs) # print("logit", logit) # print("maxLogit", maxLogit) # print("smNumer", smNumer) # print("smNumerSum", smNumerSum) # print("langsVisited", langsVisited) # print("mask", mask) # print("maskBigNeg", maskBigNeg) # print() #print("action", action, probs, logit, mask, langsVisited, parentLang, numActions) if np.random.rand(1) < .005: print("action", action, probs, logit, mask, langsVisited, numActions) #print() return action
def PredictAll(self, env, sess, langIds, visited, candidates): numActions, parentLang, mask, numSiblings, numVisitedSiblings, numMatchedSiblings, parentMatched, linkLang = candidates.GetFeatures( ) #print("numActions", numActions) numActionsNP = np.empty([1, 1], dtype=np.int32) numActionsNP[0, 0] = numActions assert (numActions > 0) #print("parentLang", numActions, parentLang.shape) #print("mask", mask.shape, mask) langsVisited = GetLangsVisited(visited, langIds, env) #print("langsVisited", langsVisited) (probs, ) = sess.run( [self.probs], feed_dict={ self.parentLang: parentLang, self.numActions: numActionsNP, self.mask: mask, self.numSiblings: numSiblings, self.numVisitedSiblings: numVisitedSiblings, self.numMatchedSiblings: numMatchedSiblings, self.parentMatched: parentMatched, self.linkLang: linkLang, self.langIds: langIds, self.langsVisited: langsVisited }) #print("hidden3", hidden3.shape, hidden3) #print("qValues", qValues.shape, qValues) #print(" maxQ", maxQ.shape, maxQ) #print(" probs", probs.shape, probs) #print(" chosenAction", chosenAction.shape, chosenAction) #print("linkSpecific", linkSpecific.shape) #print("numSiblings", numSiblings.shape) #print("numVisitedSiblings", numVisitedSiblings.shape) #print("numMatchedSiblings", numMatchedSiblings.shape) #print(" qValues", qValues) #print() probs = np.reshape(probs, [probs.shape[1]]) action = np.random.choice(probs, p=probs) #print(" action", action) action = np.argmax(probs == action) #print(" action", action) return action
def __init__(self, env, action, reward, link, langIds, visited, candidates, nextVisited, nextCandidates): self.action = action self.link = link self.langIds = langIds self.reward = reward self.discountedReward = None if visited is not None: self.visited = visited self.langsVisited = GetLangsVisited(visited, langIds, env) if candidates is not None: self.candidates = candidates numActions, mask = candidates.GetFeatures() self.numActions = numActions self.mask = np.array(mask, copy=True) self.nextVisited = nextVisited self.nextCandidates = nextCandidates