def GetNextState(env, params, action, visited, candidates): #print("candidates", action, candidates.Debug()) # if action == 0: if action == -1: # no explicit stop state but no candidates stopNode = env.nodes[0] link = Link("", 0, stopNode, stopNode) else: # assert(candidates.HasLinks(action)) assert(candidates.HasLinks()) link = candidates.Pop(action) assert(link is not None) nextNode = link.childNode #print(" nextNode", nextNode.Debug()) if nextNode.urlId == 0: #print(" stop") reward = 0.0 elif nextNode.alignedNode is not None and nextNode.alignedNode.urlId in visited: reward = params.reward #print(" visited", visited) #print(" reward", reward) #print() else: #print(" non-rewarding") reward = params.cost return link, reward
def GetNextState(env, params, action, visited, candidates, linkLang, numSiblings, numVisitedSiblings, numMatchedSiblings): #print("candidates", action, candidates.Debug()) if action == -1: # no explicit stop state but no candidates stopNode = env.nodes[0] link = Link("", 0, stopNode, stopNode) else: langId = linkLang[0, action] numSiblings1 = numSiblings[0, action] numVisitedSiblings1 = numVisitedSiblings[0, action] numMatchedSiblings1 = numMatchedSiblings[0, action] key = (langId, numSiblings1, numVisitedSiblings1, numMatchedSiblings1) link = candidates.Pop(key) assert(link is not None) nextNode = link.childNode #print(" nextNode", nextNode.Debug()) if nextNode.urlId == 0: #print(" stop") reward = 0.0 elif nextNode.alignedNode is not None and nextNode.alignedNode.urlId in visited: reward = params.reward #print(" visited", visited) #print(" reward", reward) #print() else: #print(" non-rewarding") reward = params.cost return link, reward
def GetNextState(env, params, action, visited, candidates): #print("candidates", action, candidates.Debug()) if action == -1: # no explicit stop state but no candidates stopNode = env.nodes[0] link = Link("", 0, stopNode, stopNode) else: _, parentLang, _ = candidates.GetFeatures() parentLang1 = parentLang[0, action] key = (parentLang1, ) link = candidates.Pop(key) candidates.AddLinks(link.childNode, visited, params) assert(link.childNode.urlId not in visited) visited.add(link.childNode.urlId) assert(link is not None) nextNode = link.childNode #print(" nextNode", nextNode.Debug()) if nextNode.urlId == 0: #print(" stop") reward = 0.0 elif nextNode.alignedNode is not None and nextNode.alignedNode.urlId in visited: reward = params.reward #print(" visited", visited) #print(" reward", reward) #print() else: #print(" non-rewarding") reward = params.cost return link, reward
async def load_urls(self, doc: bs4.BeautifulSoup, url: URL) -> List[Link]: result: List[Link] = [] next_url: str for tag_a in doc.find_all('a'): try: if (next_url := tag_a.attrs['href']).startswith('http'): result.append(Link(prev_url=url, next_url=await self.get_url_obj(next_url), text=tag_a.text)) except KeyError: print('У тэга "a" не обнаружен атрибут href, пропускаю') return result