def run(self, event_pack: EventPackage): random.seed(time.time()) #prepare the search terms searchTerms = event_pack.body searchTerms.pop(0) search = "sfw+"+"+".join(searchTerms) url = "https://www.google.com/search?tbm=isch&q="+search+"&oq="+search+"&gs_l=img&safesearch=on" #get the page headers = {} headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" req = urllib.request.Request(url,headers=headers) response = urllib.request.urlopen(req) text = response.read() #html parser parser = HTMLParser() theImages = [] def handleTag(tag, attrs): if tag == "img": for n in attrs: if n[0] == "data-src": #print(n[1]) theImages.append(str(n[1])) parser.handle_starttag = handleTag parser.feed(str(text)) nrimg = random.randint(0,len(theImages)) return theImages[nrimg]
async def source(self, ctx): '''Perform a reverse image search using iqdb.org.''' url = await ctx.get_last_url() body = (await self.ona.request("http://iqdb.org", method="POST", data={"url": url})).decode() self.ona.assert_("No relevant matches" not in body, "HTTP request failed" not in body, error="No results found.") parser = HTMLParser() hrefs = [] def handler(tag, attrs): # This handler parses the iqdb.org response html for all href links any(hrefs.append(attr[1]) for attr in attrs if attr[0] == "href") parser.handle_starttag = handler parser.feed(body) href = hrefs[2] # The second href is the "best match" if href.startswith("//"): # Fix links href = f"https:{href}" await ctx.send(f"Here's the closest match:\n{href}")
def handle_starttag(self, tag, attrs): HTMLParser.handle_starttag(self, tag, attrs) if tag.lower() in self.strip_tags: self.collect = False
def handle_starttag(self, tag, attrs): HTMLParser.handle_starttag(self, tag, attrs) if tag.lower() in self.strip_tags: self.collect = False
def handle_starttag(self, tag, attrs): HTMLParser.handle_starttag(set, tag, attrs) print('<' + tag + '>')
""" self.warningcolor = '\033[0;37;41m' self.tipcolor = '\033[0;31;42m' self.endcolor = '\033[0m' self._newcolor = '' @property def new(self): """ Customized Python Print Color. """ return self._newcolor @new.setter def new(self, color_str): """ New Color. """ self._newcolor = color_str def disable(self): """ Disable Color Print. """ self.warningcolor = '' self.endcolor = '' # TODO:(edony) Can not filter the needed infomation source_html = requests.get(r'https://www.python.org/events/python-events/') content = source_html.text p = HTMLParser() p.feed(content) print(p.handle_starttag('h3',['class']))
def handle_starttag(self, tag, attrs): ''' 获取起始标签 <xxx> 其中也会包括直接闭合标签例如meta,hr,br :param tag:标签名 :param attrs:属性列表 :return: ''' # 父方法获取对应的标签和属性 HTMLParser.handle_starttag(self, tag, attrs) # print("tag_start:{0}".format(tag)) # 排除掉直接闭合标签的干扰 if tag in startendtag_list: return # 新建该节点 并分辨该节点的类型 node = wn.WebNode() if tag == "p": node = wn.PNode() elif tag == "strong": node = wn.StrongNode() elif tag == "span": node = wn.SpanNode() elif tag == "html": node = wn.HtmlNode() elif tag == "head": node = wn.HeadNode() elif tag == "body": node = wn.BodyNode() elif tag == "style": node = wn.StyleNode() elif tag == "h1": node = wn.H2Node() elif tag == "h2": node = wn.H2Node() elif tag == "h3": node = wn.H2Node() elif tag == "table": node = wn.TableNode() elif tag == "tr": node = wn.TrNode() elif tag == "td": node = wn.TdNode() elif tag == "ol": node = wn.OlNode() elif tag == "li": node = wn.LiNode() elif tag == "title": node = wn.TitleNode() elif tag == "div": node = wn.DivNode() elif tag == "form": node = wn.FormNode() elif tag == "select": node = wn.SelectNode() elif tag == "option": node = wn.OptionNode() elif tag == "script": node = wn.ScriptNode() elif tag == "input": node = wn.InputNode() elif tag == "link": node = wn.LinkNode() # 赋值标签,之前的当前节点赋值新节点的父节点 node.tag = tag node.father = self.now # 判断父节点是否为空,即是否是顶层的 if node.father != None: # 表明该节点比其父节点低一层 node.index = node.father.index + 1 # 判断父节点的path是否为空,即是否第一次记录path if node.father.path == "": # 为空则记录初始父节点 node.path = "{0}[{1}]".format(tag, len(node.father.children)+1) else: # 不为空则继续增加路径 node.path = node.father.path + "-" + "{0}[{1}]".format(tag, len(node.father.children)+1) else: # 没有父节点,表明其层级最高 node.index = 0 # 添加属性 for each in attrs: node.attr[each[0]]=each[1] # 将新节点置为父节点,向下移一层 self.now = node # 如果是可能非闭合标签 其不可能有子节点,则必然将其闭合,然后重新走start流程 if self.now.tag in maybenoendtag_list: self.now.father.children.append(self.now) self.now = self.now.father