Esempio n. 1
0
    def run(self, event_pack: EventPackage):
        random.seed(time.time())

        #prepare the search terms
        searchTerms = event_pack.body
        searchTerms.pop(0)
        search = "sfw+"+"+".join(searchTerms)
        url = "https://www.google.com/search?tbm=isch&q="+search+"&oq="+search+"&gs_l=img&safesearch=on"

        #get the page
        headers = {}
        headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
        req = urllib.request.Request(url,headers=headers)
        response = urllib.request.urlopen(req)
        text = response.read()

        #html parser
        parser = HTMLParser()
        theImages = []
        def handleTag(tag, attrs):
            if tag == "img":
                for n in attrs:
                    if n[0] == "data-src":
                        #print(n[1])
                        theImages.append(str(n[1]))

        parser.handle_starttag = handleTag
        parser.feed(str(text))

        nrimg = random.randint(0,len(theImages))

        return theImages[nrimg]
Esempio n. 2
0
    async def source(self, ctx):
        '''Perform a reverse image search using iqdb.org.'''
        url = await ctx.get_last_url()
        body = (await self.ona.request("http://iqdb.org", method="POST", data={"url": url})).decode()
        self.ona.assert_("No relevant matches" not in body, "HTTP request failed" not in body,
                         error="No results found.")
        parser = HTMLParser()
        hrefs = []

        def handler(tag, attrs):    # This handler parses the iqdb.org response html for all href links
            any(hrefs.append(attr[1]) for attr in attrs if attr[0] == "href")
        parser.handle_starttag = handler
        parser.feed(body)
        href = hrefs[2]   # The second href is the "best match"
        if href.startswith("//"):    # Fix links
            href = f"https:{href}"
        await ctx.send(f"Here's the closest match:\n{href}")
Esempio n. 3
0
 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(self, tag, attrs)
     if tag.lower() in self.strip_tags:
         self.collect = False
Esempio n. 4
0
 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(self, tag, attrs)
     if tag.lower() in self.strip_tags:
         self.collect = False
Esempio n. 5
0
 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(set, tag, attrs)
     print('<' + tag + '>')
Esempio n. 6
0
        """
        self.warningcolor = '\033[0;37;41m'
        self.tipcolor = '\033[0;31;42m'
        self.endcolor = '\033[0m'
        self._newcolor = ''
    @property
    def new(self):
        """
        Customized Python Print Color.
        """
        return self._newcolor
    @new.setter
    def new(self, color_str):
        """
        New Color.
        """
        self._newcolor = color_str
    def disable(self):
        """
        Disable Color Print.
        """
        self.warningcolor = ''
        self.endcolor = ''

# TODO:(edony) Can not filter the needed infomation
source_html = requests.get(r'https://www.python.org/events/python-events/')
content = source_html.text
p = HTMLParser()
p.feed(content)
print(p.handle_starttag('h3',['class']))
Esempio n. 7
0
    def handle_starttag(self, tag, attrs):
        '''
        获取起始标签 <xxx> 其中也会包括直接闭合标签例如meta,hr,br
        :param tag:标签名
        :param attrs:属性列表
        :return:
        '''

        # 父方法获取对应的标签和属性
        HTMLParser.handle_starttag(self, tag, attrs)
        # print("tag_start:{0}".format(tag))
        # 排除掉直接闭合标签的干扰
        if tag in startendtag_list:
            return

        # 新建该节点 并分辨该节点的类型
        node = wn.WebNode()
        if tag == "p":
            node = wn.PNode()
        elif tag == "strong":
            node = wn.StrongNode()
        elif tag == "span":
            node = wn.SpanNode()
        elif tag == "html":
            node = wn.HtmlNode()
        elif tag == "head":
            node = wn.HeadNode()
        elif tag == "body":
            node = wn.BodyNode()
        elif tag == "style":
            node = wn.StyleNode()
        elif tag == "h1":
            node = wn.H2Node()
        elif tag == "h2":
            node = wn.H2Node()
        elif tag == "h3":
            node = wn.H2Node()
        elif tag == "table":
            node = wn.TableNode()
        elif tag == "tr":
            node = wn.TrNode()
        elif tag == "td":
            node = wn.TdNode()
        elif tag == "ol":
            node = wn.OlNode()
        elif tag == "li":
            node = wn.LiNode()
        elif tag == "title":
            node = wn.TitleNode()
        elif tag == "div":
            node = wn.DivNode()
        elif tag == "form":
            node = wn.FormNode()
        elif tag == "select":
            node = wn.SelectNode()
        elif tag == "option":
            node = wn.OptionNode()
        elif tag == "script":
            node = wn.ScriptNode()
        elif tag == "input":
            node = wn.InputNode()
        elif tag == "link":
            node = wn.LinkNode()
        # 赋值标签,之前的当前节点赋值新节点的父节点
        node.tag = tag
        node.father = self.now
        # 判断父节点是否为空,即是否是顶层的
        if node.father != None:
            # 表明该节点比其父节点低一层
            node.index = node.father.index + 1
            # 判断父节点的path是否为空,即是否第一次记录path
            if node.father.path == "":
                # 为空则记录初始父节点
                node.path = "{0}[{1}]".format(tag, len(node.father.children)+1)
            else:
                # 不为空则继续增加路径
                node.path = node.father.path + "-" + "{0}[{1}]".format(tag, len(node.father.children)+1)
        else:
            # 没有父节点,表明其层级最高
            node.index = 0
        # 添加属性
        for each in attrs:
            node.attr[each[0]]=each[1]

        # 将新节点置为父节点,向下移一层
        self.now = node
        # 如果是可能非闭合标签 其不可能有子节点,则必然将其闭合,然后重新走start流程
        if self.now.tag in maybenoendtag_list:
            self.now.father.children.append(self.now)
            self.now = self.now.father