Exemple #1
0
 def doInBackground(self):
     #-----------------------------------------------------------------------
     # Is it possible for an exception to occur?
     #-----------------------------------------------------------------------
     try:
         #-------------------------------------------------------------------
         # Inform the user of what is occurring, and try to retrieve the data
         #-------------------------------------------------------------------
         print 'connecting...'
         doc = Jsoup.connect(self.url).get()
         #-------------------------------------------------------------------
         # Use Jsoup methods to locate the HTML links & associated text
         #-------------------------------------------------------------------
         print 'processing...'
         #-------------------------------------------------------------------
         # Traverse the HTML, looking for <H3> tags and their tables...
         #-------------------------------------------------------------------
         visitor = FormattingVisitor(self.url)
         walker = NodeTraversor(visitor)
         walker.traverse(doc)
         #           Tabs = visitor.Tabs
         #           print 'Tabs:', Tabs.getTabCount()
         #           print visitor.toString()
         self.frame.add(visitor.Tabs)
         self.frame.validate()
     except:
         Type, value = sys.exc_info()[:2]
         Type, value = str(Type), str(value)
         print '\nError:', Type
         print 'value:', value
 def doInBackground(self):
     #-----------------------------------------------------------------------
     # Is it possible for an exception to be thrown
     #-----------------------------------------------------------------------
     try:
         #-------------------------------------------------------------------
         # Inform the user of what is occurring, and try to retrieve the data
         #-------------------------------------------------------------------
         self.msg.setText('connecting...')
         self.doc = Jsoup.connect(self.url).get()
         #-------------------------------------------------------------------
         # Use Jsoup methods to locate the HTML links & associated text
         #-------------------------------------------------------------------
         self.msg.setText('processing...')
         #-------------------------------------------------------------------
         # Warning: Don't update the visible model within the loop.
         #-------------------------------------------------------------------
         model = DefaultListModel()
         for link in self.doc.getElementsByTag('a'):
             name = link.text()
             href = link.attr('abs:href')
             self.docLinks[name] = href
             model.addElement(name)
         #-------------------------------------------------------------------
         # Replace the visible model with the one containing the real data
         #-------------------------------------------------------------------
         self.List.setModel(model)
         self.msg.setText('Make a selection')
     except:
         Type, value = sys.exc_info()[:2]
         Type, value = str(Type), str(value)
         print '\nError:', Type
         print 'value:', value
         self.msg.setText(value)
 def getPS3News(self):
     self.Doc = Jsoup.connect(self.PS3Url.decode('utf-8')).get()
     news = self.Doc.select('table[class=tbl tbl_list_comm]')
     news = news.select('tr[class= ]').get(0)
     newsUrl = "http://bbs2.ruliweb.daum.net/gaia/do/ruliweb/default/ps/93/" + news.select('a').get(1).attr('href')
     newsTitle = news.select('a').get(1).text().encode('utf-8')
     self.PS3News = "\002[" + newsTitle.decode('utf-8') + "]\002 " + newsUrl.decode('utf-8')
Exemple #4
0
    def highlight_files(self, fnames):
        from pygments import highlight
        from org.jsoup import Jsoup

        st = """
        <style type="text/css">
            @page { 
                size: 10in 11in;
                position: relative;
                margin: 0.2in 0.2in;
                display: inline-table;
            }
        </style>
        """
        for filename in fnames:
            self.changeUpdate.post(AsyncEvent(self,
                                              ACTION.FILEREAD, "Reading file: %s - 1/5" % filename))
            filestring = self.file_to_string(filename)

            self.changeUpdate.post(AsyncEvent(self,
                                              ACTION.CONVERTTOHTML, "Hiliting file: %s - 2/5" % filename))
            html = highlight(filestring, self.lexer, self.formatter)

            self.changeUpdate.post(AsyncEvent(self,
                                              ACTION.CLEANHTML, "Cleaning the HTML 3/5"))
            doc = Jsoup.parseBodyFragment(html)
            doc.head().html(st).after(doc.getElementsByTag('div').first())

            yield doc
Exemple #5
0
    def getTodayD(self, group = ""):
        url = "http://m.thisisgame.com/pad/"
        Doc = Element
        Doc = Jsoup.connect(url).get()
        _param = 4;
        requestStr = ""

        if group.upper() == "A" or group.upper() == "A조".decode('utf-8'):
            _param = 0
            requestStr = "\002[A조 게릴라 일정]\002 ".decode('utf-8')
        elif group.upper() == "B" or group.upper() == "B조".decode('utf-8'):
            _param = 1
            requestStr = "\002[B조 게릴라 일정]\002 ".decode('utf-8')
        elif group.upper() == "C" or group.upper() == "C조".decode('utf-8'):
            _param = 2
            requestStr = "\002[C조 게릴라 일정]\002 ".decode('utf-8')
        elif group.upper() == "D" or group.upper() == "D조".decode('utf-8'):
            _param = 3
            requestStr = "\002[D조 게릴라 일정]\002 ".decode('utf-8')
        elif group.upper() == "E" or group.upper() == "E조".decode('utf-8'):
            _param = 4
            requestStr = "\002[E조 게릴라 일정]\002 ".decode('utf-8')
        else:
            _param = 4
            requestStr = "\002[E조 게릴라 일정]\002 ".decode('utf-8')

        requestStr = requestStr + Doc.select("div[class=content guerilla-time]").select("tr")[1].select("td")[_param].text()

        self.requestAdd(requestStr)
Exemple #6
0
    def getMonsterInfo(self, monsterName):
        url = self.getMonsterInfoUrl(monsterName)
        Doc = Element
        Doc = Jsoup.connect(url).get()
        requestStr = "\002["
        requestStr = requestStr + Doc.select("dl[class=monstercode]").select("dd").text() + " / "
        requestStr = requestStr + Doc.select("dl[class=monsterclass1]").select("dd").text().replace("타입 ".decode('utf-8'), "타입, ".decode('utf-8')) + " / "
        requestStr = requestStr + Doc.select("dl[class=monstergrade]").select("dd").text() + " "
        requestStr = requestStr + Doc.select("dl[class=monstername]").select("dd").text() + "]\002 "
        requestStr = requestStr + Doc.select("dl[class=maxlevel]").select("dd").text() + ", "
        requestStr = requestStr + Doc.select("dl[class=teamcost]").select("dd").text() + ", "
        requestStr = requestStr + Doc.select("dl[class=maxexp]").select("dd").text() + " | "
        requestStr = requestStr + "능력치 [".decode('utf-8')
        requestStr = requestStr + "체력 :".decode('utf-8')+ Doc.select("dl[class=hp]")[1].select("dd").text() + " "
        requestStr = requestStr + "공격력 :".decode('utf-8')+ Doc.select("dl[class=attack]")[1].select("dd").text() + " "
        requestStr = requestStr + "회복 :".decode('utf-8')+ Doc.select("dl[class=heal]")[1].select("dd").text() + "] "
        requestStr2 = "\002[".decode('utf-8') + Doc.select("dl[class=leader grouped]").select("dl[class=skillname]").select("dd").text() + "]\002 "
        requestStr2 = requestStr2 + Doc.select("dl[class=leader grouped]").select("dl[class=description]").select("dd").text() + " / "
        requestStr2 = requestStr2 + Doc.select("dl[class=leader grouped]").select("dl[class=skillextra]").select("dd").text() + " "
        requestStr2 = requestStr2 + "\002[".decode('utf-8') + Doc.select("dl[class=normal grouped").select("dl[class=skillname]").select("dd").text() + "]\002 "
        requestStr2 = requestStr2 + Doc.select("dl[class=normal grouped]").select("dl[class=turn]").select("dd").text() + " / "
        requestStr2 = requestStr2 + Doc.select("dl[class=normal grouped]").select("dl[class=description]").select("dd").text()
        if 0 != len(Doc.select("dl[class=awakeskills]").select("span[class=name]")):
            requestStr3 = "\002[각성스킬]\002 ".decode('utf-8')
            for _i in range(len(Doc.select("dl[class=awakeskills]").select("span[class=name]"))):
                requestStr3 = requestStr3 + Doc.select("dl[class=awakeskills]").select("span[class=name]")[_i].text() + "  "
            requestStr3 = requestStr3.strip().replace("  ", ", ")

        self.requestAdd(requestStr) 
        self.requestAdd(requestStr2)
        if 0 != len(Doc.select("dl[class=awakeskills]").select("span[class=name]")):
            self.requestAdd(requestStr3)
 def Migration(self):
     url_ko = "http://ma.inven.co.kr/dataninfo/card/"
     Doc = Element
     Doc = Jsoup.connect(url_ko).get()
     Doc = Doc.select("tbody[id=filteredList]")
     Doc = Doc.select("tr")
     self.stat.executeUpdate("DELETE FROM " + self.TABLENAME +  ";")
     for i in range(0, Doc.size()):
         prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?, ?);")
         prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=")+1:]) #ID
         prep.setString(2, "http://ma.inven.co.kr/dataninfo/card/" + Doc[i].select("a").attr("href")) #LINK
         prep.setString(3, Doc[i].select("a").text()) #NAME
         prep.setString(4, Doc[i].select("img").attr("src")) #FACE
         rarity = ''
         for j in range(0, Doc[i].select("img").size()):
             if Doc[i].select("img")[j].attr("src").find("card_grade_star.png") != -1:
                 rarity = rarity + "☆".decode('utf-8')
                 if j == 5:
                     rarity = "★".decode('utf-8')
         prep.setString(5, rarity) #레어리티
         prep.setString(6, Doc[i].select("td[class=cost]").text()) #COST
         prep.addBatch()
         self.db.setAutoCommit(False);
         prep.executeBatch();
         self.db.setAutoCommit(True);
    def doInBackground( self ) :
        #-----------------------------------------------------------------------
        # Is it possible for an exception to occur?
        #-----------------------------------------------------------------------
        try :
            #-------------------------------------------------------------------
            # Inform the user of what is occurring, and try to retrieve the data
            #-------------------------------------------------------------------
            self.showMessage( 'Connecting...' )
            doc = Jsoup.connect( self.url ).get()
            #-------------------------------------------------------------------
            # Use Jsoup methods to locate the HTML links & associated text
            #-------------------------------------------------------------------
            self.showMessage( 'Processing...' )
            #-------------------------------------------------------------------
            # Traverse the HTML, looking for <H3> tags and their tables...
            #-------------------------------------------------------------------
            visitor = FormattingVisitor( self.url )
            walker  = NodeTraversor( visitor )
            walker.traverse( doc )
#           Tabs = visitor.Tabs
#           print 'Tabs:', Tabs.getTabCount()
#           print visitor.toString()
            self.sPane.setRightComponent( visitor.Tabs )
        except :
            Type, value = sys.exc_info()[ :2 ]
            Type, value = str( Type ), str( value )
            self.showMessage( '\nheaderTask Error: %s\nvalue: %s' % ( Type, value ) )
Exemple #9
0
    def get_html(self, n=10):  #Stackoverflow Parser 시작점....!!!!!!!!
        elements = []
        so_question_list = self._stackoverflow_links(n)
        for so_question in so_question_list:
            print "Treat link", so_question["title"]
            #doc = Jsoup.connect(so_question["link"]).userAgent("Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6").get()
            doc = Jsoup.connect(so_question["link"]).userAgent(
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36'
            ).get()

            #TODO 여기부터..
            if not self.template:  #template을 None으로 초기화 했기 때문에 template이 None이면 초기를 doc으로 설정
                self.template = doc
            elements.append({
                "e": doc.select(".accepted-answer .post-text"),
                "title": so_question["title"]
            })

        if self.template:
            table_checkbox = self._table()
            self.snippets = [
                table_checkbox %
                (element["title"], i, element["e"].outerHtml())
                for i, element in enumerate(elements[:n])
            ]

            self._relative_to_absolute()
            self.template.body().html("".join(self.snippets))
            self._add_style()

            return self.template.outerHtml()
        return "%s Results. Your search returned no matches." % (
            len(so_question_list))
Exemple #10
0
def remove_code_block(s):
    from org.jsoup import Jsoup
    doc = Jsoup.parse(s)
    for element in doc.select("code"):
        element.remove()

    return doc.text()
Exemple #11
0
 def getMonsterPic(self, monsterName):
     url = self.getMonsterInfoUrl(monsterName)
     Doc = Element
     Doc = Jsoup.connect(url).get()
     requestStr = "\002[" + Doc.select("dl[class=monstername]").select("dd").text() + "]\002 "
     requestStr = requestStr + Doc.select("div[class=image]").select("img").attr("src")
     self.requestAdd(requestStr)
     return
Exemple #12
0
 def fetch_document(scratch_project_url, timeout, user_agent):
     connection = Jsoup.connect(scratch_project_url)
     connection.userAgent(user_agent)
     connection.timeout(timeout)
     connection.ignoreContentType(True)
     # connection.header("content-type", "	text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
     # return _ResponseJsoupDocumentWrapper(connection.get().text())
     return json.loads(connection.get().text())
Exemple #13
0
 def connect(self):
     try:
         # r = requests.get(self.url, timeout=20)
         self.doc = Jsoup.connect(self.url).userAgent(
             "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
         ).referrer("https://www.google.com").timeout(0).get()
     except Exception as e:
         print "Download - Error: ", self.url, e
Exemple #14
0
def so_text(s):
    """ Removes code tag and its content from SO body as well as all html tags"""
    from org.jsoup import Jsoup
    s = unescape_html(s)
    doc = Jsoup.parse(s)
    for element in doc.select("code"):
        element.remove()

    return doc.text()
Exemple #15
0
 def getProfileUrl(self, username):
     Doc = Element
     url = "http://osu.ppy.sh/u/" + username
     Doc = Jsoup.connect(url).get()
     id = Doc.select("script")[4].html()
     id = id[id.find("var userId = ")+13:id.find(";")]
     if id != "":
         self.getUserInfo(id, username)
     else:
         self.requestAdd("?")
 def typeCmdProc(self):
     if self.getMessage("msg").find('http://') != -1 or self.getMessage("msg").find('https://') != -1:
         try:
           url = self.getMessage('msg')[self.getMessage('msg').find('http'):]
           url = url.split(' ')[0]
           Doc = Jsoup.connect(url).get()
           title = '\002[ LINK TITLE ]\002 ' + Doc.select('title').text().replace('\n', ' ')
           self.requestAdd( title );
         except:
           pass
 def getCallNumber(self, number):
     myUrl = self.Url + str(number)
     self.Doc = Jsoup.connect(myUrl.decode('utf-8')).get()
     myNumber = self.Doc.select("tr[class=bg1]").select("td").select("td")[1].select("a").text().split()[0]
     numberUrl = self.Doc.select("tr[class=bg1]").select("td").select("td")[1].select("a").attr("href")[2:]
     numberUrl = "http://www.thecall.co.kr/" + numberUrl
     try:
         self.requestAdd("\002["+ myNumber+ " / " + numberUrl + "]\002 " + self.Doc.select("tr[class=bg1]").select("td").select("td")[2].text())
     except:
         self.requestAdd("해당 번호를 검색할 수 없거나, 연결에 실패했습니다".decode('utf-8'))
 def checkShortLink(self, keyword):
     Doc = Element
     checkUrl = self.BASEURL + "&action=url-stats&shorturl=" + urllib.unquote(keyword)
     Doc = Jsoup.connect(checkUrl).get()
     message = Doc.select("message").text()
     shorturl = Doc.select("shorturl").text()
     url = Doc.select("url").text()
     title = Doc.select("title").text()
     if message.find("Error: short URL not found") != -1:
         self.requestAdd("해당 키워드로 정의된 숏링크를 수 없었어요.".decode('utf-8'))
     elif message.find("success") != -1:
         self.requestAdd("\002[ " + shorturl + " -> ".decode('utf-8') + url + " ]\002")
Exemple #19
0
    def getUserInfo(self, id):
        Doc = Element
        url = "http://www.noobmeter.com/player/kr/" + id
        Doc = Jsoup.connect(url).get()

        PR = ""
        ER = ""
        WN6 = ""
        WN7 = ""
        Battles = ""
        WR = ""
        AD = ""
        AE = ""
        ME = ""
        ABC = ""
        ABD = ""
        AK = ""
        BS = ""
        KD = ""

        PR = Doc.select("div[class=pageContent]").select("tbody").select("tr")[0].select("td")[1].text()
        ER = Doc.select("div[class=pageContent]").select("tbody").select("tr")[1].select("td")[1].text()
        WN6 = Doc.select("div[class=pageContent]").select("tbody").select("tr")[2].select("td")[1].text()
        WN7 = Doc.select("div[class=pageContent]").select("tbody").select("tr")[3].select("td")[1].text()
        Battles = Doc.select("div[class=pageContent]").select("tbody").select("tr")[4].select("td")[1].text()
        WR = Doc.select("div[class=pageContent]").select("tbody").select("tr")[5].select("td")[1].text()
        AD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[6].select("td")[1].text()
        AE = Doc.select("div[class=pageContent]").select("tbody").select("tr")[7].select("td")[1].text()
        ME = Doc.select("div[class=pageContent]").select("tbody").select("tr")[8].select("td")[1].text()
        ABC = Doc.select("div[class=pageContent]").select("tbody").select("tr")[9].select("td")[1].text()
        ABD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[10].select("td")[1].text()
        AK = Doc.select("div[class=pageContent]").select("tbody").select("tr")[11].select("td")[1].text()
        BS = Doc.select("div[class=pageContent]").select("tbody").select("tr")[12].select("td")[1].text()
        KD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[13].select("td")[1].text()
 
        requestStr = "\002[" + id + "]\002"
        requestStr = requestStr + " 퍼포먼스 레이팅: ".decode('utf-8') + PR
        requestStr = requestStr + " | 이피신시 레이팅: ".decode('utf-8') + ER
        requestStr = requestStr + " | WN6: ".decode('utf-8') + WN6
        requestStr = requestStr + " | WN7:  ".decode('utf-8') + WN7
        requestStr = requestStr + " | 총배틀: ".decode('utf-8') + Battles
        requestStr = requestStr + " | 승율: ".decode('utf-8') + WR
        requestStr = requestStr + " | 평균데미지: ".decode('utf-8') + AD
        requestStr = requestStr + " | 평균경험치: ".decode('utf-8') + AE
        requestStr = requestStr + " | 최대경험치: ".decode('utf-8') + ME
        requestStr = requestStr + " | 점거율: ".decode('utf-8') + ABC
        requestStr = requestStr + " | 방어율: ".decode('utf-8') + ABD
        requestStr = requestStr + " | 격파율: ".decode('utf-8') + AK
        requestStr = requestStr + " | 생존율: ".decode('utf-8') + BS
        requestStr = requestStr + " | 킬뎃: ".decode('utf-8') + KD

        self.requestAdd(requestStr)
 def doInBackground(self):
     #-----------------------------------------------------------------------
     # Is it possible for an exception to be thrown
     #-----------------------------------------------------------------------
     try:
         #-------------------------------------------------------------------
         # Inform the user of what is occurring, and try to retrieve the data
         #-------------------------------------------------------------------
         self.area.setText('connecting...')
         doc = Jsoup.connect(self.url).get()
         self.area.setText(str(doc.normalise()))
     except:
         Type, value = sys.exc_info()[:2]
         Type, value = str(Type), str(value)
         self.area.setText('\nError: %s\nValue: %s' % (Type, value))
    def Migration(self):
        url_ko = "http://web.ruliweb.daum.net/etcs/mia/mia_card.htm"
        url_jp = "http://web.ruliweb.daum.net/etcs/mia_jp/mia_card.htm"
        Doc = Element
        Doc = Jsoup.connect(url_ko).get()
        Doc = Doc.select("table[class=mia_tbl]")
        Doc = Doc.select("div").select("a")
        self.stat.executeUpdate("DELETE FROM " + self.TABLENAME +  ";")
        for i in range(0, Doc.size()):
            if i % 2 == 1:
                prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?);")
                prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=") + 1:])
                prep.setString(2, "KO")
                prep.setString(3, "http://web.ruliweb.daum.net/etcs/mia/" + Doc[i].select("a").attr("href"))
                prep.setString(4, Doc[i].html().replace("<br />", "*").split("*")[0])
                prep.setString(5, Doc[i].html().replace("<br />", "*").split("*")[0])
                prep.addBatch()
                self.db.setAutoCommit(False);
                prep.executeBatch();
                self.db.setAutoCommit(True);

        Doc = Jsoup.connect(url_jp).get()
        Doc = Doc.select("table[class=mia_tbl]")
        Doc = Doc.select("div").select("a")
        for i in range(0, Doc.size()):
            if i % 2 == 1:
                prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?);")
                prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=") + 1:])
                prep.setString(2, "JP")
                prep.setString(3, "http://web.ruliweb.daum.net/etcs/mia_jp/" + Doc[i].select("a").attr("href"))
                prep.setString(4, Doc[i].html().replace("<br />", "*").split("*")[0])
                prep.setString(5, Doc[i].html().replace("<br />", "*").split("*")[0])
                prep.addBatch()
                self.db.setAutoCommit(False);
                prep.executeBatch();
                self.db.setAutoCommit(True);
Exemple #22
0
    def getCheckFocus(self):
        url = "http://m.thisisgame.com/pad/"
        Doc = Element
        Doc = Jsoup.connect(url).get()

        requestStr = ""
        requestStr = requestStr + Doc.select("div[class =content week-focus]").select("li")[0].html()
        requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[1].html()
        requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[2].html()
        requestStr = requestStr + "\n"
        requestStr = requestStr + Doc.select("div[class =content week-focus]").select("li")[3].html()
        requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[4].html()
        requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[5].html()
        requestStr = re.sub("</?[a][a-z0-9]*[^<>]*>", "", requestStr).replace("<span>", "").replace("</span>", "").replace(" 오후 11시 59분".decode('utf-8'), "")
        requestStr = requestStr.replace("<strong>", "\002").replace("</strong>", "\002").replace("<br />", " ")

        self.requestAdd(requestStr)
Exemple #23
0
 def getMonsterSearchResult(self, monsterName):
     monsterName = monsterName.replace(" ", "%20")
     url = "http://www.thisisgame.com/pad/info/monster/list.php?sf=name&sw=" + monsterName
     Doc = Element
     Doc = Jsoup.connect(url).get()
     Doc = Doc.select("div[class=pad-table2 pad-info-light]").select("tr")
     requestStr = monsterName + ": "
     if 1 != len(Doc):
         for _i in range(len(Doc)):
             if _i <= 1:
                 tmp = Doc[_i].select("td").text()
                 requestStr = requestStr + tmp[0:tmp.find(" [")]
             else:
                 tmp = Doc[_i].select("td").text()
                 requestStr = requestStr + " / " + tmp[0:tmp.find(" [")]
             if _i == 5:
                 break
     self.requestAdd(requestStr)
    def doInBackground(self):
        #-----------------------------------------------------------------------
        # Disable the text (input) field, if it exists
        #-----------------------------------------------------------------------
        try:
            self.msg.setText('working...')
            self.doc = Jsoup.connect(self.url).get()
            self.msg.setText('ready')
        except:
            Type, value = sys.exc_info()[:2]
            print 'Error:', str(type)
            print 'value:', str(value)
            self.msg.setText(str(value))

        #-----------------------------------------------------------------------
        # Was the specified URL retrieved?
        #-----------------------------------------------------------------------
        if self.doc:
            self.cb.removeAllItems()
            for link in self.doc.getElementsByTag('a'):
                self.cb.addItem(str(link.text()))
 def requestShortLink(self, geturl, keyword):
     Doc = Element
     if keyword == "":
         geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23")
     else:
         geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") + "&keyword=" + keyword.encode('utf-8')
     Doc = Jsoup.connect(geturl).get()
     status = Doc.select("status").text()
     message = Doc.select("message").text()
     shorturl = Doc.select("shorturl").text()
     url = Doc.select("url")[1].text()
     if(status == "success"):
         return shorturl
     elif(status == "fail"):
         if message.find("already exists in database") != -1:
             if shorturl == "":
                 return message
             else:
                 return shorturl
         else:
             return message
 def requestShortLink(self, geturl, keyword):
     Doc = Element
     if keyword == "":
         geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23")
     else:
         geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") + "&keyword=" + keyword.encode('utf-8')
     Doc = Jsoup.connect(geturl).get()
     status = Doc.select("status").text()
     message = Doc.select("message").text()
     shorturl = Doc.select("shorturl").text()
     url = Doc.select("url")[1].text()
     if(status == "success"):
         self.requestAdd("숏링크 생성 완료! ".decode('utf-8') + "\002[ " + url + " -> " + shorturl + " ]\002")
     elif(status == "fail"):
         if message.find("already exists in database") != -1:
             if shorturl == "":
                 self.requestAdd("실패! 정의되지 않은 오류. ".decode('utf-8') + " 메세지: ".decode('utf-8') + message)
             else:
                 self.requestAdd("실패!".decode('utf-8') + " 이미 생성된 링크일 수 있습니다 ".decode('utf-8') + " \002[ ".decode('utf-8') + shorturl + " ]\002")
         else:
             self.requestAdd("실패! 정의되지 않은 오류. ".decode('utf-8') + " 메세지: ".decode('utf-8') + message + " " + " \002[ ".decode('utf-8') + shorturl + " ]\002")
Exemple #27
0
def so_tokenizer(s, remove_html=True, as_str=True):

    if remove_html:
        from org.jsoup import Jsoup
        s = unescape_html(s)
        doc = Jsoup.parse(s)
        s = doc.text()
    tokens = tokenize(s)
    tokens = set(tokens)

    res = []
    for token in tokens:
        res.extend(camel_case_split(token))

        res.append(token.lower())

    res = [item for item in res if item not in java_stopwords]
    res = set(res)
    if as_str:
        return " ".join(res)
    else:
        return res
Exemple #28
0
 def doInBackground(self):
     #-----------------------------------------------------------------------
     # Is it possible for an exception to occur?
     #-----------------------------------------------------------------------
     try:
         #-------------------------------------------------------------------
         # Inform the user of what is occurring, and try to retrieve the data
         #-------------------------------------------------------------------
         self.result.setText('connecting...')
         doc = Jsoup.connect(self.url).get()
         #-------------------------------------------------------------------
         # Use Jsoup methods to locate the HTML links & associated text
         #-------------------------------------------------------------------
         self.result.setText('processing...')
         #-------------------------------------------------------------------
         # Traverse the HTML, looking for <H#...> tags
         #-------------------------------------------------------------------
         self.text = self.getPlainText(doc)
     except:
         Type, value = sys.exc_info()[:2]
         Type, value = str(Type), str(value)
         print '\nError:', Type
         print 'value:', value
         self.result.setText('Exception: %s' % value)
 def doInBackground( self ) :
     #-----------------------------------------------------------------------
     # Is it possible for an exception to be thrown
     #-----------------------------------------------------------------------
     try :
         #-------------------------------------------------------------------
         # Inform the user of what is occurring, and try to retrieve the data
         #-------------------------------------------------------------------
         model = self.List.getModel()
         model.set( 0, 'Connecting...' )
         self.doc  = Jsoup.connect( self.url ).get()
         #-------------------------------------------------------------------
         # Use Jsoup methods to locate the HTML links & associated text
         #-------------------------------------------------------------------
         model.set( 0, 'Processing...' )
         #-------------------------------------------------------------------
         # Warning: Don't update the visible model within the loop.
         # Note: the 'abs:href' notation causes jsoup to return an absolute
         #       (i.e., complete) URL.
         #-------------------------------------------------------------------
         self.model = DefaultListModel()
         for link in self.doc.getElementsByTag( 'a' ) :
             name = link.text()
             href = link.attr( 'abs:href' )
             self.docLinks[ name ] = href
             self.model.addElement( name )
         #-------------------------------------------------------------------
         # Replace the visible model with the one containing the real data
         #-------------------------------------------------------------------
         self.List.setModel( self.model )
     except :
         Type, value = sys.exc_info()[ :2 ]
         Type, value = str( Type ), str( value )
         print '\nsoupTask Error:', Type
         print 'value:', value
         sys.exit()
Exemple #30
0
 def fetch_document(scratch_project_url, timeout, user_agent):
     connection = Jsoup.connect(scratch_project_url)
     connection.userAgent(user_agent)
     connection.timeout(timeout)
     return _ResponseJsoupDocumentWrapper(connection.get())
    def getCardInfo(self, requestStr ,cardName):
        stmt = self.db.createStatement();
        cardName = cardName.replace("\017", "").strip()
        rs = ''
        cardUrl = ''
        cardNumber = 0
        if cardName.split()[0] == "일".decode('utf-8'):
            cardName = self.countSplit(cardName, 1)
            try:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'JP' AND " + self.CARDNAME + " LIKE '" + cardName +"';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)
            except:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'JP' AND " + self.CARDNAME + " LIKE '%" + cardName +"%';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)

        elif cardName.split()[0] == "한".decode('utf-8'):
            cardName = self.countSplit(cardName, 1)
            try:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'KO' AND " + self.CARDNAME + " LIKE '" + cardName +"';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)
            except:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'KO' AND " + self.CARDNAME + " LIKE '%" + cardName +"%';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)

        else:
            try:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE "  + self.CARDNAME + " LIKE '" + cardName +"';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)
            except:
                rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE "  + self.CARDNAME + " LIKE '%" + cardName +"%';")
                cardUrl = rs.getString(self.CARDURL)
                cardNumber = rs.getString(self.CARDID)

        Doc = Element
        Doc = Jsoup.connect(cardUrl).get()

        cardImg1 = Doc.select("table[id=content_block_1]").select("td")[0].select("a").attr("href")
        cardImg2 = Doc.select("table[id=content_block_1]").select("td")[1].select("a").attr("href")
        cardImg3 = Doc.select("table[id=content_block_1]").select("td")[2].select("a").attr("href")
        cardImg4 = Doc.select("table[id=content_block_1]").select("td")[3].select("a").attr("href")
        cardImg5 = Doc.select("table[id=content_block_1]").select("td")[4].select("a").attr("href")
        cardImg6 = Doc.select("table[id=content_block_1]").select("td")[5].select("a").attr("href")
        cardIllustrator = ""
        try:
            cardIllustrator = Doc.select("table[id=content_block_1]").select("td")[6].text().split(":")[1].strip()
        except:
            try:
                cardIllustrator = Doc.select("table[id=content_block_1]").select("td")[12].text().split(":")[1].strip()
            except:
                pass
        cardImg7 = ""
        cardImg8 = ""
        cardImg9 = ""
        cardImg10 = ""
        cardImg11 = ""
        cardImg12 = ""
        try:
            cardImg7 = Doc.select("table[id=content_block_1]").select("td")[6].select("a").attr("href")
            cardImg8 = Doc.select("table[id=content_block_1]").select("td")[7].select("a").attr("href")
            cardImg9 = Doc.select("table[id=content_block_1]").select("td")[8].select("a").attr("href")
            cardImg10 = Doc.select("table[id=content_block_1]").select("td")[9].select("a").attr("href")
            cardImg11 = Doc.select("table[id=content_block_1]").select("td")[10].select("a").attr("href")
            cardImg12 = Doc.select("table[id=content_block_1]").select("td")[11].select("a").attr("href")
        except:
            cardImg7 = cardImg8 = cardImg9 = cardImg10 = cardImg11 = "오리지날 일러를 찾을 수 없었어요".decode('utf-8')

        cardName = Doc.select("table[class=mia_tbl]").select("tr")[0].select("td")[0].text()
        cardCode = Doc.select("table[class=mia_tbl]").select("tr")[0].select("td")[1].text()
        cardInfo = Doc.select("table[class=mia_tbl]").select("tr")[1].text()

        cardSkillName = Doc.select("table[class=mia_tbl]").select("tr")[2].select("p")[0].text()
        cardSkillInfo = Doc.select("table[class=mia_tbl]").select("tr")[2].select("p")[1].text()

        cardRarelity = Doc.select("table[class=mia_tbl]").select("tr")[3].select("td")[0].text().replace("★".decode('utf-8'), "☆".decode('utf-8')).replace('☆☆☆☆☆'.decode('utf-8'), '★'.decode('utf-8'))
        cardCost = Doc.select("table[class=mia_tbl]").select("tr")[3].select("td")[1].text()
        cardFaction = Doc.select("table[class=mia_tbl]").select("tr")[4].select("td")[0].text()
        cardType = Doc.select("table[class=mia_tbl]").select("tr")[4].select("td")[1].text()
        cardGender =  Doc.select("table[class=mia_tbl]").select("tr")[5].select("td")[0].text()
        cardForce =  Doc.select("table[class=mia_tbl]").select("tr")[5].select("td")[1].text()
        cardGrowthRate = Doc.select("table[class=mia_tbl]").select("tr")[6].select("td")[0].text()
        cardLimitBreakCount = Doc.select("table[class=mia_tbl]").select("tr")[6].select("td")[1].text()
        cardGetInfo = Doc.select("table[class=mia_tbl]").select("tr")[7].select("td")[0].text()

        cardMLV = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[1].html().replace("<br />", "*")
        cardMLV = cardMLV[3:cardMLV.find("*")]
        cardLLV = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[2].html().replace("<br />", "*")
        cardLLV = cardLLV[3:cardLLV.find("*")]

        cardHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[9].text()
        cardATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[10].text()
        cardMHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[11].text()
        cardMATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[12].text()
        cardFHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[13].text()
        cardFATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[14].text()

        cardSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[18].text()
        cardMSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[19].text()
        cardFSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[20].text()

        cardCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[21].text()
        cardMCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[22].text()
        cardFCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[23].text()


        cardHHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[33].text()
        cardHATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[34].text()
        cardHMHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[35].text()
        cardHMATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[36].text()
        cardHFHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[37].text()
        cardHFATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[38].text()

        cardHSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[42].text()
        cardHMSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[43].text()
        cardHFSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[44].text()

        cardHCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[45].text()
        cardHMCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[46].text()
        cardHFCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[47].text()

        requestMsg = ""
        if requestStr == "!카드2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 "
            requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성레벨(한계레벨): ".decode('utf-8') + cardMLV + "(" + cardLLV + ") \002|\002 "
            requestMsg = requestMsg + "체력 (M/F): ".decode('utf-8') + cardHP + "(" + cardMHP + "/" + cardFHP +"), "
            requestMsg = requestMsg + "공격력 (M/F): ".decode('utf-8') + cardATK + "(" + cardMATK + "/" + cardFATK +"), "
            requestMsg = requestMsg + "H+A(M/F): ".decode('utf-8') + cardSum + "(" + cardMSum + "/" + cardFSum +") \002|\002 "
            requestMsg = requestMsg + "CP (M/F): ".decode('utf-8') + cardCP + "(" + cardMCP + "/" + cardFCP +") \002|\002 "
            requestMsg = requestMsg + cardSkillName + " : " + cardSkillInfo
        if requestStr == "!키라카드2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 "
            requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성레벨(한계레벨): ".decode('utf-8') + cardMLV + "(" + cardLLV + ") \002|\002 "
            requestMsg = requestMsg + "체력 (M/F): ".decode('utf-8') + cardHHP + "(" + cardHMHP + "/" + cardHFHP +"), "
            requestMsg = requestMsg + "공격력 (M/F): ".decode('utf-8') + cardHATK + "(" + cardHMATK + "/" + cardHFATK +"), "
            requestMsg = requestMsg + "H+A(M/F): ".decode('utf-8') + cardHSum + "(" + cardHMSum + "/" + cardHFSum +") \002|\002 "
            requestMsg = requestMsg + "CP (M/F): ".decode('utf-8') + cardHCP + "(" + cardHMCP + "/" + cardHFCP +") \002|\002 "
            requestMsg = requestMsg + cardSkillName + " : " + cardSkillInfo
        if requestStr == "!일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg1
        if requestStr == "!각성일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg2
        if requestStr == "!만렙일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg3
        if requestStr == "!키라일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg4
        if requestStr == "!키라각성일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg5
        if requestStr == "!키라만렙일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg6
        if requestStr == "!오리지날일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg7
        if requestStr == "!오리지날각성일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg8
        if requestStr == "!오리지날키라일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg10
        if requestStr == "!오리지날키라각성일러2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 "
            requestMsg = requestMsg + " " + cardImg11


        if requestStr == "!카드설명2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardName + "]\002 "
            hangul = re.compile('''[^ ㄱ-ㅣ가-힣0-9a-zA-Z.,"']+'''.decode('utf-8'))
            cardInfo = hangul.sub('', cardInfo)
            if len(cardInfo) > 320:
                cardInfo = cardInfo[:140] + "\n" + cardInfo[140:] + "...(생략)".decode('utf-8')
            if len(cardInfo) > 150:
                cardInfo = cardInfo[:140] + "\n" + cardInfo[140:]
            requestMsg = requestMsg + " " + cardInfo
        if requestStr == "!좀더카드정보2".decode('utf-8') or requestStr == "!mtcdjb".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 "
            requestMsg = requestMsg + "넘버: ".decode('utf-8') + cardNumber + ", "
            requestMsg = requestMsg + "성별: ".decode('utf-8') + cardGender + ", "
            requestMsg = requestMsg + "포스: ".decode('utf-8') + cardForce + ", "
            requestMsg = requestMsg + "성장률: ".decode('utf-8') + cardGrowthRate + ", "
            requestMsg = requestMsg + "한돌횟수: ".decode('utf-8') + cardLimitBreakCount + ", "
            requestMsg = requestMsg + "입수방법: ".decode('utf-8') + cardGetInfo
        if requestStr == "!좀덜카드정보2".decode('utf-8'):
            requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 "
            requestMsg = requestMsg + "넘버: ".decode('utf-8') + cardNumber + ", "
            requestMsg = requestMsg + "성별: ".decode('utf-8') + cardGender + ", "
            requestMsg = requestMsg + "포스: ".decode('utf-8') + cardForce + ", "
            requestMsg = requestMsg + "성장률: ".decode('utf-8') + cardGrowthRate + ", "
            requestMsg = requestMsg + "한돌횟수: ".decode('utf-8') + cardLimitBreakCount + ", "
            requestMsg = requestMsg + "입수방법: ".decode('utf-8') + cardGetInfo
            requestMsg = requestMsg[:len(requestMsg)/2]

        self.requestAdd(requestMsg)
Exemple #32
0
    def getUserInfo(self, id, username):
        Doc = Element
        url = "http://osu.ppy.sh/pages/include/profile-general.php?u=" + id
        if self.requestStr == "!오스".decode('utf-8'):
            url = url + "&m=0"
        elif self.requestStr == "!태고".decode('utf-8'):
            url = url + "&m=1"
        elif self.requestStr == "!캐치어비트".decode('utf-8'):
            url = url + "&m=2"
        elif self.requestStr == "!오스!매니아".decode('utf-8'):
            url = url + "&m=3"
        Doc = Jsoup.connect(url).get()

        rank = ""
        score = ""
        accuracy = ""
        playcount = ""
        totalscore = ""
        level = ""
        hits = ""
        maxcombo = ""
        
        if Doc.select("div[class=profileStatLine]")[0].text().find("Performance Rank") != -1:
            rank = Doc.select("div[class=profileStatLine]")[0].text()
            rank = rank[rank.find(":")+2:]
            
            score = Doc.select("div[class=profileStatLine]")[1].text()
            score = score[score.find(":")+2:]

            accuracy = Doc.select("div[class=profileStatLine]")[2].text()
            accuracy = accuracy[accuracy.find(":")+2:]

            playcount = Doc.select("div[class=profileStatLine]")[3].text()
            playcount = playcount[playcount.find(":")+2:]

            totalscore = Doc.select("div[class=profileStatLine]")[4].text()
            totalscore = totalscore[totalscore.find(":")+2:]

            level = Doc.select("div[class=profileStatLine]")[5].text()
            level = level[level.find(":")+2:]

            hits = Doc.select("div[class=profileStatLine]")[6].text()
            hits = hits[hits.find(":")+2:]

            maxcombo = Doc.select("div[class=profileStatLine]")[7].text()
            maxcombo = maxcombo[maxcombo.find(":")+2:]
        else:
            rank = "#-"
            score = Doc.select("div[class=profileStatLine]")[0].text()
            score = score[score.find(":")+2:]

            accuracy = Doc.select("div[class=profileStatLine]")[1].text()
            accuracy = accuracy[accuracy.find(":")+2:]

            playcount = Doc.select("div[class=profileStatLine]")[2].text()
            playcount = playcount[playcount.find(":")+2:]

            totalscore = Doc.select("div[class=profileStatLine]")[3].text()
            totalscore = totalscore[totalscore.find(":")+2:]

            level = Doc.select("div[class=profileStatLine]")[4].text()
            level = level[level.find(":")+2:]

            hits = Doc.select("div[class=profileStatLine]")[5].text()
            hits = hits[hits.find(":")+2:]

            maxcombo = Doc.select("div[class=profileStatLine]")[6].text()
            maxcombo = maxcombo[maxcombo.find(":")+2:]
        
        if Doc.select("td")[1].text() != "":
            #eventepic = Doc.select("td")[0].text() +": " + Doc.select("td")[1].select("div[class=event epic4]").text()
            eventepic =  Doc.select("td")[0].text() + " " + Doc.select("td")[1].text()
            #Doc.select("td")[1].select("a")[0].text().decode('utf-8') 
        else:
            eventepic = ""

        requestStr = "\002[" + level + "레벨 ".decode('utf-8') + username + "]\002"
        requestStr = requestStr + " 랭크: ".decode('utf-8') + rank + " / 정확도: ".decode('utf-8') + accuracy
        requestStr = requestStr + " / 플레이카운트: ".decode('utf-8') + playcount + " / 토탈스코어: ".decode('utf-8') + totalscore
        requestStr = requestStr + " / 총 히트 수: ".decode('utf-8') + hits + " / 최대콤보: ".decode('utf-8') + maxcombo + " | "
        requestStr = requestStr + eventepic

        self.requestAdd(requestStr)
 def fetch_document(scratch_project_url, timeout, user_agent):
     connection = Jsoup.connect(scratch_project_url)
     connection.userAgent(user_agent)
     connection.timeout(timeout)
     return _ResponseJsoupDocumentWrapper(connection.get())
Exemple #34
0
def remove_html_tags(s):
    from org.jsoup import Jsoup
    return Jsoup.parse(s).text()
Exemple #35
0
	def __init__(self, answer):
		self.answer = answer
		self.inline = []
		self.block = []
		self.doc = Jsoup.parse(answer)
Exemple #36
0
import sys
import os

import test1

from org.jsoup import Jsoup
from com.pixshow.framework.utils import HttpUtility

url = "http://en.wikipedia.org/";

print test1.workDir()

html = HttpUtility.get(url);
doc = Jsoup.parse(html)

html = doc.select('#mp-itn b a').toString()

appContext.get('testService').save(html)
    def getCardInfo(self, requestStr ,cardName):
        stmt = self.db.createStatement();
        lvBreak = 0
        if requestStr == "!돌파".decode('utf-8') or requestStr == "!키라돌파".decode('utf-8'):
            lvBreak = int(cardName.split(" ")[0])
            cardName = cardName.split(" ")[1].replace("\017", "").strip()
        else:
            cardName = cardName.replace("\017", "").strip()
        rs = ''
        cardUrl = ''
        cardNumber = 0
        cardCost = ''
        cardRarity = ''
        try:
            rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE "  + self.CARDNAME + " LIKE '" + cardName +"';")
            cardUrl = rs.getString(self.CARDURL)
            cardNumber = rs.getString(self.CARDID)
            cardName = rs.getString(self.CARDNAME)
            cardCost = rs.getString(self.CARDCOST)
            cardRarity = rs.getString(self.CARDRARITY)
        except:
            rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE "  + self.CARDNAME + " LIKE '%" + cardName +"%';")
            cardUrl = rs.getString(self.CARDURL)
            cardNumber = rs.getString(self.CARDID)
            cardName = rs.getString(self.CARDNAME)
            cardCost = rs.getString(self.CARDCOST)
            cardRarity = rs.getString(self.CARDRARITY)

        Doc = Element
        Doc = Jsoup.connect(cardUrl).get()
        
        
        cardImg1 = Doc.select("div[id=cardG1A1]").select("img").attr("src")
        cardImg2 = Doc.select("div[id=cardG1A2]").select("img").attr("src")
        cardImg3 = Doc.select("div[id=cardG1A3]").select("img").attr("src")
        cardImg4 = Doc.select("div[id=cardG1A4]").select("img").attr("src")
        
        gender = Doc.select("div[class=cardInfo]").select("table").select("tr")[0].select("td")[0].text()
        force = Doc.select("div[class=cardInfo]").select("table").select("tr")[0].select("td")[1].text()
        growth = Doc.select("div[class=cardInfo]").select("table").select("tr")[1].select("td")[0].text()
        illustrator =  Doc.select("div[class=cardInfo]").select("table").select("tr")[1].select("td")[1].text()
        faction = Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text()
        if Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "마법".decode('utf-8'):
            faction = "마법의 파".decode('utf-8')
        elif Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "검술".decode('utf-8'):
            faction = "검술의 성".decode('utf-8')
        elif Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "기교".decode('utf-8'):
            faction = "기교의 장".decode('utf-8')
        else:
            pass
            
        sellPrice =  Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[1].text()
        
        skillInfo = Doc.select("div[class=foldedBox]")[1].select("div[class=content]").text()
        cardStory = Doc.select("div[class=foldedBox]")[2].select("div[class=content]").text()

        
        wakeupLV = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[0].select("td")[1].text()
        lastLV = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[0].select("td")[2].text()
        
        HP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[0].text()
        wakeupHP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[1].text()
        lastHP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[2].text()
        
        ATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[0].text()
        wakeupATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[1].text()
        lastATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[2].text()
        
        CP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[0].text()
        wakeupCP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[1].text()
        lastCP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[2].text()
        
        
        wakeupLVH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[0].select("td")[1].text()
        lastLVH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[0].select("td")[2].text()
        
        HPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[0].text()
        wakeupHPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[1].text()
        lastHPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[2].text()
        
        ATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[0].text()
        wakeupATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[1].text()
        lastATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[2].text()
        
        CPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[0].text()
        wakeupCPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[1].text()
        lastCPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[2].text()
        
        requestMsg = ""
        if requestStr == "!카드".decode('utf-8'):
            requestMsg = "\002[" + faction + " " + cardRarity +  " " + cardName + "]\002 "
            requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성(한계)레벨 : ".decode('utf-8') + wakeupLV + "(" + lastLV + ")"
            requestMsg = requestMsg + " \002|\002 "
            requestMsg = requestMsg + "체력(M/L) : ".decode('utf-8') + HP + "(" + wakeupHP + "/" + lastHP + ")"
            requestMsg = requestMsg + ", "
            requestMsg = requestMsg + "공격력(M/L) : ".decode('utf-8') + ATK + "(" + wakeupATK + "/" + lastATK + ")"
            requestMsg = requestMsg + ", "
            requestMsg = requestMsg + "CP(M/L) : ".decode('utf-8') + CP + "(" + wakeupCP + "/" + lastCP + ")"
            requestMsg = requestMsg + " \002|\002 "
            requestMsg = requestMsg + skillInfo
            
            
        elif requestStr == "!키라카드".decode('utf-8') or requestStr == "!키라".decode('utf-8'):
            requestMsg = "\002[" + faction + " " + cardRarity +  " 키라 ".decode('utf-8') + cardName + "]\002 "
            requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성(한계)레벨 : ".decode('utf-8') + wakeupLVH + "(" + lastLVH + ")"
            requestMsg = requestMsg + " \002|\002 "
            requestMsg = requestMsg + "체력(M/L) : ".decode('utf-8') + HPH + "(" + wakeupHPH + "/" + lastHPH + ")"
            requestMsg = requestMsg + ", "
            requestMsg = requestMsg + "공격력(M/L) : ".decode('utf-8') + ATKH + "(" + wakeupATKH + "/" + lastATKH + ")"
            requestMsg = requestMsg + ", "
            requestMsg = requestMsg + "CP(M/L) : ".decode('utf-8') + CPH + "(" + wakeupCPH + "/" + lastCPH + ")"
            requestMsg = requestMsg + " \002|\002 "
            requestMsg = requestMsg + skillInfo
            
        elif requestStr == "!일러".decode('utf-8'):
            requestMsg = "\002[" + faction + " " + cardName + " / " + illustrator +"]\002 "
            requestMsg = requestMsg + cardImg1
        elif requestStr == "!각성일러".decode('utf-8'):
            requestMsg = "\002[" + faction + " 각성 ".decode('utf-8') + cardName + " / " + illustrator +"]\002 "
            requestMsg = requestMsg + cardImg2
        elif requestStr == "!키라일러".decode('utf-8'):
            requestMsg = "\002[" + faction + " 키라 ".decode('utf-8') + cardName + " / " + illustrator +"]\002 "
            requestMsg = requestMsg + cardImg3
        elif requestStr == "!키라각성일러".decode('utf-8'):
            requestMsg = "\002[" + faction + " 키라 각성".decode('utf-8') + cardName + " / " + illustrator +"]\002 "
            requestMsg = requestMsg + cardImg4
            
        elif requestStr == "!카드설명".decode('utf-8'):
            requestMsg = "\002[" + faction + " " + cardName + " / " + illustrator +"]\002 "
            if len(cardStory) > 320:
                cardStory = cardStory[:140] + "\n" + cardStory[140:] + "...(생략)".decode('utf-8')
            if len(cardStory) > 150:
                cardStory = cardStory[:140] + "\n" + cardStory[140:]
            requestMsg = requestMsg + " " + cardStory
          
        elif requestStr == "!돌파".decode('utf-8'):
            maxBreak = len(Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")) - 2
            if lvBreak >= maxBreak:
                requestMsg = "\002[" + faction + " " + cardRarity +  " " + str(maxBreak) + "회 풀 돌파 ".decode('utf-8') + cardName + "]\002 "
                requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[0] + ", "
                HP = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[1]
                requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", "
                ATK = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[2]
                requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", "
                CP = (int(HP) + int(ATK)) / int(cardCost)
                CP = str(CP)
                requestMsg = requestMsg + "CP: ".decode('utf-8') + CP
            else:
                requestMsg = "\002[" + faction + " " + cardRarity +  " " + str(lvBreak) + "회 돌파 ".decode('utf-8') + cardName + "]\002 "
                requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[0] + ", "
                HP = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[1]
                requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", "
                ATK = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[2]
                requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", "
                CP = (int(HP) + int(ATK)) / int(cardCost)
                CP = str(CP)
                requestMsg = requestMsg + "CP: ".decode('utf-8') + CP    
                
        elif requestStr == "!키라돌파".decode('utf-8'):
            maxBreak = len(Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")) - 2
            if lvBreak >= maxBreak:
                requestMsg = "\002[" + faction + " " + cardRarity +  " " + str(maxBreak) + "회 풀 돌파 키라 ".decode('utf-8') + cardName + "]\002 "
                requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[0] + ", "
                HP = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[1]
                requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", "
                ATK = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[2]
                requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", "
                CP = (int(HP) + int(ATK)) / int(cardCost)
                CP = str(CP)
                requestMsg = requestMsg + "CP: ".decode('utf-8') + CP
            else:
                requestMsg = "\002[" + faction + " " + cardRarity +  " " + str(lvBreak) + "회 돌파 키라 ".decode('utf-8') + cardName + "]\002 "
                requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[0] + ", "
                HP = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[1]
                requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", "
                ATK = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[2]
                requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", "
                CP = (int(HP) + int(ATK)) / int(cardCost)
                CP = str(CP)
                requestMsg = requestMsg + "CP: ".decode('utf-8') + CP                          
        self.requestAdd(requestMsg)
Exemple #38
0
 def getMonsterInfoUrl(self, monsterName):
     monsterName = monsterName.replace(" ", "%20")
     url = "http://www.thisisgame.com/pad/info/monster/list.php?sf=name&sw=" + monsterName
     Doc = Element
     Doc = Jsoup.connect(url).get()
     return Doc.select("div[class=pad-table2 pad-info-light]").select("tr")[1].select("td")[0].select("a").attr("href")
Exemple #39
0
def clean_question(html):
	"""Removes code tag and its content. Subsequently, it removes html tags"""
	doc = Jsoup.parse(html)
	doc.select("code").empty()
	return doc.text()
                    node["node"].replaceWith(new_div)
                    break


if len(argv) < 4:
    infile = "/Users/mac/Downloads/im"
    outfile = "/Users/mac/Downloads/dialogues.html"
    textfile = "/Users/mac/Downloads/dialogues.txt"
else:
    infile = argv[1]
    outfile = argv[2]
    textfile = argv[3]

with iopen(outfile, "w", encoding="utf-8", errors="ignore") as output:
    input = File(infile)
    soup = Jsoup.parse(input, "UTF-8", "")

    # First, create a new document
    new_doc = Jsoup.parse("<body></body>")
    new_doc.updateMetaCharsetElement(True)
    new_doc.charset(Charset.forName("UTF-8"))
    new_body = new_doc.select("body").first()

    for element in soup.select("*"):
        if (element.tag().toString() == "ul" and element.className()
                == "ui_clean_list im-mess-stack--mess _im_stack_messages") or (
                    element.tag().toString() == "div"
                    and element.className() == "im-mess-stack--pname"):
            new_body.appendChild(element)

    # Then remove empty tags from it and transform the labels