def doInBackground(self): #----------------------------------------------------------------------- # Is it possible for an exception to occur? #----------------------------------------------------------------------- try: #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- print 'connecting...' doc = Jsoup.connect(self.url).get() #------------------------------------------------------------------- # Use Jsoup methods to locate the HTML links & associated text #------------------------------------------------------------------- print 'processing...' #------------------------------------------------------------------- # Traverse the HTML, looking for <H3> tags and their tables... #------------------------------------------------------------------- visitor = FormattingVisitor(self.url) walker = NodeTraversor(visitor) walker.traverse(doc) # Tabs = visitor.Tabs # print 'Tabs:', Tabs.getTabCount() # print visitor.toString() self.frame.add(visitor.Tabs) self.frame.validate() except: Type, value = sys.exc_info()[:2] Type, value = str(Type), str(value) print '\nError:', Type print 'value:', value
def doInBackground(self): #----------------------------------------------------------------------- # Is it possible for an exception to be thrown #----------------------------------------------------------------------- try: #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- self.msg.setText('connecting...') self.doc = Jsoup.connect(self.url).get() #------------------------------------------------------------------- # Use Jsoup methods to locate the HTML links & associated text #------------------------------------------------------------------- self.msg.setText('processing...') #------------------------------------------------------------------- # Warning: Don't update the visible model within the loop. #------------------------------------------------------------------- model = DefaultListModel() for link in self.doc.getElementsByTag('a'): name = link.text() href = link.attr('abs:href') self.docLinks[name] = href model.addElement(name) #------------------------------------------------------------------- # Replace the visible model with the one containing the real data #------------------------------------------------------------------- self.List.setModel(model) self.msg.setText('Make a selection') except: Type, value = sys.exc_info()[:2] Type, value = str(Type), str(value) print '\nError:', Type print 'value:', value self.msg.setText(value)
def getPS3News(self): self.Doc = Jsoup.connect(self.PS3Url.decode('utf-8')).get() news = self.Doc.select('table[class=tbl tbl_list_comm]') news = news.select('tr[class= ]').get(0) newsUrl = "http://bbs2.ruliweb.daum.net/gaia/do/ruliweb/default/ps/93/" + news.select('a').get(1).attr('href') newsTitle = news.select('a').get(1).text().encode('utf-8') self.PS3News = "\002[" + newsTitle.decode('utf-8') + "]\002 " + newsUrl.decode('utf-8')
def highlight_files(self, fnames): from pygments import highlight from org.jsoup import Jsoup st = """ <style type="text/css"> @page { size: 10in 11in; position: relative; margin: 0.2in 0.2in; display: inline-table; } </style> """ for filename in fnames: self.changeUpdate.post(AsyncEvent(self, ACTION.FILEREAD, "Reading file: %s - 1/5" % filename)) filestring = self.file_to_string(filename) self.changeUpdate.post(AsyncEvent(self, ACTION.CONVERTTOHTML, "Hiliting file: %s - 2/5" % filename)) html = highlight(filestring, self.lexer, self.formatter) self.changeUpdate.post(AsyncEvent(self, ACTION.CLEANHTML, "Cleaning the HTML 3/5")) doc = Jsoup.parseBodyFragment(html) doc.head().html(st).after(doc.getElementsByTag('div').first()) yield doc
def getTodayD(self, group = ""): url = "http://m.thisisgame.com/pad/" Doc = Element Doc = Jsoup.connect(url).get() _param = 4; requestStr = "" if group.upper() == "A" or group.upper() == "A조".decode('utf-8'): _param = 0 requestStr = "\002[A조 게릴라 일정]\002 ".decode('utf-8') elif group.upper() == "B" or group.upper() == "B조".decode('utf-8'): _param = 1 requestStr = "\002[B조 게릴라 일정]\002 ".decode('utf-8') elif group.upper() == "C" or group.upper() == "C조".decode('utf-8'): _param = 2 requestStr = "\002[C조 게릴라 일정]\002 ".decode('utf-8') elif group.upper() == "D" or group.upper() == "D조".decode('utf-8'): _param = 3 requestStr = "\002[D조 게릴라 일정]\002 ".decode('utf-8') elif group.upper() == "E" or group.upper() == "E조".decode('utf-8'): _param = 4 requestStr = "\002[E조 게릴라 일정]\002 ".decode('utf-8') else: _param = 4 requestStr = "\002[E조 게릴라 일정]\002 ".decode('utf-8') requestStr = requestStr + Doc.select("div[class=content guerilla-time]").select("tr")[1].select("td")[_param].text() self.requestAdd(requestStr)
def getMonsterInfo(self, monsterName): url = self.getMonsterInfoUrl(monsterName) Doc = Element Doc = Jsoup.connect(url).get() requestStr = "\002[" requestStr = requestStr + Doc.select("dl[class=monstercode]").select("dd").text() + " / " requestStr = requestStr + Doc.select("dl[class=monsterclass1]").select("dd").text().replace("타입 ".decode('utf-8'), "타입, ".decode('utf-8')) + " / " requestStr = requestStr + Doc.select("dl[class=monstergrade]").select("dd").text() + " " requestStr = requestStr + Doc.select("dl[class=monstername]").select("dd").text() + "]\002 " requestStr = requestStr + Doc.select("dl[class=maxlevel]").select("dd").text() + ", " requestStr = requestStr + Doc.select("dl[class=teamcost]").select("dd").text() + ", " requestStr = requestStr + Doc.select("dl[class=maxexp]").select("dd").text() + " | " requestStr = requestStr + "능력치 [".decode('utf-8') requestStr = requestStr + "체력 :".decode('utf-8')+ Doc.select("dl[class=hp]")[1].select("dd").text() + " " requestStr = requestStr + "공격력 :".decode('utf-8')+ Doc.select("dl[class=attack]")[1].select("dd").text() + " " requestStr = requestStr + "회복 :".decode('utf-8')+ Doc.select("dl[class=heal]")[1].select("dd").text() + "] " requestStr2 = "\002[".decode('utf-8') + Doc.select("dl[class=leader grouped]").select("dl[class=skillname]").select("dd").text() + "]\002 " requestStr2 = requestStr2 + Doc.select("dl[class=leader grouped]").select("dl[class=description]").select("dd").text() + " / " requestStr2 = requestStr2 + Doc.select("dl[class=leader grouped]").select("dl[class=skillextra]").select("dd").text() + " " requestStr2 = requestStr2 + "\002[".decode('utf-8') + Doc.select("dl[class=normal grouped").select("dl[class=skillname]").select("dd").text() + "]\002 " requestStr2 = requestStr2 + Doc.select("dl[class=normal grouped]").select("dl[class=turn]").select("dd").text() + " / " requestStr2 = requestStr2 + Doc.select("dl[class=normal grouped]").select("dl[class=description]").select("dd").text() if 0 != len(Doc.select("dl[class=awakeskills]").select("span[class=name]")): requestStr3 = "\002[각성스킬]\002 ".decode('utf-8') for _i in range(len(Doc.select("dl[class=awakeskills]").select("span[class=name]"))): requestStr3 = requestStr3 + Doc.select("dl[class=awakeskills]").select("span[class=name]")[_i].text() + " " requestStr3 = requestStr3.strip().replace(" ", ", ") self.requestAdd(requestStr) self.requestAdd(requestStr2) if 0 != len(Doc.select("dl[class=awakeskills]").select("span[class=name]")): self.requestAdd(requestStr3)
def Migration(self): url_ko = "http://ma.inven.co.kr/dataninfo/card/" Doc = Element Doc = Jsoup.connect(url_ko).get() Doc = Doc.select("tbody[id=filteredList]") Doc = Doc.select("tr") self.stat.executeUpdate("DELETE FROM " + self.TABLENAME + ";") for i in range(0, Doc.size()): prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?, ?);") prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=")+1:]) #ID prep.setString(2, "http://ma.inven.co.kr/dataninfo/card/" + Doc[i].select("a").attr("href")) #LINK prep.setString(3, Doc[i].select("a").text()) #NAME prep.setString(4, Doc[i].select("img").attr("src")) #FACE rarity = '' for j in range(0, Doc[i].select("img").size()): if Doc[i].select("img")[j].attr("src").find("card_grade_star.png") != -1: rarity = rarity + "☆".decode('utf-8') if j == 5: rarity = "★".decode('utf-8') prep.setString(5, rarity) #레어리티 prep.setString(6, Doc[i].select("td[class=cost]").text()) #COST prep.addBatch() self.db.setAutoCommit(False); prep.executeBatch(); self.db.setAutoCommit(True);
def doInBackground( self ) : #----------------------------------------------------------------------- # Is it possible for an exception to occur? #----------------------------------------------------------------------- try : #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- self.showMessage( 'Connecting...' ) doc = Jsoup.connect( self.url ).get() #------------------------------------------------------------------- # Use Jsoup methods to locate the HTML links & associated text #------------------------------------------------------------------- self.showMessage( 'Processing...' ) #------------------------------------------------------------------- # Traverse the HTML, looking for <H3> tags and their tables... #------------------------------------------------------------------- visitor = FormattingVisitor( self.url ) walker = NodeTraversor( visitor ) walker.traverse( doc ) # Tabs = visitor.Tabs # print 'Tabs:', Tabs.getTabCount() # print visitor.toString() self.sPane.setRightComponent( visitor.Tabs ) except : Type, value = sys.exc_info()[ :2 ] Type, value = str( Type ), str( value ) self.showMessage( '\nheaderTask Error: %s\nvalue: %s' % ( Type, value ) )
def get_html(self, n=10): #Stackoverflow Parser 시작점....!!!!!!!! elements = [] so_question_list = self._stackoverflow_links(n) for so_question in so_question_list: print "Treat link", so_question["title"] #doc = Jsoup.connect(so_question["link"]).userAgent("Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6").get() doc = Jsoup.connect(so_question["link"]).userAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36' ).get() #TODO 여기부터.. if not self.template: #template을 None으로 초기화 했기 때문에 template이 None이면 초기를 doc으로 설정 self.template = doc elements.append({ "e": doc.select(".accepted-answer .post-text"), "title": so_question["title"] }) if self.template: table_checkbox = self._table() self.snippets = [ table_checkbox % (element["title"], i, element["e"].outerHtml()) for i, element in enumerate(elements[:n]) ] self._relative_to_absolute() self.template.body().html("".join(self.snippets)) self._add_style() return self.template.outerHtml() return "%s Results. Your search returned no matches." % ( len(so_question_list))
def remove_code_block(s): from org.jsoup import Jsoup doc = Jsoup.parse(s) for element in doc.select("code"): element.remove() return doc.text()
def getMonsterPic(self, monsterName): url = self.getMonsterInfoUrl(monsterName) Doc = Element Doc = Jsoup.connect(url).get() requestStr = "\002[" + Doc.select("dl[class=monstername]").select("dd").text() + "]\002 " requestStr = requestStr + Doc.select("div[class=image]").select("img").attr("src") self.requestAdd(requestStr) return
def fetch_document(scratch_project_url, timeout, user_agent): connection = Jsoup.connect(scratch_project_url) connection.userAgent(user_agent) connection.timeout(timeout) connection.ignoreContentType(True) # connection.header("content-type", " text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") # return _ResponseJsoupDocumentWrapper(connection.get().text()) return json.loads(connection.get().text())
def connect(self): try: # r = requests.get(self.url, timeout=20) self.doc = Jsoup.connect(self.url).userAgent( "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0" ).referrer("https://www.google.com").timeout(0).get() except Exception as e: print "Download - Error: ", self.url, e
def so_text(s): """ Removes code tag and its content from SO body as well as all html tags""" from org.jsoup import Jsoup s = unescape_html(s) doc = Jsoup.parse(s) for element in doc.select("code"): element.remove() return doc.text()
def getProfileUrl(self, username): Doc = Element url = "http://osu.ppy.sh/u/" + username Doc = Jsoup.connect(url).get() id = Doc.select("script")[4].html() id = id[id.find("var userId = ")+13:id.find(";")] if id != "": self.getUserInfo(id, username) else: self.requestAdd("?")
def typeCmdProc(self): if self.getMessage("msg").find('http://') != -1 or self.getMessage("msg").find('https://') != -1: try: url = self.getMessage('msg')[self.getMessage('msg').find('http'):] url = url.split(' ')[0] Doc = Jsoup.connect(url).get() title = '\002[ LINK TITLE ]\002 ' + Doc.select('title').text().replace('\n', ' ') self.requestAdd( title ); except: pass
def getCallNumber(self, number): myUrl = self.Url + str(number) self.Doc = Jsoup.connect(myUrl.decode('utf-8')).get() myNumber = self.Doc.select("tr[class=bg1]").select("td").select("td")[1].select("a").text().split()[0] numberUrl = self.Doc.select("tr[class=bg1]").select("td").select("td")[1].select("a").attr("href")[2:] numberUrl = "http://www.thecall.co.kr/" + numberUrl try: self.requestAdd("\002["+ myNumber+ " / " + numberUrl + "]\002 " + self.Doc.select("tr[class=bg1]").select("td").select("td")[2].text()) except: self.requestAdd("해당 번호를 검색할 수 없거나, 연결에 실패했습니다".decode('utf-8'))
def checkShortLink(self, keyword): Doc = Element checkUrl = self.BASEURL + "&action=url-stats&shorturl=" + urllib.unquote(keyword) Doc = Jsoup.connect(checkUrl).get() message = Doc.select("message").text() shorturl = Doc.select("shorturl").text() url = Doc.select("url").text() title = Doc.select("title").text() if message.find("Error: short URL not found") != -1: self.requestAdd("해당 키워드로 정의된 숏링크를 수 없었어요.".decode('utf-8')) elif message.find("success") != -1: self.requestAdd("\002[ " + shorturl + " -> ".decode('utf-8') + url + " ]\002")
def getUserInfo(self, id): Doc = Element url = "http://www.noobmeter.com/player/kr/" + id Doc = Jsoup.connect(url).get() PR = "" ER = "" WN6 = "" WN7 = "" Battles = "" WR = "" AD = "" AE = "" ME = "" ABC = "" ABD = "" AK = "" BS = "" KD = "" PR = Doc.select("div[class=pageContent]").select("tbody").select("tr")[0].select("td")[1].text() ER = Doc.select("div[class=pageContent]").select("tbody").select("tr")[1].select("td")[1].text() WN6 = Doc.select("div[class=pageContent]").select("tbody").select("tr")[2].select("td")[1].text() WN7 = Doc.select("div[class=pageContent]").select("tbody").select("tr")[3].select("td")[1].text() Battles = Doc.select("div[class=pageContent]").select("tbody").select("tr")[4].select("td")[1].text() WR = Doc.select("div[class=pageContent]").select("tbody").select("tr")[5].select("td")[1].text() AD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[6].select("td")[1].text() AE = Doc.select("div[class=pageContent]").select("tbody").select("tr")[7].select("td")[1].text() ME = Doc.select("div[class=pageContent]").select("tbody").select("tr")[8].select("td")[1].text() ABC = Doc.select("div[class=pageContent]").select("tbody").select("tr")[9].select("td")[1].text() ABD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[10].select("td")[1].text() AK = Doc.select("div[class=pageContent]").select("tbody").select("tr")[11].select("td")[1].text() BS = Doc.select("div[class=pageContent]").select("tbody").select("tr")[12].select("td")[1].text() KD = Doc.select("div[class=pageContent]").select("tbody").select("tr")[13].select("td")[1].text() requestStr = "\002[" + id + "]\002" requestStr = requestStr + " 퍼포먼스 레이팅: ".decode('utf-8') + PR requestStr = requestStr + " | 이피신시 레이팅: ".decode('utf-8') + ER requestStr = requestStr + " | WN6: ".decode('utf-8') + WN6 requestStr = requestStr + " | WN7: ".decode('utf-8') + WN7 requestStr = requestStr + " | 총배틀: ".decode('utf-8') + Battles requestStr = requestStr + " | 승율: ".decode('utf-8') + WR requestStr = requestStr + " | 평균데미지: ".decode('utf-8') + AD requestStr = requestStr + " | 평균경험치: ".decode('utf-8') + AE requestStr = requestStr + " | 최대경험치: ".decode('utf-8') + ME requestStr = requestStr + " | 점거율: ".decode('utf-8') + ABC requestStr = requestStr + " | 방어율: ".decode('utf-8') + ABD requestStr = requestStr + " | 격파율: ".decode('utf-8') + AK requestStr = requestStr + " | 생존율: ".decode('utf-8') + BS requestStr = requestStr + " | 킬뎃: ".decode('utf-8') + KD self.requestAdd(requestStr)
def doInBackground(self): #----------------------------------------------------------------------- # Is it possible for an exception to be thrown #----------------------------------------------------------------------- try: #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- self.area.setText('connecting...') doc = Jsoup.connect(self.url).get() self.area.setText(str(doc.normalise())) except: Type, value = sys.exc_info()[:2] Type, value = str(Type), str(value) self.area.setText('\nError: %s\nValue: %s' % (Type, value))
def Migration(self): url_ko = "http://web.ruliweb.daum.net/etcs/mia/mia_card.htm" url_jp = "http://web.ruliweb.daum.net/etcs/mia_jp/mia_card.htm" Doc = Element Doc = Jsoup.connect(url_ko).get() Doc = Doc.select("table[class=mia_tbl]") Doc = Doc.select("div").select("a") self.stat.executeUpdate("DELETE FROM " + self.TABLENAME + ";") for i in range(0, Doc.size()): if i % 2 == 1: prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?);") prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=") + 1:]) prep.setString(2, "KO") prep.setString(3, "http://web.ruliweb.daum.net/etcs/mia/" + Doc[i].select("a").attr("href")) prep.setString(4, Doc[i].html().replace("<br />", "*").split("*")[0]) prep.setString(5, Doc[i].html().replace("<br />", "*").split("*")[0]) prep.addBatch() self.db.setAutoCommit(False); prep.executeBatch(); self.db.setAutoCommit(True); Doc = Jsoup.connect(url_jp).get() Doc = Doc.select("table[class=mia_tbl]") Doc = Doc.select("div").select("a") for i in range(0, Doc.size()): if i % 2 == 1: prep = self.db.prepareStatement("INSERT INTO " + self.TABLENAME + " VALUES (?, ?, ?, ?, ?);") prep.setString(1, Doc[i].select("a").attr("href")[Doc[i].select("a").attr("href").find("=") + 1:]) prep.setString(2, "JP") prep.setString(3, "http://web.ruliweb.daum.net/etcs/mia_jp/" + Doc[i].select("a").attr("href")) prep.setString(4, Doc[i].html().replace("<br />", "*").split("*")[0]) prep.setString(5, Doc[i].html().replace("<br />", "*").split("*")[0]) prep.addBatch() self.db.setAutoCommit(False); prep.executeBatch(); self.db.setAutoCommit(True);
def getCheckFocus(self): url = "http://m.thisisgame.com/pad/" Doc = Element Doc = Jsoup.connect(url).get() requestStr = "" requestStr = requestStr + Doc.select("div[class =content week-focus]").select("li")[0].html() requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[1].html() requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[2].html() requestStr = requestStr + "\n" requestStr = requestStr + Doc.select("div[class =content week-focus]").select("li")[3].html() requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[4].html() requestStr = requestStr + " / " + Doc.select("div[class =content week-focus]").select("li")[5].html() requestStr = re.sub("</?[a][a-z0-9]*[^<>]*>", "", requestStr).replace("<span>", "").replace("</span>", "").replace(" 오후 11시 59분".decode('utf-8'), "") requestStr = requestStr.replace("<strong>", "\002").replace("</strong>", "\002").replace("<br />", " ") self.requestAdd(requestStr)
def getMonsterSearchResult(self, monsterName): monsterName = monsterName.replace(" ", "%20") url = "http://www.thisisgame.com/pad/info/monster/list.php?sf=name&sw=" + monsterName Doc = Element Doc = Jsoup.connect(url).get() Doc = Doc.select("div[class=pad-table2 pad-info-light]").select("tr") requestStr = monsterName + ": " if 1 != len(Doc): for _i in range(len(Doc)): if _i <= 1: tmp = Doc[_i].select("td").text() requestStr = requestStr + tmp[0:tmp.find(" [")] else: tmp = Doc[_i].select("td").text() requestStr = requestStr + " / " + tmp[0:tmp.find(" [")] if _i == 5: break self.requestAdd(requestStr)
def doInBackground(self): #----------------------------------------------------------------------- # Disable the text (input) field, if it exists #----------------------------------------------------------------------- try: self.msg.setText('working...') self.doc = Jsoup.connect(self.url).get() self.msg.setText('ready') except: Type, value = sys.exc_info()[:2] print 'Error:', str(type) print 'value:', str(value) self.msg.setText(str(value)) #----------------------------------------------------------------------- # Was the specified URL retrieved? #----------------------------------------------------------------------- if self.doc: self.cb.removeAllItems() for link in self.doc.getElementsByTag('a'): self.cb.addItem(str(link.text()))
def requestShortLink(self, geturl, keyword): Doc = Element if keyword == "": geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") else: geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") + "&keyword=" + keyword.encode('utf-8') Doc = Jsoup.connect(geturl).get() status = Doc.select("status").text() message = Doc.select("message").text() shorturl = Doc.select("shorturl").text() url = Doc.select("url")[1].text() if(status == "success"): return shorturl elif(status == "fail"): if message.find("already exists in database") != -1: if shorturl == "": return message else: return shorturl else: return message
def requestShortLink(self, geturl, keyword): Doc = Element if keyword == "": geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") else: geturl = self.BASEURL + "&action=shorturl&url=" + urllib.quote(geturl.encode('utf-8')).replace("&", "%26").replace("#", "%23") + "&keyword=" + keyword.encode('utf-8') Doc = Jsoup.connect(geturl).get() status = Doc.select("status").text() message = Doc.select("message").text() shorturl = Doc.select("shorturl").text() url = Doc.select("url")[1].text() if(status == "success"): self.requestAdd("숏링크 생성 완료! ".decode('utf-8') + "\002[ " + url + " -> " + shorturl + " ]\002") elif(status == "fail"): if message.find("already exists in database") != -1: if shorturl == "": self.requestAdd("실패! 정의되지 않은 오류. ".decode('utf-8') + " 메세지: ".decode('utf-8') + message) else: self.requestAdd("실패!".decode('utf-8') + " 이미 생성된 링크일 수 있습니다 ".decode('utf-8') + " \002[ ".decode('utf-8') + shorturl + " ]\002") else: self.requestAdd("실패! 정의되지 않은 오류. ".decode('utf-8') + " 메세지: ".decode('utf-8') + message + " " + " \002[ ".decode('utf-8') + shorturl + " ]\002")
def so_tokenizer(s, remove_html=True, as_str=True): if remove_html: from org.jsoup import Jsoup s = unescape_html(s) doc = Jsoup.parse(s) s = doc.text() tokens = tokenize(s) tokens = set(tokens) res = [] for token in tokens: res.extend(camel_case_split(token)) res.append(token.lower()) res = [item for item in res if item not in java_stopwords] res = set(res) if as_str: return " ".join(res) else: return res
def doInBackground(self): #----------------------------------------------------------------------- # Is it possible for an exception to occur? #----------------------------------------------------------------------- try: #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- self.result.setText('connecting...') doc = Jsoup.connect(self.url).get() #------------------------------------------------------------------- # Use Jsoup methods to locate the HTML links & associated text #------------------------------------------------------------------- self.result.setText('processing...') #------------------------------------------------------------------- # Traverse the HTML, looking for <H#...> tags #------------------------------------------------------------------- self.text = self.getPlainText(doc) except: Type, value = sys.exc_info()[:2] Type, value = str(Type), str(value) print '\nError:', Type print 'value:', value self.result.setText('Exception: %s' % value)
def doInBackground( self ) : #----------------------------------------------------------------------- # Is it possible for an exception to be thrown #----------------------------------------------------------------------- try : #------------------------------------------------------------------- # Inform the user of what is occurring, and try to retrieve the data #------------------------------------------------------------------- model = self.List.getModel() model.set( 0, 'Connecting...' ) self.doc = Jsoup.connect( self.url ).get() #------------------------------------------------------------------- # Use Jsoup methods to locate the HTML links & associated text #------------------------------------------------------------------- model.set( 0, 'Processing...' ) #------------------------------------------------------------------- # Warning: Don't update the visible model within the loop. # Note: the 'abs:href' notation causes jsoup to return an absolute # (i.e., complete) URL. #------------------------------------------------------------------- self.model = DefaultListModel() for link in self.doc.getElementsByTag( 'a' ) : name = link.text() href = link.attr( 'abs:href' ) self.docLinks[ name ] = href self.model.addElement( name ) #------------------------------------------------------------------- # Replace the visible model with the one containing the real data #------------------------------------------------------------------- self.List.setModel( self.model ) except : Type, value = sys.exc_info()[ :2 ] Type, value = str( Type ), str( value ) print '\nsoupTask Error:', Type print 'value:', value sys.exit()
def fetch_document(scratch_project_url, timeout, user_agent): connection = Jsoup.connect(scratch_project_url) connection.userAgent(user_agent) connection.timeout(timeout) return _ResponseJsoupDocumentWrapper(connection.get())
def getCardInfo(self, requestStr ,cardName): stmt = self.db.createStatement(); cardName = cardName.replace("\017", "").strip() rs = '' cardUrl = '' cardNumber = 0 if cardName.split()[0] == "일".decode('utf-8'): cardName = self.countSplit(cardName, 1) try: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'JP' AND " + self.CARDNAME + " LIKE '" + cardName +"';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) except: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'JP' AND " + self.CARDNAME + " LIKE '%" + cardName +"%';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) elif cardName.split()[0] == "한".decode('utf-8'): cardName = self.countSplit(cardName, 1) try: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'KO' AND " + self.CARDNAME + " LIKE '" + cardName +"';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) except: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.COUNTRY + " = 'KO' AND " + self.CARDNAME + " LIKE '%" + cardName +"%';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) else: try: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.CARDNAME + " LIKE '" + cardName +"';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) except: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.CARDNAME + " LIKE '%" + cardName +"%';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) Doc = Element Doc = Jsoup.connect(cardUrl).get() cardImg1 = Doc.select("table[id=content_block_1]").select("td")[0].select("a").attr("href") cardImg2 = Doc.select("table[id=content_block_1]").select("td")[1].select("a").attr("href") cardImg3 = Doc.select("table[id=content_block_1]").select("td")[2].select("a").attr("href") cardImg4 = Doc.select("table[id=content_block_1]").select("td")[3].select("a").attr("href") cardImg5 = Doc.select("table[id=content_block_1]").select("td")[4].select("a").attr("href") cardImg6 = Doc.select("table[id=content_block_1]").select("td")[5].select("a").attr("href") cardIllustrator = "" try: cardIllustrator = Doc.select("table[id=content_block_1]").select("td")[6].text().split(":")[1].strip() except: try: cardIllustrator = Doc.select("table[id=content_block_1]").select("td")[12].text().split(":")[1].strip() except: pass cardImg7 = "" cardImg8 = "" cardImg9 = "" cardImg10 = "" cardImg11 = "" cardImg12 = "" try: cardImg7 = Doc.select("table[id=content_block_1]").select("td")[6].select("a").attr("href") cardImg8 = Doc.select("table[id=content_block_1]").select("td")[7].select("a").attr("href") cardImg9 = Doc.select("table[id=content_block_1]").select("td")[8].select("a").attr("href") cardImg10 = Doc.select("table[id=content_block_1]").select("td")[9].select("a").attr("href") cardImg11 = Doc.select("table[id=content_block_1]").select("td")[10].select("a").attr("href") cardImg12 = Doc.select("table[id=content_block_1]").select("td")[11].select("a").attr("href") except: cardImg7 = cardImg8 = cardImg9 = cardImg10 = cardImg11 = "오리지날 일러를 찾을 수 없었어요".decode('utf-8') cardName = Doc.select("table[class=mia_tbl]").select("tr")[0].select("td")[0].text() cardCode = Doc.select("table[class=mia_tbl]").select("tr")[0].select("td")[1].text() cardInfo = Doc.select("table[class=mia_tbl]").select("tr")[1].text() cardSkillName = Doc.select("table[class=mia_tbl]").select("tr")[2].select("p")[0].text() cardSkillInfo = Doc.select("table[class=mia_tbl]").select("tr")[2].select("p")[1].text() cardRarelity = Doc.select("table[class=mia_tbl]").select("tr")[3].select("td")[0].text().replace("★".decode('utf-8'), "☆".decode('utf-8')).replace('☆☆☆☆☆'.decode('utf-8'), '★'.decode('utf-8')) cardCost = Doc.select("table[class=mia_tbl]").select("tr")[3].select("td")[1].text() cardFaction = Doc.select("table[class=mia_tbl]").select("tr")[4].select("td")[0].text() cardType = Doc.select("table[class=mia_tbl]").select("tr")[4].select("td")[1].text() cardGender = Doc.select("table[class=mia_tbl]").select("tr")[5].select("td")[0].text() cardForce = Doc.select("table[class=mia_tbl]").select("tr")[5].select("td")[1].text() cardGrowthRate = Doc.select("table[class=mia_tbl]").select("tr")[6].select("td")[0].text() cardLimitBreakCount = Doc.select("table[class=mia_tbl]").select("tr")[6].select("td")[1].text() cardGetInfo = Doc.select("table[class=mia_tbl]").select("tr")[7].select("td")[0].text() cardMLV = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[1].html().replace("<br />", "*") cardMLV = cardMLV[3:cardMLV.find("*")] cardLLV = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[2].html().replace("<br />", "*") cardLLV = cardLLV[3:cardLLV.find("*")] cardHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[9].text() cardATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[10].text() cardMHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[11].text() cardMATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[12].text() cardFHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[13].text() cardFATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[14].text() cardSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[18].text() cardMSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[19].text() cardFSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[20].text() cardCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[21].text() cardMCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[22].text() cardFCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[23].text() cardHHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[33].text() cardHATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[34].text() cardHMHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[35].text() cardHMATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[36].text() cardHFHP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[37].text() cardHFATK = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[38].text() cardHSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[42].text() cardHMSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[43].text() cardHFSum = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[44].text() cardHCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[45].text() cardHMCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[46].text() cardHFCP = Doc.select("table[class=mia_tbl mia_a003 mia_normal]").select("td")[47].text() requestMsg = "" if requestStr == "!카드2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 " requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성레벨(한계레벨): ".decode('utf-8') + cardMLV + "(" + cardLLV + ") \002|\002 " requestMsg = requestMsg + "체력 (M/F): ".decode('utf-8') + cardHP + "(" + cardMHP + "/" + cardFHP +"), " requestMsg = requestMsg + "공격력 (M/F): ".decode('utf-8') + cardATK + "(" + cardMATK + "/" + cardFATK +"), " requestMsg = requestMsg + "H+A(M/F): ".decode('utf-8') + cardSum + "(" + cardMSum + "/" + cardFSum +") \002|\002 " requestMsg = requestMsg + "CP (M/F): ".decode('utf-8') + cardCP + "(" + cardMCP + "/" + cardFCP +") \002|\002 " requestMsg = requestMsg + cardSkillName + " : " + cardSkillInfo if requestStr == "!키라카드2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 " requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성레벨(한계레벨): ".decode('utf-8') + cardMLV + "(" + cardLLV + ") \002|\002 " requestMsg = requestMsg + "체력 (M/F): ".decode('utf-8') + cardHHP + "(" + cardHMHP + "/" + cardHFHP +"), " requestMsg = requestMsg + "공격력 (M/F): ".decode('utf-8') + cardHATK + "(" + cardHMATK + "/" + cardHFATK +"), " requestMsg = requestMsg + "H+A(M/F): ".decode('utf-8') + cardHSum + "(" + cardHMSum + "/" + cardHFSum +") \002|\002 " requestMsg = requestMsg + "CP (M/F): ".decode('utf-8') + cardHCP + "(" + cardHMCP + "/" + cardHFCP +") \002|\002 " requestMsg = requestMsg + cardSkillName + " : " + cardSkillInfo if requestStr == "!일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg1 if requestStr == "!각성일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg2 if requestStr == "!만렙일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg3 if requestStr == "!키라일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg4 if requestStr == "!키라각성일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg5 if requestStr == "!키라만렙일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg6 if requestStr == "!오리지날일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg7 if requestStr == "!오리지날각성일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg8 if requestStr == "!오리지날키라일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg10 if requestStr == "!오리지날키라각성일러2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + " / " + cardIllustrator + "]\002 " requestMsg = requestMsg + " " + cardImg11 if requestStr == "!카드설명2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardName + "]\002 " hangul = re.compile('''[^ ㄱ-ㅣ가-힣0-9a-zA-Z.,"']+'''.decode('utf-8')) cardInfo = hangul.sub('', cardInfo) if len(cardInfo) > 320: cardInfo = cardInfo[:140] + "\n" + cardInfo[140:] + "...(생략)".decode('utf-8') if len(cardInfo) > 150: cardInfo = cardInfo[:140] + "\n" + cardInfo[140:] requestMsg = requestMsg + " " + cardInfo if requestStr == "!좀더카드정보2".decode('utf-8') or requestStr == "!mtcdjb".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 " requestMsg = requestMsg + "넘버: ".decode('utf-8') + cardNumber + ", " requestMsg = requestMsg + "성별: ".decode('utf-8') + cardGender + ", " requestMsg = requestMsg + "포스: ".decode('utf-8') + cardForce + ", " requestMsg = requestMsg + "성장률: ".decode('utf-8') + cardGrowthRate + ", " requestMsg = requestMsg + "한돌횟수: ".decode('utf-8') + cardLimitBreakCount + ", " requestMsg = requestMsg + "입수방법: ".decode('utf-8') + cardGetInfo if requestStr == "!좀덜카드정보2".decode('utf-8'): requestMsg = "\002[" + cardFaction + " " + cardRarelity + " " + cardName + "]\002 " requestMsg = requestMsg + "넘버: ".decode('utf-8') + cardNumber + ", " requestMsg = requestMsg + "성별: ".decode('utf-8') + cardGender + ", " requestMsg = requestMsg + "포스: ".decode('utf-8') + cardForce + ", " requestMsg = requestMsg + "성장률: ".decode('utf-8') + cardGrowthRate + ", " requestMsg = requestMsg + "한돌횟수: ".decode('utf-8') + cardLimitBreakCount + ", " requestMsg = requestMsg + "입수방법: ".decode('utf-8') + cardGetInfo requestMsg = requestMsg[:len(requestMsg)/2] self.requestAdd(requestMsg)
def getUserInfo(self, id, username): Doc = Element url = "http://osu.ppy.sh/pages/include/profile-general.php?u=" + id if self.requestStr == "!오스".decode('utf-8'): url = url + "&m=0" elif self.requestStr == "!태고".decode('utf-8'): url = url + "&m=1" elif self.requestStr == "!캐치어비트".decode('utf-8'): url = url + "&m=2" elif self.requestStr == "!오스!매니아".decode('utf-8'): url = url + "&m=3" Doc = Jsoup.connect(url).get() rank = "" score = "" accuracy = "" playcount = "" totalscore = "" level = "" hits = "" maxcombo = "" if Doc.select("div[class=profileStatLine]")[0].text().find("Performance Rank") != -1: rank = Doc.select("div[class=profileStatLine]")[0].text() rank = rank[rank.find(":")+2:] score = Doc.select("div[class=profileStatLine]")[1].text() score = score[score.find(":")+2:] accuracy = Doc.select("div[class=profileStatLine]")[2].text() accuracy = accuracy[accuracy.find(":")+2:] playcount = Doc.select("div[class=profileStatLine]")[3].text() playcount = playcount[playcount.find(":")+2:] totalscore = Doc.select("div[class=profileStatLine]")[4].text() totalscore = totalscore[totalscore.find(":")+2:] level = Doc.select("div[class=profileStatLine]")[5].text() level = level[level.find(":")+2:] hits = Doc.select("div[class=profileStatLine]")[6].text() hits = hits[hits.find(":")+2:] maxcombo = Doc.select("div[class=profileStatLine]")[7].text() maxcombo = maxcombo[maxcombo.find(":")+2:] else: rank = "#-" score = Doc.select("div[class=profileStatLine]")[0].text() score = score[score.find(":")+2:] accuracy = Doc.select("div[class=profileStatLine]")[1].text() accuracy = accuracy[accuracy.find(":")+2:] playcount = Doc.select("div[class=profileStatLine]")[2].text() playcount = playcount[playcount.find(":")+2:] totalscore = Doc.select("div[class=profileStatLine]")[3].text() totalscore = totalscore[totalscore.find(":")+2:] level = Doc.select("div[class=profileStatLine]")[4].text() level = level[level.find(":")+2:] hits = Doc.select("div[class=profileStatLine]")[5].text() hits = hits[hits.find(":")+2:] maxcombo = Doc.select("div[class=profileStatLine]")[6].text() maxcombo = maxcombo[maxcombo.find(":")+2:] if Doc.select("td")[1].text() != "": #eventepic = Doc.select("td")[0].text() +": " + Doc.select("td")[1].select("div[class=event epic4]").text() eventepic = Doc.select("td")[0].text() + " " + Doc.select("td")[1].text() #Doc.select("td")[1].select("a")[0].text().decode('utf-8') else: eventepic = "" requestStr = "\002[" + level + "레벨 ".decode('utf-8') + username + "]\002" requestStr = requestStr + " 랭크: ".decode('utf-8') + rank + " / 정확도: ".decode('utf-8') + accuracy requestStr = requestStr + " / 플레이카운트: ".decode('utf-8') + playcount + " / 토탈스코어: ".decode('utf-8') + totalscore requestStr = requestStr + " / 총 히트 수: ".decode('utf-8') + hits + " / 최대콤보: ".decode('utf-8') + maxcombo + " | " requestStr = requestStr + eventepic self.requestAdd(requestStr)
def remove_html_tags(s): from org.jsoup import Jsoup return Jsoup.parse(s).text()
def __init__(self, answer): self.answer = answer self.inline = [] self.block = [] self.doc = Jsoup.parse(answer)
import sys import os import test1 from org.jsoup import Jsoup from com.pixshow.framework.utils import HttpUtility url = "http://en.wikipedia.org/"; print test1.workDir() html = HttpUtility.get(url); doc = Jsoup.parse(html) html = doc.select('#mp-itn b a').toString() appContext.get('testService').save(html)
def getCardInfo(self, requestStr ,cardName): stmt = self.db.createStatement(); lvBreak = 0 if requestStr == "!돌파".decode('utf-8') or requestStr == "!키라돌파".decode('utf-8'): lvBreak = int(cardName.split(" ")[0]) cardName = cardName.split(" ")[1].replace("\017", "").strip() else: cardName = cardName.replace("\017", "").strip() rs = '' cardUrl = '' cardNumber = 0 cardCost = '' cardRarity = '' try: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.CARDNAME + " LIKE '" + cardName +"';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) cardName = rs.getString(self.CARDNAME) cardCost = rs.getString(self.CARDCOST) cardRarity = rs.getString(self.CARDRARITY) except: rs = stmt.executeQuery("SELECT * FROM "+ self.TABLENAME + " WHERE " + self.CARDNAME + " LIKE '%" + cardName +"%';") cardUrl = rs.getString(self.CARDURL) cardNumber = rs.getString(self.CARDID) cardName = rs.getString(self.CARDNAME) cardCost = rs.getString(self.CARDCOST) cardRarity = rs.getString(self.CARDRARITY) Doc = Element Doc = Jsoup.connect(cardUrl).get() cardImg1 = Doc.select("div[id=cardG1A1]").select("img").attr("src") cardImg2 = Doc.select("div[id=cardG1A2]").select("img").attr("src") cardImg3 = Doc.select("div[id=cardG1A3]").select("img").attr("src") cardImg4 = Doc.select("div[id=cardG1A4]").select("img").attr("src") gender = Doc.select("div[class=cardInfo]").select("table").select("tr")[0].select("td")[0].text() force = Doc.select("div[class=cardInfo]").select("table").select("tr")[0].select("td")[1].text() growth = Doc.select("div[class=cardInfo]").select("table").select("tr")[1].select("td")[0].text() illustrator = Doc.select("div[class=cardInfo]").select("table").select("tr")[1].select("td")[1].text() faction = Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() if Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "마법".decode('utf-8'): faction = "마법의 파".decode('utf-8') elif Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "검술".decode('utf-8'): faction = "검술의 성".decode('utf-8') elif Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[0].text() == "기교".decode('utf-8'): faction = "기교의 장".decode('utf-8') else: pass sellPrice = Doc.select("div[class=cardInfo]").select("table").select("tr")[2].select("td")[1].text() skillInfo = Doc.select("div[class=foldedBox]")[1].select("div[class=content]").text() cardStory = Doc.select("div[class=foldedBox]")[2].select("div[class=content]").text() wakeupLV = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[0].select("td")[1].text() lastLV = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[0].select("td")[2].text() HP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[0].text() wakeupHP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[1].text() lastHP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[1].select("td")[2].text() ATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[0].text() wakeupATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[1].text() lastATK = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[2].select("td")[2].text() CP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[0].text() wakeupCP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[1].text() lastCP = Doc.select("div[class=statTable]")[0].select("tbody").select("tr")[3].select("td")[2].text() wakeupLVH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[0].select("td")[1].text() lastLVH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[0].select("td")[2].text() HPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[0].text() wakeupHPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[1].text() lastHPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[1].select("td")[2].text() ATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[0].text() wakeupATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[1].text() lastATKH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[2].select("td")[2].text() CPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[0].text() wakeupCPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[1].text() lastCPH = Doc.select("div[class=statTable]")[1].select("tbody").select("tr")[3].select("td")[2].text() requestMsg = "" if requestStr == "!카드".decode('utf-8'): requestMsg = "\002[" + faction + " " + cardRarity + " " + cardName + "]\002 " requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성(한계)레벨 : ".decode('utf-8') + wakeupLV + "(" + lastLV + ")" requestMsg = requestMsg + " \002|\002 " requestMsg = requestMsg + "체력(M/L) : ".decode('utf-8') + HP + "(" + wakeupHP + "/" + lastHP + ")" requestMsg = requestMsg + ", " requestMsg = requestMsg + "공격력(M/L) : ".decode('utf-8') + ATK + "(" + wakeupATK + "/" + lastATK + ")" requestMsg = requestMsg + ", " requestMsg = requestMsg + "CP(M/L) : ".decode('utf-8') + CP + "(" + wakeupCP + "/" + lastCP + ")" requestMsg = requestMsg + " \002|\002 " requestMsg = requestMsg + skillInfo elif requestStr == "!키라카드".decode('utf-8') or requestStr == "!키라".decode('utf-8'): requestMsg = "\002[" + faction + " " + cardRarity + " 키라 ".decode('utf-8') + cardName + "]\002 " requestMsg = requestMsg + "코스트 : ".decode('utf-8') + cardCost + ", 각성(한계)레벨 : ".decode('utf-8') + wakeupLVH + "(" + lastLVH + ")" requestMsg = requestMsg + " \002|\002 " requestMsg = requestMsg + "체력(M/L) : ".decode('utf-8') + HPH + "(" + wakeupHPH + "/" + lastHPH + ")" requestMsg = requestMsg + ", " requestMsg = requestMsg + "공격력(M/L) : ".decode('utf-8') + ATKH + "(" + wakeupATKH + "/" + lastATKH + ")" requestMsg = requestMsg + ", " requestMsg = requestMsg + "CP(M/L) : ".decode('utf-8') + CPH + "(" + wakeupCPH + "/" + lastCPH + ")" requestMsg = requestMsg + " \002|\002 " requestMsg = requestMsg + skillInfo elif requestStr == "!일러".decode('utf-8'): requestMsg = "\002[" + faction + " " + cardName + " / " + illustrator +"]\002 " requestMsg = requestMsg + cardImg1 elif requestStr == "!각성일러".decode('utf-8'): requestMsg = "\002[" + faction + " 각성 ".decode('utf-8') + cardName + " / " + illustrator +"]\002 " requestMsg = requestMsg + cardImg2 elif requestStr == "!키라일러".decode('utf-8'): requestMsg = "\002[" + faction + " 키라 ".decode('utf-8') + cardName + " / " + illustrator +"]\002 " requestMsg = requestMsg + cardImg3 elif requestStr == "!키라각성일러".decode('utf-8'): requestMsg = "\002[" + faction + " 키라 각성".decode('utf-8') + cardName + " / " + illustrator +"]\002 " requestMsg = requestMsg + cardImg4 elif requestStr == "!카드설명".decode('utf-8'): requestMsg = "\002[" + faction + " " + cardName + " / " + illustrator +"]\002 " if len(cardStory) > 320: cardStory = cardStory[:140] + "\n" + cardStory[140:] + "...(생략)".decode('utf-8') if len(cardStory) > 150: cardStory = cardStory[:140] + "\n" + cardStory[140:] requestMsg = requestMsg + " " + cardStory elif requestStr == "!돌파".decode('utf-8'): maxBreak = len(Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")) - 2 if lvBreak >= maxBreak: requestMsg = "\002[" + faction + " " + cardRarity + " " + str(maxBreak) + "회 풀 돌파 ".decode('utf-8') + cardName + "]\002 " requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[0] + ", " HP = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[1] requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", " ATK = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[2] requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", " CP = (int(HP) + int(ATK)) / int(cardCost) CP = str(CP) requestMsg = requestMsg + "CP: ".decode('utf-8') + CP else: requestMsg = "\002[" + faction + " " + cardRarity + " " + str(lvBreak) + "회 돌파 ".decode('utf-8') + cardName + "]\002 " requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[0] + ", " HP = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[1] requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", " ATK = Doc.select("div[class=statTable]")[0].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[2] requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", " CP = (int(HP) + int(ATK)) / int(cardCost) CP = str(CP) requestMsg = requestMsg + "CP: ".decode('utf-8') + CP elif requestStr == "!키라돌파".decode('utf-8'): maxBreak = len(Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")) - 2 if lvBreak >= maxBreak: requestMsg = "\002[" + faction + " " + cardRarity + " " + str(maxBreak) + "회 풀 돌파 키라 ".decode('utf-8') + cardName + "]\002 " requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[0] + ", " HP = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[1] requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", " ATK = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[maxBreak].split(",")[2] requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", " CP = (int(HP) + int(ATK)) / int(cardCost) CP = str(CP) requestMsg = requestMsg + "CP: ".decode('utf-8') + CP else: requestMsg = "\002[" + faction + " " + cardRarity + " " + str(lvBreak) + "회 돌파 키라 ".decode('utf-8') + cardName + "]\002 " requestMsg = requestMsg + "레벨: ".decode('utf-8') + Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[0] + ", " HP = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[1] requestMsg = requestMsg + "체력: ".decode('utf-8') + HP + ", " ATK = Doc.select("div[class=statTable]")[1].select("table").attr("data-wakeup").split("|")[lvBreak].split(",")[2] requestMsg = requestMsg + "공격력: ".decode('utf-8') + ATK + ", " CP = (int(HP) + int(ATK)) / int(cardCost) CP = str(CP) requestMsg = requestMsg + "CP: ".decode('utf-8') + CP self.requestAdd(requestMsg)
def getMonsterInfoUrl(self, monsterName): monsterName = monsterName.replace(" ", "%20") url = "http://www.thisisgame.com/pad/info/monster/list.php?sf=name&sw=" + monsterName Doc = Element Doc = Jsoup.connect(url).get() return Doc.select("div[class=pad-table2 pad-info-light]").select("tr")[1].select("td")[0].select("a").attr("href")
def clean_question(html): """Removes code tag and its content. Subsequently, it removes html tags""" doc = Jsoup.parse(html) doc.select("code").empty() return doc.text()
node["node"].replaceWith(new_div) break if len(argv) < 4: infile = "/Users/mac/Downloads/im" outfile = "/Users/mac/Downloads/dialogues.html" textfile = "/Users/mac/Downloads/dialogues.txt" else: infile = argv[1] outfile = argv[2] textfile = argv[3] with iopen(outfile, "w", encoding="utf-8", errors="ignore") as output: input = File(infile) soup = Jsoup.parse(input, "UTF-8", "") # First, create a new document new_doc = Jsoup.parse("<body></body>") new_doc.updateMetaCharsetElement(True) new_doc.charset(Charset.forName("UTF-8")) new_body = new_doc.select("body").first() for element in soup.select("*"): if (element.tag().toString() == "ul" and element.className() == "ui_clean_list im-mess-stack--mess _im_stack_messages") or ( element.tag().toString() == "div" and element.className() == "im-mess-stack--pname"): new_body.appendChild(element) # Then remove empty tags from it and transform the labels