def __detectUrl(self): """ 思路:获取链接内容,然后正则匹配是否包含 #EXTM3U 如果包含则匹配 #EXTINF """ txt = WebUtil.getText(self.__url) if txt == '': self.__isM3u8 = False return reg = r'^#EXTM3U' if re.search(reg, txt): # 匹配成功,是m3u8链接 self.__isM3u8 = True reg = r'#EXTINF' # 判断是内层还是外层 if re.search(reg, txt): # 是内层链接 self.__innerUrl = self.__url self.__content = txt self.__innerUrlPre = getPre(self.__innerUrl) pass else: # 是外层链接 # 取内层 self.__indexContent = txt self.__urlPre = getPre(self.__url) self.__getInnerContent() pass pass else: # 不是m3u8链接 self.__isM3u8 = False pass pass
def __getInnerContent(self): # 获取 xxk/hls/index.m3u8 ,一般在文件最后一行 lk = self.__indexContent.split('\n')[-1:][0] # 拼接inner url self.__innerUrl = self.__urlPre + lk # 获取链接内容 txt = WebUtil.getText(self.__innerUrl) if txt == '': self.__isM3u8 = False else: self.__content = txt self.__isM3u8 = True pass
def downloadStockList(self): logging.info("Trying to download raw list of stocks from " + self._exchangeName) WebUtil.downloadFromUrlAndSaveToFile(self._url, self._rawDataFileName, False) logging.info("Downloaded raw list of stocks from " + self._exchangeName)
def downloadHistoricalPrizes(self): WebUtil.downloadFromUrlAndSaveToFile(self._url, self._fileName, False)
def searchWolfram(self, query, podsToParse=5, cleanUpText=True, includeUrl=True): replystring = "" wolframResult = self.fetchWolframData(query, podsToParse) #First check if the query succeeded if not wolframResult[0]: return wolframResult[1] try: xml = ElementTree.fromstring(wolframResult[1]) except ElementTree.ParseError: self.logError("[Wolfram] Unexpected reply, invalid XML:") self.logError(wolframResult[1]) return "Wow, that's some weird data. I don't know what to do with this, sorry. Try reformulating your query, or just try again and see what happens" if xml.attrib['error'] != 'false': replystring = "Sorry, an error occurred. Tell my owner(s) to check the error log, maybe they can fix it. It could also be an error on WolframAlpha's side though" self.logError("[Wolfram] An error occurred for the search query '{}'. Reply: {}".format(query, wolframResult[1])) elif xml.attrib['success'] != 'true': replystring = "No results found, sorry" #Most likely no results were found. See if there are suggestions for search improvements if xml.find('didyoumeans') is not None: didyoumeans = xml.find('didyoumeans').findall('didyoumean') suggestions = [] for didyoumean in didyoumeans: if didyoumean.attrib['level'] != 'low': suggestion = didyoumean.text.replace('\n', '').strip() if len(suggestion) > 0: suggestions.append(suggestion.encode('utf-8')) if len(suggestions) > 0: replystring += ". Did you perhaps mean: {}".format(", ".join(suggestions)) else: pods = xml.findall('pod') resultFound = False for pod in pods[1:]: if pod.attrib['title'] == "Input": continue for subpod in pod.findall('subpod'): text = subpod.find('plaintext').text #If there's no text, or if it's a dumb result ('3 euros' returns coinweight, which is an image), skip this pod if text is None or text.startswith('\n'): continue if cleanUpText: text = text.replace('\n', Constants.GREY_SEPARATOR).strip() #If there's no text in this pod (for instance if it's just an image) if len(text) == 0: continue replystring += text resultFound = True break if resultFound: break if not resultFound: replystring += "Sorry, results were either images, irrelevant or non-existent" if cleanUpText: replystring = re.sub(' {2,}', ' ', replystring) #Make sure we don't spam the channel, keep message length limited # Shortened URL will be about 25 characters, keep that in mind messageLengthLimit = Constants.MAX_MESSAGE_LENGTH if includeUrl: messageLengthLimit -= 30 if len(replystring) > messageLengthLimit: replystring = replystring[:messageLengthLimit] + '[...]' #Add the search url if includeUrl: searchUrl = "http://www.wolframalpha.com/input/?i={}".format(urllib.quote_plus(query)) #If the message would get too long, shorten the result URL if len(replystring) + len(searchUrl) > Constants.MAX_MESSAGE_LENGTH: searchUrl = WebUtil.shortenUrl(searchUrl)[1] replystring += "{}{}".format(Constants.GREY_SEPARATOR, searchUrl) return replystring
def updateHistoricalPrizes(self): fromDate = self._getFromDateForStock() if fromDate < self._toDate: self._url = YahooUrlBuilder(self._stockName, fromDate, self._toDate).build() WebUtil.downloadFromUrlAndAppendToFile(self._url, self._fileName)
def searchWolfram(self, query, podsToParse=5, cleanUpText=True, includeUrl=True): replystring = "" wolframResult = self.fetchWolframData(query, podsToParse) #First check if the query succeeded if not wolframResult[0]: return wolframResult[1] try: xml = ElementTree.fromstring(wolframResult[1]) except ElementTree.ParseError: self.logError("[Wolfram] Unexpected reply, invalid XML:") self.logError(wolframResult[1]) return "Wow, that's some weird data. I don't know what to do with this, sorry. Try reformulating your query, or just try again and see what happens" if xml.attrib['error'] != 'false': replystring = "Sorry, an error occurred. Tell my owner(s) to check the error log, maybe they can fix it. It could also be an error on WolframAlpha's side though" self.logError( "[Wolfram] An error occurred for the search query '{}'. Reply: {}" .format(query, wolframResult[1])) elif xml.attrib['success'] != 'true': replystring = "No results found, sorry" #Most likely no results were found. See if there are suggestions for search improvements if xml.find('didyoumeans') is not None: didyoumeans = xml.find('didyoumeans').findall('didyoumean') suggestions = [] for didyoumean in didyoumeans: if didyoumean.attrib['level'] != 'low': suggestion = didyoumean.text.replace('\n', '').strip() if len(suggestion) > 0: suggestions.append(suggestion.encode('utf-8')) if len(suggestions) > 0: replystring += ". Did you perhaps mean: {}".format( ", ".join(suggestions)) else: pods = xml.findall('pod') resultFound = False for pod in pods[1:]: if pod.attrib['title'] == "Input": continue for subpod in pod.findall('subpod'): text = subpod.find('plaintext').text #If there's no text, or if it's a dumb result ('3 euros' returns coinweight, which is an image), skip this pod if text is None or text.startswith('\n'): continue if cleanUpText: text = text.replace('\n', Constants.GREY_SEPARATOR).strip() #If there's no text in this pod (for instance if it's just an image) if len(text) == 0: continue replystring += text resultFound = True break if resultFound: break if not resultFound: replystring += "Sorry, results were either images, irrelevant or non-existent" if cleanUpText: replystring = re.sub(' {2,}', ' ', replystring) #Make sure we don't spam the channel, keep message length limited # Shortened URL will be about 25 characters, keep that in mind messageLengthLimit = Constants.MAX_MESSAGE_LENGTH if includeUrl: messageLengthLimit -= 30 if len(replystring) > messageLengthLimit: replystring = replystring[:messageLengthLimit] + '[...]' #Add the search url if includeUrl: searchUrl = "http://www.wolframalpha.com/input/?i={}".format( urllib.quote_plus(query)) #If the message would get too long, shorten the result URL if len(replystring) + len( searchUrl) > Constants.MAX_MESSAGE_LENGTH: searchUrl = WebUtil.shortenUrl(searchUrl)[1] replystring += "{}{}".format(Constants.GREY_SEPARATOR, searchUrl) return replystring