Python WebUtil Examples, util.WebUtil Python Examples

Example #1

0

Show file

File: M3u8.py Project: wangcong099878/mdown

    def __detectUrl(self):
        """
        思路：获取链接内容，然后正则匹配是否包含 #EXTM3U
        如果包含则匹配 #EXTINF
        """
        txt = WebUtil.getText(self.__url)
        if txt == '':
            self.__isM3u8 = False
            return
        reg = r'^#EXTM3U'
        if re.search(reg, txt):
            # 匹配成功，是m3u8链接
            self.__isM3u8 = True
            reg = r'#EXTINF'

            # 判断是内层还是外层
            if re.search(reg, txt):
                # 是内层链接
                self.__innerUrl = self.__url
                self.__content = txt
                self.__innerUrlPre = getPre(self.__innerUrl)
                pass
            else:
                # 是外层链接
                # 取内层
                self.__indexContent = txt
                self.__urlPre = getPre(self.__url)
                self.__getInnerContent()
                pass
            pass
        else:
            # 不是m3u8链接
            self.__isM3u8 = False
            pass
        pass

Example #2

0

Show file

File: M3u8.py Project: wangcong099878/mdown

    def __getInnerContent(self):
        # 获取 xxk/hls/index.m3u8 ，一般在文件最后一行
        lk = self.__indexContent.split('\n')[-1:][0]
        # 拼接inner url
        self.__innerUrl = self.__urlPre + lk

        # 获取链接内容
        txt = WebUtil.getText(self.__innerUrl)
        if txt == '':
            self.__isM3u8 = False
        else:
            self.__content = txt
            self.__isM3u8 = True
        pass

Example #3

0

Show file

File: StockListDownloader.py Project: marpiec/HedgeFund

 def downloadStockList(self):
     logging.info("Trying to download raw list of stocks from " + self._exchangeName)
     WebUtil.downloadFromUrlAndSaveToFile(self._url, self._rawDataFileName, False)
     logging.info("Downloaded raw list of stocks from " + self._exchangeName)

Example #4

0

Show file

File: YahooOneStockDownloader.py Project: marcinpieciukiewicz/HistoricalStockRepository

 def downloadHistoricalPrizes(self):
     WebUtil.downloadFromUrlAndSaveToFile(self._url, self._fileName, False)

Example #5

0

Show file

File: WolframAlpha.py Project: Didero/DideRobot

	def searchWolfram(self, query, podsToParse=5, cleanUpText=True, includeUrl=True):
		replystring = ""
		wolframResult = self.fetchWolframData(query, podsToParse)
		#First check if the query succeeded
		if not wolframResult[0]:
			return wolframResult[1]

		try:
			xml = ElementTree.fromstring(wolframResult[1])
		except ElementTree.ParseError:
			self.logError("[Wolfram] Unexpected reply, invalid XML:")
			self.logError(wolframResult[1])
			return "Wow, that's some weird data. I don't know what to do with this, sorry. Try reformulating your query, or just try again and see what happens"

		if xml.attrib['error'] != 'false':
			replystring = "Sorry, an error occurred. Tell my owner(s) to check the error log, maybe they can fix it. It could also be an error on WolframAlpha's side though"
			self.logError("[Wolfram] An error occurred for the search query '{}'. Reply: {}".format(query, wolframResult[1]))
		elif xml.attrib['success'] != 'true':
			replystring = "No results found, sorry"
			#Most likely no results were found. See if there are suggestions for search improvements
			if xml.find('didyoumeans') is not None:
				didyoumeans = xml.find('didyoumeans').findall('didyoumean')
				suggestions = []
				for didyoumean in didyoumeans:
					if didyoumean.attrib['level'] != 'low':
						suggestion = didyoumean.text.replace('\n', '').strip()
						if len(suggestion) > 0:
							suggestions.append(suggestion.encode('utf-8'))
				if len(suggestions) > 0:
					replystring += ". Did you perhaps mean: {}".format(", ".join(suggestions))
		else:
			pods = xml.findall('pod')
			resultFound = False
			for pod in pods[1:]:
				if pod.attrib['title'] == "Input":
					continue
				for subpod in pod.findall('subpod'):
					text = subpod.find('plaintext').text
					#If there's no text, or if it's a dumb result ('3 euros' returns coinweight, which is an image), skip this pod
					if text is None or text.startswith('\n'):
						continue
					if cleanUpText:
						text = text.replace('\n', Constants.GREY_SEPARATOR).strip()
					#If there's no text in this pod (for instance if it's just an image)
					if len(text) == 0:
						continue
					replystring += text
					resultFound = True
					break
				if resultFound:
					break

			if not resultFound:
				replystring += "Sorry, results were either images, irrelevant or non-existent"

		if cleanUpText:
			replystring = re.sub(' {2,}', ' ', replystring)

		#Make sure we don't spam the channel, keep message length limited
		#  Shortened URL will be about 25 characters, keep that in mind
		messageLengthLimit = Constants.MAX_MESSAGE_LENGTH
		if includeUrl:
			messageLengthLimit -= 30

		if len(replystring) > messageLengthLimit:
			replystring = replystring[:messageLengthLimit] + '[...]'

		#Add the search url
		if includeUrl:
			searchUrl = "http://www.wolframalpha.com/input/?i={}".format(urllib.quote_plus(query))
			#If the message would get too long, shorten the result URL
			if len(replystring) + len(searchUrl) > Constants.MAX_MESSAGE_LENGTH:
				searchUrl = WebUtil.shortenUrl(searchUrl)[1]
			replystring += "{}{}".format(Constants.GREY_SEPARATOR, searchUrl)
			
		return replystring

Example #6

0

Show file

File: YahooOneStockUpdater.py Project: marpiec/HedgeFund

 def updateHistoricalPrizes(self):
     fromDate = self._getFromDateForStock()
     if fromDate < self._toDate:
         self._url = YahooUrlBuilder(self._stockName, fromDate, self._toDate).build()
         WebUtil.downloadFromUrlAndAppendToFile(self._url, self._fileName)

Example #7

0

Show file

    def searchWolfram(self,
                      query,
                      podsToParse=5,
                      cleanUpText=True,
                      includeUrl=True):
        replystring = ""
        wolframResult = self.fetchWolframData(query, podsToParse)
        #First check if the query succeeded
        if not wolframResult[0]:
            return wolframResult[1]

        try:
            xml = ElementTree.fromstring(wolframResult[1])
        except ElementTree.ParseError:
            self.logError("[Wolfram] Unexpected reply, invalid XML:")
            self.logError(wolframResult[1])
            return "Wow, that's some weird data. I don't know what to do with this, sorry. Try reformulating your query, or just try again and see what happens"

        if xml.attrib['error'] != 'false':
            replystring = "Sorry, an error occurred. Tell my owner(s) to check the error log, maybe they can fix it. It could also be an error on WolframAlpha's side though"
            self.logError(
                "[Wolfram] An error occurred for the search query '{}'. Reply: {}"
                .format(query, wolframResult[1]))
        elif xml.attrib['success'] != 'true':
            replystring = "No results found, sorry"
            #Most likely no results were found. See if there are suggestions for search improvements
            if xml.find('didyoumeans') is not None:
                didyoumeans = xml.find('didyoumeans').findall('didyoumean')
                suggestions = []
                for didyoumean in didyoumeans:
                    if didyoumean.attrib['level'] != 'low':
                        suggestion = didyoumean.text.replace('\n', '').strip()
                        if len(suggestion) > 0:
                            suggestions.append(suggestion.encode('utf-8'))
                if len(suggestions) > 0:
                    replystring += ". Did you perhaps mean: {}".format(
                        ", ".join(suggestions))
        else:
            pods = xml.findall('pod')
            resultFound = False
            for pod in pods[1:]:
                if pod.attrib['title'] == "Input":
                    continue
                for subpod in pod.findall('subpod'):
                    text = subpod.find('plaintext').text
                    #If there's no text, or if it's a dumb result ('3 euros' returns coinweight, which is an image), skip this pod
                    if text is None or text.startswith('\n'):
                        continue
                    if cleanUpText:
                        text = text.replace('\n',
                                            Constants.GREY_SEPARATOR).strip()
                    #If there's no text in this pod (for instance if it's just an image)
                    if len(text) == 0:
                        continue
                    replystring += text
                    resultFound = True
                    break
                if resultFound:
                    break

            if not resultFound:
                replystring += "Sorry, results were either images, irrelevant or non-existent"

        if cleanUpText:
            replystring = re.sub(' {2,}', ' ', replystring)

        #Make sure we don't spam the channel, keep message length limited
        #  Shortened URL will be about 25 characters, keep that in mind
        messageLengthLimit = Constants.MAX_MESSAGE_LENGTH
        if includeUrl:
            messageLengthLimit -= 30

        if len(replystring) > messageLengthLimit:
            replystring = replystring[:messageLengthLimit] + '[...]'

        #Add the search url
        if includeUrl:
            searchUrl = "http://www.wolframalpha.com/input/?i={}".format(
                urllib.quote_plus(query))
            #If the message would get too long, shorten the result URL
            if len(replystring) + len(
                    searchUrl) > Constants.MAX_MESSAGE_LENGTH:
                searchUrl = WebUtil.shortenUrl(searchUrl)[1]
            replystring += "{}{}".format(Constants.GREY_SEPARATOR, searchUrl)

        return replystring