Python isGdocUrl Examples

Programming Language: Python

Namespace/Package Name: common.util.urlFuncs

Method/Function: isGdocUrl

Examples at hotexamples.com: 6

Python isGdocUrl - 6 examples found. These are the top rated real world Python examples of common.util.urlFuncs.isGdocUrl extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: gDocParse.py Project: fake-name/ReadableWebProxy

	def getDriveFileUrls(cls, url):
		ctnt, handle = cls.wg.getpage(url, returnMultiple=True)

		# Pull out the title for the disambiguation page.
		soup = common.util.webFunctions.as_soup(ctnt)
		title = soup.title.string

		# Google drive supports a `read?{google doc path} mode. As such, we look at the actual URL,
		# which tells us if we redirected to a plain google doc, and just return that if the redirect occured.
		handleUrl = handle.geturl()
		if handleUrl != url:
			if urlFuncs.isGdocUrl(handleUrl):
				cls.log.info("Direct read redirect: '%s'", handleUrl)
				handleUrl = urlFuncs.trimGDocUrl(handleUrl)
				return [(title, handleUrl)], title

		jsRe = re.compile('var data = (.*?); _initFolderLandingPageApplication\(config, data\)', re.DOTALL)

		items = jsRe.findall(ctnt)
		assert len(items) == 1

		data = '{cont}'.format(cont=items.pop().strip())
		conf = jsLiteralParse.jsParse(data)

		# The keys+data in the data/conf are:
		# 'folderName'  - Title of the folder, just a string
		# 'viewerItems' - List of lists of the items in the folder, which contains the title, previewimage, and url for each item.
		# 				Other stuff (mime types) for the files, but they're all google internal mime-types and look to be the same for
		# 				Every file, even if they're different docs types.
		# 'folderModel' - List of UID and the view URL. Looks to be completely redundant, as all the information is also in 'viewerItems'

		assert 'viewerItems' in conf
		assert 'folderName' in conf

		title = conf['folderName']

		pages = conf['viewerItems']

		items = []
		for page in pages:
			if len(page) != 18 and len(page) != 22:
				cls.log.error("json entry in page with an invalid length:")
				cls.log.error("%s", page)
				continue


			# Item 2 is the title, item 17 is the doc URL
			# The doc URL is unicode escaped, annoyingly
			itemTitle = page[2]
			itemUrl   = page[17].encode('ascii').decode('unicode_escape')

			itemUrl = urlFuncs.trimGDocUrl(itemUrl)

			items.append((itemTitle, itemUrl))


		return items, title

Example #2

Show file

File: gDocParse.py Project: woebbi/ReadableWebProxy

    def getDriveFileUrls(cls, url):
        ctnt, handle = cls.wg.getpage(url, returnMultiple=True)

        # Pull out the title for the disambiguation page.
        soup = WebRequest.as_soup(ctnt)
        title = soup.title.string

        # Google drive supports a `read?{google doc path} mode. As such, we look at the actual URL,
        # which tells us if we redirected to a plain google doc, and just return that if the redirect occured.
        handleUrl = handle.geturl()
        if handleUrl != url:
            if urlFuncs.isGdocUrl(handleUrl):
                cls.log.info("Direct read redirect: '%s'", handleUrl)
                handleUrl = urlFuncs.trimGDocUrl(handleUrl)
                return [(title, handleUrl)], title

        jsRe = re.compile(
            'var data = (.*?); _initFolderLandingPageApplication\(config, data\)',
            re.DOTALL)

        items = jsRe.findall(ctnt)
        assert len(items) == 1

        data = '{cont}'.format(cont=items.pop().strip())
        conf = jsLiteralParse.jsParse(data)

        # The keys+data in the data/conf are:
        # 'folderName'  - Title of the folder, just a string
        # 'viewerItems' - List of lists of the items in the folder, which contains the title, previewimage, and url for each item.
        # 				Other stuff (mime types) for the files, but they're all google internal mime-types and look to be the same for
        # 				Every file, even if they're different docs types.
        # 'folderModel' - List of UID and the view URL. Looks to be completely redundant, as all the information is also in 'viewerItems'

        assert 'viewerItems' in conf
        assert 'folderName' in conf

        title = conf['folderName']

        pages = conf['viewerItems']

        items = []
        for page in pages:
            if len(page) != 18 and len(page) != 22:
                cls.log.error("json entry in page with an invalid length:")
                cls.log.error("%s", page)
                continue

            # Item 2 is the title, item 17 is the doc URL
            # The doc URL is unicode escaped, annoyingly
            itemTitle = page[2]
            itemUrl = page[17].encode('ascii').decode('unicode_escape')

            itemUrl = urlFuncs.trimGDocUrl(itemUrl)

            items.append((itemTitle, itemUrl))

        return items, title

Example #3

Show file

File: gDocParse.py Project: fake-name/ReadableWebProxy

	def __init__(self, targetUrl):

		isGdoc, url = urlFuncs.isGdocUrl(targetUrl)
		if not isGdoc:
			raise ValueError("Passed URL '%s' is not a google document?" % targetUrl)

		url = urlFuncs.trimGDocUrl(url)
		self.url = url+'/export?format=zip'
		self.refererUrl = targetUrl

		self.document = ''

		self.currentChunk = ''

Example #4

Show file

File: gDocParse.py Project: woebbi/ReadableWebProxy

    def __init__(self, targetUrl):

        isGdoc, url = urlFuncs.isGdocUrl(targetUrl)
        if not isGdoc:
            raise ValueError("Passed URL '%s' is not a google document?" %
                             targetUrl)

        url = urlFuncs.trimGDocUrl(url)
        self.url = url + '/export?format=zip'
        self.refererUrl = targetUrl

        self.document = ''

        self.currentChunk = ''

Example #5

Show file

File: GDocProcessor.py Project: woebbi/ReadableWebProxy

 def wantsUrl(url):
     return urlFuncs.isGdocUrl(url)[0]

Example #6

Show file

File: GDocProcessor.py Project: fake-name/ReadableWebProxy

	def wantsUrl(url):
		return urlFuncs.isGdocUrl(url)[0]