Python unquote Examples

Programming Language: Python

Namespace/Package Name: semproc.utils

Method/Function: unquote

Examples at hotexamples.com: 3

Python unquote - 3 examples found. These are the top rated real world Python examples of semproc.utils.unquote extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def _un_htmlify(self, text):
        def _handle_bad_html(s):
            pttn = re.compile('<|>')
            return pttn.sub(' ', s)

        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                soup = BeautifulSoup(text.strip())
        except UserWarning:
            return ''

        # get all of the text and any a/@href values
        texts = [
            _handle_bad_html(t.strip('"')) for t in soup.find_all(text=True)
        ]
        if self.include_html_hrefs:
            texts += [
                unquote(a['href']) for a in soup.find_all('a')
                if 'href' in a.attrs
            ]

        try:
            text = ' '.join(texts)
        except:
            raise
        return text

Example #2

Show file

File: unique_identifiers.py Project: Sandy4321/semantics-preprocessing

    def _extract_url(self, text):
        # but really first, is it a urn?
        text = self._verify_url(text)
        if not text:
            return '', '', []
        url = self._tidy_text(unquote(text))
        base_url, values = break_url(url)
        values = values.split(' ') + [base_url] if base_url else []

        # we're just running with a hack
        if url == 'http://dx.doi.org':
            return '', '', []

        if 'dx.doi.org' in base_url:
            t = 'doi'
        elif 'hdl.handle.net' in base_url:
            t = 'hdl'
        else:
            t = 'url'

        # return the original extracted url, tag, and the values plus
        # the base_url for more extracting
        return url, t, filter(None, [self._tidy_text(v) for v in values])

Example #3

Show file

File: bag_parser.py Project: Sandy4321/semantics-preprocessing

    def _un_htmlify(self, text):
        def _handle_bad_html(s):
            pttn = re.compile('<|>')
            return pttn.sub(' ', s)

        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                soup = BeautifulSoup(text.strip())
        except UserWarning:
            return ''

        # get all of the text and any a/@href values
        texts = [_handle_bad_html(t.strip('"'))
                 for t in soup.find_all(text=True)]
        if self.include_html_hrefs:
            texts += [unquote(a['href'])
                      for a in soup.find_all('a') if 'href' in a.attrs]

        try:
            text = ' '.join(texts)
        except:
            raise
        return text