Example #1
0
def _open(resource_url):
    """
    Helper function that returns an open file object for a resource,
    given its resource URL.  If the given resource URL uses the "nltk:"
    protocol, or uses no protocol, then use ``nltk.data.find`` to find
    its path, and open it with the given mode; if the resource URL
    uses the 'file' protocol, then open the file with the given mode;
    otherwise, delegate to ``urllib2.urlopen``.

    :type resource_url: str
    :param resource_url: A URL specifying where the resource should be
        loaded from.  The default protocol is "nltk:", which searches
        for the file in the the NLTK data package.
    """
    # Divide the resource name into "<protocol>:<path>".
    protocol, path = re.match('(?:(\w+):)?(.*)', resource_url).groups()

    if protocol is None or protocol.lower() == 'nltk':
        return find(path).open()
    elif protocol.lower() == 'file':
        # urllib might not use mode='rb', so handle this one ourselves:
        return open(path, 'rb')
    else:
        return compat.urlopen(resource_url)
Example #2
0
File: data.py Project: NitHUb/nltk
def _open(resource_url):
    """
    Helper function that returns an open file object for a resource,
    given its resource URL.  If the given resource URL uses the "nltk:"
    protocol, or uses no protocol, then use ``nltk.data.find`` to find
    its path, and open it with the given mode; if the resource URL
    uses the 'file' protocol, then open the file with the given mode;
    otherwise, delegate to ``urllib2.urlopen``.

    :type resource_url: str
    :param resource_url: A URL specifying where the resource should be
        loaded from.  The default protocol is "nltk:", which searches
        for the file in the the NLTK data package.
    """
    resource_url = normalize_resource_url(resource_url)
    protocol, _path = split_resource_url(resource_url)

    if protocol is None or protocol.lower() == 'nltk':
        return find(_path, path + ['']).open()
    elif protocol.lower() == 'file':
        # urllib might not use mode='rb', so handle this one ourselves:
        return find(_path, ['']).open()
    else:
        return compat.urlopen(resource_url)
Example #3
0
def _open(resource_url):
    """
    Helper function that returns an open file object for a resource,
    given its resource URL.  If the given resource URL uses the "nltk:"
    protocol, or uses no protocol, then use ``nltk.data.find`` to find
    its path, and open it with the given mode; if the resource URL
    uses the 'file' protocol, then open the file with the given mode;
    otherwise, delegate to ``urllib2.urlopen``.

    :type resource_url: str
    :param resource_url: A URL specifying where the resource should be
        loaded from.  The default protocol is "nltk:", which searches
        for the file in the the NLTK data package.
    """
    # Divide the resource name into "<protocol>:<path>".
    protocol, path = re.match('(?:(\w+):)?(.*)', resource_url).groups()

    if protocol is None or protocol.lower() == 'nltk':
        return find(path).open()
    elif protocol.lower() == 'file':
        # urllib might not use mode='rb', so handle this one ourselves:
        return open(path, 'rb')
    else:
        return compat.urlopen(resource_url)
Example #4
0
def clean_url(url):
    html = compat.urlopen(url).read()
    return clean_html(html)
Example #5
0
def clean_url(url):
    html = compat.urlopen(url).read()
    return clean_html(html)