Python wikipedia Examples

Programming Language: Python

Namespace/Package Name: modules.wikipedia

Method/Function: wikipedia

Examples at hotexamples.com: 2

Python wikipedia - 2 examples found. These are the top rated real world Python examples of modules.wikipedia.wikipedia extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: head.py Project: Nuruddinjr/phenny

def gettitle(phenny, uri):
    if not ':' in uri:
        uri = 'http://' + uri
    uri = uri.replace('#!', '?_escaped_fragment_=')

    if uri.startswith('http://wiki.apertium.org/wiki/'):
        item = uri[len('http://wiki.apertium.org/wiki/'):]
        return awik(phenny, re.match(r'(blahblah)?(.*)', item))
    if re.match(r'https?://en.wiktionary.org/wiki/(.*)', uri):
        item = re.match(r'https?://en.wiktionary.org/wiki/(.*)', uri).group(1)
        return w(phenny, re.match(r'(blahblah)?(.*)', web.unquote(item)))
    if re.match(r'https?://([a-z]{2,3}).wikipedia.org/wiki/(.*)', uri):
        match = re.match(r'https?://([a-z]{2,3}).wikipedia.org/wiki/(.*)', uri)
        lang, page = match.group(1), match.group(2)
        return wikipedia(phenny, page, lang)

    parts = uri.split(".")
    start = parts[0]
    parts.pop(0)
    uri = start + "." + web.quote('.'.join(parts))
    
    title = None
    localhost = [
        'http://localhost/', 'http://localhost:80/',
        'http://localhost:8080/', 'http://127.0.0.1/',
        'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
        'https://localhost/', 'https://localhost:80/',
        'https://localhost:8080/', 'https://127.0.0.1/',
        'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
        'http://localhost:', 'https://localhost:',
    ]
    for s in localhost:
        if uri.startswith(s):
            return #phenny.reply('Sorry, access forbidden.')

    if not hasattr(phenny.config, 'blacklisted_urls'):
        phenny.config.blacklisted_urls = []
    if not hasattr(phenny.bot, 'blacklisted_urls'):
        phenny.bot.blacklisted_urls = []
        for s in phenny.config.blacklisted_urls:
            phenny.bot.blacklisted_urls.append(re.compile(s))
    for regex in phenny.bot.blacklisted_urls:
        if regex.match(uri):
            return

    try:
        redirects = 0
        while True:
            try:
                info = web.head(uri)

                if not isinstance(info, list):
                    status = '200'
                else:
                    status = str(info[1])
                    info = info[0]
            except web.HTTPError:
                try:
                    info = requests.get(uri, headers=web.default_headers, verify=True)
                    status = str(info.status_code)
                    info = info.headers
                except web.HTTPError:
                    return None
                    
            if status.startswith('3'):
                uri = urllib.parse.urljoin(uri, info['Location'])
            else:
                break

            redirects += 1
            if redirects >= 25:
                return None

        try:
            mtype = info['content-type']
        except:
            return None

        if not mtype or not (('/html' in mtype) or ('/xhtml' in mtype)):
            return None

        try:
            bytes = web.get(uri)
        except:
            return None
        #bytes = u.read(262144)
        #u.close()

    except web.ConnectionError:
        return

    m = r_title.search(bytes)
    if m:
        title = m.group(1)
        title = title.strip()
        title = title.replace('\t', ' ')
        title = title.replace('\r', ' ')
        title = title.replace('\n', ' ')
        while '  ' in title:
            title = title.replace('  ', ' ')
        if len(title) > 200:
            title = title[:200] + '[...]'

        def e(m):
            entity = m.group(0)
            if entity.startswith('&#x'):
                cp = int(entity[3:-1], 16)
                return chr(cp)
            elif entity.startswith('&#'):
                cp = int(entity[2:-1])
                return chr(cp)
            else:
                char = name2codepoint[entity[1:-1]]
                return chr(char)
        title = r_entity.sub(e, title)

        if title:
            title = title.replace('\n', '')
            title = title.replace('\r', '')
            title = "[ {0} ]".format(title)
        else:
            title = None
    return title

Example #2

Show file

def gettitle(phenny, input, uri):
    if not ':' in uri:
        uri = 'http://' + uri
    uri = uri.replace('#!', '?_escaped_fragment_=')

    if uri.startswith('http://wiki.apertium.org/wiki/'):
        item = uri[len('http://wiki.apertium.org/wiki/'):]
        return apertium_wiki.awik(phenny, re.match(r'(blahblah)?(.*)()', item))
    if re.match(r'https?://en.wiktionary.org/wiki/(.*)', uri):
        item = re.match(r'https?://en.wiktionary.org/wiki/(.*)', uri).group(1)
        return wiktionary.w(phenny,
                            re.match(r'(blahblah)?(.*)()', web.unquote(item)))
    if re.match(r'https?://([a-z]{2,3}).wikipedia.org/wiki/(.*)', uri):
        match = re.match(r'https?://([a-z]{2,3}).wikipedia.org/wiki/(.*)', uri)
        lang, page = match.group(1), match.group(2)
        return wikipedia.wikipedia(phenny, page, lang)

    parts = uri.split(".")
    start = parts[0]
    parts.pop(0)
    uri = start + "." + web.quote('.'.join(parts), safe='/#')

    title = None
    localhost = [
        'http://localhost/',
        'http://localhost:80/',
        'http://localhost:8080/',
        'http://127.0.0.1/',
        'http://127.0.0.1:80/',
        'http://127.0.0.1:8080/',
        'https://localhost/',
        'https://localhost:80/',
        'https://localhost:8080/',
        'https://127.0.0.1/',
        'https://127.0.0.1:80/',
        'https://127.0.0.1:8080/',
        'http://localhost:',
        'https://localhost:',
    ]
    for s in localhost:
        if uri.startswith(s):
            return  #phenny.reply('Sorry, access forbidden.')

    if not hasattr(phenny.config, 'blacklisted_urls'):
        phenny.config.blacklisted_urls = []
    if not hasattr(phenny, 'blacklisted_urls'):
        phenny.blacklisted_urls = []
        for s in phenny.config.blacklisted_urls:
            phenny.blacklisted_urls.append(re.compile(s))
    for regex in phenny.blacklisted_urls:
        if regex.match(uri):
            return

    try:
        redirects = 0
        while True:
            try:
                info = web.head(uri)

                if not isinstance(info, list):
                    status = '200'
                else:
                    status = str(info[1])
                    info = info[0]
            except web.HTTPError:
                try:
                    info = requests.get(uri,
                                        headers=web.default_headers,
                                        verify=True,
                                        timeout=REQUEST_TIMEOUT)
                    status = str(info.status_code)
                    info = info.headers
                except web.HTTPError:
                    return None

            if status.startswith('3'):
                uri = urllib.parse.urljoin(uri, info['Location'])
            else:
                break

            redirects += 1
            if redirects >= 25:
                return None

        try:
            mtype = info['content-type']
        except:
            return None

        if not mtype or not (('/html' in mtype) or ('/xhtml' in mtype)):
            return None

        try:
            bytes = web.get(uri)
        except:
            return None
        #bytes = u.read(262144)
        #u.close()

    except:
        return

    m = r_title.search(bytes)
    if m:
        title = m.group(1)
        title = title.strip()
        title = title.replace('\t', ' ')
        title = title.replace('\r', ' ')
        title = title.replace('\n', ' ')
        while '  ' in title:
            title = title.replace('  ', ' ')
        if len(title) > 200:
            title = title[:200] + '[...]'

        def e(m):
            entity = m.group(0)
            if entity.startswith('&#x'):
                cp = int(entity[3:-1], 16)
                return chr(cp)
            elif entity.startswith('&#'):
                cp = int(entity[2:-1])
                return chr(cp)
            else:
                char = name2codepoint[entity[1:-1]]
                return chr(char)

        title = r_entity.sub(e, title)

        if title:
            title = title.replace('\n', '')
            title = title.replace('\r', '')
            title = "[ {0} ]".format(title)
        else:
            title = None
    return title