Esempio n. 1
0
def unbitly(jenni, input):
    '''.longurl <link> -- obtain the final destination URL from a short URL'''
    url = input.group(2)
    if not url:
        if hasattr(jenni, 'last_seen_uri') and input.sender in jenni.bot.last_seen_uri:
            url = jenni.bot.last_seen_uri[input.sender]
        else:
            return jenni.say('No URL provided')
    if not url.startswith(('http://', 'https://')):
        url = 'http://' + url

    status, useful = proxy.get_more(url)
    try:
        new_url = re_meta.findall(useful['read'])
    except:
        return jenni.say(str(useful))

    if new_url:
        new_url = new_url[0]
    else:
        url = url.replace("'", r"\'")
        try:
            status, results = proxy.get_more(url)
            new_url = results['geturl']
        except:
            return jenni.say('Failed to grab URL: %s' % (url))

    if new_url.startswith(('http://', 'https://')):
        jenni.say(new_url)
    else:
        jenni.say('Failed to obtain final destination.')
Esempio n. 2
0
def unbitly(jenni, input):
    '''.longurl <link> -- obtain the final destination URL from a short URL'''
    url = input.group(2)
    if not url:
        if hasattr(
                jenni,
                'last_seen_uri') and input.sender in jenni.bot.last_seen_uri:
            url = jenni.bot.last_seen_uri[input.sender]
        else:
            return jenni.say('No URL provided')
    if not url.startswith(('http://', 'https://')):
        url = 'http://' + url

    status, useful = proxy.get_more(url)
    try:
        new_url = re_meta.findall(useful['read'])
    except:
        return jenni.say(str(useful))

    if new_url:
        new_url = new_url[0]
    else:
        url = url.replace("'", r"\'")
        try:
            status, results = proxy.get_more(url)
            new_url = results['geturl']
        except:
            return jenni.say('Failed to grab URL: %s' % (url))

    if new_url.startswith(('http://', 'https://')):
        jenni.say(new_url)
    else:
        jenni.say('Failed to obtain final destination.')
Esempio n. 3
0
def unbitly(jenni, input):
    """.longurl <link> -- obtain the final destination URL from a short URL"""
    url = input.group(2)
    if not url:
        if hasattr(jenni, "last_seen_uri") and input.sender in jenni.last_seen_uri:
            url = jenni.last_seen_uri[input.sender]
        else:
            return jenni.say("No URL provided")
    if not url.startswith(("http://", "https://")):
        url = "http://" + url

    status, useful = proxy.get_more(url)
    try:
        new_url = re_meta.findall(useful["read"])
    except:
        return jenni.say(str(useful))

    if new_url:
        new_url = new_url[0]
    else:
        url = url.replace("'", r"\'")
        try:
            status, results = proxy.get_more(url)
            new_url = results["geturl"]
        except:
            return jenni.say("Failed to grab URL: %s" % (url))

    if new_url.startswith(("http://", "https://")):
        jenni.say(new_url)
    else:
        jenni.say("Failed to obtain final destination.")
Esempio n. 4
0
def lookup(typ, objid):
    url = "https://%s%s/%ss/%s" % (API_URL, API_ENDPOINT, typ, objid)

    success, response = proxy.get_more(url)

    if not success:
        raise Exception("Unable to connect to proxy: {0}".format(response))

    if response['code'] == 200:
        result = json.loads(response['read'])
        return result

    try:
        raise SpotifyStatusCodes[response['code']]
    except KeyError, ValueError:
        raise Exception("HTTP Error {0}".format(response['code']))
Esempio n. 5
0
def lookup(typ, objid):
    url = "https://%s%s/%ss/%s" % (API_URL, API_ENDPOINT, typ, objid)

    success, response = proxy.get_more(url)

    if not success:
        raise Exception("Unable to connect to proxy: {0}".format(response))

    if response['code'] == 200:
        result = json.loads(response['read'])
        return result

    try:
        raise SpotifyStatusCodes[response['code']]
    except KeyError, ValueError:
        raise Exception("HTTP Error {0}".format(response['code']))
Esempio n. 6
0
File: url.py Progetto: J3RN/jenni
def find_title(url):
    """
    This finds the title when provided with a string of a URL.
    """

    for item in IGNORE:
        if item in url:
            return False, 'ignored'

    if not re.search('^((https?)|(ftp))://', url):
        url = 'http://' + url

    if '/#!' in url:
        url = url.replace('/#!', '/?_escaped_fragment_=')

    if 'i.imgur' in url:
        a = url.split('.')
        url = a[0][:-1] + '.'.join(a[1:-1])

    if 'zerobin.net' in url:
        return True, 'ZeroBin'

    url = uc.decode(url)

    msg = str()
    k = 0
    status = False

    while not status:
        k += 1
        if k > 3:
            break

        msg = dict()

        try:
            status, msg = proxy.get_more(url)
        except:
            try:
                status, msg = get_page_backup(url)
            except:
                continue

        if type(msg) == type(dict()) and 'code' in msg:
            status = msg['code']
        else:
            continue

        time.sleep(0.5)


    if not status:
        return False, msg

    useful = msg

    info = useful['headers']
    page = useful['read']

    try:
        mtype = info['content-type']
    except:
        print 'failed mtype:', str(info)
        return False, 'mtype failed'
    if not (('/html' in mtype) or ('/xhtml' in mtype)):
        return False, str(mtype)

    content = page
    regex = re.compile('<(/?)title( [^>]+)?>', re.IGNORECASE)
    content = regex.sub(r'<\1title>', content)
    regex = re.compile('[\'"]<title>[\'"]', re.IGNORECASE)
    content = regex.sub('', content)
    start = content.find('<title>')
    if start == -1:
        return False, 'NO <title> found'
    end = content.find('</title>', start)
    if end == -1:
        return False, 'NO </title> found'
    content = content[start + 7:end]
    content = content.strip('\n').rstrip().lstrip()
    title = content

    if len(title) > 200:
        title = title[:200] + '[...]'

    def e(m):
        entity = m.group()
        if entity.startswith('&#x'):
            cp = int(entity[3:-1], 16)
            meep = unichr(cp)
        elif entity.startswith('&#'):
            cp = int(entity[2:-1])
            meep = unichr(cp)
        else:
            entity_stripped = entity[1:-1]
            try:
                char = name2codepoint[entity_stripped]
                meep = unichr(char)
            except:
                if entity_stripped in HTML_ENTITIES:
                    meep = HTML_ENTITIES[entity_stripped]
                else:
                    meep = str()
        try:
            return uc.decode(meep)
        except:
            return uc.decode(uc.encode(meep))

    title = r_entity.sub(e, title)

    title = title.replace('\n', '')
    title = title.replace('\r', '')

    def remove_spaces(x):
        if '  ' in x:
            x = x.replace('  ', ' ')
            return remove_spaces(x)
        else:
            return x

    title = remove_spaces(title)

    new_title = str()
    for char in title:
        unichar = uc.encode(char)
        if len(list(uc.encode(char))) <= 3:
            new_title += uc.encode(char)
    title = new_title

    title = re.sub(r'(?i)dcc\ssend', '', title)

    title += '\x0F'

    if title:
        return True, title
    else:
        return False, 'No Title'
Esempio n. 7
0
def find_title(url):
    """
    This finds the title when provided with a string of a URL.
    """

    for item in IGNORE:
        if item in url:
            return False, 'ignored'

    if not re.search('^((https?)|(ftp))://', url):
        url = 'http://' + url

    if '/#!' in url:
        url = url.replace('/#!', '/?_escaped_fragment_=')

    if 'i.imgur' in url:
        a = url.split('.')
        url = a[0][:-1] + '.'.join(a[1:-1])

    if 'zerobin.net' in url:
        return True, 'ZeroBin'

    url = uc.decode(url)

    msg = str()
    k = 0
    status = False

    while not status:
        k += 1
        if k > 3:
            break

        msg = dict()

        try:
            status, msg = proxy.get_more(url)
        except:
            try:
                status, msg = get_page_backup(url)
            except:
                continue

        if type(msg) == type(dict()) and 'code' in msg:
            status = msg['code']
        else:
            continue

        time.sleep(0.5)

    if not status:
        return False, msg

    useful = msg

    info = useful['headers']
    page = useful['read']

    try:
        mtype = info['content-type']
    except:
        print 'failed mtype:', str(info)
        return False, 'mtype failed'
    if not (('/html' in mtype) or ('/xhtml' in mtype)):
        return False, str(mtype)

    content = page
    regex = re.compile('<(/?)title( [^>]+)?>', re.IGNORECASE)
    content = regex.sub(r'<\1title>', content)
    regex = re.compile('[\'"]<title>[\'"]', re.IGNORECASE)
    content = regex.sub('', content)
    start = content.find('<title>')
    if start == -1:
        return False, 'NO <title> found'
    end = content.find('</title>', start)
    if end == -1:
        return False, 'NO </title> found'
    content = content[start + 7:end]
    content = content.strip('\n').rstrip().lstrip()
    title = content

    if len(title) > 200:
        title = title[:200] + '[...]'

    def e(m):
        entity = m.group()
        if entity.startswith('&#x'):
            cp = int(entity[3:-1], 16)
            meep = unichr(cp)
        elif entity.startswith('&#'):
            cp = int(entity[2:-1])
            meep = unichr(cp)
        else:
            entity_stripped = entity[1:-1]
            try:
                char = name2codepoint[entity_stripped]
                meep = unichr(char)
            except:
                if entity_stripped in HTML_ENTITIES:
                    meep = HTML_ENTITIES[entity_stripped]
                else:
                    meep = str()
        try:
            return uc.decode(meep)
        except:
            return uc.decode(uc.encode(meep))

    title = r_entity.sub(e, title)

    title = title.replace('\n', '')
    title = title.replace('\r', '')

    def remove_spaces(x):
        if '  ' in x:
            x = x.replace('  ', ' ')
            return remove_spaces(x)
        else:
            return x

    title = remove_spaces(title)

    new_title = str()
    for char in title:
        unichar = uc.encode(char)
        if len(list(uc.encode(char))) <= 3:
            new_title += uc.encode(char)
    title = new_title

    title = re.sub(r'(?i)dcc\ssend', '', title)

    title += '\x0F'

    if title:
        return True, title
    else:
        return False, 'No Title'
Esempio n. 8
0
def find_title(url):
    """
    This finds the title when provided with a string of a URL.
    """
    uri = url

    for item in IGNORE:
        if item in uri:
            return False, "ignored"

    if not re.search("^((https?)|(ftp))://", uri):
        uri = "http://" + uri

    if "/#!" in uri:
        uri = uri.replace("/#!", "/?_escaped_fragment_=")

    if "i.imgur" in uri:
        a = uri.split(".")
        uri = a[0][:-1] + ".".join(a[1:-1])

    if "zerobin.net" in uri:
        return True, "ZeroBin"

    uri = uc.decode(uri)

    msg = str()
    k = 0
    status = False

    while not status:
        k += 1
        if k > 3:
            break

        msg = dict()

        try:
            status, msg = proxy.get_more(url)
        except:
            try:
                status, msg = get_page_backup(url)
            except:
                continue

        if type(msg) == type(dict()) and "code" in msg:
            status = msg["code"]
        else:
            continue

        time.sleep(0.5)

    if not status:
        return False, msg

    useful = msg

    info = useful["headers"]
    page = useful["read"]

    try:
        mtype = info["content-type"]
    except:
        print "failed mtype:", str(info)
        return False, "mtype failed"
    if not (("/html" in mtype) or ("/xhtml" in mtype)):
        return False, str(mtype)

    content = page
    regex = re.compile("<(/?)title( [^>]+)?>", re.IGNORECASE)
    content = regex.sub(r"<\1title>", content)
    regex = re.compile("['\"]<title>['\"]", re.IGNORECASE)
    content = regex.sub("", content)
    start = content.find("<title>")
    if start == -1:
        return False, "NO <title> found"
    end = content.find("</title>", start)
    if end == -1:
        return False, "NO </title> found"
    content = content[start + 7 : end]
    content = content.strip("\n").rstrip().lstrip()
    title = content

    if len(title) > 200:
        title = title[:200] + "[...]"

    def e(m):
        entity = m.group()
        if entity.startswith("&#x"):
            cp = int(entity[3:-1], 16)
            meep = unichr(cp)
        elif entity.startswith("&#"):
            cp = int(entity[2:-1])
            meep = unichr(cp)
        else:
            entity_stripped = entity[1:-1]
            try:
                char = name2codepoint[entity_stripped]
                meep = unichr(char)
            except:
                if entity_stripped in HTML_ENTITIES:
                    meep = HTML_ENTITIES[entity_stripped]
                else:
                    meep = str()
        try:
            return uc.decode(meep)
        except:
            return uc.decode(uc.encode(meep))

    title = r_entity.sub(e, title)

    title = title.replace("\n", "")
    title = title.replace("\r", "")

    def remove_spaces(x):
        if "  " in x:
            x = x.replace("  ", " ")
            return remove_spaces(x)
        else:
            return x

    title = remove_spaces(title)

    new_title = str()
    for char in title:
        unichar = uc.encode(char)
        if len(list(uc.encode(char))) <= 3:
            new_title += uc.encode(char)
    title = new_title

    title = re.sub(r"(?i)dcc\ssend", "", title)

    title += "\x0F"

    if title:
        return True, title
    else:
        return False, "No Title"