def unbitly(jenni, input): '''.longurl <link> -- obtain the final destination URL from a short URL''' url = input.group(2) if not url: if hasattr(jenni, 'last_seen_uri') and input.sender in jenni.bot.last_seen_uri: url = jenni.bot.last_seen_uri[input.sender] else: return jenni.say('No URL provided') if not url.startswith(('http://', 'https://')): url = 'http://' + url status, useful = proxy.get_more(url) try: new_url = re_meta.findall(useful['read']) except: return jenni.say(str(useful)) if new_url: new_url = new_url[0] else: url = url.replace("'", r"\'") try: status, results = proxy.get_more(url) new_url = results['geturl'] except: return jenni.say('Failed to grab URL: %s' % (url)) if new_url.startswith(('http://', 'https://')): jenni.say(new_url) else: jenni.say('Failed to obtain final destination.')
def unbitly(jenni, input): '''.longurl <link> -- obtain the final destination URL from a short URL''' url = input.group(2) if not url: if hasattr( jenni, 'last_seen_uri') and input.sender in jenni.bot.last_seen_uri: url = jenni.bot.last_seen_uri[input.sender] else: return jenni.say('No URL provided') if not url.startswith(('http://', 'https://')): url = 'http://' + url status, useful = proxy.get_more(url) try: new_url = re_meta.findall(useful['read']) except: return jenni.say(str(useful)) if new_url: new_url = new_url[0] else: url = url.replace("'", r"\'") try: status, results = proxy.get_more(url) new_url = results['geturl'] except: return jenni.say('Failed to grab URL: %s' % (url)) if new_url.startswith(('http://', 'https://')): jenni.say(new_url) else: jenni.say('Failed to obtain final destination.')
def unbitly(jenni, input): """.longurl <link> -- obtain the final destination URL from a short URL""" url = input.group(2) if not url: if hasattr(jenni, "last_seen_uri") and input.sender in jenni.last_seen_uri: url = jenni.last_seen_uri[input.sender] else: return jenni.say("No URL provided") if not url.startswith(("http://", "https://")): url = "http://" + url status, useful = proxy.get_more(url) try: new_url = re_meta.findall(useful["read"]) except: return jenni.say(str(useful)) if new_url: new_url = new_url[0] else: url = url.replace("'", r"\'") try: status, results = proxy.get_more(url) new_url = results["geturl"] except: return jenni.say("Failed to grab URL: %s" % (url)) if new_url.startswith(("http://", "https://")): jenni.say(new_url) else: jenni.say("Failed to obtain final destination.")
def lookup(typ, objid): url = "https://%s%s/%ss/%s" % (API_URL, API_ENDPOINT, typ, objid) success, response = proxy.get_more(url) if not success: raise Exception("Unable to connect to proxy: {0}".format(response)) if response['code'] == 200: result = json.loads(response['read']) return result try: raise SpotifyStatusCodes[response['code']] except KeyError, ValueError: raise Exception("HTTP Error {0}".format(response['code']))
def find_title(url): """ This finds the title when provided with a string of a URL. """ for item in IGNORE: if item in url: return False, 'ignored' if not re.search('^((https?)|(ftp))://', url): url = 'http://' + url if '/#!' in url: url = url.replace('/#!', '/?_escaped_fragment_=') if 'i.imgur' in url: a = url.split('.') url = a[0][:-1] + '.'.join(a[1:-1]) if 'zerobin.net' in url: return True, 'ZeroBin' url = uc.decode(url) msg = str() k = 0 status = False while not status: k += 1 if k > 3: break msg = dict() try: status, msg = proxy.get_more(url) except: try: status, msg = get_page_backup(url) except: continue if type(msg) == type(dict()) and 'code' in msg: status = msg['code'] else: continue time.sleep(0.5) if not status: return False, msg useful = msg info = useful['headers'] page = useful['read'] try: mtype = info['content-type'] except: print 'failed mtype:', str(info) return False, 'mtype failed' if not (('/html' in mtype) or ('/xhtml' in mtype)): return False, str(mtype) content = page regex = re.compile('<(/?)title( [^>]+)?>', re.IGNORECASE) content = regex.sub(r'<\1title>', content) regex = re.compile('[\'"]<title>[\'"]', re.IGNORECASE) content = regex.sub('', content) start = content.find('<title>') if start == -1: return False, 'NO <title> found' end = content.find('</title>', start) if end == -1: return False, 'NO </title> found' content = content[start + 7:end] content = content.strip('\n').rstrip().lstrip() title = content if len(title) > 200: title = title[:200] + '[...]' def e(m): entity = m.group() if entity.startswith('&#x'): cp = int(entity[3:-1], 16) meep = unichr(cp) elif entity.startswith('&#'): cp = int(entity[2:-1]) meep = unichr(cp) else: entity_stripped = entity[1:-1] try: char = name2codepoint[entity_stripped] meep = unichr(char) except: if entity_stripped in HTML_ENTITIES: meep = HTML_ENTITIES[entity_stripped] else: meep = str() try: return uc.decode(meep) except: return uc.decode(uc.encode(meep)) title = r_entity.sub(e, title) title = title.replace('\n', '') title = title.replace('\r', '') def remove_spaces(x): if ' ' in x: x = x.replace(' ', ' ') return remove_spaces(x) else: return x title = remove_spaces(title) new_title = str() for char in title: unichar = uc.encode(char) if len(list(uc.encode(char))) <= 3: new_title += uc.encode(char) title = new_title title = re.sub(r'(?i)dcc\ssend', '', title) title += '\x0F' if title: return True, title else: return False, 'No Title'
def find_title(url): """ This finds the title when provided with a string of a URL. """ uri = url for item in IGNORE: if item in uri: return False, "ignored" if not re.search("^((https?)|(ftp))://", uri): uri = "http://" + uri if "/#!" in uri: uri = uri.replace("/#!", "/?_escaped_fragment_=") if "i.imgur" in uri: a = uri.split(".") uri = a[0][:-1] + ".".join(a[1:-1]) if "zerobin.net" in uri: return True, "ZeroBin" uri = uc.decode(uri) msg = str() k = 0 status = False while not status: k += 1 if k > 3: break msg = dict() try: status, msg = proxy.get_more(url) except: try: status, msg = get_page_backup(url) except: continue if type(msg) == type(dict()) and "code" in msg: status = msg["code"] else: continue time.sleep(0.5) if not status: return False, msg useful = msg info = useful["headers"] page = useful["read"] try: mtype = info["content-type"] except: print "failed mtype:", str(info) return False, "mtype failed" if not (("/html" in mtype) or ("/xhtml" in mtype)): return False, str(mtype) content = page regex = re.compile("<(/?)title( [^>]+)?>", re.IGNORECASE) content = regex.sub(r"<\1title>", content) regex = re.compile("['\"]<title>['\"]", re.IGNORECASE) content = regex.sub("", content) start = content.find("<title>") if start == -1: return False, "NO <title> found" end = content.find("</title>", start) if end == -1: return False, "NO </title> found" content = content[start + 7 : end] content = content.strip("\n").rstrip().lstrip() title = content if len(title) > 200: title = title[:200] + "[...]" def e(m): entity = m.group() if entity.startswith("&#x"): cp = int(entity[3:-1], 16) meep = unichr(cp) elif entity.startswith("&#"): cp = int(entity[2:-1]) meep = unichr(cp) else: entity_stripped = entity[1:-1] try: char = name2codepoint[entity_stripped] meep = unichr(char) except: if entity_stripped in HTML_ENTITIES: meep = HTML_ENTITIES[entity_stripped] else: meep = str() try: return uc.decode(meep) except: return uc.decode(uc.encode(meep)) title = r_entity.sub(e, title) title = title.replace("\n", "") title = title.replace("\r", "") def remove_spaces(x): if " " in x: x = x.replace(" ", " ") return remove_spaces(x) else: return x title = remove_spaces(title) new_title = str() for char in title: unichar = uc.encode(char) if len(list(uc.encode(char))) <= 3: new_title += uc.encode(char) title = new_title title = re.sub(r"(?i)dcc\ssend", "", title) title += "\x0F" if title: return True, title else: return False, "No Title"