def change_path(url, pattern, group_marker=None): """ A generic method to change the path of the passed url to issue the preview page instead of the redirection page. """ # The passed argument 'pattern' is a string which was read from the database, # a pattern example: # {scheme}://{netloc}/info{path} # Parse the url and replace the placeholder values in the pattern with the # actual parts of the url. # http://url.com/info/XOon1ron for pat in ("{scheme}", "{netloc}", "{path}"): if pat not in pattern: raise ValueError("Pattern is missing crucial pattern: {0}".format(pat)) up = urlparse(url) new_url = pattern.replace("{scheme}", up.scheme)\ .replace("{netloc}", up.netloc)\ .replace("{path}", up.path) up = urlparse(new_url) if "//" in up.path: new_url = "{0}://{1}{2}".format(up.scheme, up.netloc, up.path.replace("//", "/")) response = make_request(new_url, allow_redir=True) # Set the group marker if group_marker: response.group_marker = group_marker response = get_long_url(url, response) return response
def surlhu(url): """ Filters the destination URL of the passed surl.hu shortened URL. """ up = urlparse(url) preview_url = "http://surl.hu/s.php/{0}".format(up.path[1:]) response = make_request(preview_url) response = get_long_url(url, response) return response
def tetl(url): """ Extracts the destination URL of the te.tl shortened link. """ up = urlparse(url) new_url = "{0}://www.{1}{2}".format(up.scheme, up.netloc, up.path) response = make_request(new_url) response = get_long_url(url, response) return response
def generic_extraction(url, allow_redir=False, group_marker=None): """ This method is used to extract the source of a shortened URL and then filter for the long URL, when no special modification for the passed URL is needed, this method is the way to go.""" response = make_request(url, allow_redir=allow_redir) if group_marker: response.group_marker = group_marker return get_long_url(url, response)
def tinypl(url): """ Extracts the destination URL of a tiny.pl shortened URL. """ if url.endswith("!") or url.endswith("/"): url = url[:-1] up = urlparse(url) new_url = "{0}://{1}/co.php?d={2}".format(up.scheme, up.netloc, up.path[1:]) response = get_long_url(url, make_request(new_url)) return response
def minume(url): """ Custom routine for minu.me shortened URLs. """ # add the special char which switches to the preview function if url.endswith("/"): new_url = url + "p" else: new_url = url + "/p" # Make the request and reset the url response = get_long_url(url, make_request(new_url)) return response
def xavcc(url): """ Extracts the destination URL of xav.cc shortened URLs. """ # xav.cc returns with 403 status codes when you use your browser to click a # shortened link, not quite sure whats going on with their website but default # user agents are declined by their service. response = make_request(url, headers={"User-Agent": "I'm a tiny spider :]"}) if response.status_code in (301, 302): response = meta_redirect(url) else: response = get_long_url(url, response) return response
def append_char(url, char, group_marker=None, allow_redir=True): """ Generic method to add a char at the end of the passed url to enforce the preview function of the service. """ response = make_request("{0}{1}".format(url, char), allow_redir=allow_redir) # Set the group_marker value if it has been passed, this is required for the # get_long_url method when there are multiple domains for one single shortening # service. if group_marker: response.group_marker = group_marker response = get_long_url(url, response) return response
def decenturl(url): """ Uses the decenturl.com API to expand the passed URL. """ response = make_request("http://decenturl.com/api-resolve?d={0}".format(url), allow_redir=True, is_json=True) response.url = url if(not response.success): return response elif("ok" not in response.source): response.error_msg = "Failed to extract destination URL" else: response.long_url = response.source[1] return response
def metamark(url): """ Expands the metamark.net shortened URL. """ new_url = "http://metamark.net/api/rest/simple" response = make_request(new_url, method="post", data={"short_url": url}) if not response.success: return response elif "ERROR" in response.source or not response.source.strip(): response.error_msg = "Failed to extract the destination URL" else: response.long_url = response.source.strip() response.url = url return response
def safe(url): """ Uses the safe.mn API do expand the passed url. """ new_url = "http://safe.mn/api/expand?short_url={0}&format=json".format(url) response = make_request(new_url, allow_redir=True, is_json=True) # reset the url response.url = url if(not response.success): return response elif("error" in response.source): response.error_msg = response.source["error"] else: response.long_url = response.source["url"] return response
def mtnymobi(url): """ Makes use of the mtny.mobi lookup API function to obtain the long URL. """ up = urlparse(url) api_url = "http://mtny.mobi/api/lookup.php?type=json&id=" new_url = "{0}{1}".format(api_url, up.path[1:]) response = make_request(new_url, is_json=True) response.url = url if not response.success: return response elif not response.source.get("url"): response.error_msg = response.source["message"] else: response.long_url = response.source.get("url") return response
def smrl(url): """ Extracts the destination of a smrl.tk shortened URL (or one of it's domains). """ # smrl.tk and it's domains make use of iframes to show where the destination # points to. We have to set the passed URL, with the additional /info path, # as the referer parameter and make a GET request to srv.smrls.net if url.endswith("/"): url = url[:-1] up = urlparse(url) referer = "{0}://{1}{2}/info".format(up.scheme, up.netloc, up.path) headers = {"Referer": referer} response = make_request("http://srv.smrls.net/", headers=headers) response.group_marker = "__smrl__" response = get_long_url(url, response) return response
def chilp(url): """ Uses the API of chilp.it to obtain the destination of a shortened URL. """ api_url = "http://p.chilp.it/api.php?" up = urlparse(url) new_url = "{0}{1}".format(api_url, up.path[1:]) response = make_request(new_url) if not response.success: return elif not response.source.strip(): response.error_msg = "Failed to obtain the destination URL" else: response.long_url = response.source.strip() response.url = url return response
def zumlink(url): """ Extracts the destination URL or URLs from the zumlink.com shortened URL. """ # Zumlink.com's URL shortener supports both shortening one single link and # multiple links, this method is able to extract both types of shortened links. response = make_request(url, allow_redir=False) if not response.success: return response elif "location" in response.headers: response.long_url = response.headers["location"] elif response.source: response = get_long_url(url, response) else: response.error_msg = "Failed to extract the destination URL o_O" response.success = False return response
def read_headers(url, parameter="location", **kwargs): """ Returns the destination URL from the passed short URL. The passed parameter argument is the parameter which is searched for inside the returned headers of the request.""" args = {"method": "get", "allow_redir": False, "headers": None} args.update(kwargs) response = make_request(url, **args) if(not response.success): return response elif(parameter not in response.headers): response.error_msg = "Header is missing the '{0}' parameter!".format(parameter) else: # Add the long url to the outer body of the response response.long_url = response.headers[parameter] return response
def insert_subdomain(url, subdomain): """ Insert the passed subdomain into the passed url and filter for the destination URL, for convenience the subdomain can also be 'www.' if a service relies on it. """ # Remove trailing dot inside the subdomain if(subdomain.endswith(".")): subdomain = subdomain[:-1] # Re build the url with the passed subdomain up = urlparse(url) if subdomain in up.netloc: new_url = url else: new_url = "{0}://{1}.{2}{3}".format(up.scheme, subdomain, up.netloc, up.path) response = make_request(new_url) # Handle the parsing and return the informations response = get_long_url(url, response) return response
def meta_redirect(url): """ Filters out the meta tag's content value, it points to the destination URL where the shortend URL tries to redirect. """ # Obtain the source of the passed URL response = make_request(url) if not response.success: return response try: # Filter the source for the destination URL soup = BeautifulSoup(response.source) long_url = soup.meta["content"] if not long_url: raise KeyError for trash in ("0;URL=", "0;url="): long_url = long_url.replace(trash, "") except(KeyError, AttributeError): response.error_msg = "Failed to extract the destination URL" else: response.long_url = long_url return response
def budurl(url): """ Returns the expanded budurl link on success, dictionary with information about why the request failed on failure. """ # Build the url new_url = "http://budurl.com/api/v1/budurls/expand?budurl={0}".format(url) response = make_request(new_url, allow_redir=True, is_json=True) # Restore the url parameter response.url = url # Filter out failed requests if(not response.success): return response elif(not response.source["success"]): # When the API returns an error response.error_msg = response.source["error_msg"] else: # Add the long url to the outer body of the response response.long_url = response.source["long_url"] return response
def yep(url): """ Partially uses the API of yep.it to extract the destination URL. """ response = make_request("http://yep.it/preview.php?p={0}".format(url), allow_redir=True) get_long_url(url ,response) response.url = url if(not response.success): return response try: soup = BeautifulSoup(response.source) long_url = soup.div.find_all("font")[1].text if not long_url: raise KeyError except(KeyError, IndexError): response.error_msg = "Failed to extract destination URL" else: response.long_url = long_url return response
def zapitnu(url): """ Extracts the long URL of zapit.nu shortened links. """ if url.endswith("+"): url = url[:-1] response = get_long_url(url, make_request("{0}+".format(url))) return response