Exemple #1
0
def change_path(url, pattern, group_marker=None):
    """ A generic method to change the path of the passed url to issue the preview
        page instead of the redirection page. """
    # The passed argument 'pattern' is a string which was read from the database,
    # a pattern example:
    #     {scheme}://{netloc}/info{path}
    # Parse the url and replace the placeholder values in the pattern with the
    # actual parts of the url.
    #     http://url.com/info/XOon1ron
    for pat in ("{scheme}", "{netloc}", "{path}"):
        if pat not in pattern:
            raise ValueError("Pattern is missing crucial pattern: {0}".format(pat))

    up = urlparse(url)
    new_url = pattern.replace("{scheme}", up.scheme)\
                     .replace("{netloc}", up.netloc)\
                     .replace("{path}", up.path)
    up = urlparse(new_url)
    if "//" in up.path:
        new_url = "{0}://{1}{2}".format(up.scheme, up.netloc,
                                        up.path.replace("//", "/"))
    response = make_request(new_url, allow_redir=True)
    # Set the group marker
    if group_marker:
        response.group_marker = group_marker
    response = get_long_url(url, response)
    return response
def surlhu(url):
    """ Filters the destination URL of the passed surl.hu shortened URL. """
    up = urlparse(url)
    preview_url = "http://surl.hu/s.php/{0}".format(up.path[1:])
    response = make_request(preview_url)
    response = get_long_url(url, response)
    return response
def tetl(url):
    """ Extracts the destination URL of the te.tl shortened link. """
    up = urlparse(url)
    new_url = "{0}://www.{1}{2}".format(up.scheme, up.netloc, up.path)
    response = make_request(new_url)
    response = get_long_url(url, response)
    return response
Exemple #4
0
def generic_extraction(url, allow_redir=False, group_marker=None):
    """ This method is used to extract the source of a shortened URL and then
        filter for the long URL, when no special modification for the passed URL
        is needed, this method is the way to go."""
    response = make_request(url, allow_redir=allow_redir)
    if group_marker:
        response.group_marker = group_marker
    return get_long_url(url, response)
def tinypl(url):
    """ Extracts the destination URL of a tiny.pl shortened URL. """
    if url.endswith("!") or url.endswith("/"):
        url = url[:-1]
    up = urlparse(url)
    new_url = "{0}://{1}/co.php?d={2}".format(up.scheme, up.netloc, up.path[1:])
    response = get_long_url(url, make_request(new_url))
    return response
def minume(url):
    """ Custom routine for minu.me shortened URLs. """
    # add the special char which switches to the preview function
    if url.endswith("/"):
        new_url = url + "p"
    else:
        new_url = url + "/p"
    # Make the request and reset the url
    response = get_long_url(url, make_request(new_url))
    return response
def xavcc(url):
    """ Extracts the destination URL of xav.cc shortened URLs. """
    # xav.cc returns with 403 status codes when you use your browser to click a
    # shortened link, not quite sure whats going on with their website but default
    # user agents are declined by their service.
    response = make_request(url, headers={"User-Agent": "I'm a tiny spider :]"})
    if response.status_code in (301, 302):
        response = meta_redirect(url)
    else:
        response = get_long_url(url, response)
    return response
Exemple #8
0
def append_char(url, char, group_marker=None, allow_redir=True):
    """ Generic method to add a char at the end of the passed url to enforce
        the preview function of the service. """
    response = make_request("{0}{1}".format(url, char), allow_redir=allow_redir)
    # Set the group_marker value if it has been passed, this is required for the
    # get_long_url method when there are multiple domains for one single shortening
    # service.
    if group_marker:
        response.group_marker = group_marker
    response = get_long_url(url, response)
    return response
def decenturl(url):
    """ Uses the decenturl.com API to expand the passed URL. """
    response = make_request("http://decenturl.com/api-resolve?d={0}".format(url),
                          allow_redir=True, is_json=True)
    response.url = url
    if(not response.success):
        return response
    elif("ok" not in response.source):
        response.error_msg = "Failed to extract destination URL"
    else:
        response.long_url = response.source[1]
    return response
def metamark(url):
    """ Expands the metamark.net shortened URL. """
    new_url = "http://metamark.net/api/rest/simple"
    response = make_request(new_url, method="post", data={"short_url": url})
    if not response.success:
        return response
    elif "ERROR" in response.source or not response.source.strip():
        response.error_msg = "Failed to extract the destination URL"
    else:
        response.long_url = response.source.strip()
    response.url = url
    return response
def safe(url):
    """ Uses the safe.mn API do expand the passed url. """
    new_url = "http://safe.mn/api/expand?short_url={0}&format=json".format(url)
    response = make_request(new_url, allow_redir=True, is_json=True)
    # reset the url
    response.url = url
    if(not response.success):
        return response
    elif("error" in response.source):
        response.error_msg = response.source["error"]
    else:
        response.long_url = response.source["url"]
    return response
def mtnymobi(url):
    """ Makes use of the mtny.mobi lookup API function to obtain the long URL. """
    up = urlparse(url)
    api_url = "http://mtny.mobi/api/lookup.php?type=json&id="
    new_url = "{0}{1}".format(api_url, up.path[1:])
    response = make_request(new_url, is_json=True)
    response.url = url
    if not response.success:
        return response
    elif not response.source.get("url"):
        response.error_msg = response.source["message"]
    else:
        response.long_url = response.source.get("url")
    return response
def smrl(url):
    """ Extracts the destination of a smrl.tk shortened URL (or one of it's domains). """
    # smrl.tk and it's domains make use of iframes to show where the destination
    # points to. We have to set the passed URL, with the additional /info path,
    # as the referer parameter and make a GET request to srv.smrls.net
    if url.endswith("/"):
        url = url[:-1]
    up = urlparse(url)
    referer = "{0}://{1}{2}/info".format(up.scheme, up.netloc, up.path)
    headers = {"Referer": referer}
    response = make_request("http://srv.smrls.net/", headers=headers)
    response.group_marker = "__smrl__"
    response = get_long_url(url, response)
    return response
def chilp(url):
    """ Uses the API of chilp.it to obtain the destination of a shortened URL. """
    api_url = "http://p.chilp.it/api.php?"
    up = urlparse(url)
    new_url = "{0}{1}".format(api_url, up.path[1:])
    response = make_request(new_url)
    if not response.success:
        return
    elif not response.source.strip():
        response.error_msg = "Failed to obtain the destination URL"
    else:
        response.long_url = response.source.strip()
    response.url = url
    return response
def zumlink(url):
    """ Extracts the destination URL or URLs from the zumlink.com shortened URL. """
    # Zumlink.com's URL shortener supports both shortening one single link and
    # multiple links, this method is able to extract both types of shortened links.
    response = make_request(url, allow_redir=False)
    if not response.success:
        return response
    elif "location" in response.headers:
        response.long_url = response.headers["location"]
    elif response.source:
        response = get_long_url(url, response)
    else:
        response.error_msg = "Failed to extract the destination URL o_O"
        response.success = False
    return response
Exemple #16
0
def read_headers(url, parameter="location", **kwargs):
    """ Returns the destination URL from the passed short URL.
        The passed parameter argument is the parameter which is searched for inside
        the returned headers of the request."""
    args = {"method": "get", "allow_redir": False, "headers": None}
    args.update(kwargs)
    response = make_request(url, **args)

    if(not response.success):
        return response
    elif(parameter not in response.headers):
        response.error_msg = "Header is missing the '{0}' parameter!".format(parameter)
    else:
        # Add the long url to the outer body of the response
        response.long_url = response.headers[parameter]
    return response
Exemple #17
0
def insert_subdomain(url, subdomain):
    """ Insert the passed subdomain into the passed url and filter for the
        destination URL, for convenience the subdomain can also be 'www.' if
        a service relies on it. """
    # Remove trailing dot inside the subdomain
    if(subdomain.endswith(".")):
        subdomain = subdomain[:-1]
    # Re build the url with the passed subdomain
    up = urlparse(url)
    if subdomain in up.netloc:
        new_url = url
    else:
        new_url = "{0}://{1}.{2}{3}".format(up.scheme, subdomain, up.netloc, up.path)

    response = make_request(new_url)
    # Handle the parsing and return the informations
    response = get_long_url(url, response)
    return response
Exemple #18
0
def meta_redirect(url):
    """ Filters out the meta tag's content value, it points to the destination
        URL where the shortend URL tries to redirect. """
    # Obtain the source of the passed URL
    response = make_request(url)
    if not response.success:
        return response
    try: # Filter the source for the destination URL
        soup = BeautifulSoup(response.source)
        long_url = soup.meta["content"]
        if not long_url:
            raise KeyError
        for trash in ("0;URL=", "0;url="):
            long_url = long_url.replace(trash, "")
    except(KeyError, AttributeError):
        response.error_msg = "Failed to extract the destination URL"
    else:
        response.long_url = long_url
    return response
def budurl(url):
    """ Returns the expanded budurl link on success, dictionary with information
        about why the request failed on failure. """
    # Build the url
    new_url = "http://budurl.com/api/v1/budurls/expand?budurl={0}".format(url)
    response = make_request(new_url, allow_redir=True, is_json=True)
    # Restore the url parameter
    response.url = url

    # Filter out failed requests
    if(not response.success):
        return response
    elif(not response.source["success"]):
        # When the API returns an error
        response.error_msg = response.source["error_msg"]
    else:
        # Add the long url to the outer body of the response
        response.long_url = response.source["long_url"]
    return response
def yep(url):
    """ Partially uses the API of yep.it to extract the destination URL. """
    response = make_request("http://yep.it/preview.php?p={0}".format(url),
                          allow_redir=True)
    get_long_url(url ,response)
    response.url = url
    if(not response.success):
        return response

    try:
        soup = BeautifulSoup(response.source)
        long_url = soup.div.find_all("font")[1].text
        if not long_url:
            raise KeyError
    except(KeyError, IndexError):
        response.error_msg = "Failed to extract destination URL"
    else:
        response.long_url = long_url
    return response
def zapitnu(url):
    """ Extracts the long URL of zapit.nu shortened links. """
    if url.endswith("+"):
        url = url[:-1]
    response = get_long_url(url, make_request("{0}+".format(url)))
    return response