Beispiel #1
0
def _expand(link, timeout=2, **kwargs):
    '''
    Expands a url, while taking into consideration: special link shortener or analytics platforms that either need a sophisticated
    redirect(st.sh), or parsing of the url (ln.is)
    
    :param link: string of a link to unshorten.
    :returns: a dictionary with the original link, the unshortened link, and the unshortened domain.
    '''
    try:
        r = requests.head(link,
                          allow_redirects=True,
                          timeout=timeout,
                          **kwargs)
        r.raise_for_status()
        url_long = r.url
        domain = get_domain(url_long)

    except requests.exceptions.RequestException as e:
        domain, url_long = _parse_error(str(e))

    # replace list with constants.url_appenders
    if domain in constants.url_appenders:
        url_long = url_long.replace(domain, '')
        domain = get_domain(url_long)
    elif domain in constants.short_domain_ad_redirects or domain == -1:
        url_long = unshortenit.UnshortenIt().unshorten(link, timeout=timeout)
        domain = get_domain(url_long)

    return dict(original_url=link,
                resolved_domain=domain,
                resolved_url=url_long)
Beispiel #2
0
    def processPage(self, content):
        soup = WebRequest.as_soup(self.content)

        releases = []
        for tweet in soup.find_all('li', attrs={"data-item-type": "tweet"}):
            if "promoted" in str(tweet['class']):
                continue
            content = tweet.find("p", class_='tweet-text')
            if content and content.a:
                itemtxt = content.get_text()

                itemurl = content.a['data-expanded-url']
                itemurl = unshortenit.UnshortenIt().unshorten(itemurl)

                urlnl = urllib.parse.urlsplit(itemurl).netloc.lower()
                if urlnl == 'www.baka-tsuki.org':
                    msg = self.dispatchBT(itemurl, itemtxt)
                    if msg:
                        releases.append(msg)
                if urlnl in NANO_DESU_MAP:
                    msg = self.dispatchNanoDesu(urlnl, itemurl, itemtxt)
                    if msg:
                        releases.append(msg)

        self.log.info("Found %s releases from Twitter Feed", len(releases))
        if releases:
            self.sendReleases(releases)
Beispiel #3
0
def unwrap_redirect(urlin, resolve_redirects=True):
    try:
        url = unshortenit.UnshortenIt(urlcache=CacheObject()).unshorten(
            urlin, resolve_30x=resolve_redirects)
        return url
    except (unshortenit.NotFound, unshortenit.UnshortenFailed,
            requests.exceptions.ConnectionError):
        return None