Ejemplo n.º 1
0
def fact():
    """- gets a random fact from OMGFACTS"""

    attempts = 0

    # all of this is because omgfacts is fail
    while True:
        try:
            soup = http.get_soup('http://www.omg-facts.com/random')
        except (http.HTTPError, http.URLError):
            if attempts > 2:
                return "Could not find a fact!"
            else:
                attempts += 1
                continue

        response = soup.find('a', {'class': 'surprise'})
        link = response['href']
        fact_data = ''.join(response.find(text=True))

        if fact_data:
            fact_data = fact_data.strip()
            break
        else:
            if attempts > 2:
                return "Could not find a fact!"
            else:
                attempts += 1
                continue

    url = web.try_shorten(link)

    return "{} - {}".format(fact_data, url)
Ejemplo n.º 2
0
def sptfy(text, sptfy=False):
    if sptfy:
        shortenurl = "http://sptfy.com/index.php"
        data = {
            "longUrl": text,
            "shortUrlDomain": 1,
            "submitted": 1,
            "shortUrlFolder": 6,
            "customUrl": "",
            "shortUrlPassword": "",
            "shortUrlExpiryDate": "",
            "shortUrlUses": 0,
            "shortUrlType": 0
        }
        try:
            soup = http.get_soup(shortenurl, data=data, cookies=True)
        except:
            return text
        try:
            link = soup.find("div", {"class": "resultLink"}).text.strip()
            return link
        except:
            message = "Unable to shorten URL: %s" % \
                      soup.find("div", {"class": "messagebox_text"}).find("p").text.split("<br/>")[0]
            return message
    else:
        return web.try_shorten(text)
Ejemplo n.º 3
0
def get_gifs(url):
    soup = get_soup(url)
    container = soup.find('div', class_="row")
    gifs = [
        urljoin(url, elem["data-src"])
        for elem in container.find_all('img', {'data-src': True})
    ]
    return gifs
Ejemplo n.º 4
0
def refresh_cache():
    """gets a page of random MLIAs and puts them into a dictionary """
    url = 'http://mylifeisaverage.com/{}'.format(random.randint(1, 11000))
    soup = http.get_soup(url)

    for story in soup.find_all('div', {'class': 'story '}):
        mlia_id = story.find('span', {'class': 'left'}).a.text
        mlia_text = story.find('div', {'class': 'sc'}).text.strip()
        mlia_cache.append((mlia_id, mlia_text))
Ejemplo n.º 5
0
def ytplaylist_url(match):
    location = match.group(4).split("=")[-1]
    try:
        soup = http.get_soup("https://www.youtube.com/playlist?list=" + location)
    except Exception:
        return "\x034\x02Invalid response."
    title = soup.find('title').text.split('-')[0].strip()
    author = soup.find('img', {'class': 'channel-header-profile-image'})['title']
    num_videos = soup.find('ul', {'class': 'header-stats'}).findAll('li')[0].text.split(' ')[0]
    views = soup.find('ul', {'class': 'header-stats'}).findAll('li')[1].text.split(' ')[0]
    return "\x02{}\x02 - \x02{}\x02 views - \x02{}\x02 videos - \x02{}\x02".format(title, views, num_videos, author)
Ejemplo n.º 6
0
def test_get_soup():
    test_data = """
    <html>
        <body>
            <div class="thing"><p>foobar</p></div>
        </body>
    </html>
    """
    with patch('cloudbot.util.http.get', lambda *a, **k: test_data):
        from cloudbot.util import http
        soup = http.get_soup('http://example.com')
        assert soup.find('div', {'class': "thing"}).p.text == "foobar"
Ejemplo n.º 7
0
def xkcd_search(term):
    search_term = http.quote_plus(term)
    soup = http.get_soup("http://www.ohnorobot.com/index.pl?s={}&Search=Search&"
                         "comic=56&e=0&n=0&b=0&m=0&d=0&t=0".format(search_term))
    result = soup.find('li')
    if result:
        url = result.find('div', {'class': 'tinylink'}).text
        xkcd_id = url[:-1].split("/")[-1]
        print(xkcd_id)
        return xkcd_info(xkcd_id, url=True)
    else:
        return "No results found!"
Ejemplo n.º 8
0
def hulu_search(text):
    """<query> - searches Hulu for <query>"""
    result = http.get_soup(
        "http://m.hulu.com/search?dp_identifier=hulu&{}&items_per_page=1&page=1".format(urlencode({'query': text})))
    data = result.find('results').find('videos').find('video')
    showname = data.find('show').find('name').text
    title = data.find('title').text
    duration = timeformat.format_time(int(float(data.find('duration').text)))
    description = data.find('description').text
    rating = data.find('content-rating').text
    return "{}: {} - {} - {} ({}) {}".format(showname, title, description, duration, rating,
                                             "http://www.hulu.com/watch/" + str(data.find('id').text))
Ejemplo n.º 9
0
def gelbooru_url(match):
    soup = http.get_soup('http://gelbooru.com/index.php?page=dapi&s=post&q=index&id={}'.format(match.group(1)))
    posts = soup.find_all('post')

    id, score, url, rating, tags = (posts[0].get('id'), posts[0].get('score'), posts[0].get('file_url'),posts[0].get('rating'),posts[0].get('tags'))

    if rating is 'e': rating = "\x02\x034NSFW\x03\x02"
    elif rating is 'q': rating = "\x02\x037Questionable\x03\x02"
    elif rating is 's': rating = "\x02\x033Safe\x03\x02"

    return u'\x02[{}]\x02 Score: \x02{}\x02 - Rating: {} - {} - {}'.format(id, score, rating, url, tags[:75].strip())


# http://gelbooru.com/index.php?page=post&s=list&tags=%3D_%3D
Ejemplo n.º 10
0
def hulu_search(inp):
    """hulu <search> - Search Hulu"""
    result = http.get_soup(
        "http://m.hulu.com/search?dp_identifier=hulu&{}&items_per_page=1&page=1".format(urlencode({"query": inp}))
    )
    data = result.find("results").find("videos").find("video")
    showname = data.find("show").find("name").text
    title = data.find("title").text
    duration = timeformat.timeformat(int(float(data.find("duration").text)))
    description = data.find("description").text
    rating = data.find("content-rating").text
    return "{}: {} - {} - {} ({}) {}".format(
        showname, title, description, duration, rating, "http://www.hulu.com/watch/" + str(data.find("id").text)
    )
Ejemplo n.º 11
0
def lyrics(text):
    """<search> - search AZLyrics.com for song lyrics"""
    if "pastelyrics" in text:
        dopaste = True
        text = text.replace("pastelyrics", "").strip()
    else:
        dopaste = False
    soup = http.get_soup(url + text.replace(" ", "+"))
    if "Try to compose less restrictive search query" in soup.find('div', {'id': 'inn'}).text:
        return "No results. Check spelling."
    div = None
    for i in soup.findAll('div', {'class': 'sen'}):
        if "/lyrics/" in i.find('a')['href']:
            div = i
            break
    if div:
        title = div.find('a').text
        link = div.find('a')['href']
        if dopaste:
            newsoup = http.get_soup(link)
            try:
                lyrics = newsoup.find('div', {'style': 'margin-left:10px;margin-right:10px;'}).text.strip()
                pasteurl = " " + web.paste(lyrics)
            except Exception as e:
                pasteurl = " (\x02Unable to paste lyrics\x02 [{}])".format(str(e))
        else:
            pasteurl = ""
        artist = div.find('b').text.title()
        lyricsum = div.find('div').text
        if "\r\n" in lyricsum.strip():
            lyricsum = " / ".join(lyricsum.strip().split("\r\n")[0:4])  # truncate, format
        else:
            lyricsum = " / ".join(lyricsum.strip().split("\n")[0:4])  # truncate, format
        return "\x02{}\x02 by \x02{}\x02 {}{} - {}".format(title, artist, web.try_shorten(link), pasteurl,
                                                           lyricsum[:-3])
    else:
        return "No song results. " + url + text.replace(" ", "+")
Ejemplo n.º 12
0
def refresh_cache(text):
    global gelbooru_cache
    gelbooru_cache = []
    num = 0
    search = text.replace(' ','+').replace('explicit','rating:explicit').replace('nsfw','rating:explicit').replace('safe','rating:safe').replace('sfw','rating:safe')
    # score:>100
    #print 'http://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=20&tags={}'.format(search)
    soup = http.get_soup(u'http://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=20&tags={}'.format(search))
    posts = soup.find_all('post')

    while num < len(posts):
        gelbooru_cache.append((posts[num].get('id'), posts[num].get('score'), posts[num].get('file_url'),posts[num].get('rating'),posts[num].get('tags')))
        num += 1

    random.shuffle(gelbooru_cache)
    return
Ejemplo n.º 13
0
def steam(text, message):
    """<query> - Search for specified game/trailer/DLC"""
    params = {'term': text.strip().lower()}

    try:
        data = http.get_soup("http://store.steampowered.com/search/", params=params)
    except Exception as e:
        return "Could not get game info: {}".format(e)

    result = data.find('a', {'class': 'search_result_row'})

    if not result:
        return "No game found."

    app_id = result['data-ds-appid']
    message(format_game(app_id))
Ejemplo n.º 14
0
def twitch_lookup(location):
    locsplit = location.split("/")
    if len(locsplit) > 1 and len(locsplit) == 3:
        channel = locsplit[0]
        _type = locsplit[1]  # should be b or c
        _id = locsplit[2]
    else:
        channel = locsplit[0]
        _type = None
        _id = None
    fmt = "{}: {} playing {} ({})"  # Title: nickname playing Game (x views)
    if _type and _id:
        if _type == "b":  # I haven't found an API to retrieve broadcast info
            soup = http.get_soup("http://twitch.tv/" + location)
            title = soup.find('span', {'class': 'real_title js-title'}).text
            playing = soup.find('a', {'class': 'game js-game'}).text
            views = soup.find('span', {'id': 'views-count'}).text + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return html.unescape(fmt.format(title, channel, playing, views))
        elif _type == "c":
            data = http.get_json("https://api.twitch.tv/kraken/videos/" +
                                 _type + _id)
            title = data['title']
            playing = data['game']
            views = str(data['views']) + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return html.unescape(fmt.format(title, channel, playing, views))
    else:
        data = http.get_json("https://api.twitch.tv/kraken/streams?channel=" +
                             channel)
        if data["streams"]:
            title = data["streams"][0]["channel"]["status"]
            playing = data["streams"][0]["game"]
            v = data["streams"][0]["viewers"]
            viewers = "\x033\x02Online now!\x02\x0f " + str(v) + " viewer" + (
                "s" if v != 1 else "")
            return html.unescape(fmt.format(title, channel, playing, viewers))
        else:
            try:
                data = http.get_json("https://api.twitch.tv/kraken/channels/" +
                                     channel)
            except Exception:
                return "Unable to get channel data. Maybe channel is on justin.tv instead of twitch.tv?"
            title = data['status']
            playing = data['game']
            viewers = "\x034\x02Offline\x02\x0f"
            return html.unescape(fmt.format(title, channel, playing, viewers))
Ejemplo n.º 15
0
def twitch_lookup(location):
    locsplit = location.split("/")
    if len(locsplit) > 1 and len(locsplit) == 3:
        channel = locsplit[0]
        type = locsplit[1]  # should be b or c
        id = locsplit[2]
    else:
        channel = locsplit[0]
        type = None
        id = None
    h = HTMLParser()
    fmt = "{}: {} playing {} ({})"  # Title: nickname playing Game (x views)
    if type and id:
        if type == "b":  # I haven't found an API to retrieve broadcast info
            soup = http.get_soup("http://twitch.tv/" + location)
            title = soup.find('span', {'class': 'real_title js-title'}).text
            playing = soup.find('a', {'class': 'game js-game'}).text
            views = soup.find('span', {'id': 'views-count'}).text + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return h.unescape(fmt.format(title, channel, playing, views))
        elif type == "c":
            data = http.get_json("https://api.twitch.tv/kraken/videos/" + type + id)
            title = data['title']
            playing = data['game']
            views = str(data['views']) + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return h.unescape(fmt.format(title, channel, playing, views))
    else:
        data = http.get_json("http://api.justin.tv/api/stream/list.json?channel=" + channel)
        if data and len(data) >= 1:
            data = data[0]
            title = data['title']
            playing = data['meta_game']
            viewers = "\x033\x02Online now!\x02\x0f " + str(data["channel_count"]) + " viewer"
            print(viewers)
            viewers = viewers + "s" if not " 1 view" in viewers else viewers
            print(viewers)
            return h.unescape(fmt.format(title, channel, playing, viewers))
        else:
            try:
                data = http.get_json("https://api.twitch.tv/kraken/channels/" + channel)
            except:
                return
            title = data['status']
            playing = data['game']
            viewers = "\x034\x02Offline\x02\x0f"
            return h.unescape(fmt.format(title, channel, playing, viewers))
Ejemplo n.º 16
0
def sptfy(inp, sptfy=False):
    if sptfy:
        shortenurl = "http://sptfy.com/index.php"
        data = urlencode({'longUrl': inp, 'shortUrlDomain': 1, 'submitted': 1, "shortUrlFolder": 6, "customUrl": "",
                          "shortUrlPassword": "", "shortUrlExpiryDate": "", "shortUrlUses": 0, "shortUrlType": 0})
        try:
            soup = http.get_soup(shortenurl, post_data=data, cookies=True)
        except:
            return inp
        try:
            link = soup.find('div', {'class': 'resultLink'}).text.strip()
            return link
        except:
            message = "Unable to shorten URL: {}".format(soup.find('div', {
                'class': 'messagebox_text'}).find('p').text.split("<br/>")[0])
            return message
    else:
        return web.try_shorten(inp)
Ejemplo n.º 17
0
def twitch_lookup(location):
    locsplit = location.split("/")
    if len(locsplit) > 1 and len(locsplit) == 3:
        channel = locsplit[0]
        _type = locsplit[1]  # should be b or c
        _id = locsplit[2]
    else:
        channel = locsplit[0]
        _type = None
        _id = None
    fmt = "{}: {} playing {} ({})"  # Title: nickname playing Game (x views)
    if _type and _id:
        if _type == "b":  # I haven't found an API to retrieve broadcast info
            soup = http.get_soup("http://twitch.tv/" + location)
            title = soup.find("span", {"class": "real_title js-title"}).text
            playing = soup.find("a", {"class": "game js-game"}).text
            views = soup.find("span", {"id": "views-count"}).text + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return html.unescape(fmt.format(title, channel, playing, views))
        elif _type == "c":
            data = http.get_json("https://api.twitch.tv/kraken/videos/" + _type + _id)
            title = data["title"]
            playing = data["game"]
            views = str(data["views"]) + " view"
            views = views + "s" if not views[0:2] == "1 " else views
            return html.unescape(fmt.format(title, channel, playing, views))
    else:
        data = http.get_json("https://api.twitch.tv/kraken/streams?channel=" + channel)
        if data["streams"]:
            title = data["streams"][0]["channel"]["status"]
            playing = data["streams"][0]["game"]
            v = data["streams"][0]["viewers"]
            viewers = "\x033\x02Online now!\x02\x0f " + str(v) + " viewer" + ("s" if v != 1 else "")
            return html.unescape(fmt.format(title, channel, playing, viewers))
        else:
            try:
                data = http.get_json("https://api.twitch.tv/kraken/channels/" + channel)
            except Exception:
                return "Unable to get channel data. Maybe channel is on justin.tv instead of twitch.tv?"
            title = data["status"]
            playing = data["game"]
            viewers = "\x034\x02Offline\x02\x0f"
            return html.unescape(fmt.format(title, channel, playing, viewers))
Ejemplo n.º 18
0
def osrc(text):
    """<github user> - gets an Open Source Report Card for <github user> from osrc.dfm.io"""

    user_nick = text.strip()
    url = user_url.format(user_nick)

    try:
        soup = http.get_soup(url)
    except (http.HTTPError, http.URLError):
        return "Couldn't find any stats for this user."

    report = soup.find("div", {"id": "description"}).find("p").get_text()

    # Split and join to remove all the excess whitespace, slice the
    # string to remove the trailing full stop.
    report = " ".join(report.split())[:-1]

    short_url = web.try_shorten(url)

    return "{} - {}".format(report, short_url)
Ejemplo n.º 19
0
def recipe(text):
    """[term] - gets a recipe for [term], or gets a random recipe if no term is specified"""
    if text:
        # get the recipe URL by searching
        try:
            search = http.get_soup(search_url, query=text.strip())
        except (http.HTTPError, http.URLError) as e:
            return "Could not get recipe: {}".format(e)

        # find the list of results
        result_list = search.find('div', {'class': 'found_results'})

        if result_list:
            results = result_list.find_all('div', {'class': 'recipe_result'})
        else:
            return "No results"

        # pick a random front page result
        result = random.choice(results)

        # extract the URL from the result
        url = base_url + result.find('div', {'class': 'image-wrapper'}).find('a')['href']

    else:
        # get a random recipe URL
        try:
            page = http.open(random_url)
        except (http.HTTPError, http.URLError) as e:
            return "Could not get recipe: {}".format(e)
        url = page.geturl()

    # use get_data() to get the recipe info from the URL
    try:
        data = get_data(url)
    except ParseError as e:
        return "Could not parse recipe: {}".format(e)

    name = data.name.strip()
    return "Try eating \x02{}!\x02 - {}".format(name, web.try_shorten(url))
Ejemplo n.º 20
0
def horoscope(text, db, notice, nick):
    """<sign> - get your horoscope"""

    # check if the user asked us not to save his details
    dontsave = text.endswith(" dontsave")
    if dontsave:
        sign = text[:-9].strip().lower()
    else:
        sign = text

    db.execute("create table if not exists horoscope(nick primary key, sign)")

    if not sign:
        sign = db.execute("select sign from horoscope where "
                          "nick=lower(:nick)", {'nick': nick}).fetchone()
        if not sign:
            notice("horoscope <sign> -- Get your horoscope")
            return
        sign = sign[0]

    url = "http://my.horoscope.com/astrology/free-daily-horoscope-{}.html".format(sign)
    soup = http.get_soup(url)

    title = soup.find_all('h1', {'class': 'h1b'})[1]
    horoscope_text = soup.find('div', {'class': 'fontdef1'})
    result = "\x02{}\x02 {}".format(title, horoscope_text)
    result = formatting.strip_html(result)
    # result = unicode(result, "utf8").replace('flight ','')

    if not title:
        return "Could not get the horoscope for {}.".format(text)

    if text and not dontsave:
        db.execute("insert or replace into horoscope(nick, sign) values (:nick, :sign)",
                   {'nick': nick.lower(), 'sign': sign})
        db.commit()

    return result
Ejemplo n.º 21
0
def gelbooru_list_url(match):
    soup = http.get_soup(match.group(1))
    return u'{}'.format(soup.find('title').text)