Ejemplo n.º 1
0
def get_goose(link):
    r = requests.get(link)
    clean_url = re.findall('url=https?://abstrusegoose.com/.*"', r.text)
    url = clean_url[0].lstrip('url=').rstrip('"')
    r = requests.get(url)
    logging.warning(r.url)
    try:
        urls_strips = re.findall('https?://abstrusegoose.com/strips/.*png',
                                 r.text)
        url = urls_strips[0]
        i = fetch_image(url)
    except Exception:
        i = None
    src = f'[Source]({r.url}).'
    try:
        soup = BeautifulSoup(r.text, 'html5lib')
        txt = soup.find("div", {"id": "blog_text"})
        txt = html.unescape(
            str(txt).lstrip('<div id="blog_text"><p>\n<p>').rstrip(
                '</p></div></p>\n'))
        txt = str(lxml_html.fromstring(txt).text_content())
        for item in ('r/>', 'align="center">', 'a href="', '" target="_blank"',
                     're>', 'm>', 'strong>', '/p>'):
            txt = txt.replace(item, ' ')
        txt = txt.replace('r/>', '') + "\n" + src
    except Exception:
        txt = src
    try:
        title = soup.find("h1", {"class": "storytitle"})
        title = title.text
    except Exception:
        title = ''
    text = title + '\n' + txt
    return i, text
Ejemplo n.º 2
0
def get_phd(link):
    r = requests.get(link)
    soup = BeautifulSoup(r.text, 'html5lib')
    images = soup.findAll('img', {"id": "comic", "name": "comic"})
    url = images[0]["src"]
    i = fetch_image(url)
    txt = f' [Source]({r.url}).'
    return i, txt
Ejemplo n.º 3
0
def get_exo(link):
    r = requests.get(link)
    logging.warning(r.url)
    urls = re.findall('https?://www.exocomics.com/wp-content/uploads/.*jpg',
                      r.text)
    i = fetch_image(urls[0])
    txt = f' [Source]({r.url}).'
    return i, txt
Ejemplo n.º 4
0
def post():
    """
    (7) Make a prediction from a URL provided via POST request.
    """
    url = request.json.get('url')
    img = utils.fetch_image(url)
    result = utils.predict_from_image(CLF, img)
    return jsonify(result)
Ejemplo n.º 5
0
def get_tom_gauld(link):
    r = requests.get(link)
    logging.warning(r.url)
    soup = BeautifulSoup(r.text, 'html.parser')
    images = soup.findAll('img')
    urls = re.findall('https?://64.media.tumblr.com/.*jpg', str(images[0]))
    src = f' [Source]({r.url}).'
    i = fetch_image(urls[0])
    try:
        txt = str(images[0]).split("src")[0].lstrip('<img alt=') + src
    except Exception:
        txt = src
    return i, txt
Ejemplo n.º 6
0
def form():
    """
    (4b) Make a prediction from a URL given via a GET form.
    """
    url = request.args.get('url')
    if url:
        img = utils.fetch_image(url)
        result = utils.predict_from_image(CLF, img)
        result['url'] = url  # If we add this back, we can display it.
    else:
        result = {}

    return render_template('form.html', result=result)
Ejemplo n.º 7
0
def get_from_gocomics(link):
    r = requests.get(link)
    logging.warning(r.url)
    page = BeautifulSoup(r.text, "html5lib")

    links = []
    for img in page.findAll("div",
                            {'class': "comic__image js-comic-swipe-target"}):
        links.append(img)
    urls = re.findall('src="https://assets\.amuniversal\.com.*srcset',
                      str(links[0]))
    url = urls[0].lstrip('src="').rstrip('" srcset"')
    logging.warning(url)
    i = fetch_image(url)
    txt = f"[Source.]({r.url})"
    return i, txt
Ejemplo n.º 8
0
def get_xkcd(link):
    r = requests.get(link)
    logging.warning(r.url)
    src = f'[Source]({r.url}).'
    try:
        soup = BeautifulSoup(r.text, 'html5lib')
        img_url = soup.find_all("img")
        text = img_url[2]['title']
        title = img_url[2]['alt']
        url = 'https:' + (img_url[2]['srcset'].split(' '))[0]
        i = fetch_image(url)
        txt = title + '\n' + text + '\n' + src
    except Exception:
        i = None
        txt = src
    return i, txt
Ejemplo n.º 9
0
def api():
    """
    (8) Make a prediction from a base64-encoded image via POST request.

    If accessing the web API from code, you may not have a URL to pass to
    the service, and there is no form for doing a file upload. So we need
    a way to pass the image as data. There are lots of ways to do this; one
    way is to encode as base64.
    """
    data = request.json.get('image')
    if data.startswith('http'):
        img = utils.fetch_image(data)
    else:
        img = Image.open(BytesIO(base64.b64decode(data)))
    result = utils.predict_from_image(CLF, img)
    return jsonify(result)
Ejemplo n.º 10
0
def get_dilbert(link, latest=False):
    if latest:
        date = str(datetime.date.today())
    else:
        date = get_random_date(1989, 4, 16)
    full_link = link + date
    r = requests.get(full_link)
    soup = BeautifulSoup(r.text, 'html5lib')
    images = soup.findAll('img')
    url = str(images[2]['src'])
    i = fetch_image(url)
    src = f' [Source]({full_link}).'
    try:
        txt = str(images[2]['alt']) + src
    except Exception:
        txt = None
    return i, txt
Ejemplo n.º 11
0
def get_smbc(link):
    r = requests.get(link)
    urls = re.findall('https?://www.smbc-comics.com/comics/.*p?n?gi?f?',
                      r.text)
    logging.warning(r.url)
    src = f' [Source]({r.url}).'
    try:
        txt = re.findall('img\stitle=.*id="cc-comic"', r.text)
        txt = txt[0].lstrip('img title="').split('"')[0]
        txt = html.unescape(txt) + src
    except Exception:
        txt = src
    if not urls:
        logging.warning(link)
        return None, None
    i = fetch_image(urls[0])
    return i, txt
Ejemplo n.º 12
0
def predict():
    """
    (3) Make a prediction from a URL given via GET request.
    
    Using a URL means we can still just accept a string as an arg.

    There's still no human interface.
    """
    url = request.args.get('url')
    img = utils.fetch_image(url)
    result = utils.predict_from_image(CLF, img)

    # Deal with not getting a URL.
    # if url:
    #     img = utils.fetch_image(url)
    #     result = utils.predict_from_image(CLF, img)
    # else:
    #     result = 'Please provide a URL'

    return jsonify(result)
Ejemplo n.º 13
0
def get_new_yorker_cartoon(link):
    r = urlopen(link)
    page = BeautifulSoup(r, "html5lib")
    logging.warning(r.url)
    links = []
    for link in page.findAll(
            "picture",
        {"class": "responsive-cartoon__image responsive-image"}):
        links.append(link)
    res = []
    for item in links:
        caption = re.findall('<img alt=.*" class="responsive-image__image', str(item))[0].\
            lstrip('<img alt="').\
            rstrip(' class="responsive-image__image')
        url = re.findall('class="responsive-image__image" sizes="100vw".* srcset=', str(item))[0].\
            lstrip('class="responsive-image__image" sizes="100vw" src="').\
            rstrip('" srcset=')
        logging.warning(url)
        i = fetch_image(url)
        txt = html.unescape(caption)
        res.append((i, txt))
    return [item for item in res[:2]]
Ejemplo n.º 14
0
def get_poorlydrawnlines(link):
    r = requests.get(link)
    logging.warning(r.url)
    try:
        if r.url.endswith('comic/'):
            soup = BeautifulSoup(r.text, 'html5lib')
            list_of_comics = []
            for link in soup.find_all('a'):
                if ('png' or 'jpg') in str(link.get('href')):
                    list_of_comics.append(link.get('href'))
            url = list_of_comics[0]
        else:
            urls = re.findall(
                'https?://www.poorlydrawnlines.com/wp-content/uploads/.*j?p?n?g',
                r.text)
            url = urls[0].split()[0]
            if url.endswith('><img'):
                url = url.rstrip('><img')
    except Exception:
        return None, None
    i = fetch_image(url)
    txt = f' [Source]({r.url}).'
    return i, txt