Python get_web_data Examples, requests.get_web_data Python Examples

Example #1

0

Show file

File: main.py Project: try-except-try-accept/av0cav0cad0sunset

def place_or_music_or_thought():
    import instabot
    bot = instabot.Bot()
    bot.login(username=USERNAME, password=PASSWORD, proxy=None)

    latitude, longitude = 0, 0

    if 0:  #not randint(0, 3):
        pic, venue, locale, type_ = go_random_tripadvisor()
        caption = get_random_tripadvisor_caption(venue, locale, type_)
        fn = "{}.jpg".format(randint(1111, 9999))
        img = get_web_data(pic).content

    elif randint(0, 1):  #need to fix randint(0, 2):

        latitude, longitude, place_name, img = get_random_place()
        img = get_web_data(img).content
        caption = get_random_travel_caption(place_name)
        text = "".join([t for t in place_name if t.isalpha()])
        fn = "{}.jpg".format(text)

    elif randint(0, 1):

        song_data = get_song()
        song, artist = song_data['song'], song_data['artist']
        text = "".join([t for t in artist + song if t.isalpha()])
        fn = "{}.jpg".format(text)
        img = get_web_data(song_data['img']).content
        youtube = song_data["youtube"]
        caption = get_random_song_caption(song, artist, youtube)

    else:

        caption = get_random_thought()
        img = random_pic()
        fn = ""
    try:
        with open("previous_captions.json", "r", encoding="utf-8") as f:
            previous_captions = loads(f.read())

        for cap in previous_captions:
            if similarity(cap, caption) >= TOO_SIMILAR:
                print(cap)
                print("...and...")
                print(caption)
                print("... deemed too similar")
                place_or_music_or_thought()

        previous_captions.append(caption)

    except:
        previous_captions = [caption]

    post_image(bot, caption, img, fn, latitude, longitude)
    with open("previous_captions.json", "w", encoding="utf-8") as f:
        f.write(dumps(previous_captions))

    bot.logout()

Example #2

0

Show file

def get_random_place():
    longitude = randint(-180, 180)
    latitude = randint(-90, 90)
    place = MAPS_URL.format(abs(latitude), "N" if latitude >= 0 else "S",
                            abs(longitude), "E" if longitude >= 0 else "W")

    html = get_web_data(place).content

    s = BeautifulSoup(html, features="html.parser")

    img_link = s.find_all("meta", itemprop="image")[0]["content"]
    place_name = s.find_all("meta", property="og:description")[0]["content"]
    place_name = "".join(
        [i for i in "".join(place_name.split(",")[:2]) if not i.isdigit()])

    for veto in LOCATION_VETO:

        if veto in place_name.lower():
            print("VETO", place)
            sleep(5)
            return get_random_place()

    for veto in LOCATION_PREFER_NOT:
        if veto in place_name.lower() and randint(0, 3):
            print("VETO", place)
            sleep(5)
            return get_random_place()

    return latitude, longitude, place_name, img_link

Example #3

0

Show file

File: main.py Project: try-except-try-accept/av0cav0cad0sunset

def random_pic():

    id_ = randint(1, 500)
    url = "https://i.picsum.photos/id/{}/400/400.jpg".format(id_)
    img = get_web_data(url).content
    print(url)
    return img

Example #4

0

Show file

File: main.py Project: try-except-try-accept/av0cav0cad0sunset

def search_for_text(words):
    words = words.lower()

    search = "+".join([w for w in words.split(" ")])
    url = "https://www.google.com/search?&q=%22{0}%22&oq=%22{0}%22".format(
        search)
    data = get_web_data(url).content
    soup = BeautifulSoup(data, features="html.parser")
    text = soup.text.lower()
    print(text)
    occurrences = text.count(words)

    words = words.split(" ")
    extract = text.split(" ")
    print(extract)
    print(words)
    pos = subarray_match(extract,
                         words,
                         start_point=randint(0,
                                             len(extract) - 20))
    step = rand_choice([-1, 1])
    print("pos is", pos)
    if step == -1:
        end = 0
    else:
        pos = pos + len(words)
        end = len(extract) - 1
    out = list(words)

    i = pos
    stop = False
    while i != end and not stop:
        word = extract[i]
        print(word)
        for token in SENTENCE_END:
            if token in word or contains_html(word):
                print("Encountered illegal token", word)
                stop = True
                break
        if not stop:
            if step == 1:
                out.append(word)
            else:
                out.insert(0, word)
        i += step

    return out

Example #5

0

Show file

File: main.py Project: try-except-try-accept/av0cav0cad0sunset

def album_cover_challenge(day):
    try:
        import instabot
        bot = instabot.Bot()
        bot.login(username=USERNAME, password=PASSWORD, proxy=None)

        song_data = get_song()
        song, artist = song_data['song'], song_data['artist']
        text = "".join([t for t in artist + song if t.isalpha()])
        fn = "{}.jpg".format(text)
        img = get_web_data(song_data['img']).content
        youtube = song_data["youtube"]
        caption = """DAY {}
        
        I've been given a task to choose ten albums that greatly influenced my taste in music. One album per day for ten consecutive days. No explanations, no reviews, just album covers.""".format(
            day)
        post_image(bot, caption, img, fn)
        bot.logout()
        return True
    except:
        return False

Example #6

0

Show file

def get_song(kw=None):

    artist = kw.split(" ")[0]
    song = " ".join(kw.split(" ")[1:])

    literal = "%22"

    for i in range(2):
        try:
            search_kw = literal + "+".join([
                w for w in song.split(" ")
            ]) + literal + "+" + literal + "+".join(
                [w for w in artist.split(" ")]) + literal
            youtube_url = YT_URL.format(search_kw)
            print(youtube_url)
            soup = BeautifulSoup(get_web_data(youtube_url).content,
                                 features="html.parser")
            vid_id = soup.find_all("a", class_="yt-uix-tile-link")[0]['href']
            final = "https://www.youtube.com" + vid_id
            break
        except IndexError:
            print("crash")
            literal = ""
    #
    # GOOG_URL = "https://www.google.com/search?tbm=isch&q={}"
    # url = GOOG_URL.format(search_kw)
    #
    # source = get_web_data(url).content
    # pic = [s['src'] for s in BeautifulSoup(source, features="html.parser").findAll('img') if s.get('src') is not None][2]

    return {
        "artist": artist,
        "song": song,
        "youtube": final,
        "img": pic,
        "kw": kw
    }

Example #7

0

Show file

def go_random_tripadvisor(url=None):
    if url is None:
        lat, long = get_new_location()
        url = TRIPADVISOR_URL.format(lat, long)

    data = get_web_data(url).content
    soup = BeautifulSoup(data)

    try:
        pois = soup.find_all("a", class_="poi_name")
        poi = rand_choice(pois)

    except Exception as e:
        print(e)
        print(soup)
        print("Nothing found at latitude {}, longitude {}".format(lat, long))
        sleep(randint(2, 7))
        print("Trying again...")
        return go_random_tripadvisor()

    page_url = "http://tripadvisor.com" + poi["href"]

    if "/Tourism" in page_url:

        place = poi.find_all("span")[0].text
        #print("OK let's go to", place)

        return go_random_tripadvisor(page_url)

    elif "/Restaurant" in page_url:

        restaurant = poi.find_all("span")[0].text
        type_ = "restaurant"
        #print("Settled then. We'll eat at", restaurant)
        restaurant = True

    elif "/Attraction" in page_url:

        attraction = poi.find_all("span")[0].text
        type_ = "attraction"
        #print("I guess we weren't hungry and instead we went to", attraction)

    elif "/Hotel" in page_url:

        hotel = poi.find_all("span")[0].text
        type_ = "hotel"
        #print("I guess we weren't hungry and instead we went to", attraction)

    else:
        print(page_url)
        print("What should I do?")

    venue_photos_url = page_url + "#photos;aggregationId=&albumid=101&filter=7"

    print(venue_photos_url)

    venue, locale = page_url[page_url.index("Reviews-") +
                             8:page_url.index(".html")].replace("_",
                                                                " ").split("-")

    #print("Had a great time at", venue, "in", locale)

    photos_page_html = BeautifulSoup(get_web_data(venue_photos_url).content,
                                     features="html.parser")

    photos = [
        p for p in photos_page_html.find_all("meta")
        if p.get("content") is not None and "photo-s" in p["content"]
    ]

    if not photos:
        photos = [
            p for p in photos_page_html.find_all("img")
            if p.get("src") is not None and (
                "photo-l" in p["src"] or "photo-f" in ["src"])
        ]
        elem = "src"
    else:
        elem = "content"

        if len(photos) == 0:
            return go_random_tripadvisor()

    pic = rand_choice(photos)[elem]

    for suffix in "lf":
        if "photo-{}".format(suffix) in pic:
            if get_web_data(pic.replace("photo-{}".format(suffix),
                                        "photo-s")).status_code == 200:
                pic = pic.replace("photo-{}".format(suffix), "photo-s")
                break

    print("Here's the pic: ", pic)

    return pic, venue, locale, type_

Example #8

0

Show file

def get_song(kw=None):

    if kw is None:

        with open("nouns.txt") as f:
            nouns = [line.strip() for line in f]
        with open("verbs.txt") as f:
            verbs = [line.strip() for line in f]

        if randint(0, 2):
            kw = sample(nouns, randint(1, 3))
        else:
            kw = sample(nouns + verbs, randint(1, 3))

    url = DISCOGS_URL.format("+".join(kw))

    #print(url)

    response = get_web_data(url)

    s = BeautifulSoup(response.content, features="html.parser")
    try:
        x = s.find_all("div", class_="card_large")[0]
    except IndexError:
        if len(kw) != 1:
            kw.pop(randint(0, len(kw) - 1))
        else:
            kw = None
        get_song(kw)

    pic = x.find_all("a")[0].find_all(
        "span", class_="thumbnail_center")[0].img['data-src']

    song = x.find_all("h4")[0].find_all("a")[0]['title']
    artist = x.find_all("h5")[0].text.strip()

    song_title = "{} - {}".format(artist, song)

    literal = "%22"

    for i in range(2):
        try:
            search_kw = literal + "+".join([
                w for w in song.split(" ")
            ]) + literal + "+" + literal + "+".join(
                [w for w in artist.split(" ")]) + literal
            youtube_url = YT_URL.format(search_kw)
            print(youtube_url)

            x = get_web_data(youtube_url).content
            soup = BeautifulSoup(x, features="html.parser")

            vid_id = soup.find_all("a", class_="yt-simple-endpoint")[0]['href']
            final = "https://www.youtube.com" + vid_id
            break
        except IndexError as e:
            print(e)
            literal = ""
    #
    # GOOG_URL = "https://www.google.com/search?tbm=isch&q={}"
    # url = GOOG_URL.format(search_kw)
    #
    # source = get_web_data(url).content
    # pic = [s['src'] for s in BeautifulSoup(source, features="html.parser").findAll('img') if s.get('src') is not None][2]

    return {
        "artist": artist,
        "song": song,
        "youtube": final,
        "img": pic,
        "kw": kw
    }

Example #9

0

Show file

from requests import get as get_web_data
from random import choice as rand_choice
from random import sample, randint

with open("nouns.txt") as f:
    nouns = [line.strip() for line in f]

kw = sample(nouns, randint(1, 4))

print(kw)

url = DISCOGS_URL.format("+".join(kw))

#print(url)

response = get_web_data(url)

s = BeautifulSoup(response.content, features="html.parser")

x = s.find_all("div", class_="card_large")[0]
img = x.find_all("a")[0].find_all("span",
                                  class_="thumbnail_center")[0].img['data-src']
input(img)
song = x.find_all("h4")[0].find_all("a")[0]['title']
artist = x.find_all("h5")[0].text.strip()

song_title = "{} - {}".format(artist, song)

search_kw = "+".join([w for w in song_title.split(" ")])

youtube_url = YT_URL.format(search_kw)