def place_or_music_or_thought(): import instabot bot = instabot.Bot() bot.login(username=USERNAME, password=PASSWORD, proxy=None) latitude, longitude = 0, 0 if 0: #not randint(0, 3): pic, venue, locale, type_ = go_random_tripadvisor() caption = get_random_tripadvisor_caption(venue, locale, type_) fn = "{}.jpg".format(randint(1111, 9999)) img = get_web_data(pic).content elif randint(0, 1): #need to fix randint(0, 2): latitude, longitude, place_name, img = get_random_place() img = get_web_data(img).content caption = get_random_travel_caption(place_name) text = "".join([t for t in place_name if t.isalpha()]) fn = "{}.jpg".format(text) elif randint(0, 1): song_data = get_song() song, artist = song_data['song'], song_data['artist'] text = "".join([t for t in artist + song if t.isalpha()]) fn = "{}.jpg".format(text) img = get_web_data(song_data['img']).content youtube = song_data["youtube"] caption = get_random_song_caption(song, artist, youtube) else: caption = get_random_thought() img = random_pic() fn = "" try: with open("previous_captions.json", "r", encoding="utf-8") as f: previous_captions = loads(f.read()) for cap in previous_captions: if similarity(cap, caption) >= TOO_SIMILAR: print(cap) print("...and...") print(caption) print("... deemed too similar") place_or_music_or_thought() previous_captions.append(caption) except: previous_captions = [caption] post_image(bot, caption, img, fn, latitude, longitude) with open("previous_captions.json", "w", encoding="utf-8") as f: f.write(dumps(previous_captions)) bot.logout()
def get_random_place(): longitude = randint(-180, 180) latitude = randint(-90, 90) place = MAPS_URL.format(abs(latitude), "N" if latitude >= 0 else "S", abs(longitude), "E" if longitude >= 0 else "W") html = get_web_data(place).content s = BeautifulSoup(html, features="html.parser") img_link = s.find_all("meta", itemprop="image")[0]["content"] place_name = s.find_all("meta", property="og:description")[0]["content"] place_name = "".join( [i for i in "".join(place_name.split(",")[:2]) if not i.isdigit()]) for veto in LOCATION_VETO: if veto in place_name.lower(): print("VETO", place) sleep(5) return get_random_place() for veto in LOCATION_PREFER_NOT: if veto in place_name.lower() and randint(0, 3): print("VETO", place) sleep(5) return get_random_place() return latitude, longitude, place_name, img_link
def random_pic(): id_ = randint(1, 500) url = "https://i.picsum.photos/id/{}/400/400.jpg".format(id_) img = get_web_data(url).content print(url) return img
def search_for_text(words): words = words.lower() search = "+".join([w for w in words.split(" ")]) url = "https://www.google.com/search?&q=%22{0}%22&oq=%22{0}%22".format( search) data = get_web_data(url).content soup = BeautifulSoup(data, features="html.parser") text = soup.text.lower() print(text) occurrences = text.count(words) words = words.split(" ") extract = text.split(" ") print(extract) print(words) pos = subarray_match(extract, words, start_point=randint(0, len(extract) - 20)) step = rand_choice([-1, 1]) print("pos is", pos) if step == -1: end = 0 else: pos = pos + len(words) end = len(extract) - 1 out = list(words) i = pos stop = False while i != end and not stop: word = extract[i] print(word) for token in SENTENCE_END: if token in word or contains_html(word): print("Encountered illegal token", word) stop = True break if not stop: if step == 1: out.append(word) else: out.insert(0, word) i += step return out
def album_cover_challenge(day): try: import instabot bot = instabot.Bot() bot.login(username=USERNAME, password=PASSWORD, proxy=None) song_data = get_song() song, artist = song_data['song'], song_data['artist'] text = "".join([t for t in artist + song if t.isalpha()]) fn = "{}.jpg".format(text) img = get_web_data(song_data['img']).content youtube = song_data["youtube"] caption = """DAY {} I've been given a task to choose ten albums that greatly influenced my taste in music. One album per day for ten consecutive days. No explanations, no reviews, just album covers.""".format( day) post_image(bot, caption, img, fn) bot.logout() return True except: return False
def get_song(kw=None): artist = kw.split(" ")[0] song = " ".join(kw.split(" ")[1:]) literal = "%22" for i in range(2): try: search_kw = literal + "+".join([ w for w in song.split(" ") ]) + literal + "+" + literal + "+".join( [w for w in artist.split(" ")]) + literal youtube_url = YT_URL.format(search_kw) print(youtube_url) soup = BeautifulSoup(get_web_data(youtube_url).content, features="html.parser") vid_id = soup.find_all("a", class_="yt-uix-tile-link")[0]['href'] final = "https://www.youtube.com" + vid_id break except IndexError: print("crash") literal = "" # # GOOG_URL = "https://www.google.com/search?tbm=isch&q={}" # url = GOOG_URL.format(search_kw) # # source = get_web_data(url).content # pic = [s['src'] for s in BeautifulSoup(source, features="html.parser").findAll('img') if s.get('src') is not None][2] return { "artist": artist, "song": song, "youtube": final, "img": pic, "kw": kw }
def go_random_tripadvisor(url=None): if url is None: lat, long = get_new_location() url = TRIPADVISOR_URL.format(lat, long) data = get_web_data(url).content soup = BeautifulSoup(data) try: pois = soup.find_all("a", class_="poi_name") poi = rand_choice(pois) except Exception as e: print(e) print(soup) print("Nothing found at latitude {}, longitude {}".format(lat, long)) sleep(randint(2, 7)) print("Trying again...") return go_random_tripadvisor() page_url = "http://tripadvisor.com" + poi["href"] if "/Tourism" in page_url: place = poi.find_all("span")[0].text #print("OK let's go to", place) return go_random_tripadvisor(page_url) elif "/Restaurant" in page_url: restaurant = poi.find_all("span")[0].text type_ = "restaurant" #print("Settled then. We'll eat at", restaurant) restaurant = True elif "/Attraction" in page_url: attraction = poi.find_all("span")[0].text type_ = "attraction" #print("I guess we weren't hungry and instead we went to", attraction) elif "/Hotel" in page_url: hotel = poi.find_all("span")[0].text type_ = "hotel" #print("I guess we weren't hungry and instead we went to", attraction) else: print(page_url) print("What should I do?") venue_photos_url = page_url + "#photos;aggregationId=&albumid=101&filter=7" print(venue_photos_url) venue, locale = page_url[page_url.index("Reviews-") + 8:page_url.index(".html")].replace("_", " ").split("-") #print("Had a great time at", venue, "in", locale) photos_page_html = BeautifulSoup(get_web_data(venue_photos_url).content, features="html.parser") photos = [ p for p in photos_page_html.find_all("meta") if p.get("content") is not None and "photo-s" in p["content"] ] if not photos: photos = [ p for p in photos_page_html.find_all("img") if p.get("src") is not None and ( "photo-l" in p["src"] or "photo-f" in ["src"]) ] elem = "src" else: elem = "content" if len(photos) == 0: return go_random_tripadvisor() pic = rand_choice(photos)[elem] for suffix in "lf": if "photo-{}".format(suffix) in pic: if get_web_data(pic.replace("photo-{}".format(suffix), "photo-s")).status_code == 200: pic = pic.replace("photo-{}".format(suffix), "photo-s") break print("Here's the pic: ", pic) return pic, venue, locale, type_
def get_song(kw=None): if kw is None: with open("nouns.txt") as f: nouns = [line.strip() for line in f] with open("verbs.txt") as f: verbs = [line.strip() for line in f] if randint(0, 2): kw = sample(nouns, randint(1, 3)) else: kw = sample(nouns + verbs, randint(1, 3)) url = DISCOGS_URL.format("+".join(kw)) #print(url) response = get_web_data(url) s = BeautifulSoup(response.content, features="html.parser") try: x = s.find_all("div", class_="card_large")[0] except IndexError: if len(kw) != 1: kw.pop(randint(0, len(kw) - 1)) else: kw = None get_song(kw) pic = x.find_all("a")[0].find_all( "span", class_="thumbnail_center")[0].img['data-src'] song = x.find_all("h4")[0].find_all("a")[0]['title'] artist = x.find_all("h5")[0].text.strip() song_title = "{} - {}".format(artist, song) literal = "%22" for i in range(2): try: search_kw = literal + "+".join([ w for w in song.split(" ") ]) + literal + "+" + literal + "+".join( [w for w in artist.split(" ")]) + literal youtube_url = YT_URL.format(search_kw) print(youtube_url) x = get_web_data(youtube_url).content soup = BeautifulSoup(x, features="html.parser") vid_id = soup.find_all("a", class_="yt-simple-endpoint")[0]['href'] final = "https://www.youtube.com" + vid_id break except IndexError as e: print(e) literal = "" # # GOOG_URL = "https://www.google.com/search?tbm=isch&q={}" # url = GOOG_URL.format(search_kw) # # source = get_web_data(url).content # pic = [s['src'] for s in BeautifulSoup(source, features="html.parser").findAll('img') if s.get('src') is not None][2] return { "artist": artist, "song": song, "youtube": final, "img": pic, "kw": kw }
from requests import get as get_web_data from random import choice as rand_choice from random import sample, randint with open("nouns.txt") as f: nouns = [line.strip() for line in f] kw = sample(nouns, randint(1, 4)) print(kw) url = DISCOGS_URL.format("+".join(kw)) #print(url) response = get_web_data(url) s = BeautifulSoup(response.content, features="html.parser") x = s.find_all("div", class_="card_large")[0] img = x.find_all("a")[0].find_all("span", class_="thumbnail_center")[0].img['data-src'] input(img) song = x.find_all("h4")[0].find_all("a")[0]['title'] artist = x.find_all("h5")[0].text.strip() song_title = "{} - {}".format(artist, song) search_kw = "+".join([w for w in song_title.split(" ")]) youtube_url = YT_URL.format(search_kw)