def get_episodes(self, show_id, season): r = self.session.get(f"{BASE_URL}/show/{show_id}/{season}") r.raise_for_status() sopa = bso(r.content, "lxml") tables = sopa.find_all("tr") seasons = [i.text for i in tables[1].find_all("a")] if not any(season == season_ for season_ in seasons): return season_subs = [] episodes = [] for tr in range(len(tables)): data = tables[tr].find_all("td") title = self.title_available(data) if title: episodes.append(title) source_var = self.source_separator(data) if not source_var: continue season_subs += list( self.scrape_episode_info(source_var, tables, tr)) return list(self.get_episode_dicts(episodes, season_subs, season))
def download_lastfm_pl(self, playlist_url): # Apparently, last fm API doesn't have a playlist endpoint. If you # find out that it has, please fix this! try: r = requests.get(playlist_url, timeout=10) except requests.exceptions.RequestException as e: logger.error(f"{RED}Playlist download failed: {e}") return soup = bso(r.content, "html.parser") artists = [artist.text for artist in soup.select(ARTISTS_SELECTOR)] titles = [title.text for title in soup.select(TITLE_SELECTOR)] track_list = [] if len(artists) == len(titles) and artists: track_list = [ artist + " " + title for artist, title in zip(artists, titles) ] if not track_list: logger.info(f"{OFF}Nothing found") return pl_title = sanitize_filename(soup.select_one("h1").text) pl_directory = os.path.join(self.directory, pl_title) logger.info(f"{YELLOW}Downloading playlist: {pl_title} " f"({len(track_list)} tracks)") for i in track_list: track_id = get_url_info( self.search_by_type(i, "track", 1, lucky=True)[0])[1] if track_id: self.download_from_id(track_id, False, pl_directory) if not self.no_m3u_for_playlists: make_m3u(pl_directory)
def random_posts(html): soup = bso(html, "lxml") posts = list(soup.findAll(attrs={"role": "main"})[0].children) result = [] for i in range(3): result.append(str(posts[random.randint(0, len(posts))])) return result
def getFakeAccData(self, response): beautifulContent = bso(response.content, "html.parser") mailAdress = self.getMailAdress(beautifulContent) cookies = self.getCookies(response) return mailAdress, cookies
def parse_pdf(path): """ Parses a pdf to string. Reads the pdf from the given path """ raw = parser.from_file(path, xmlContent=True) soup = bso(raw['content'], 'lxml') pages = soup.find_all('div', attrs={'class': 'page'}) print(pages) return pages
def index_titles(self): r = self.session.get(f"{BASE_URL}/series.php?/") r.raise_for_status() soup = bso(r.content, "html.parser") for a in soup.find_all("a"): href_url = a.get("href") if "show" in href_url: yield {"title": a.text, "url": href_url}
def index_titles(self): r = self.session.get(BASE) r.raise_for_status() soup = bso(r.content, "html.parser") titles = [] for a in soup.find_all("a"): href_url = a.get("href") if "show" in href_url: titles.append({"title": a.text, "url": href_url}) return titles
def get_tickers_list_SP500(): import urllib from urllib import request from bs4 import BeautifulSoup as bso url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" # Get page if not os.path.exists("wiki_tickers_page.txt"): get_url = request.urlopen(url).read() with open("wiki_tickers_page.txt", "w") as file: file.write(str(get_url)) else: with open("wiki_tickers_page.txt", "r") as file: get_url = file.read() # Get Ticker bsdoc = bso(get_url, "html.parser") table = bsdoc.find("table", {"id": "constituents"}) # print(table) table = table.find("tbody") if not os.path.exists("tickers.txt"): # print(get_url) a_ = table.findAll("a", {"class": "external text"}) tickers = [] for a in a_: if a.getText() != "reports": tickers.append(str(a.getText())) with open("tickers.txt", "w") as file: file.write(json.dumps(tickers)) print("tickers file created") else: with open("tickers.txt", "r") as file: tickers = json.loads(file.read()) if not os.path.exists("tickers_names.txt"): names = [] trs = table.findAll("tr") i = 0 for tr in trs: #print(tr) tds = bso.findAll(tr, "td") #ais = bso.findAll(tds, "a") for i, td in enumerate(tds): #print(i, td) if i == 1: name = bso.find(td, "a").getText() names.append(name) with open("tickers_names.txt", "w") as file: file.write(json.dumps(names)) else: with open("tickers_names.txt", "r") as file: names = json.loads(file.read()) return tickers, names
def getInboxLinks(self, response): beautifulContent = bso(response.content, "html.parser") inboxElements = beautifulContent.find("table") trimmedLinks = [] for mails in inboxElements.contents: link = mails.attrs.get('onclick') if (link): extractedLink = re.findall("readmail.html.*'", link) trimmedLinks.append(extractedLink[0].strip("'")) return trimmedLinks
def getTwitterCode(self, response): beautifulContent = bso(response.content, "html.parser") codes = beautifulContent.findAll("td", attrs={ "class": "h1 black", "dir": "ltr" }) for code in codes: code = code.getText() if code.isnumeric(): return code else: return -1
def get_html_soup_object(url): try: fp = urllib.request.urlopen(url) mybytes = fp.read() fp.close() except HTTPError as e: mybytes = e.read() except URLError: print("Błąd sieci! Upewnij się czy działa połączenie z internetem.") sys.exit(2) html = mybytes.decode("utf8") return bso(html, "html.parser")
def get_arg_links(moviePag): moviePag = requests.get("%s?id=%s" % (api_movie, arg_id)) movieSop = bso(moviePag.content, "html.parser") movieJson = json.loads(movieSop.text) movieTitle = movieJson["title"] try: for rele in movieJson["releases"]: if rele["subtitles"]: for uri in rele["subtitles"]: self.appendSubs( movieTitle, uri["uri"].rsplit("/", 1)[-1], uri["uri"], "argenteam.net", ) except KeyError: pass
def get_subdivx(self): subdivx = "{}{}{}".format(subdivx_base, self.query, subdivx_query) page = requests.get(subdivx) soup = bso(page.content, "html.parser") title = soup.find_all(id="menu_titulo_buscador") desc = soup.find_all(id="buscador_detalle_sub") url = soup.find_all("a", class_="titulo_menu_izq") for t, d, u in zip(title, desc, url): self.appendSubs( t.text.replace("Subtitulos de ", ""), d.text, u.get("href"), "subdivx.com", ) if not title: print("Sin resultados en Subdivx")
def get_tickers_list_PA(): import urllib from urllib import request from bs4 import BeautifulSoup as bso from string import ascii_uppercase alphabet = [letter for letter in ascii_uppercase] url = "https://www.borsaitaliana.it/borsa/azioni/listino-a-z.html?initial=" for letter in alphabet: ticker_page = request.urlopen(url + letter).read() table = bso(ticker_page, "html.parser").find("table", {"class": "m-table -firstlevel"}) table_body = table.findAll("tr") for row in table_body: print(row) print(table)
def scrape_download_url(self, episode_dict): logger.debug("Scrapping download URL") r = self.session.get(episode_dict["download_url"]) r.raise_for_status() discriminator = f".{episode_dict['season']}.{episode_dict['episode']}." soup = bso(r.content, "lxml") for url, selected in zip(soup.select(CSS1), soup.select(CSS2)): meta = ".".join( selected.get("href").split(discriminator)[-1].split(".")[:-1]) if meta in episode_dict["download_url"]: id_url = url.find_all("a")[0].get("href") sub_id = parse.parse_qs(parse.urlparse(id_url).query)["id"][0] lang_id = parse.parse_qs( parse.urlparse(id_url).query)["lang"][0] version_ = parse.parse_qs( parse.urlparse(id_url).query)["fversion"][0] return f"{BASE_URL}/updated/{lang_id}/{sub_id}/{version_}"
def parse(self, response): '''Need regex to grab the time from the date and price attributes''' # print(response.css('ul.search-main-content__events-list').extract()) ul_selector = response.css( "ul.search-main-content__events-list").extract_first() soup = bso(ul_selector, "lxml") unordered_list = soup.find("ul") print(unordered_list) # print(first_child.findChildren()) list_of_tags = unordered_list.children # print(list_of_tags) all_events = list() # print(list_of_tags) for item in list_of_tags: event = Event() div_container = response.css( 'div.eds-media-card-content__content__principal') # print(div_container) event['title'] = div_container.css( "a.eds-media-card-content__action-link h3.eds-media-card-content__title.eds-text-color--grey-800.eds-text-bl div.card-text--truncated__three::text" ).extract_first() event['date'] = div_container.css( 'div.eds-media-card-content__sub-content div.eds-text-bs--fixed.eds-text-color--grey-600.eds-l-mar-top-1::text' ).extract_first() event['location'] = div_container.css( 'div.eds-media-card-content__sub-content div.eds-media-card-content__sub-content-cropped div.eds-text-bs--fixed.eds-text-color--grey-600.eds-l-mar-top-1 div.card-text--truncated__one::text' ).extract_first() event['price'] = div_container.css( 'div.eds-media-card-content__sub-content div.eds-media-card-content__sub-content-cropped div.eds-text-bs--fixed.eds-text-color--grey-600.eds-l-mar-top-1::text' ).extract_first() return event
def download_lastfm_pl(self, playlist_url): # Apparently, last fm API doesn't have a playlist endpoint. If you # find out that it has, please fix this! r = requests.get(playlist_url) soup = bso(r.content, "html.parser") artists = [artist.text for artist in soup.select(ARTISTS_SELECTOR)] titles = [title.text for title in soup.select(TITLE_SELECTOR)] if len(artists) == len(titles) and artists: track_list = [ artist + " " + title for artist, title in zip(artists, titles) ] if not track_list: print("Nothing found") return pl_title = sanitize_filename(soup.select_one("h1").text) print("Downloading playlist: " + pl_title) self.directory = os.path.join(self.directory, pl_title) for i in track_list: track_url = self.search_by_type(i, "track", 1, lucky=True)[0] if track_url: self.handle_url(track_url)
def get_argenteam(self): argenteam_search = "%s?q=%s" % (api_search, self.query) page = requests.get(argenteam_search) soup = bso(page.content, "html.parser") arg_json = json.loads(soup.text) def get_arg_links(moviePag): moviePag = requests.get("%s?id=%s" % (api_movie, arg_id)) movieSop = bso(moviePag.content, "html.parser") movieJson = json.loads(movieSop.text) movieTitle = movieJson["title"] try: for rele in movieJson["releases"]: if rele["subtitles"]: for uri in rele["subtitles"]: self.appendSubs( movieTitle, uri["uri"].rsplit("/", 1)[-1], uri["uri"], "argenteam.net", ) except KeyError: pass for tipo in arg_json["results"]: mov_o_tv = tipo["type"] arg_id = tipo["id"] try: if mov_o_tv == "movie": moviePag = requests.get("{}?id={}".format(api_movie, arg_id)) get_arg_links(moviePag) else: moviePag = requests.get("{}?id={}".format(api_episode, arg_id)) get_arg_links(moviePag) except AttributeError: print("Sin resultados en Argenteam")
def get_content_entries(counter): html = get_html() if html: soup = bso(html, "html.parser") bodies = soup.find_all("body") titles = soup.find_all("title") letters_body_list = [] for idx, (body, title) in enumerate(zip(bodies, titles)): letter_dict = { "letter_number": idx, # ex-letter_order "letter_title": title.text, "body": body } letters_body_list.append(letter_dict) wasted_letters_numbers = [2, 4, 8, 16] without_li_letters_numbers = [6, 7, 10, 11] strong_theme_letters = [5, 9, 12, 13, 14, 15, 17] for idx, letter in enumerate(letters_body_list): # for i in without_li_letters_numbers: # print(letters_body_list[i]["letter_title"]) # exit(0) # continue # letter = letters_body_list[10] # letter["letter_number"] = 12 # letter["body"] = letters_body_list[12]["body"] # letter["letter_title"] = letters_body_list[12]["letter_title"] if letter["letter_number"] in wasted_letters_numbers: continue else: section_name = letter["letter_title"] modified_date = get_modified_date(section_name) get_all_a_in_text(letter["body"], letter["letter_title"], modified_date) # # text = get_text(letter["body"], 31, -16) description = section_name lesson_name = pretifier(section_name) url = "text_" + str(counter) counter += 1 write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=str(letter["body"])) if letter["letter_number"] == 5: # виртуальное окружение ol = letter["body"].find('ol') for li in ol.find_all('li'): list_a = li.find_all('a') for a in list_a: description = li.text lesson_name = pretifier(li.text) section_name = letter["letter_title"] url_description = a.text url, type = get_url_and_url_type(a) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) if letter[ "letter_number"] in strong_theme_letters: # недели 4,5,6,7,8 (тесты), 9 strongs = letter["body"].find_all("strong") for strong in strongs: next_ = strong.find_next() if next_.li: li_list = next_.find_all("li") for li in li_list: section_name = strong.text lesson_name = pretifier(li.text) description = li.text url_description = letter["letter_title"] url, type = get_url_and_url_type(li.a) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) elif "проекты" in strong.text.lower(): next_next_ = next_.find_next().find_next() section_name = letter["letter_title"] url_description = next_next_.text lesson_name = pretifier(strong.text) description = strong.text url, type = get_url_and_url_type(next_next_) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) next_next_next_ = next_next_.find_next() section_name = letter["letter_title"] url_description = next_next_next_.text description = strong.text lesson_name = pretifier(strong.text) url, type = get_url_and_url_type(next_next_next_) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) elif "трек" in strong.text.lower( ) or "дополнительно" in strong.text.lower(): # continue if "5-й недели" in letter["letter_title"].lower(): next_next_ = next_.find_next().find_next( ).find_next() elif "трек" in strong.text.lower( ) or "9 недели" in letter["letter_title"].lower(): next_next_ = next_.find_next().find_next() elif "дополнительно" in strong.text.lower(): next_next_ = next_.find_next() li_list = next_next_.find_all("li") for li in li_list: section_name = strong.text.replace('"', '') lesson_name = pretifier(li.text) description = li.text url_description = letter["letter_title"] url, type = get_url_and_url_type(li.a) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) elif letter[ "letter_number"] in without_li_letters_numbers: # Trello, проекты, доп задания lesson_name = pretifier(section_name) description = section_name regex = re.compile( r".*gmail.com.*|.*subscri*.|why did I get this") tables = letter["body"].find_all("table") for td in tables: list_a = td.find_all('a') for a in list_a: match = regex.search(a.text) if not match: url, type = get_url_and_url_type(a) url_description = a.text slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, type=type, url_description=url_description, slug=slug) else: # недели: 0(приветственное), 1, 2 # text = get_text(letter["body"], 31, -16) # theme_name = section_name # url = "text_" + str(counter) # counter += 1 # slug = get_slug_url(theme_name, section_name) # write_to_db(theme_name, section_name, url, modified_date=modified_date, description=str(letter["body"]), slug=slug) # continue li_els = letter["body"].find_all('li') for li in li_els: if li.a: a_els = li.find_all('a') for a in a_els: lesson_name = pretifier(li.text) description = li.text url_description = a.text url, type = get_url_and_url_type(a) slug = get_slug_url( lesson_name, section_name ) # возможно стоит передавать description вместо lesson_name. Проверить! write_to_db(lesson_name, description, section_name, url, modified_date=modified_date, url_description=url_description, type=type, slug=slug) else: print('\t[Error] No datafile found')
def __init__(self, data): self.engine = bso(data, "lxml")
def updateData(self, data): self.engine = bso(data, "lxml")
def convert_one(input_file): soup = bso(open(input_file), 'xml') newsoup = bso('<annotation></annotation>', 'xml') ann = newsoup.annotation filename = soup.annotation.filename.string folder = soup.annotation.folder.string folder_tag = newsoup.new_tag('folder') filename_tag = newsoup.new_tag('filename') folder_tag.string = "VOC10" filename_tag.string = filename ann.append(folder_tag) ann.append(filename_tag) source_tag = newsoup.new_tag('source') db_tag = newsoup.new_tag('database') db_tag.string = 'The VOC2007 Database' src_ann_tag = newsoup.new_tag('annotation') src_ann_tag.string = 'PASCAL VOC2007' img_tag = newsoup.new_tag('image') img_tag.string = 'flickr' source_tag.append(db_tag) source_tag.append(src_ann_tag) source_tag.append(img_tag) ann.append(source_tag) height = ann.find('nrows') #height width = ann.find('ncols') #width if height is not None and width is not None: size_tag = newsoup.new_tag('size') width_tag = newsoup.new_tag('width') width_tag.string = width.string height_tag = newsoup.new_tag('height') height_tag.string = height.string depth_tag = newsoup.new_tag('depth') depth_tag.string = '3' size_tag.append(width_tag) size_tag.append(height_tag) size_tag.append(depth_tag) ann.append(size_tag) segment_tag = newsoup.new_tag('segmented') segment_tag.string = '1' ann.append(segment_tag) objects = soup.find_all('object') for obj in objects: deleted = obj.find('deleted') if deleted.string == '1': continue object_tag = newsoup.new_tag('object') name = obj.find('name').string name_tag = newsoup.new_tag('name') name_tag.string = name pose_tag = newsoup.new_tag('pose') pose_tag.string = 'Unspecified' trunc_tag = newsoup.new_tag('truncated') trunc_tag.string = '0' difficult_tag = newsoup.new_tag('difficult') difficult_tag.string = '0' bndbox_tag = newsoup.new_tag('bndbox') x_list = [] y_list = [] for x in obj.find_all('x'): x_list.append(int(x.string)) for y in obj.find_all('y'): y_list.append(int(y.string)) x_max, x_min = max(x_list), min(x_list) y_max, y_min = max(y_list), min(y_list) xmin_tag = newsoup.new_tag('xmin') xmin_tag.string = str(x_min) ymin_tag = newsoup.new_tag('ymin') ymin_tag.string = str(y_min) xmax_tag = newsoup.new_tag('xmax') xmax_tag.string = str(x_max) ymax_tag = newsoup.new_tag('ymax') ymax_tag.string = str(y_max) bndbox_tag.append(xmin_tag) bndbox_tag.append(ymin_tag) bndbox_tag.append(xmax_tag) bndbox_tag.append(ymax_tag) object_tag.append(name_tag) object_tag.append(pose_tag) object_tag.append(trunc_tag) object_tag.append(difficult_tag) object_tag.append(bndbox_tag) ann.append(object_tag) return newsoup.encode_contents()
final_qr += val[1:i] try: if len(number) != 0: final_qr += str('"' + str(int(number)*qr_size) + '"') except: print('error in qr code generation') show_error = True final_qr += new_file[-1][1:] with open('newqrcode.svg', 'a+') as svg: svg.truncate(0) svg.write(final_qr) s = '' with open('newqrcode.svg') as svg: our_soup = bso(svg.read(), 'lxml') all_child = our_soup.find('svg') for child in all_child.findChildren(): s += str(child)[:-8] + '/>\n' with open('svg/part2.svg', 'a+') as write_file: write_file.truncate(0) write_file.write(s) svg_file_name = input('Enter the svg file name you want to save : ') with open('omr_svg/' + svg_file_name + '.svg', 'w') as svg_file: file = '' with open('svg/part1.svg', 'r') as p1: file += str(p1.read()) + '\n' with open('svg/part2.svg', 'r') as p2:
def get_episodes(self, show_id, season): logger.debug("https://www.tusubtitulo.com/show/{}/{}".format( show_id, season)) r2 = self.session.get( "https://www.tusubtitulo.com/show/{}/{}".format(show_id, season), ) r2.raise_for_status() sopa = bso(r2.content, "lxml") tables = sopa.find_all("tr") seasons = [i.text for i in tables[1].find_all("a")] if not self.is_season_available(seasons, season): logger.debug("Season not found") return season_subs = [] episodes = [] for tr in range(len(tables)): data = tables[tr].find_all("td") title = self.title_available(data) if title: episodes.append(title) source_var = self.source_separator(data) if source_var: inc = 1 while True: try: content = tables[tr + inc].find_all("td") language = content[4].text if "eng" in language.lower(): language = "en" elif "esp" in language.lower(): language = "es" else: language = None completed = True if not "%" in content[ 5].text else False url = content[6].find_all("a")[0].get("href") sub_id = parse.parse_qs( parse.urlparse(url).query)["id"][0] lang_id = parse.parse_qs( parse.urlparse(url).query)["lang"][0] version_ = parse.parse_qs( parse.urlparse(url).query)["version"][0] download_url = ( "https://www.tusubtitulo.com/updated/{}/{}/{}". format(lang_id, sub_id, version_)) if language and completed: season_subs.append({ "episode_id": sub_id, "metadata": source_var, "download_url": download_url, "language": language, }) inc += 1 except IndexError: break final_list = [] for i in episodes: for t in season_subs: if i["episode_id"] == t["episode_id"]: final_list.append({ "episode_number": i["episode_number"], "episode_url": i["episode_url"], "metadata": t["metadata"], "download_url": t["download_url"], "language": t["language"], }) return final_list
def parse(self): return bso(self.response.text, "html.parser")
self.engine = bso(data, "lxml") def getSearchItems(self): main = self.engine.find("div", attrs={"id": "main"}) searchitems = main.findChildren(recursive=False) refineditems = [] for item in searchitems: if item.name == "div" and "class" not in item.attrs.keys(): refineditems.append(item) return refineditems def dictifyItem(self, soupSearchTag): titleH3 = soupSearchTag.find("h3", attrs={"class": "zBAuLc"}) if titleH3 == None: return None title = titleH3.find("div").string date = soupSearchTag.find("span", attrs={"class": "r0bn4c rQMQod"}) if date == None: return None date = date.string return {"title": title, "date": date} def updateData(self, data): self.engine = bso(data, "lxml") #google.com/search?q=covid&num=100&pws=0&start=100 if __name__ == "__main__": our_soup = bso(sample_content, "lxml") first_child = our_soup.find() print(first_child)
def post(self): json_webhook = request.form.get('payload') parsed_json_webhook = json.loads(json_webhook) event = parsed_json_webhook['event'] if event not in ['media.play']: return '', 204 media_type = parsed_json_webhook['Metadata']['type'] if media_type == 'episode': season = parsed_json_webhook['Metadata']['parentIndex'] episode = parsed_json_webhook['Metadata']['index'] else: season = episode = None ids = [] for item in parsed_json_webhook['Metadata']['Guid']: splitted_id = item['id'].split('://') if len(splitted_id) == 2: ids.append({splitted_id[0]: splitted_id[1]}) if not ids: return '', 404 if media_type == 'episode': try: episode_imdb_id = [x['imdb'] for x in ids if 'imdb' in x][0] r = requests.get( 'https://imdb.com/title/{}'.format(episode_imdb_id), headers={"User-Agent": os.environ["SZ_USER_AGENT"]}) soup = bso(r.content, "html.parser") series_imdb_id = soup.find( 'a', {'class': re.compile(r'SeriesParentLink__ParentTextLink') })['href'].split('/')[2] except: return '', 404 else: sonarrEpisodeId = TableEpisodes.select(TableEpisodes.sonarrEpisodeId) \ .join(TableShows, on=(TableEpisodes.sonarrSeriesId == TableShows.sonarrSeriesId)) \ .where(TableShows.imdbId == series_imdb_id, TableEpisodes.season == season, TableEpisodes.episode == episode) \ .dicts() \ .get() if sonarrEpisodeId: episode_download_subtitles( no=sonarrEpisodeId['sonarrEpisodeId'], send_progress=True) else: try: movie_imdb_id = [x['imdb'] for x in ids if 'imdb' in x][0] except: return '', 404 else: radarrId = TableMovies.select(TableMovies.radarrId)\ .where(TableMovies.imdbId == movie_imdb_id)\ .dicts()\ .get() if radarrId: movies_download_subtitles(no=radarrId['radarrId']) return '', 200
def getSoup(): url = 'http://www.beisbolcubano.cu/' html = requests.get(url) soup = bso(html.text, 'lxml') return soup