def get_search_result(self, keyword): movies_list = [] raw_keyword = keyword keyword = raw_keyword.replace(" ", "+") search_result = scraper_lib.get_page_soup(url=self.search_url.format( self.domain, keyword), check_result=True) if (search_result == -1): result_url = self.get_movie_url_from_google(keyword) search_result = scraper_lib.get_page_soup(url=result_url) movies = scraper_lib.Container( block=search_result, tag='div', container_class='col-lg-3 col-md-4 col-xs-4 mb-30').get_container( ) for movie in movies: movies_list.append(self.__get_post_info(movie)) if (not movies_list): try: title = scraper_lib.Element(block=search_result, el_tag='title', get_text=True).get_element() except: title = raw_keyword movie = Movie(title=title, page_url=result_url) movie.image_url = "n.d." movies_list.append(movie) return movies_list
def __get_iframe_page(self, seriehd_page): hdpass_iframe = scraper_lib.Element(seriehd_page, 'iframe', el_id='iframeVid').get_element() return scraper_lib.get_page_soup(url=hdpass_iframe["src"])
def get_episodes(self, episodes_url): episodes = [] page = scraper_lib.get_page_soup(url=episodes_url) episodes_container = scraper_lib.Container( page, tag="div", first=True, container_class="buttons-bar episodes").get_container() episode_soup = scraper_lib.get_soup_prettified(episodes_container) episodes_block = scraper_lib.Container(episode_soup, tag="li").get_container() for episode in episodes_block: episode_no = scraper_lib.get_text(episode).strip() episodes.append( Episode(title="Episodio {0}".format(episode_no), episode_no=episode_no, urls=scraper_lib.get_hrefs(episode)[0])) return episodes
def get_seasons(self, tvshow_page_url): seasons = [] page = scraper_lib.get_page_soup(url=tvshow_page_url) iframe = self.__get_iframe_page(page) seasons_container = scraper_lib.Container( iframe, tag="div", first=True, container_class="buttons-bar seasons").get_container() season_soup = scraper_lib.get_soup_prettified(seasons_container) seasons_block = scraper_lib.Container(season_soup, tag="li").get_container() for season in seasons_block: season_no = scraper_lib.get_text(season).strip() seasons.append( Season(title="Stagione {0}".format( scraper_lib.get_text(season).strip()), season_no=season_no, ref_url=scraper_lib.get_hrefs(season)[0])) return seasons
def get_hdload_frame(self, title, movie_url): altadefinizione_page = scraper_lib.get_page_soup(movie_url) hdpass_iframe = scraper_lib.Element(altadefinizione_page, 'iframe', el_id='iframeVid').get_element() hdpass_iframe_url = hdpass_iframe["src"] return hdpass_iframe_url
def get_search_result(self, keyword): keyword = keyword.replace(" ", "+") self.cf_session = scraper_lib.get_cf_session() search_result = scraper_lib.get_page_soup(url=self.search_url.format( self.domain, keyword), scraper=self.cf_session) tvshow = scraper_lib.Container( block=search_result, tag='div', first=True, container_class='col-xs-6 col-sm-2-5').get_container() info = self.__get_post_info(tvshow) time.sleep(1) return self.get_seasons( scraper_lib.get_page_soup(info["url"], scraper=self.cf_session))
def get_first_result(search_string, destination_url): soup = scraper_lib.get_page_soup( url=__get_request_url(search_string, destination_url)) first_result = soup.find('div', class_='r') if (first_result): first_anchor = first_result.find('a') if first_anchor: return first_anchor["href"] return None
def get_filmpertutti_domain(self): try: soup = scraper_lib.get_page_soup(self.url_check_domain, timeout=5) wrapper = scraper_lib.Container(block=soup, tag='div', first=True, container_class="content").get_container() domain_url = scraper_lib.get_hrefs(wrapper, ["filmpertutti."])[0] return domain_url except: return self.default_domain
def get_all_players(self): self.hdpass_page = scraper_lib.get_page_soup(self.current_url) hosts = scraper_lib.Container(self.hdpass_page, 'div', container_class='hosts-bar', first=True).get_container() players = scraper_lib.Container(hosts, 'li').get_container() return [scraper_lib.get_text(player) for player in players]
def get_seasons(self, page_url): seasons_page = [] page = scraper_lib.get_page_soup(page_url) seasons = scraper_lib.Container( page, tag="div", container_class="su-spoiler-title").get_container() for i, season in enumerate(seasons, start=1): seasons_page.append( Season(title="stagione: {0}".format(i), season_no=i)) return seasons_page
def get_results(search_string, destination_url): soup = scraper_lib.get_page_soup( url=__get_request_url(search_string, destination_url)) results = [] for g in soup.find_all('div', class_='r'): anchors = g.find_all('a') if anchors: link = anchors[0]['href'] title = g.find('h3').text item = {"title": title, "link": link} results.append(item) return results
def get_embed_values_by_player(self, player_name): href = None if self.hdpass_page is None: self.hdpass_page = scraper_lib.get_page_soup(self.current_url) hosts = scraper_lib.Container(self.hdpass_page, 'div', container_class='hosts-bar', first=True).get_container() players = scraper_lib.Container(hosts, 'li').get_container() for player in players: if player_name == scraper_lib.get_text(player): a_href = scraper_lib.get_tag(player, 'a') href = a_href["href"].replace("amp;", "") break if href is not None: r = scraper_lib.get_page_soup(url=href) return scraper_lib.Element(r, 'iframe', el_property="custom-src").get_element() return None
def get_search_result(self, keyword): results = [] search_result = scraper_lib.get_page_soup( url=self.search_url.format(self.domain, keyword)) tvshows = scraper_lib.Container( block=search_result, tag='div', container_class='col-xl-3 col-lg-3 col-md-3 col-sm-6 col-6' ).get_container() for tvshow in tvshows: results.append(self.__get_post_info(tvshow)) return results
def get_fpt_posts(self, keyword, media_type): try: posts_list = [] key_search = keyword.replace(" ", "+") url_search = self.filmpertutti_url.format(self.domain, key_search) soup = scraper_lib.get_page_soup(url_search) container = scraper_lib.Container(block=soup, tag='ul', first=True, container_class="posts").get_container() posts = scraper_lib.Container(block=container, tag='li').get_container() for post in posts: posts_list.append(self.get_post_info(post, media_type)) return posts_list except: return None
def get_search_result(self, keyword): tvshow_lst = [] keyword = keyword.replace(" ", "+") search_result = scraper_lib.get_page_soup( url=self.search_url.format(self.domain, keyword)) div_posts = scraper_lib.Container(block=search_result, tag='ul', container_class='recent-posts', first=True).get_container() tvshows = scraper_lib.Container(block=div_posts, tag='li').get_container() for tvshow in tvshows: try: tvshow_lst.append(self.__get_post_info(tvshow)) except: pass return tvshow_lst
def play(title, streaming_url): streaming_source_name = kodiutilsitem.get_streaming_source_name( streaming_url) if streaming_source_name == "speedvideo": playable_url = speedvideo.get_stream_url(streaming_url) if streaming_url != '' and streaming_url != '404': kodiplayer.play_video(playable_url) else: kodiplayer.play_video_with_resolver(streaming_url) elif streaming_source_name == "vcrypt": page = scraper_lib.get_page_soup(url=streaming_url) mixdrop_url = scraper_lib.Element( page, 'iframe', ).get_element()["src"] mixdrop_obj = Mixdrop(mixdrop_url) kodiplayer.play_video(mixdrop_obj.get_final_url()) else: kodiplayer.play_video_with_resolver(streaming_url)
def get_episodes(self, page_url, season_no=1): episodes_lst = [] block = scraper_lib.get_page_soup(page_url) episodes_container = scraper_lib.Container( block, tag="div", container_class="su-spoiler-content").get_container()[season_no - 1] episodes_block = scraper_lib.Container( episodes_container, tag="div", container_class="su-link-ep").get_container() for i, episode in enumerate(episodes_block, start=1): urls = [] ep_title = scraper_lib.get_text(episode).replace('\n', '').strip() urls.append( scraper_lib.Element(episode, el_tag="a", el_property="href").get_element()) episodes_lst.append( Episode(title=ep_title, urls=urls, episode_no=i)) return episodes_lst
def get_movie(self, title, fpt_movie_url): self.soup = scraper_lib.get_page_soup(fpt_movie_url) urls = self.__get_movies_url() movie = Movie(title, urls) return movie
def scrape(self, fpt_tvshow_url): self.soup = scraper_lib.get_page_soup(fpt_tvshow_url) self.seasons_wrapper = self.get_seasons_wrapper()