コード例 #1
0
    def get_seasons(self, tvshow_page_url):
        seasons = []
        page = scraper_lib.get_page_soup(url=tvshow_page_url)
        iframe = self.__get_iframe_page(page)

        seasons_container = scraper_lib.Container(
            iframe,
            tag="div",
            first=True,
            container_class="buttons-bar seasons").get_container()

        season_soup = scraper_lib.get_soup_prettified(seasons_container)

        seasons_block = scraper_lib.Container(season_soup,
                                              tag="li").get_container()

        for season in seasons_block:
            season_no = scraper_lib.get_text(season).strip()

            seasons.append(
                Season(title="Stagione {0}".format(
                    scraper_lib.get_text(season).strip()),
                       season_no=season_no,
                       ref_url=scraper_lib.get_hrefs(season)[0]))

        return seasons
コード例 #2
0
    def get_all_seasons(self):
        if sys.version_info[0] < 3:
            del self.seasons_lst[:]
        else:
            self.seasons_lst.clear()

        if self.is_modern_state:
            season_container = scraper_lib.Container(
                block=self.seasons_wrapper,
                tag='div',
                container_class='accordion-item')

            title = scraper_lib.Element(block='', el_tag='ul', get_text=True)
        else:
            #old version
            season_container = scraper_lib.Container(
                block=self.seasons_wrapper,
                tag='p',
                text=True,
                recursive=False)

            title = scraper_lib.Element(block='', el_tag='span', get_text=True)

        seasons_block = season_container.get_container()

        for i, season_block in enumerate(seasons_block):
            title.block = season_block
            season_title = title.get_element()
            season = Season(title=season_title, season_no=i)
            self.seasons_lst.append(season)

        return self.seasons_lst
コード例 #3
0
    def get_episodes(self, episodes_url):
        episodes = []

        page = scraper_lib.get_page_soup(url=episodes_url)

        episodes_container = scraper_lib.Container(
            page,
            tag="div",
            first=True,
            container_class="buttons-bar episodes").get_container()

        episode_soup = scraper_lib.get_soup_prettified(episodes_container)

        episodes_block = scraper_lib.Container(episode_soup,
                                               tag="li").get_container()

        for episode in episodes_block:
            episode_no = scraper_lib.get_text(episode).strip()

            episodes.append(
                Episode(title="Episodio {0}".format(episode_no),
                        episode_no=episode_no,
                        urls=scraper_lib.get_hrefs(episode)[0]))

        return episodes
コード例 #4
0
    def get_all_players(self):
        self.hdpass_page = scraper_lib.get_page_soup(self.current_url)

        hosts = scraper_lib.Container(self.hdpass_page,
                                      'div',
                                      container_class='hosts-bar',
                                      first=True).get_container()

        players = scraper_lib.Container(hosts, 'li').get_container()

        return [scraper_lib.get_text(player) for player in players]
コード例 #5
0
    def get_episodes_by_season_number(self, season_no):
        #return all episodes of that season
        self.get_all_seasons()
        if len(self.seasons_lst) > 0:
            try:
                season_obj = self.seasons_lst[season_no]
            except IndexError:
                season_obj = self.seasons_lst[-1]

            if self.is_modern_state:
                #new version config
                seasons = scraper_lib.Container(
                    self.seasons_wrapper,
                    tag='div',
                    container_class='accordion-item').get_container()

                s_title = scraper_lib.Element(block='',
                                              el_tag='ul',
                                              get_text=True)

                season_content = scraper_lib.Container(
                    block='', tag='div', first=True, container_class='content')
            else:
                #old version config
                seasons = scraper_lib.Container(
                    self.seasons_wrapper, tag='p',
                    recursive=False).get_container()

                s_title = scraper_lib.Element(block='',
                                              el_tag='span',
                                              get_text=True)

            for season in seasons:
                try:
                    s_title.block = season
                    title = s_title.get_element()
                except:
                    title = "nd"

                if title == season_obj.title:
                    if not self.is_modern_state:
                        content = scraper_lib.get_next_sibling(season)
                    else:
                        season_content.block = season
                        content = season_content.get_container()

                    all_episodes = self.get_all_episodes(content)
                    return all_episodes
        return None
コード例 #6
0
    def get_search_result(self, keyword):
        movies_list = []
        raw_keyword = keyword
        keyword = raw_keyword.replace(" ", "+")
        search_result = scraper_lib.get_page_soup(url=self.search_url.format(
            self.domain, keyword),
                                                  check_result=True)

        if (search_result == -1):
            result_url = self.get_movie_url_from_google(keyword)
            search_result = scraper_lib.get_page_soup(url=result_url)

        movies = scraper_lib.Container(
            block=search_result,
            tag='div',
            container_class='col-lg-3 col-md-4 col-xs-4 mb-30').get_container(
            )

        for movie in movies:
            movies_list.append(self.__get_post_info(movie))

        if (not movies_list):
            try:
                title = scraper_lib.Element(block=search_result,
                                            el_tag='title',
                                            get_text=True).get_element()
            except:
                title = raw_keyword

            movie = Movie(title=title, page_url=result_url)
            movie.image_url = "n.d."
            movies_list.append(movie)

        return movies_list
コード例 #7
0
    def get_fpt_posts(self, keyword, media_type):
        try:
            posts_list = []
            key_search = keyword.replace(" ", "+")
            url_search = self.filmpertutti_url.format(self.domain, key_search)

            soup = scraper_lib.get_page_soup(url_search)

            container = scraper_lib.Container(block=soup, tag='ul',
                first=True, container_class="posts").get_container()

            posts = scraper_lib.Container(block=container, tag='li').get_container()

            for post in posts:
                posts_list.append(self.get_post_info(post, media_type))      

            return posts_list            
        except:
            return None
コード例 #8
0
    def get_seasons_wrapper(self):
        #return the html tag which cointains all the seasons
        seasons_wrapper = scraper_lib.Container(
            block=self.soup,
            tag='div',
            first=True,
            container_class='seasons-wraper').get_container()

        if seasons_wrapper is not None:
            return seasons_wrapper
        else:
            self.is_modern_state = False

            container = scraper_lib.Container(
                block=self.soup,
                tag='div',
                container_id='info',
                container_class='pad').get_container()

            return container[-1]
コード例 #9
0
    def get_filmpertutti_domain(self):
        try:
            soup = scraper_lib.get_page_soup(self.url_check_domain, timeout=5)

            wrapper = scraper_lib.Container(block=soup, tag='div', first=True,
                container_class="content").get_container()

            domain_url = scraper_lib.get_hrefs(wrapper, ["filmpertutti."])[0]
            return domain_url
        except:
            return self.default_domain
コード例 #10
0
    def get_seasons(self, page_url):
        seasons_page = []
        page = scraper_lib.get_page_soup(page_url)

        seasons = scraper_lib.Container(
            page, tag="div",
            container_class="su-spoiler-title").get_container()

        for i, season in enumerate(seasons, start=1):
            seasons_page.append(
                Season(title="stagione: {0}".format(i), season_no=i))
        return seasons_page
コード例 #11
0
    def get_search_result(self, keyword):
        tvshow_lst = []
        keyword = keyword.replace(" ", "+")
        search_result = scraper_lib.get_page_soup(
            url=self.search_url.format(self.domain, keyword))

        div_posts = scraper_lib.Container(block=search_result,
                                          tag='ul',
                                          container_class='recent-posts',
                                          first=True).get_container()

        tvshows = scraper_lib.Container(block=div_posts,
                                        tag='li').get_container()

        for tvshow in tvshows:
            try:
                tvshow_lst.append(self.__get_post_info(tvshow))
            except:
                pass

        return tvshow_lst
コード例 #12
0
    def get_embed_values_by_player(self, player_name):
        href = None
        if self.hdpass_page is None:
            self.hdpass_page = scraper_lib.get_page_soup(self.current_url)

        hosts = scraper_lib.Container(self.hdpass_page,
                                      'div',
                                      container_class='hosts-bar',
                                      first=True).get_container()

        players = scraper_lib.Container(hosts, 'li').get_container()
        for player in players:
            if player_name == scraper_lib.get_text(player):
                a_href = scraper_lib.get_tag(player, 'a')
                href = a_href["href"].replace("amp;", "")
                break

        if href is not None:
            r = scraper_lib.get_page_soup(url=href)
            return scraper_lib.Element(r, 'iframe',
                                       el_property="custom-src").get_element()

        return None
コード例 #13
0
    def get_episodes(self, page_url, season_no=1):
        episodes_lst = []
        block = scraper_lib.get_page_soup(page_url)

        episodes_container = scraper_lib.Container(
            block, tag="div",
            container_class="su-spoiler-content").get_container()[season_no -
                                                                  1]

        episodes_block = scraper_lib.Container(
            episodes_container, tag="div",
            container_class="su-link-ep").get_container()

        for i, episode in enumerate(episodes_block, start=1):
            urls = []
            ep_title = scraper_lib.get_text(episode).replace('\n', '').strip()
            urls.append(
                scraper_lib.Element(episode, el_tag="a",
                                    el_property="href").get_element())

            episodes_lst.append(
                Episode(title=ep_title, urls=urls, episode_no=i))

        return episodes_lst
コード例 #14
0
    def get_search_result(self, keyword):
        results = []

        search_result = scraper_lib.get_page_soup(
            url=self.search_url.format(self.domain, keyword))

        tvshows = scraper_lib.Container(
            block=search_result,
            tag='div',
            container_class='col-xl-3 col-lg-3 col-md-3 col-sm-6 col-6'
        ).get_container()

        for tvshow in tvshows:
            results.append(self.__get_post_info(tvshow))

        return results
コード例 #15
0
    def get_seasons(self, page):
        season_lst = []
        seasons = scraper_lib.Container(
            page, tag="a", container_class="button-sel-serie").get_container()

        for i, season in enumerate(seasons, start=1):

            season_block = scraper_lib.Element(
                page, el_tag="div",
                el_class="row-stagione-{0}".format(i)).get_element()

            season_lst.append(
                Season(title="stagione: {0}".format(i),
                       season_no=i,
                       html_block=season_block))

        return season_lst
コード例 #16
0
    def get_episodes(self, page):
        page = scraper_lib.get_soup(page)
        episodes = []

        episodes_block = scraper_lib.Container(
            page, tag="a", container_class="box-link-serie").get_container()

        for i, episode_block in enumerate(episodes_block, start=1):
            urls = self.get_playable_urls(episode_block)
            episode_name = scraper_lib.Element(episode_block,
                                               el_tag="div",
                                               get_text=True).get_element()

            episodes.append(
                Episode(title=episode_name, urls=urls, episode_no=i))

        return episodes
コード例 #17
0
    def get_search_result(self, keyword):
        keyword = keyword.replace(" ", "+")
        self.cf_session = scraper_lib.get_cf_session()

        search_result = scraper_lib.get_page_soup(url=self.search_url.format(
            self.domain, keyword),
                                                  scraper=self.cf_session)

        tvshow = scraper_lib.Container(
            block=search_result,
            tag='div',
            first=True,
            container_class='col-xs-6 col-sm-2-5').get_container()

        info = self.__get_post_info(tvshow)
        time.sleep(1)
        return self.get_seasons(
            scraper_lib.get_page_soup(info["url"], scraper=self.cf_session))
コード例 #18
0
    def get_all_episodes(self, episode_wrapper):
        episodes = []
        if self.is_modern_state:

            episodes_wrapper = scraper_lib.Container(
                block=episode_wrapper,
                tag='div',
                container_class='episode-wrap').get_container()

            for i, episode in enumerate(episodes_wrapper):
                episodes.append(self.get_episode_info(episode, i))
        else:
            #old version
            episodes_html = "{0}".format(episode_wrapper).split("<br/>")

            for i, episode in enumerate(episodes_html):
                episodes.append(self.get_episode_info(episode.strip(), i))

        return episodes
コード例 #19
0
    def __get_post_info(self, block):

        info_block = scraper_lib.Container(block=block, tag="h2",
                                           first=True).get_container()

        post_title = scraper_lib.Element(block=info_block,
                                         el_tag="a",
                                         get_text=True).get_element()

        post_ref_url = scraper_lib.Element(block=block,
                                           el_tag="a",
                                           el_property="href").get_element()

        try:
            image = scraper_lib.Element(block=block,
                                        el_tag="img",
                                        el_class="Thumbnail",
                                        el_property="src").get_element()
        except:
            image = "n.d."

        tvshow = TvShow(title=post_title, page_url=post_ref_url)
        tvshow.image_url = image
        return tvshow