Beispiel #1
0
 def get_start_button_menu_text_url_dict(self):
     return dict(HD=URL('http://www.pornhd4k.com/kategori/porno/hd*'))
Beispiel #2
0
 def get_href(self, txt='', base_url=URL()):
     if txt.startswith('http://'):
         return txt
     if txt.startswith('/'):
         return base_url.domain() + txt
     return base_url.get().partition('?')[0] + txt
Beispiel #3
0
 def get_start_button_menu_text_url_dict(self):
     return dict(
         Galleries_Recently_Updated=URL(
             'http://motherless.com/galleries/updated*'),
         Galleries_Most_Viewed=URL(
             'http://motherless.com/galleries/viewed*'),
         Galleries_Most_Favorited=URL(
             'http://motherless.com/galleries/favorited*'),
         Videos_Recent=URL('http://motherless.com/videos/recent*'),
         Videos_Most_Viewed=URL('http://motherless.com/videos/viewed*'),
         Videos_Most_Favoritede=URL(
             'http://motherless.com/videos/favorited*'),
         Videos_Popular=URL('http://motherless.com/videos/popular*'),
         Videos_Live=URL('http://motherless.com/live/videos*'),
         Videos_All_Time_Most_Viewed=URL(
             'http://motherless.com/videos/all/viewed*'),
         Videos_All_Time_Most_Favorited=URL(
             'http://motherless.com/videos/all/favorited*'),
         Videos_Archived=URL('http://motherless.com/videos/archives*'))
Beispiel #4
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()

        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('ul', 'class', 'thumbs'),
                                                ('li', 'class', 'category')])
        startpage_rule.add_process_rule_level('a', {'href'})
        startpage_rule.add_process_rule_level('img',
                                              {'src', 'alt', 'data-original'})
        startpage_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('div', 'class',
                                                       'pager paging')])
        # startpage_pages_rule.add_activate_rule_level([('a', 'class', 'current')])
        startpage_pages_rule.add_process_rule_level(
            'span', {'data-query-key', 'data-query-value'})
        # startpage_pages_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x,base_url))
        parser.add_rule(startpage_pages_rule)

        channels_rule = ParserRule()
        channels_rule.add_activate_rule_level([('ul', 'class', 'tag-150-list')
                                               ])
        channels_rule.add_process_rule_level('a', {'href'})
        channels_rule.add_process_rule_level('img', {'src'})
        channels_rule.set_attribute_filter_function(
            'href', lambda x: '/channel/' in x or '/prime/' in x)
        channels_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(channels_rule)

        channel_categories_rule = ParserRule()
        channel_categories_rule.add_activate_rule_level([
            ('ul', 'class', 'link-tag-list long-col')
        ])
        # startpage_pages_rule.add_activate_rule_level([('a', 'class', 'current')])
        channel_categories_rule.add_process_rule_level(
            'span', {'data-query-key', 'data-query-value'})
        # startpage_pages_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x,base_url))
        parser.add_rule(channel_categories_rule)

        video_rule = ParserRule()
        video_rule.add_activate_rule_level([('div', 'class',
                                             'player-container')])
        video_rule.add_process_rule_level('script', {})
        video_rule.set_attribute_filter_function(
            'data', lambda text: 'players.push' in text)
        parser.add_rule(video_rule)
        #
        gallery_href_rule = ParserRule()
        gallery_href_rule.add_activate_rule_level([('ul', 'class',
                                                    'video-tag-list')])
        gallery_href_rule.add_process_rule_level('a', {'href'})
        gallery_href_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(gallery_href_rule)

        self.proceed_parcing(parser, fname)

        result = ParseResult()

        if video_rule.is_result():  # len(video_rule.get_result()) > 0:
            script = video_rule.get_result()[0]['data'].replace(' ',
                                                                '').replace(
                                                                    '\\', '')
            sources = script.partition("'sources':{")[2].partition(
                '}')[0].split(',')

            urls = list()
            for item in sources:
                part = item.strip("\n\t'").partition("':'")
                if part[2].startswith('http://'):
                    data = dict(text=part[0],
                                url=URL(part[2].strip("'") + '*'))
                    urls.append(data)

            if len(urls) == 1:
                video = MediaData(urls[0]['url'])
            elif len(urls) > 1:
                video = MediaData(urls[-1]['url'])
                for item in urls:
                    video.add_alternate(item)
            else:
                return result

            result.set_type('video')
            result.set_video(video)

            for f in gallery_href_rule.get_result(['data', 'href']):
                result.add_control(
                    ControlInfo(f['data'].strip(), URL(f['href'])))
            return result

        def add_key(old, key, value):
            (addr, br, keys) = old.partition('?')
            print(addr, br, keys)
            pairs = keys.split('&')
            print(pairs)
            keys = ''
            found = False
            for pair in pairs:
                if pair.startswith(key):
                    keys += key + '=' + value + '&'
                    found = True
                else:
                    keys += pair + '&'

            if not found:
                keys += key + '=' + value
            return addr + '?' + keys.strip('&')

        def add_pages_info_to_result(rule, description_key='data-query-value'):
            for item in rule.get_result(['data-query-key',
                                         'data-query-value']):
                print(item)
                key = item['data-query-key']
                val = item['data-query-value']
                description = item[description_key].strip('\t')
                old = base_url.get()

                addr = add_key(old, key, val)

                result.add_page(ControlInfo(description, URL(addr + '*')))

        if channels_rule.is_result():
            result.set_type('hrefs')

            for item in channels_rule.get_result():
                # print(item)
                info = item['href'].rpartition('/')[2].strip('*')
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']),
                              href=URL(item['href']),
                              popup=info))

            add_pages_info_to_result(channel_categories_rule,
                                     description_key='data')

            # for item in channel_categories_rule.get_result(['data-query-key', 'data-query-value']):
            #     print(item)

            return result

        if startpage_rule.is_result():  # len(startpage_rule.get_result()) > 0:
            result.set_type('hrefs')

            for item in startpage_rule.get_result(['href']):
                # print(item)
                t_url = item.get('data-original', item['src'])
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(t_url),
                              href=URL(item['href']),
                              popup=item.get('alt', '')))

            add_pages_info_to_result(startpage_pages_rule)

        return result
Beispiel #5
0
 def can_accept_index_file(self, base_url=URL()):
     return base_url.contain('babesandstars.com/')
Beispiel #6
0
 def can_accept_index_file(self, base_url=URL()):
     return base_url.contain('collectionofbestporn.com/')
Beispiel #7
0
 def startpage(self):
     return URL("http://www.pornhd.com/?order=newest*")
Beispiel #8
0
 def startpage(self):
     return URL("http://fineartteens.com/")
Beispiel #9
0
 def can_accept_index_file(self, base_url=URL()):
     for site in self.accepted_sites:
         if base_url.contain(site):
             return True
     return False
Beispiel #10
0
 def can_accept_index_file(self, base_url=URL()):
     return base_url.contain('sexix.net/')
Beispiel #11
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()

        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class', 'thumb')])
        startpage_rule.add_process_rule_level('a', {'href', 'title'})
        startpage_rule.add_process_rule_level('img', {'src', 'alt'})
        startpage_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('div', 'class', 'wp-pagenavi')])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_pages_rule)

        tags_rule = ParserRule()
        tags_rule.add_activate_rule_level([('div', 'class', 'tagcloud')])
        tags_rule.add_process_rule_level('a', {'href'})
        tags_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(tags_rule)

        video_rule = ParserRule()
        video_rule.add_activate_rule_level([('div', 'class', 'videoContainer')])
        video_rule.add_process_rule_level('iframe', {'src'})
        video_rule.set_attribute_modifier_function('src', lambda x: self.get_href(x, base_url))
        parser.add_rule(video_rule)

        gallery_href_rule = ParserRule()
        gallery_href_rule.add_activate_rule_level([('div', 'id', 'extras')])
        gallery_href_rule.add_process_rule_level('a', {'href'})
        gallery_href_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(gallery_href_rule)

        self.proceed_parcing(parser, fname)

        result = ParseResult()

        if video_rule.is_result():
            urls = UrlList()
            for item in video_rule.get_result():
                try:
                    r = load(URL(item['src']))
                    r = load(URL(self.quotes(r.text, "jwplayer().load('", "'") + '*'))
                    source = self.quotes(r.text, '<item>', '</item>').strip()
                    split = source.split('<jwplayer:source file="')
                    for l in split:
                        if l is '':
                            continue
                        url = l.partition('"')[0]
                        label = self.quotes(l, 'label="', '"')
                        urls.add(label, URL(url + '*'))

                except LoaderError as err:
                    print(err)

            result.set_video(urls.get_media_data())

            for f in gallery_href_rule.get_result(['data', 'href']):
                result.add_control(ControlInfo(f['data'].strip(), URL(f['href'])))
            return result

        if startpage_rule.is_result():  # len(startpage_rule.get_result()) > 0:
            for item in startpage_rule.get_result(['href']):
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']), href=URL(item['href']), popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                result.add_page(ControlInfo(item['data'], URL(item['href'])))

            for item in tags_rule.get_result(['href', 'data']):
                result.add_control(ControlInfo(item['data'], URL(item['href'])))

        return result
Beispiel #12
0
 def startpage(self):
     return URL("http://sexix.net/?orderby=date*")
Beispiel #13
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()

        def star_get_url(txt=''):
            return txt.partition('(')[2].partition(')')[0]

        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class', 'videos_form')])
        startpage_rule.add_process_rule_level('a', {'href'})
        startpage_rule.add_process_rule_level('img', {'data-lazy-src'})
        startpage_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('div', 'class', 'wp-pagenavi')])
        # startpage_pages_rule.add_activate_rule_level([('a', 'class', 'current')])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_pages_rule)

        startpage_hrefs_rule = ParserRule()
        startpage_hrefs_rule.add_activate_rule_level([('ul', 'class', 'list')])
        # startpage_hrefs_rule.add_activate_rule_level([('a', 'class', 'current')])
        startpage_hrefs_rule.add_process_rule_level('a', {'href'})
        startpage_hrefs_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_hrefs_rule)
        #
        video_rule = ParserRule()
        video_rule.add_activate_rule_level([('div', 'id', 'videos_page')])
        video_rule.add_process_rule_level('script', {})
        video_rule.set_attribute_filter_function('data', lambda text: 'jwplayer(' in text)
        parser.add_rule(video_rule)
        #
        gallery_href_rule = ParserRule()
        gallery_href_rule.add_activate_rule_level([('div', 'id', 'Categories')])
        gallery_href_rule.add_process_rule_level('a', {'href'})
        gallery_href_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x, base_url))
        parser.add_rule(gallery_href_rule)

        self.proceed_parcing(parser, fname)

        result = ParseResult()

        if video_rule.is_result():
            urls = UrlList()
            for item in video_rule.get_result():
                file = self.quotes(item['data'].replace(' ', ''), "file:'", "'")
                urls.add('default', URL(file + '*'))

            result.set_video(urls.get_media_data())

            for f in gallery_href_rule.get_result(['data', 'href']):
                result.add_control(ControlInfo(f['data'], URL(f['href'])))
            return result

        if startpage_rule.is_result():  # len(startpage_rule.get_result()) > 0:
            result.set_type('hrefs')

            for item in startpage_rule.get_result(['href']):
                # print(item)
                result.add_thumb(ThumbInfo(thumb_url=URL(item['data-lazy-src']), href=URL(item['href']),
                                           popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                result.add_page(ControlInfo(item['data'], URL(item['href'])))

            for item in startpage_hrefs_rule.get_result(['href']):
                href = item['href']
                label = href.split('/')[-2]
                # print(label,href)
                result.add_control(ControlInfo(label, URL(href)))

        return result
Beispiel #14
0
 def startpage(self):
     return URL("http://www.pornhd4k.com/")
Beispiel #15
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()
        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class', 'thumbs'),
                                                ('div', 'class',
                                                 'movie_thumbs')])
        startpage_rule.add_process_rule_level('a', {'href'})
        startpage_rule.add_process_rule_level('img', {'src', 'alt'})
        startpage_rule.set_attribute_modifier_function('href', get_href)
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('div', 'class', 'head')
                                                      ])
        startpage_pages_rule.add_activate_rule_level([('div', 'class', 'pages')
                                                      ])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function(
            'href', lambda x: 'http://www.tomorrowporn.com' + x)
        parser.add_rule(startpage_pages_rule)

        href_rule = ParserRule()
        href_rule.add_activate_rule_level([('ul', 'class', 'sub_thumb_list')])
        href_rule.add_process_rule_level('a', {'href'})
        href_rule.add_process_rule_level('img', {'src', 'alt'})
        href_rule.set_attribute_modifier_function('href', get_href)
        parser.add_rule(href_rule)

        picture_rule = ParserRule()
        picture_rule.add_activate_rule_level([
            ('div', 'class', 'thumb_box'),
            ('div', 'class', 'thumb_box bottom_corners'),
            ('div', 'class', 'thumb_box top_corners')
        ])
        picture_rule.add_process_rule_level('a', set())
        picture_rule.add_process_rule_level('img', {'src'})
        picture_rule.set_attribute_modifier_function(
            'src', lambda text: text.replace('t', ''))
        parser.add_rule(picture_rule)

        picture_href_rule = ParserRule()
        picture_href_rule.add_activate_rule_level([('div', 'class', 'menus')])
        picture_href_rule.add_process_rule_level('h2', set())
        picture_href_rule.add_process_rule_level('a', {'href', 'title'})
        parser.add_rule(picture_href_rule)

        for s in open(fname, encoding='utf-8'):
            parser.feed(s)

        result = ParseResult()

        if len(startpage_rule.get_result()) > 0:
            # result.set_type('hrefs')
            for item in startpage_rule.get_result():
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']),
                              href=URL(item['href']),
                              popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                result.add_page(ControlInfo(item['data'], URL(item['href'])))

        if len(href_rule.get_result()) > 0:
            # result.set_type('hrefs')
            for item in href_rule.get_result():
                # print (item)
                if 'src' in item:
                    result.add_thumb(
                        ThumbInfo(thumb_url=URL(item['src']),
                                  href=URL(item['href']),
                                  popup=item.get('alt', '')))

        if len(picture_rule.get_result()) > 0:
            # result.set_type('pictures')
            for f in picture_rule.get_result():
                result.add_full(FullPictureInfo(rel_name=f['src']))

            for f in picture_href_rule.get_result():
                # print(f)
                result.add_control(ControlInfo(f['title'], URL(f['href'])))

        return result
Beispiel #16
0
    def parse_index_file(self, fname, base_url=URL()):
        print(base_url.get(), base_url.domain())
        parser = SiteParser()
        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class', 'post'),
                                                ('div', 'class', 'post300')])
        startpage_rule.add_process_rule_level('a', {'href'})
        startpage_rule.add_process_rule_level('img', {'src', 'alt'})
        startpage_rule.set_attribute_modifier_function('href',
                                                       lambda x: get_href(x))
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('div', 'id', 'pager')])
        startpage_pages_rule.add_activate_rule_level([('div', 'id', 'pc')])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function(
            'href', lambda x: base_url.domain() + x)
        parser.add_rule(startpage_pages_rule)

        picture_rule = ParserRule()
        picture_rule.add_activate_rule_level([('div', 'id', 'cc')])
        picture_rule.add_process_rule_level('a', set())
        picture_rule.add_process_rule_level('img', {'src', 'title'})
        picture_rule.set_attribute_modifier_function(
            'src', lambda text: _del_thumb(text))
        parser.add_rule(picture_rule)

        picture_href_rule = ParserRule()
        picture_href_rule.add_activate_rule_level([('div', 'id', 'cc')])
        picture_href_rule.add_activate_rule_level([('div', 'class',
                                                    'shorttext')])
        picture_href_rule.add_process_rule_level('a', {'href', 'alt'})
        parser.add_rule(picture_href_rule)

        for s in open(fname):
            parser.feed(s)

        result = ParseResult()

        if len(startpage_rule.get_result()) > 0:
            result.set_type('hrefs')
            for item in startpage_rule.get_result(['href', 'src']):
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']),
                              href=URL(item['href'] + '*'),
                              popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                result.add_page(
                    ControlInfo(item['data'], URL(item['href'] + '*')))

        if len(picture_rule.get_result()) > 0:
            result.set_type('pictures')
            i = 1
            for f in picture_rule.get_result(['src', 'title']):
                result.add_full(
                    FullPictureInfo(abs_href=URL(f['src']),
                                    rel_name='%03d.jpg' % i))
                i += 1

            for f in picture_href_rule.get_result():
                if f['href'].startswith('/'):
                    result.add_control(
                        ControlInfo(text=f['alt'],
                                    url=URL(base_url.domain() + f['href'])))

        return result
Beispiel #17
0
 def startpage(self):
     return URL("http://collectionofbestporn.com/most-recent*")
Beispiel #18
0
 def startpage(self):
     return URL("http://www.pornstar.hu/galleries")
Beispiel #19
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()

        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class', 'video-thumb')
                                                ])
        startpage_rule.add_process_rule_level('a', {'href'})
        startpage_rule.add_process_rule_level('img', {'src', 'alt'})
        startpage_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([('ul', 'class',
                                                       'pagination')])
        # startpage_pages_rule.add_activate_rule_level([('a', 'class', 'current')])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_pages_rule)

        video_rule = ParserRule()
        video_rule.add_activate_rule_level([('video', '', '')])
        video_rule.add_process_rule_level('source', {'src', 'label', 'res'})
        # video_rule.set_attribute_filter_function('data', lambda text: 'jwplayer' in text)
        parser.add_rule(video_rule)

        gallery_href_rule = ParserRule()
        # gallery_href_rule.add_activate_rule_level([('div', 'class', 'option')])
        gallery_href_rule.add_activate_rule_level([('div', 'class',
                                                    'tags-container')])
        gallery_href_rule.add_process_rule_level('a', {'href'})
        # gallery_href_rule.set_attribute_modifier_function('href', lambda x: base_url.domain() + x + '*')
        parser.add_rule(gallery_href_rule)

        self.proceed_parcing(parser, fname)

        result = ParseResult()

        if video_rule.is_result():

            urls = UrlList()
            for item in video_rule.get_result(['src', 'res']):
                urls.add(item['res'], URL(item['src']))

            result.set_video(urls.get_media_data(-1))

            for f in gallery_href_rule.get_result(['data', 'href']):
                result.add_control(ControlInfo(f['data'],
                                               URL(f['href'] + '*')))
            return result

        if startpage_rule.is_result():
            # for item in startpage_rule.get_result():
            #     print(item)

            for item in startpage_rule.get_result(['href', 'src']):
                href = item['href']
                if '/category/' in href:
                    result.set_caption_visible(True)
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']),
                              href=URL(href),
                              popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                result.add_page(ControlInfo(item['data'], URL(item['href'])))

        return result
Beispiel #20
0
 def get_start_button_menu_text_url_dict(self):
     return dict(Pornstars=URL('http://toseeporn.com/Actor*'),
                 Home=URL('http://toseeporn.com/*'),
                 Search_Example=URL(
                     'http://toseeporn.com/Search=asian%20sex%20diary*'))
Beispiel #21
0
 def can_accept_index_file(self, base_url=URL()):
     return base_url.contain('pornhd.com/')
Beispiel #22
0
 def startpage(self):
     return URL("http://toseeporn.com/Category/West%20Porn*")
Beispiel #23
0
 def startpage(self):
     return URL("http://www.babesandstars.com/galleries/")
Beispiel #24
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()

        startpage_rule = ParserRule()
        startpage_rule.add_activate_rule_level([('div', 'class',
                                                 'fixed-content')])
        startpage_rule.add_process_rule_level('a', {'href', 'class'})
        startpage_rule.add_process_rule_level('div', {'style'})
        startpage_rule.set_attribute_filter_function(
            'class', lambda x: x == 'thumbnail')
        startpage_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        startpage_rule.set_attribute_modifier_function(
            'style', lambda x: x.partition("url('")[2].partition("')")[0])
        parser.add_rule(startpage_rule)

        startpage_pages_rule = ParserRule()
        startpage_pages_rule.add_activate_rule_level([
            ('div', 'class', 'col-xs-12 content-pagination')
        ])
        startpage_pages_rule.add_process_rule_level('a', {'href'})
        startpage_pages_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(startpage_pages_rule)

        tags_rule = ParserRule()
        tags_rule.add_activate_rule_level([('section', 'id', 'footer-tag')])
        tags_rule.add_process_rule_level('a', {'href'})
        tags_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(tags_rule)

        categories_rule = ParserRule()
        categories_rule.add_activate_rule_level([('ul', 'class',
                                                  'nav navbar-nav')])
        categories_rule.add_process_rule_level('a', {'href'})
        categories_rule.set_attribute_filter_function(
            'href', lambda x: '/Category/' in x and "#" not in x)
        categories_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(categories_rule)

        video_rule = ParserRule()
        video_rule.add_activate_rule_level([('body', '', '')])
        video_rule.add_process_rule_level('script', {})
        video_rule.set_attribute_filter_function(
            'data', lambda text: 'angular.' in text)
        parser.add_rule(video_rule)
        #
        gallery_href_rule = ParserRule()
        gallery_href_rule.add_activate_rule_level([('div', 'class',
                                                    'row tag-area')])
        gallery_href_rule.add_process_rule_level('a', {'href'})
        gallery_href_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(gallery_href_rule)

        self.proceed_parcing(parser, fname)

        result = ParseResult()

        if video_rule.is_result():  # len(video_rule.get_result()) > 0:
            script = video_rule.get_result()[0]['data'].replace(' ', '')
            json_file_url = self.get_href(self.quotes(script, "host:'", "'"),
                                          base_url)
            # print(json_file_url)

            from requests_loader import load, LoaderError

            json_file = Setting.base_dir + 'tsp_video.json'

            urls = list()
            result.set_type('video')

            try:
                r = load(URL(json_file_url), json_file)

                links = set()
                for item in r.json()['mediaSources']:
                    # print(item)
                    if item['source'] not in links:
                        data = dict(text=item['quality'],
                                    url=URL(item['source'] + '*'))
                        urls.append(data)
                        links.add(item['source'])

                if len(urls) == 1:
                    video = MediaData(urls[0]['url'])
                elif len(urls) > 1:
                    video = MediaData(urls[0]['url'])
                    for item in urls:
                        video.add_alternate(item)
                else:
                    return result

                result.set_video(video)

            except LoaderError as err:
                print(err)

            for f in gallery_href_rule.get_result(['data', 'href']):
                result.add_control(
                    ControlInfo(f['data'].strip(), URL(f['href'])))
            return result

        if startpage_rule.is_result():  # len(startpage_rule.get_result()) > 0:
            result.set_type('hrefs')

            for item in startpage_rule.get_result(['href']):
                # print(item)
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['style']),
                              href=URL(item['href']),
                              popup=item.get('alt', '')))

            for item in startpage_pages_rule.get_result(['href', 'data']):
                label = item['data'].replace(' ', '')
                # print(item)
                if len(label) > 0:
                    result.add_page(ControlInfo(label, URL(item['href'])))

            if categories_rule.is_result(['href']):
                for item in categories_rule.get_result(['href', 'data']):
                    result.add_control(
                        ControlInfo(item['data'], URL(item['href'])))

            if tags_rule.is_result(['href']):
                for item in tags_rule.get_result(['href', 'data']):
                    result.add_control(
                        ControlInfo(item['data'], URL(item['href'])))

        return result
Beispiel #25
0
 def get_start_button_menu_text_url_dict(self):
     return dict(
         Videos=URL('http://www.babesandstars.com/videos/'),
         Photos=URL('http://www.babesandstars.com/galleries/'),
         Top100models=URL('http://www.babesandstars.com/top-models/'))
Beispiel #26
0
 def startpage(self):
     return URL("http://www.tomorrowporn.com/")
Beispiel #27
0
    def parse_index_file(self, fname, base_url=URL()):
        parser = SiteParser()
        domain = base_url.domain()

        href_rule = ParserRule()  # startpage & model's page
        href_rule.add_activate_rule_level([('div', 'class', 'galleries'),
                                           ('div', 'class', 'models'),
                                           ('div', 'class', 'videos')])
        href_rule.add_activate_rule_level([('div', 'class', 'items')])
        href_rule.add_process_rule_level('a', {'href'})
        href_rule.add_process_rule_level('img', {'src', 'alt'})
        href_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        href_rule.set_attribute_modifier_function(
            'src', lambda x: self.get_href(x, base_url))
        parser.add_rule(href_rule)

        href_page_rule = ParserRule()  # page number in model's page
        href_page_rule.add_activate_rule_level([('ul', 'class', 'pagination')])
        href_page_rule.add_process_rule_level('a', {'href'})
        href_page_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(href_page_rule)

        model_litera_rule = ParserRule()
        model_litera_rule.add_activate_rule_level([('span', 'class', 'chars')])
        model_litera_rule.add_process_rule_level('a', {'href'})
        model_litera_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(model_litera_rule)

        picture_rule = ParserRule()  # gallery rule
        picture_rule.add_activate_rule_level([('div', 'class', 'picture')])
        picture_rule.add_process_rule_level('a', {'href'})
        picture_rule.add_process_rule_level('img', {'alt'})
        picture_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(picture_rule)

        video_rule = ParserRule()  # gallery rule
        video_rule.add_activate_rule_level([('div', 'class', 'video')])
        video_rule.add_process_rule_level('source', {'src'})
        # video_rule.add_process_rule_level('img', {'alt'})
        # video_rule.set_attribute_modifier_function('href', lambda x: self.get_href(x,base_url))
        parser.add_rule(video_rule)

        picture_href_rule = ParserRule()  # gallery href's rule
        picture_href_rule.add_activate_rule_level([('div', 'class', 'model')])
        picture_href_rule.add_activate_rule_level([('div', 'class', 'links')])
        picture_href_rule.add_process_rule_level('a', {'href'})
        picture_href_rule.set_attribute_modifier_function(
            'href', lambda x: self.get_href(x, base_url))
        parser.add_rule(picture_href_rule)

        for s in open(fname):
            parser.feed(s)

        result = ParseResult()

        if len(video_rule.get_result()) > 0:
            result.set_video(
                MediaData(URL(video_rule.get_result()[0]['src'] + '*')))
            result.set_type('video')

            for f in picture_href_rule.get_result(['href', 'data']):
                # print(f)
                result.add_control(
                    ControlInfo(text=f['data'], url=URL(f['href'])))
            return result

        if len(picture_rule.get_result()) > 0:
            result.set_type('pictures')
            for f in picture_rule.get_result():
                x = FullPictureInfo(abs_href=URL(f['href']),
                                    rel_name=f['href'].rpartition('/')[2])
                result.add_full(x)

            for f in picture_href_rule.get_result(['href', 'data']):
                # print(f)
                result.add_control(
                    ControlInfo(text=f['data'], url=URL(f['href'])))
            return result

        if len(href_rule.get_result()) > 0:
            result.set_type('hrefs')
            for item in href_rule.get_result():
                result.add_thumb(
                    ThumbInfo(thumb_url=URL(item['src']),
                              href=URL(item['href']),
                              popup=item.get('alt', '')))

            for item in model_litera_rule.get_result(['href', 'data']):
                result.add_control(ControlInfo(item['data'],
                                               URL(item['href'])))
            for item in href_page_rule.get_result(['href', 'data']):
                result.add_page(ControlInfo(item['data'], URL(item['href'])))

        return result
Beispiel #28
0
 def can_accept_index_file(self, base_url=URL()):
     return base_url.contain('tomorrowporn.com/')
Beispiel #29
0
 def startpage(self):
     return URL("http://motherless.com/videos/recent?page=1*")
Beispiel #30
0
 def get_start_button_menu_text_url_dict(self):
     return dict(Pictures=URL('http://www.bravonude.com/'),
                 Movies=URL('http://www.bravonude.com/erotica-videos/'))