def resolve(self, item, captcha_cb=None, select_cb=None):
     streams = []
     link = util.parse_html(item['url']).find(
         'a', {'class': ['play-movie', 'play-epizode']})
     if link and link.get('data-loc'):
         url = 'http://stream-a-ams1xx2sfcdnvideo5269.cz/'
         if 'serialy.' in item['url']:
             url += 'prehravac.php?play=serail&id='
         else:
             url += 'okno.php?new_way=yes&film='
         url += link.get('data-loc')
         for container in util.parse_html(url).select(
                 '.container .free--box .center--inner'):
             for stream in container.find_all(
                 ['embed', 'object', 'iframe', 'script', 'a']):
                 for attribute in ['src', 'data', 'href']:
                     value = stream.get(attribute)
                     if value:
                         streams.append(value)
         result = self.findstreams(streams)
         if len(result) == 1:
             return result[0]
         elif len(result) > 1 and select_cb:
             return select_cb(result)
     return None
Ejemplo n.º 2
0
	def list(self,url):
		if url.find('#cat#') == 0:
			return self.cat(util.parse_html(self._url(url[5:])))
		if url.find('#catl2#') == 0:
			return self.catl2(util.parse_html(self._url(url[7:])))
		if url.find('#catl3#') == 0:
			return self.catl3(util.parse_html(self._url(url[7:])))
		if url.find('#last#') == 0:
			return self.catl3(util.parse_html(self._url(url[6:])))
		else:
			raise Exception("Invalid url, I do not know how to list it :"+url)
Ejemplo n.º 3
0
 def list_series(self, url):
     result = []
     tree = util.parse_html(url)
     series_list = tree.select('.mk-search-page')
     if series_list:
         for series in tree.select('.container a'):
             item = self.dir_item()
             item['title'] = series.select('span .name-search')[0].text
             item['url'] = 'https://www.topserialy.to' + series.get('href')
             item[
                 'img'] = 'https://www.topserialy.to' + series.span.img.get(
                     'src')
             result.append(item)
     else:
         for series in tree.select('.container a.single-result'):
             item = self.dir_item()
             original_title = series.select('.original')[0].text
             czsk_title = series.select('.cz-sk')[0].text
             title = original_title
             if czsk_title not in '......' and czsk_title != original_title:
                 title += ' (' + czsk_title + ')'
             item['title'] = title
             item['url'] = 'https://www.topserialy.to' + series.get('href')
             item['img'] = 'https://www.topserialy.to' + series.img.get(
                 'data-original')
             result.append(item)
     return sorted(result)
Ejemplo n.º 4
0
 def subs(self, subtitles_path):
     soup = util.parse_html(self.webURL + '/titulky')
     sub_out = []
     cnt = 1
     subtitles = soup.find_all(id='subtitles')
     if subtitles:
         for sub in subtitles[0].find_all('li'):
             s_tme = sub.find_all('a')[0].text
             s_txt = ''.join(sub.find_all(text=True,
                                          recursive=False)).strip()
             sub_out.append(str(cnt))
             cnt += 1
             start = datetime.strptime(s_tme, '%H:%M:%S')
             end = start + timedelta(seconds=len(s_txt) * 1 / 14)
             end = end.strftime('%H:%M:%S')
             sub_out.append(s_tme + ',000 --> ' + end + ',000')
             sub_out.append(s_txt)
             sub_out.append('')
     # print s_tme, s_txt.encode('utf-8')
     if cnt < 2:
         return False
     f_sub = open(subtitles_path, 'w')
     f_sub.write('\n'.join(sub_out).encode('utf-8'))
     f_sub.close()
     return True
    def list(self, page=1):
        result = []
        url = 'moje-zpravy/video?page=%s' % page

        tree = util.parse_html(self._url(url))
        for element in tree.select('.col .show-box--date'):
            title = element.text.strip()
            link = self._url(element.parent.findNextSibling().a['href'])
            img = self._url(element.parent.findNextSibling().img.get('src'))

            item = self.video_item()
            item['title'] = title
            item['url']= link
            item['img']= img
            result.append(item)

        item = self.dir_item()
        item['type'] = 'next'
        item['url'] = str(int(page)+1)
        result.append(item)

        if page > 1:
            item = self.dir_item()
            item['type'] = 'prev'
            item['url'] = str(int(page)-1)
            result.append(item)

        return result
Ejemplo n.º 6
0
 def submit_form(html):
   """Submits the first form on the page."""
   form = util.parse_html(html).form
   data = {input['name']: input['value'] for input in form.find_all('input')
           if input.get('name') and input.get('value')}
   return facebook.application.get_response(
     form['action'], method=form['method'].upper(), body=urllib.parse.urlencode(data))
 def list_series(self, url):
     result = []
     url += '/abecedni-seznam/'
     if self.quickparser in 'true':
         for ix in xrange(1,35):
             data=urllib2.urlopen(urllib2.Request('%s/?pg=%s' % (url,ix))).read()
             data=data.split('</iframe>')[2].split('</div>',3)[3]
             for elem in [x for x in data.split('</a>') if 'item' in x and not 'personality' in x and not 'creator' in x and not 'function' in x][0:18]:
                 urlline,imgline=elem.splitlines()[1:3]
                 item = self.dir_item()
                 item['title'] = imgline.split('"')[3]
                 item['img'] = self.series_url(imgline.split('"')[1])
                 item['url'] = self.series_url(urlline.split('"')[1])
                 result.append(item)
         return result
     else:
         while len(url) > 0:
             tree = util.parse_html(url)
             for series in tree.select('#content .movies_list a.item'):
                 item = self.dir_item()
                 item['title'] = series.h3.text
                 item['url'] = self.series_url(series.get('href'))
                 item['img'] = self.series_url(series.img.get('src'))
                 result.append(item)
             active_page = tree.select('#content .pagination .active')
             if len(active_page) > 0:
                 next_page = active_page[0].find_next_sibling('a')
                 if next_page:
                     url = self.series_url(next_page.get('href'))
                     continue
             url = ''
         return result
    def list(self, url):
        result = []
        if url.startswith('#new#'):
            url = 'video/?stanice=1'
        
        tree = util.parse_html(self._url(url))
        for m in tree.find('h1',text=re.compile(r'Nejn.*')).parent.select('.grid div .box.box-video'):
            # skip payed content
            if 'box-video--premium' in m['class']:
                continue
            item = self.video_item()
            item['title']=m.div.img['title']
            item['title']+=' '+m.p.text
            item['url']=self._url(m.parent['href'])
            item['img']=self._url(m.div.img['src'])
            result.append(item)

        pager_m = tree.find('a',attrs='next')
        if pager_m:
            item = self.dir_item()
            item['type'] = 'next'
            idx = pager_m['href']
            item['url'] = 'http://www.barrandov.tv/video/'+idx
            result.append(item)

        return result
 def list_seasons(self, url):
     result = []
     for season in util.parse_html(url).select('.accordion'):
         item = self.dir_item()
         item['title'] = season.text.strip()
         item['url'] = 'https://www.topserialy.to' + season.p['data']
         result.append(item)
     return result
Ejemplo n.º 10
0
 def list_seasons(self, url):
     result = []
     for season in util.parse_html(url).select('.accordion'):
         item = self.dir_item()
         item['title'] = season.text.strip()
         item['url'] = 'https://www.topserialy.to' + season.p['data']
         result.append(item)
     return result
 def list_seasons(self, url):
     result = []
     for season in util.parse_html(url).select('#episodes--list a.accordionTitle'):
         item = self.dir_item()
         item['title'] = season.text.split(' - ')[-1]
         item['url'] = url + '#' + item['title'].split('. ', 1)[0]
         result.append(item)
     return result
Ejemplo n.º 12
0
 def list_years(self, url):
     result=[]
     page = util.parse_html(url)
     for link in page.select('div.button-style2.chars-video')[0].select('a'):
         item = self.dir_item()
         item['title'] = link.text
         item['url'] = '#year#'+self._url(link['href'])
         result.append(item)
     return result
 def list_seasons(self, url):
     result = []
     for season in util.parse_html(url).select(
             '#episodes--list a.accordionTitle'):
         item = self.dir_item()
         item['title'] = season.text.split(' - ')[-1]
         item['url'] = url + '#' + item['title'].split('. ', 1)[0]
         result.append(item)
     return result
Ejemplo n.º 14
0
	def categories(self):
		page = util.parse_html('http://www.kynychova-tv.cz/index.php?id=5')
		result = []
		for title,uri in [(x.h3.text,x.h3.a['href']) for x in page.select('div.entry5') if x.h3]:
			item = self.dir_item()
			item['title'] = title
			item['url'] = uri
			result.append(item)
		return result
Ejemplo n.º 15
0
	def list(self, url):
		url = self._url(url)
		page = util.parse_html(url)
		result = []
		for title,uri in [(x.img['title'],x['href']) for x in page.select('div.entry3')[0].findAll('a')]:
			item = self.video_item()
			item['title'] = title
			item['url'] = uri
			result.append(item)
		return result
Ejemplo n.º 16
0
	def categories(self):
		result = []
		
		item = self.dir_item()
		item['type'] = 'new'
		item['url']  = '#last#'+self._url('videos/basic/mr')
		result.append(item)
		result.extend(self.cat(util.parse_html(self._url('videos'))))
		
		return result
Ejemplo n.º 17
0
 def list_episodes(self, url):
     result=[]
     page = util.parse_html(url)
     for episodedata in [x for x in page.select('ul.style1 a')]:
         if x.get('class'):
             if not x.get('class')[0] in 'active':
                 continue
         item = self.video_item()
         item['title'] = episodedata.text.replace('\n','').replace('\t','')
         item['url'] = self._url(episodedata['href'])
         result.append(item)
     return result
Ejemplo n.º 18
0
    def resolve(self, item, captcha_cb=None, select_cb=None):
        streams = []
        page = util.parse_html(item['url'])
        pattern = r'\+?"([^"])"\+?'
        link = re.sub(pattern,lambda n:n.group(1),page.find('script',text=re.compile(r'"."')).text)
	link = re.search(r'''(http://[^'"]+)''',link).group(1)
	link = link.replace('\n','')
        if u'mp4' in link:
	    return {'url':link, 'subs':''}
        else:
            result=resolver.findstreams([str(link)])
            return result[0]
 def list_genres(self, url):
     result = []
     item = self.dir_item()
     item['title'] = 'Všetky'
     item['url'] = url + '/seznam-filmu/'
     result.append(item)
     for genre in util.parse_html(url).select('#content .genres .buts a'):
         item = self.dir_item()
         item['title'] = genre.text
         item['url'] = url + genre.get('href')
         result.append(item)
     return result
 def list_genres(self, url):
     result = []
     item = self.dir_item()
     item['title'] = 'Všetky'
     item['url'] = url + '/seznam-filmu/'
     result.append(item)
     for genre in util.parse_html(url).select('#content .genres .buts a'):
         item = self.dir_item()
         item['title'] = genre.text
         item['url'] = url + genre.get('href')
         result.append(item)
     return result
 def list_episodes(self, url):
     result = []
     url, season = url.split('#', 1)
     for episode in util.parse_html(url).select('#episodes--list dd:nth-of-type(' + season +
                                           ') ul.episodes li'):
         link = episode.find('a', 'view')
         link.extract()
         item = self.video_item()
         item['title'] = episode.text.strip()
         item['url'] = self.series_url(link.get('href'))
         item['number'] = int(item['title'].split('.', 1)[0])
         result.append(item)
     return sorted(result, key=lambda k: k['number'])
 def list_episodes(self, url):
     result = []
     for episode in util.parse_html(url).select('a'):
         item = self.video_item()
         item['url'] = 'https://www.topserialy.to/' + episode.get('href')
         season_episode = item['url'].split('-')[-1].upper()
         item['title'] = season_episode + ' ' + episode.text.strip()
         try:
             item['number'] = int(''.join(re.findall(r'[0-9]', season_episode)))
         except ValueError:
             item['number'] = 0
         result.append(item)
     return sorted(result, key=lambda k: k['number'])
 def resolve(self, item, captcha_cb=None, select_cb=None):
     streams = []
     link = util.parse_html(item['url']).find('a', {'class': ['play-movie', 'play-epizode']})
     if link and link.get('data-loc'):
         url = 'http://stream-a-ams1xx2sfcdnvideo5269.cz/'
         if 'serialy.' in item['url']:
             url += 'prehravac.php?play=serail&id='
         else:
             url += 'okno.php?new_way=yes&film='
         url += link.get('data-loc')
         for container in util.parse_html(url).select('.container .free--box .center--inner'):
             for stream in container.find_all(['embed', 'object', 'iframe', 'script', 'a']):
                 for attribute in ['src', 'data', 'href']:
                     value = stream.get(attribute)
                     if value:
                         streams.append(value)
         result = self.findstreams(streams)
         if len(result) == 1:
             return result[0]
         elif len(result) > 1 and select_cb:
             return select_cb(result)
     return None
def resolve(url):
        # returns the stream url
        stream = []
	if url.endswith('.flv'):
            stream = [url]
        else:
	    page = util.parse_html(url)
            stream = ['http://nahnoji.cz'+x['src'] for x in page.select('source[type=video/mp4]')]
	if stream:
		result=[]
		for streamurl in stream:
			result.append({'name':__name__,'quality':'360p','url':streamurl,'surl':url})
		return result
Ejemplo n.º 25
0
 def list_episodes(self, url):
     result = []
     for episode in util.parse_html(url).select('a'):
         item = self.video_item()
         item['url'] = 'https://www.topserialy.to/' + episode.get('href')
         season_episode = item['url'].split('-')[-1].upper()
         item['title'] = season_episode + ' ' + episode.text.strip()
         try:
             item['number'] = int(''.join(
                 re.findall(r'[0-9]', season_episode)))
         except ValueError:
             item['number'] = 0
         result.append(item)
     return sorted(result, key=lambda k: k['number'])
 def list_episodes(self, url):
     result = []
     url, season = url.split('#', 1)
     for episode in util.parse_html(
             url).select('#episodes--list dd:nth-of-type(' + season +
                         ') ul.episodes li'):
         link = episode.find('a', 'view')
         link.extract()
         item = self.video_item()
         item['title'] = episode.text.strip()
         item['url'] = self.series_url(link.get('href'))
         item['number'] = int(item['title'].split('.', 1)[0])
         result.append(item)
     return sorted(result, key=lambda k: k['number'])
Ejemplo n.º 27
0
 def list_movies(self, url):
     result = []
     page = util.parse_html(url)
     for moviedata in page.select('div.3u'):
         item = self.video_item()
         item['title'] = moviedata.section.h3.text
         item['title'] = item['title'].replace('\t','')
         item['title'] = item['title'].replace('\n','')
         item['url'] = self._url(moviedata.section.a['href'])
         images = moviedata.section.a.div['onmouseenter'].splitlines()
         imagefile=[x for x in images if '.jpg' in x][-1]
         imagefile = imagefile.replace("\t",'')
         imagefile = imagefile.replace("'",'')
         item['img'] = self._url(imagefile)
         result.append(item)
     return sorted(result, key=lambda n:n['title'])
Ejemplo n.º 28
0
def settings():
    # Vyparsuj a zobraz mesta
    dialog = xbmcgui.Dialog()
    mestalist1 = [
        'BANSKÁ BYSTRICA',
        'BARDEJOV',
        'BRATISLAVA',
        'BREZNO',
        'DOLNÝ KUBÍN',
        'DUNAJSKÁ STREDA',
        'HURBANOVO',
        'KOMÁRNO',
        'KOŠICE',
        'KRÁĽ. CHLMEC',
        'LEVICE',
        'LIPTOVSKÝ MIKULÁŠ',
        'LUČENEC',
        'MEDZILABORCE',
        'MICHALOVCE',
        'NITRA',
        'PEZINOK',
        'PIEŠŤANY',
        'POPRAD',
        'PREŠOV',
        'PRIEVIDZA',
        'RIMAVSKÁ SOBOTA',
        'ROŽŇAVA',
        'SENICA',
        'SKALICA',
        'ŠAHY',
        'TRENČÍN',
        'TRNAVA',
        'VEĽKÝ KRTÍŠ',
        'ŽILINA',
    ]

    log("Mesta zoznam: %s" % mestalist1)
    mesto = dialog.select('Vyberte mesto', mestalist1)

    meteogrampage = util.parse_html('http://www.shmu.sk/sk/?page=1&id=meteo_num_mgram')
    mestalist2 = meteogrampage.select('select#nwp_mesto')[0].get_text(separator='|').split('|')
    mestometeogram = dialog.select('Vyberte mesto (meteogram)', mestalist2)

    # Uloz nastavenia
    __addon__.setSetting('mesto', mestalist1[mesto])
    __addon__.setSetting('mestometeogram', mestalist2[mestometeogram])
 def list_movies(self, url):
     result = []
     tree = util.parse_html(url)
     for movie in tree.select('#content .mlist--list .item'):
         if not movie.find('span', 'top'):
             item = self.video_item()
             item['title'] = movie.select('.info h3')[0].text
             item['url'] = self.movie_url(movie.select('.info .ex a')[0].get('href'))
             item['img'] = self.movie_url(movie.select('.img--container img')[0].get('src'))
             result.append(item)
     active_page = tree.select('#content .pagination .active')
     if len(active_page) > 0:
         next_page = active_page[0].find_next_sibling('a')
         if next_page:
             item = self.dir_item()
             item['type'] = 'next'
             item['url'] = self.movie_url(next_page.get('href'))
             result.append(item)
     return result
 def list_series(self, url):
     result = []
     url += '/abecedni-seznam/'
     while len(url) > 0:
         tree = util.parse_html(url)
         for series in tree.select('#content .movies_list a.item'):
             item = self.dir_item()
             item['title'] = series.h3.text
             item['url'] = self.series_url(series.get('href'))
             item['img'] = self.series_url(series.img.get('src'))
             result.append(item)
         active_page = tree.select('#content .pagination .active')
         if len(active_page) > 0:
             next_page = active_page[0].find_next_sibling('a')
             if next_page:
                 url = self.series_url(next_page.get('href'))
                 continue
         url = ''
     return result
Ejemplo n.º 31
0
    def list_shows(self, url):
        result=[]
        page = util.parse_html(url)
        for showdata in page.select('div.3u'):
            item = self.dir_item()
            item['title'] = showdata.section.text.replace('\n','').replace('\t','')
            image=showdata.section.div['style']
            try:
                image=re.search(r'.*url\("([^"]+)"\).*',image).group(1)
            except:
                image=''
            episodesurl = showdata.section.div['onclick']
            episodesurl = episodesurl.split('=')[-1]
            episodesurl = episodesurl.replace("'",'')
            episodesurl = episodesurl.replace(";",'')

            item['url'] = '#episodes#'+self._url(episodesurl)
            item['img'] = self._url(image)
            result.append(item)
        return sorted(result, key=lambda n:n['title'])
Ejemplo n.º 32
0
 def list_search(self, url):
     result = []
     html_tree = util.parse_html(url)
     for entry in html_tree.select('ul.content li'):
         item = self.video_item()
         entry.p.strong.extract()
         item['url'] = entry.h4.a.get('href')
         item['title'] = entry.h4.a.text
         item['img'] = MOVIES_BASE_URL + entry.img.get('src')
         item['plot'] = entry.p.text.strip()
         item['menu'] = {
             "[B][COLOR yellow]Add to library[/COLOR][/B]": {
                 'url': item['url'],
                 'action': 'add-to-library',
                 'name': item['title']
             }
         }
         self._filter(result, item)
     # Process next 4 pages, so we'll get 20 items per page instead of 4
     for next_page in html_tree.select('.pagination ul li.next a'):
         next_url = '%s/%ssearch%s' % (
             MOVIES_BASE_URL, self.ISO_639_1_CZECH, next_page.get('href'))
         page_number = 1
         page = re.search(r'\bpage=(\d+)', url)
         if page:
             page_number = int(page.group(1))
         next_page_number = 1
         page = re.search(r'\bpage=(\d+)', next_url)
         if page:
             next_page_number = int(page.group(1))
         if page_number > next_page_number:
             break
         if page_number % 5 != 0:
             result += self.list_search(next_url)
         else:
             item = self.dir_item()
             item['type'] = 'next'
             item['url'] = next_url
             result.append(item)
         break
     return result
 def list_movies(self, url):
     result = []
     tree = util.parse_html(url)
     for movie in tree.select('#content .mlist--list .item'):
         if not movie.find('span', 'top'):
             item = self.video_item()
             item['title'] = movie.select('.info h3')[0].text
             item['url'] = self.movie_url(
                 movie.select('.info .ex a')[0].get('href'))
             item['img'] = self.movie_url(
                 movie.select('.img--container img')[0].get('src'))
             result.append(item)
     active_page = tree.select('#content .pagination .active')
     if len(active_page) > 0:
         next_page = active_page[0].find_next_sibling('a')
         if next_page:
             item = self.dir_item()
             item['type'] = 'next'
             item['url'] = self.movie_url(next_page.get('href'))
             result.append(item)
     return result
Ejemplo n.º 34
0
def resolve(url):
    # returns the stream url
    stream = []
    if url.endswith('.flv'):
        stream = [url]
    else:
        page = util.parse_html(url)
        stream = [
            'http://nahnoji.cz' + x['src']
            for x in page.select('source[type=video/mp4]')
        ]
    if stream:
        result = []
        for streamurl in stream:
            result.append({
                'name': __name__,
                'quality': '360p',
                'url': streamurl,
                'surl': url
            })
        return result
Ejemplo n.º 35
0
	def resolve(self,item,captcha_cb=None,select_cb=None):
		item = item.copy()
		url = self._url(item['url'])
		page = util.parse_html(url)
		result = []
		data=str(page.select('div.entry3 > center')[0])
		resolved = resolver.findstreams(data,['<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]'])
		try:
			for i in resolved:
				item = self.video_item()
				item['title'] = i['name']
				item['url'] = i['url']
				item['quality'] = i['quality']
				item['surl'] = i['surl']
				result.append(item)	 
		except:
			print '===Unknown resolver==='
			
		if len(result)==1:
			return result[0]
		elif len(result) > 1 and select_cb:
			return select_cb(result)
 def list_series(self, url):
     result = []
     tree = util.parse_html(url)
     series_list = tree.select('.mk-search-page')
     if series_list:
         for series in tree.select('.container a'):
             item = self.dir_item()
             item['title'] = series.select('span .name-search')[0].text
             item['url'] = 'https://www.topserialy.to' + series.get('href')
             item['img'] = 'https://www.topserialy.to' + series.span.img.get('src')
             result.append(item)
     else:
         for series in tree.select('.container a.single-result'):
             item = self.dir_item()
             original_title = series.select('.original')[0].text
             czsk_title = series.select('.cz-sk')[0].text
             title = original_title
             if czsk_title not in '......' and czsk_title != original_title:
                 title += ' (' + czsk_title + ')'
             item['title'] = title
             item['url'] = 'https://www.topserialy.to' + series.get('href')
             item['img'] = 'https://www.topserialy.to' + series.img.get('data-original')
             result.append(item)
     return sorted(result)
Ejemplo n.º 37
0
 def list_search(self, url):
     result = []
     html_tree = util.parse_html(url)
     for entry in html_tree.select('ul.content li'):
         item = self.video_item()
         entry.p.strong.extract()
         item['url'] = entry.h4.a.get('href')
         item['title'] = entry.h4.a.text
         item['img'] = MOVIES_BASE_URL + entry.img.get('src')
         item['plot'] = entry.p.text.strip()
         item['menu'] = {"[B][COLOR red]Add to library[/COLOR][/B]": {
             'url': item['url'], 'action': 'add-to-library', 'name': item['title']}}
         self._filter(result, item)
     # Process next 4 pages, so we'll get 20 items per page instead of 4
     for next_page in html_tree.select('.pagination ul li.next a'):
         next_url = '%s/%ssearch%s' % (MOVIES_BASE_URL, self.ISO_639_1_CZECH,
                                       next_page.get('href'))
         page_number = 1
         page = re.search(r'\bpage=(\d+)', url)
         if page:
             page_number = int(page.group(1))
         next_page_number = 1
         page = re.search(r'\bpage=(\d+)', next_url)
         if page:
             next_page_number = int(page.group(1))
         if page_number > next_page_number:
             break
         if page_number % 5 != 0:
             result += self.list_search(next_url)
         else:
             item = self.dir_item()
             item['type'] = 'next'
             item['url'] = next_url
             result.append(item)
         break
     return result
Ejemplo n.º 38
0
 def __init__(self):
     res = requests.get(config.url_prefix + config.fiction_url)
     self.fiction_html = parse_html(res)
     self.fiction_title = self.get_fiction_title()
     self.chapter_list = self.get_fiction_chapter_list()
Ejemplo n.º 39
0
 def refresh(self):
     chapter_page_res = requests.get(config.url_prefix + self.chapter_url)
     chapter_html = parse_html(chapter_page_res)
     content = chapter_html.xpath('//div[@id="content"]//text()')
     content_format = filter_ads(format_chapter(content))
     self.content_list = content_format
Ejemplo n.º 40
0
    def resolve(self, item, captcha_cb=None, select_cb=None):
        streams = []
        links = util.parse_html(item['url']).select('script')
        for link in links:
            if 'data = ' in str(link):
                break
        links = re.search(r'data = "([^"]+)".*', str(link)).group(1)
        links = base64.b64decode(links)
        soup = bs4.BeautifulSoup(links, 'html5lib')

        sources = [
            x.group(1) for x in re.finditer('iframe src="([^"]+)"', links)
        ]
        lang_regex = re.compile(r'[^(]+\(([^)]+)\)')
        sources_lang = [
            lang_regex.search(x.a.text).group(1) for x in soup.select('li')
        ]
        sources = soup.select('iframe')
        sources = [x['src'] for x in sources]
        sources = [
            x.replace('b3BlbmxvYWRmdWNrZG1jYXRyb2xscw==',
                      'https://openload.co/embed') for x in sources
        ]
        result = []
        subs = []
        for index, source in enumerate(sources):
            if 'openload' in str(source):
                provider = 'OPENLOAD'
                # openload is broken atm
                continue
                metas = util.parse_html(source).select('meta')
                fname = util.request(source)
                for meta in metas:
                    if meta['name'] in 'description':
                        fname = meta['content']
                code = source.split('/')[-2]
                url = 'http://openload.co/f/' + code + '/' + fname.replace(
                    ' ', '.')
                for track in util.parse_html(source).select('track'):
                    if track.get('src'):
                        subs.append([track['src'], track['srclang']])
            elif 'flashx' in str(source):
                provider = 'FLASHX'
                code = re.search('embed-([^.-]+)[\.-]', source).group(1)
                url = 'https://www.flashx.tv/embed.php?c=%s' % code
            elif 'youwatch.org' in str(source):
                provider = 'YOUWATCH'
                url = source
            else:
                # fail on any other hoster
                continue
            hmf = urlresolver.HostedMediaFile(url=url,
                                              include_disabled=False,
                                              include_universal=False)
            part = 'None'
            language = sources_lang[index]
            if hmf.valid_url() is True:
                try:
                    surl = hmf.resolve()
                except:
                    continue
                item = self.video_item()
                item['title'] = '{0} ({1})'.format(provider, language)
                item['url'] = surl
                result.append(item)
        if subs:
            _result = []
            for sub in subs:
                for item in result:
                    item = copy(item)
                    item['subs'] = sub[0]
                    item['title'] += ' {0}'.format(sub[1])
                    _result.append(item)
            result = _result
        if len(result) == 1:
            return result[0]
        elif len(result) > 1 and select_cb:
            return select_cb(result)
    def resolve(self, item, captcha_cb=None, select_cb=None):
        streams = []
        links = util.parse_html(item['url']).select('script')
        for link in links:
            if 'data = ' in str(link):
                break
        links = re.search(r'data = "([^"]+)".*', str(link)).group(1)
        links = base64.b64decode(links)
        soup = bs4.BeautifulSoup(links, 'html5lib')

        sources = [x.group(1) for x in re.finditer('iframe src="([^"]+)"',
                                                   links)]
        lang_regex = re.compile(r'[^(]+\(([^)]+)\)')
        sources_lang = [lang_regex.search(x.a.text).group(1) for x in
                        soup.select('li')]
        sources = soup.select('iframe')
        sources = [x['src'] for x in sources]
        sources = [x.replace('b3BlbmxvYWRmdWNrZG1jYXRyb2xscw==',
                             'https://openload.co/embed') for x in sources]
        result = []
        subs = []
        for index, source in enumerate(sources):
            if 'openload' in str(source):
                provider = 'OPENLOAD'
                # openload is broken atm
                continue
                metas = util.parse_html(source).select('meta')
                fname = util.request(source)
                for meta in metas:
                    if meta['name'] in 'description':
                        fname = meta['content']
                code = source.split('/')[-2]
                url = 'http://openload.co/f/' + code + '/' + fname.replace(' ', '.')
                for track in util.parse_html(source).select('track'):
                    if track.get('src'):
                        subs.append([track['src'], track['srclang']])
            elif 'flashx' in str(source):
                provider = 'FLASHX'
                code = re.search('embed-([^.-]+)[\.-]', source).group(1)
                url = 'https://www.flashx.tv/embed.php?c=%s' % code
            elif 'youwatch.org' in str(source):
                provider = 'YOUWATCH'
                url = source
            else:
                # fail on any other hoster
                continue
            hmf = urlresolver.HostedMediaFile(url=url, include_disabled=False,
                                              include_universal=False)
            part = 'None'
            language = sources_lang[index]
            if hmf.valid_url() is True:
                try:
                    surl = hmf.resolve()
                except:
                    continue
                item = self.video_item()
                item['title'] = '{0} ({1})'.format(provider, language)
                item['url'] = surl
                result.append(item)
        if subs:
            _result = []
            for sub in subs:
                for item in result:
                    item = copy(item)
                    item['subs'] = sub[0]
                    item['title'] += ' {0}'.format(sub[1])
                    _result.append(item)
            result = _result
        if len(result) == 1:
            return result[0]
        elif len(result) > 1 and select_cb:
            return select_cb(result)
Ejemplo n.º 42
0
 def list_search_results(self, url):
     page = util.parse_html(url)
Ejemplo n.º 43
0
def parse_data():
    # Stiahnutie udajov
    mesto = __addon__.getSetting('mesto')
    mestometeogram = __addon__.getSetting('mestometeogram')
    key = __addon__.getSetting('key')
    try:
        pages = int(__addon__.getSetting('pages'))
    except ValueError:
        pages = 6

    if not key:
        xbmcgui.Dialog().ok('Chyba', 'Zadajte v nastaveniach kľúč k OpenWeather API!')
        return True

    data = {'mesto': mesto, 'page': '1', 'id': 'meteo_predpoved_sk'}
    url = 'http://www.shmu.sk/sk/?#tab'
    page = util.post(url, data)
    soup = bs4.BeautifulSoup(page, "html5lib")
    print('mesto: %s, den: %s' % (mesto, den))
    cnt = 1
    for x in soup.select('.w600')[0].tbody.findAll('td', 'center'):
        if x.has_attr('style'):
            if 'white-space' in x['style']:
                print('Daily.%s.LongDay' % cnt, skdays[den + cnt - 1])
                set_property('Daily.%s.LongDay' % cnt, skdays[den + cnt - 1])
                set_property('Daily.%s.ShortDay' % cnt, skdays[den + cnt - 1])
                night, day = x.get_text(separator='|').split('|')
                set_property('Daily.%s.HighTemperature' % cnt, day)
                set_property('Daily.%s.LowTemperature' % cnt, night)
            elif 'background:#00660E' in x['style']:
                imgname = x.img['src'].split('/')[-1]
                set_property('Daily.%s.Outlook' % cnt, x.img['alt'])
                image_name = x.img['src'].split('/')[-1]
                set_property('Daily.%s.OutlookIcon' % cnt, WEATHER_CODES[
                             image_name.replace('.gif', '')] + '.png')
                cnt += 1

    url = 'http://api.openweathermap.org/data/2.5/find?q=%s&type=like&mode=json&APPID=%s&units=metric' \
        % (urllib2.quote(mesto), key)
    req = urllib2.urlopen(url)
    response = req.read()
    req.close()
    jsonresponse = demjson.decode(response)['list'][0]

    set_property('Current.Temperature', str(jsonresponse['main']['temp']))
    set_property('Current.Wind', str(jsonresponse['wind']['speed'] * 3.6))
    try:
        set_property('Current.WindDirection',
                     degToCompass(jsonresponse['wind']['deg']))
    except:
        pass
    set_property('Current.FeelsLike', feelslike(round(float(jsonresponse['main']['temp'])),
                                                int(round(float(jsonresponse['wind']['speed']) * 3.6) + 0.5)))
    set_property('Current.Humidity', str(jsonresponse['main']['humidity']))
    set_property('Current.DewPoint', dewpoint(round(float(jsonresponse['main']['temp'])),
                                              int(jsonresponse['main']['humidity'])))
    set_property('Current.Pressure', str(jsonresponse['main']['pressure']))
    set_property('Current.Condition', str(jsonresponse['weather'][0]['main']))
    iconfilename = en2icon[jsonresponse['weather'][0]['main'].lower()]
    if not iconfilename:
        iconfilename = 'none'
    set_property('Current.OutlookIcon', xbmc.translatePath(os.path.join(
        __cwd__, 'resources/lib/icons', '%s.png' % iconfilename)))
    meteogrampage = util.parse_html('http://www.shmu.sk/sk/?page=1&id=meteo_num_mgram')
    cityid = meteogrampage.select('select#nwp_mesto')[0].find(text=mestometeogram).parent['value']
    day, month, year, hour, text = re.split(
        '[. ]', meteogrampage.select('select[class=w150] option')[-1].text)
    meteogramdate = '%s%s%s-%s00' % (year, month, day, hour)
    query = 'http://www.shmu.sk/data/datanwp/v2/' +\
        'meteogram/al-meteogram_%s-%s-nwp-.png' \
        % (cityid, meteogramdate)
    req = urllib2.Request(query)
    response = urllib2.urlopen(req, timeout=10)
    meteogramimage = Image.open(cStringIO.StringIO(response.read()))
    response.close()

    set_property('Map.IsFetched', '')
    print('Stahujem meteogram..')
    cut_picture(pages=pages, meteogramimage=meteogramimage,
                meteogramdate=meteogramdate)
Ejemplo n.º 44
0
    def fetch_mf2(self, url, id=None, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      id: string, optional id of specific element to extract and parse. defaults
        to the whole page.
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            resp = util.requests_get(url)
            resp.raise_for_status()
        except werkzeug.exceptions.HTTPException:
            # raised by us, probably via self.error()
            raise
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            self.error(f'Could not fetch source URL {url}')

        if self.entity:
            self.entity.html = resp.text

        # parse microformats
        soup = util.parse_html(resp)
        mf2 = util.parse_mf2(soup, url=resp.url, id=id)
        if id and not mf2:
            self.error(f'Got fragment {id} but no element found with that id.')

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not mf2.get('items'):
            contents = soup.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    # TODO: i should be able to pass post or contents[0] to mf2py instead
                    # here, but it returns no items. mf2py bug?
                    doc = str(post)
                    mf2 = util.parse_mf2(doc, resp.url)

        logger.debug(f'Parsed microformats2: {json_dumps(mf2, indent=2)}')
        items = mf2.get('items', [])
        if require_mf2 and (not items or not items[0]):
            self.error('No microformats2 data found in ' + resp.url,
                       data=mf2,
                       html=f"""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="{resp.url}">{util.pretty_link(resp.url)}</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""")

        return resp, mf2
Ejemplo n.º 45
0
	def search(self,keyword):
		return self.catl3(util.parse_html(self._url('search/?search_id='+keyword+'&search_type=search_videos&submit=Hledej')))