def resolve(self, item, captcha_cb=None, select_cb=None): streams = [] link = util.parse_html(item['url']).find( 'a', {'class': ['play-movie', 'play-epizode']}) if link and link.get('data-loc'): url = 'http://stream-a-ams1xx2sfcdnvideo5269.cz/' if 'serialy.' in item['url']: url += 'prehravac.php?play=serail&id=' else: url += 'okno.php?new_way=yes&film=' url += link.get('data-loc') for container in util.parse_html(url).select( '.container .free--box .center--inner'): for stream in container.find_all( ['embed', 'object', 'iframe', 'script', 'a']): for attribute in ['src', 'data', 'href']: value = stream.get(attribute) if value: streams.append(value) result = self.findstreams(streams) if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result) return None
def list(self,url): if url.find('#cat#') == 0: return self.cat(util.parse_html(self._url(url[5:]))) if url.find('#catl2#') == 0: return self.catl2(util.parse_html(self._url(url[7:]))) if url.find('#catl3#') == 0: return self.catl3(util.parse_html(self._url(url[7:]))) if url.find('#last#') == 0: return self.catl3(util.parse_html(self._url(url[6:]))) else: raise Exception("Invalid url, I do not know how to list it :"+url)
def list_series(self, url): result = [] tree = util.parse_html(url) series_list = tree.select('.mk-search-page') if series_list: for series in tree.select('.container a'): item = self.dir_item() item['title'] = series.select('span .name-search')[0].text item['url'] = 'https://www.topserialy.to' + series.get('href') item[ 'img'] = 'https://www.topserialy.to' + series.span.img.get( 'src') result.append(item) else: for series in tree.select('.container a.single-result'): item = self.dir_item() original_title = series.select('.original')[0].text czsk_title = series.select('.cz-sk')[0].text title = original_title if czsk_title not in '......' and czsk_title != original_title: title += ' (' + czsk_title + ')' item['title'] = title item['url'] = 'https://www.topserialy.to' + series.get('href') item['img'] = 'https://www.topserialy.to' + series.img.get( 'data-original') result.append(item) return sorted(result)
def subs(self, subtitles_path): soup = util.parse_html(self.webURL + '/titulky') sub_out = [] cnt = 1 subtitles = soup.find_all(id='subtitles') if subtitles: for sub in subtitles[0].find_all('li'): s_tme = sub.find_all('a')[0].text s_txt = ''.join(sub.find_all(text=True, recursive=False)).strip() sub_out.append(str(cnt)) cnt += 1 start = datetime.strptime(s_tme, '%H:%M:%S') end = start + timedelta(seconds=len(s_txt) * 1 / 14) end = end.strftime('%H:%M:%S') sub_out.append(s_tme + ',000 --> ' + end + ',000') sub_out.append(s_txt) sub_out.append('') # print s_tme, s_txt.encode('utf-8') if cnt < 2: return False f_sub = open(subtitles_path, 'w') f_sub.write('\n'.join(sub_out).encode('utf-8')) f_sub.close() return True
def list(self, page=1): result = [] url = 'moje-zpravy/video?page=%s' % page tree = util.parse_html(self._url(url)) for element in tree.select('.col .show-box--date'): title = element.text.strip() link = self._url(element.parent.findNextSibling().a['href']) img = self._url(element.parent.findNextSibling().img.get('src')) item = self.video_item() item['title'] = title item['url']= link item['img']= img result.append(item) item = self.dir_item() item['type'] = 'next' item['url'] = str(int(page)+1) result.append(item) if page > 1: item = self.dir_item() item['type'] = 'prev' item['url'] = str(int(page)-1) result.append(item) return result
def submit_form(html): """Submits the first form on the page.""" form = util.parse_html(html).form data = {input['name']: input['value'] for input in form.find_all('input') if input.get('name') and input.get('value')} return facebook.application.get_response( form['action'], method=form['method'].upper(), body=urllib.parse.urlencode(data))
def list_series(self, url): result = [] url += '/abecedni-seznam/' if self.quickparser in 'true': for ix in xrange(1,35): data=urllib2.urlopen(urllib2.Request('%s/?pg=%s' % (url,ix))).read() data=data.split('</iframe>')[2].split('</div>',3)[3] for elem in [x for x in data.split('</a>') if 'item' in x and not 'personality' in x and not 'creator' in x and not 'function' in x][0:18]: urlline,imgline=elem.splitlines()[1:3] item = self.dir_item() item['title'] = imgline.split('"')[3] item['img'] = self.series_url(imgline.split('"')[1]) item['url'] = self.series_url(urlline.split('"')[1]) result.append(item) return result else: while len(url) > 0: tree = util.parse_html(url) for series in tree.select('#content .movies_list a.item'): item = self.dir_item() item['title'] = series.h3.text item['url'] = self.series_url(series.get('href')) item['img'] = self.series_url(series.img.get('src')) result.append(item) active_page = tree.select('#content .pagination .active') if len(active_page) > 0: next_page = active_page[0].find_next_sibling('a') if next_page: url = self.series_url(next_page.get('href')) continue url = '' return result
def list(self, url): result = [] if url.startswith('#new#'): url = 'video/?stanice=1' tree = util.parse_html(self._url(url)) for m in tree.find('h1',text=re.compile(r'Nejn.*')).parent.select('.grid div .box.box-video'): # skip payed content if 'box-video--premium' in m['class']: continue item = self.video_item() item['title']=m.div.img['title'] item['title']+=' '+m.p.text item['url']=self._url(m.parent['href']) item['img']=self._url(m.div.img['src']) result.append(item) pager_m = tree.find('a',attrs='next') if pager_m: item = self.dir_item() item['type'] = 'next' idx = pager_m['href'] item['url'] = 'http://www.barrandov.tv/video/'+idx result.append(item) return result
def list_seasons(self, url): result = [] for season in util.parse_html(url).select('.accordion'): item = self.dir_item() item['title'] = season.text.strip() item['url'] = 'https://www.topserialy.to' + season.p['data'] result.append(item) return result
def list_seasons(self, url): result = [] for season in util.parse_html(url).select('#episodes--list a.accordionTitle'): item = self.dir_item() item['title'] = season.text.split(' - ')[-1] item['url'] = url + '#' + item['title'].split('. ', 1)[0] result.append(item) return result
def list_years(self, url): result=[] page = util.parse_html(url) for link in page.select('div.button-style2.chars-video')[0].select('a'): item = self.dir_item() item['title'] = link.text item['url'] = '#year#'+self._url(link['href']) result.append(item) return result
def list_seasons(self, url): result = [] for season in util.parse_html(url).select( '#episodes--list a.accordionTitle'): item = self.dir_item() item['title'] = season.text.split(' - ')[-1] item['url'] = url + '#' + item['title'].split('. ', 1)[0] result.append(item) return result
def categories(self): page = util.parse_html('http://www.kynychova-tv.cz/index.php?id=5') result = [] for title,uri in [(x.h3.text,x.h3.a['href']) for x in page.select('div.entry5') if x.h3]: item = self.dir_item() item['title'] = title item['url'] = uri result.append(item) return result
def list(self, url): url = self._url(url) page = util.parse_html(url) result = [] for title,uri in [(x.img['title'],x['href']) for x in page.select('div.entry3')[0].findAll('a')]: item = self.video_item() item['title'] = title item['url'] = uri result.append(item) return result
def categories(self): result = [] item = self.dir_item() item['type'] = 'new' item['url'] = '#last#'+self._url('videos/basic/mr') result.append(item) result.extend(self.cat(util.parse_html(self._url('videos')))) return result
def list_episodes(self, url): result=[] page = util.parse_html(url) for episodedata in [x for x in page.select('ul.style1 a')]: if x.get('class'): if not x.get('class')[0] in 'active': continue item = self.video_item() item['title'] = episodedata.text.replace('\n','').replace('\t','') item['url'] = self._url(episodedata['href']) result.append(item) return result
def resolve(self, item, captcha_cb=None, select_cb=None): streams = [] page = util.parse_html(item['url']) pattern = r'\+?"([^"])"\+?' link = re.sub(pattern,lambda n:n.group(1),page.find('script',text=re.compile(r'"."')).text) link = re.search(r'''(http://[^'"]+)''',link).group(1) link = link.replace('\n','') if u'mp4' in link: return {'url':link, 'subs':''} else: result=resolver.findstreams([str(link)]) return result[0]
def list_genres(self, url): result = [] item = self.dir_item() item['title'] = 'Všetky' item['url'] = url + '/seznam-filmu/' result.append(item) for genre in util.parse_html(url).select('#content .genres .buts a'): item = self.dir_item() item['title'] = genre.text item['url'] = url + genre.get('href') result.append(item) return result
def list_episodes(self, url): result = [] url, season = url.split('#', 1) for episode in util.parse_html(url).select('#episodes--list dd:nth-of-type(' + season + ') ul.episodes li'): link = episode.find('a', 'view') link.extract() item = self.video_item() item['title'] = episode.text.strip() item['url'] = self.series_url(link.get('href')) item['number'] = int(item['title'].split('.', 1)[0]) result.append(item) return sorted(result, key=lambda k: k['number'])
def list_episodes(self, url): result = [] for episode in util.parse_html(url).select('a'): item = self.video_item() item['url'] = 'https://www.topserialy.to/' + episode.get('href') season_episode = item['url'].split('-')[-1].upper() item['title'] = season_episode + ' ' + episode.text.strip() try: item['number'] = int(''.join(re.findall(r'[0-9]', season_episode))) except ValueError: item['number'] = 0 result.append(item) return sorted(result, key=lambda k: k['number'])
def resolve(self, item, captcha_cb=None, select_cb=None): streams = [] link = util.parse_html(item['url']).find('a', {'class': ['play-movie', 'play-epizode']}) if link and link.get('data-loc'): url = 'http://stream-a-ams1xx2sfcdnvideo5269.cz/' if 'serialy.' in item['url']: url += 'prehravac.php?play=serail&id=' else: url += 'okno.php?new_way=yes&film=' url += link.get('data-loc') for container in util.parse_html(url).select('.container .free--box .center--inner'): for stream in container.find_all(['embed', 'object', 'iframe', 'script', 'a']): for attribute in ['src', 'data', 'href']: value = stream.get(attribute) if value: streams.append(value) result = self.findstreams(streams) if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result) return None
def resolve(url): # returns the stream url stream = [] if url.endswith('.flv'): stream = [url] else: page = util.parse_html(url) stream = ['http://nahnoji.cz'+x['src'] for x in page.select('source[type=video/mp4]')] if stream: result=[] for streamurl in stream: result.append({'name':__name__,'quality':'360p','url':streamurl,'surl':url}) return result
def list_episodes(self, url): result = [] for episode in util.parse_html(url).select('a'): item = self.video_item() item['url'] = 'https://www.topserialy.to/' + episode.get('href') season_episode = item['url'].split('-')[-1].upper() item['title'] = season_episode + ' ' + episode.text.strip() try: item['number'] = int(''.join( re.findall(r'[0-9]', season_episode))) except ValueError: item['number'] = 0 result.append(item) return sorted(result, key=lambda k: k['number'])
def list_episodes(self, url): result = [] url, season = url.split('#', 1) for episode in util.parse_html( url).select('#episodes--list dd:nth-of-type(' + season + ') ul.episodes li'): link = episode.find('a', 'view') link.extract() item = self.video_item() item['title'] = episode.text.strip() item['url'] = self.series_url(link.get('href')) item['number'] = int(item['title'].split('.', 1)[0]) result.append(item) return sorted(result, key=lambda k: k['number'])
def list_movies(self, url): result = [] page = util.parse_html(url) for moviedata in page.select('div.3u'): item = self.video_item() item['title'] = moviedata.section.h3.text item['title'] = item['title'].replace('\t','') item['title'] = item['title'].replace('\n','') item['url'] = self._url(moviedata.section.a['href']) images = moviedata.section.a.div['onmouseenter'].splitlines() imagefile=[x for x in images if '.jpg' in x][-1] imagefile = imagefile.replace("\t",'') imagefile = imagefile.replace("'",'') item['img'] = self._url(imagefile) result.append(item) return sorted(result, key=lambda n:n['title'])
def settings(): # Vyparsuj a zobraz mesta dialog = xbmcgui.Dialog() mestalist1 = [ 'BANSKÁ BYSTRICA', 'BARDEJOV', 'BRATISLAVA', 'BREZNO', 'DOLNÝ KUBÍN', 'DUNAJSKÁ STREDA', 'HURBANOVO', 'KOMÁRNO', 'KOŠICE', 'KRÁĽ. CHLMEC', 'LEVICE', 'LIPTOVSKÝ MIKULÁŠ', 'LUČENEC', 'MEDZILABORCE', 'MICHALOVCE', 'NITRA', 'PEZINOK', 'PIEŠŤANY', 'POPRAD', 'PREŠOV', 'PRIEVIDZA', 'RIMAVSKÁ SOBOTA', 'ROŽŇAVA', 'SENICA', 'SKALICA', 'ŠAHY', 'TRENČÍN', 'TRNAVA', 'VEĽKÝ KRTÍŠ', 'ŽILINA', ] log("Mesta zoznam: %s" % mestalist1) mesto = dialog.select('Vyberte mesto', mestalist1) meteogrampage = util.parse_html('http://www.shmu.sk/sk/?page=1&id=meteo_num_mgram') mestalist2 = meteogrampage.select('select#nwp_mesto')[0].get_text(separator='|').split('|') mestometeogram = dialog.select('Vyberte mesto (meteogram)', mestalist2) # Uloz nastavenia __addon__.setSetting('mesto', mestalist1[mesto]) __addon__.setSetting('mestometeogram', mestalist2[mestometeogram])
def list_movies(self, url): result = [] tree = util.parse_html(url) for movie in tree.select('#content .mlist--list .item'): if not movie.find('span', 'top'): item = self.video_item() item['title'] = movie.select('.info h3')[0].text item['url'] = self.movie_url(movie.select('.info .ex a')[0].get('href')) item['img'] = self.movie_url(movie.select('.img--container img')[0].get('src')) result.append(item) active_page = tree.select('#content .pagination .active') if len(active_page) > 0: next_page = active_page[0].find_next_sibling('a') if next_page: item = self.dir_item() item['type'] = 'next' item['url'] = self.movie_url(next_page.get('href')) result.append(item) return result
def list_series(self, url): result = [] url += '/abecedni-seznam/' while len(url) > 0: tree = util.parse_html(url) for series in tree.select('#content .movies_list a.item'): item = self.dir_item() item['title'] = series.h3.text item['url'] = self.series_url(series.get('href')) item['img'] = self.series_url(series.img.get('src')) result.append(item) active_page = tree.select('#content .pagination .active') if len(active_page) > 0: next_page = active_page[0].find_next_sibling('a') if next_page: url = self.series_url(next_page.get('href')) continue url = '' return result
def list_shows(self, url): result=[] page = util.parse_html(url) for showdata in page.select('div.3u'): item = self.dir_item() item['title'] = showdata.section.text.replace('\n','').replace('\t','') image=showdata.section.div['style'] try: image=re.search(r'.*url\("([^"]+)"\).*',image).group(1) except: image='' episodesurl = showdata.section.div['onclick'] episodesurl = episodesurl.split('=')[-1] episodesurl = episodesurl.replace("'",'') episodesurl = episodesurl.replace(";",'') item['url'] = '#episodes#'+self._url(episodesurl) item['img'] = self._url(image) result.append(item) return sorted(result, key=lambda n:n['title'])
def list_search(self, url): result = [] html_tree = util.parse_html(url) for entry in html_tree.select('ul.content li'): item = self.video_item() entry.p.strong.extract() item['url'] = entry.h4.a.get('href') item['title'] = entry.h4.a.text item['img'] = MOVIES_BASE_URL + entry.img.get('src') item['plot'] = entry.p.text.strip() item['menu'] = { "[B][COLOR yellow]Add to library[/COLOR][/B]": { 'url': item['url'], 'action': 'add-to-library', 'name': item['title'] } } self._filter(result, item) # Process next 4 pages, so we'll get 20 items per page instead of 4 for next_page in html_tree.select('.pagination ul li.next a'): next_url = '%s/%ssearch%s' % ( MOVIES_BASE_URL, self.ISO_639_1_CZECH, next_page.get('href')) page_number = 1 page = re.search(r'\bpage=(\d+)', url) if page: page_number = int(page.group(1)) next_page_number = 1 page = re.search(r'\bpage=(\d+)', next_url) if page: next_page_number = int(page.group(1)) if page_number > next_page_number: break if page_number % 5 != 0: result += self.list_search(next_url) else: item = self.dir_item() item['type'] = 'next' item['url'] = next_url result.append(item) break return result
def list_movies(self, url): result = [] tree = util.parse_html(url) for movie in tree.select('#content .mlist--list .item'): if not movie.find('span', 'top'): item = self.video_item() item['title'] = movie.select('.info h3')[0].text item['url'] = self.movie_url( movie.select('.info .ex a')[0].get('href')) item['img'] = self.movie_url( movie.select('.img--container img')[0].get('src')) result.append(item) active_page = tree.select('#content .pagination .active') if len(active_page) > 0: next_page = active_page[0].find_next_sibling('a') if next_page: item = self.dir_item() item['type'] = 'next' item['url'] = self.movie_url(next_page.get('href')) result.append(item) return result
def resolve(url): # returns the stream url stream = [] if url.endswith('.flv'): stream = [url] else: page = util.parse_html(url) stream = [ 'http://nahnoji.cz' + x['src'] for x in page.select('source[type=video/mp4]') ] if stream: result = [] for streamurl in stream: result.append({ 'name': __name__, 'quality': '360p', 'url': streamurl, 'surl': url }) return result
def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() url = self._url(item['url']) page = util.parse_html(url) result = [] data=str(page.select('div.entry3 > center')[0]) resolved = resolver.findstreams(data,['<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]']) try: for i in resolved: item = self.video_item() item['title'] = i['name'] item['url'] = i['url'] item['quality'] = i['quality'] item['surl'] = i['surl'] result.append(item) except: print '===Unknown resolver===' if len(result)==1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def list_series(self, url): result = [] tree = util.parse_html(url) series_list = tree.select('.mk-search-page') if series_list: for series in tree.select('.container a'): item = self.dir_item() item['title'] = series.select('span .name-search')[0].text item['url'] = 'https://www.topserialy.to' + series.get('href') item['img'] = 'https://www.topserialy.to' + series.span.img.get('src') result.append(item) else: for series in tree.select('.container a.single-result'): item = self.dir_item() original_title = series.select('.original')[0].text czsk_title = series.select('.cz-sk')[0].text title = original_title if czsk_title not in '......' and czsk_title != original_title: title += ' (' + czsk_title + ')' item['title'] = title item['url'] = 'https://www.topserialy.to' + series.get('href') item['img'] = 'https://www.topserialy.to' + series.img.get('data-original') result.append(item) return sorted(result)
def list_search(self, url): result = [] html_tree = util.parse_html(url) for entry in html_tree.select('ul.content li'): item = self.video_item() entry.p.strong.extract() item['url'] = entry.h4.a.get('href') item['title'] = entry.h4.a.text item['img'] = MOVIES_BASE_URL + entry.img.get('src') item['plot'] = entry.p.text.strip() item['menu'] = {"[B][COLOR red]Add to library[/COLOR][/B]": { 'url': item['url'], 'action': 'add-to-library', 'name': item['title']}} self._filter(result, item) # Process next 4 pages, so we'll get 20 items per page instead of 4 for next_page in html_tree.select('.pagination ul li.next a'): next_url = '%s/%ssearch%s' % (MOVIES_BASE_URL, self.ISO_639_1_CZECH, next_page.get('href')) page_number = 1 page = re.search(r'\bpage=(\d+)', url) if page: page_number = int(page.group(1)) next_page_number = 1 page = re.search(r'\bpage=(\d+)', next_url) if page: next_page_number = int(page.group(1)) if page_number > next_page_number: break if page_number % 5 != 0: result += self.list_search(next_url) else: item = self.dir_item() item['type'] = 'next' item['url'] = next_url result.append(item) break return result
def __init__(self): res = requests.get(config.url_prefix + config.fiction_url) self.fiction_html = parse_html(res) self.fiction_title = self.get_fiction_title() self.chapter_list = self.get_fiction_chapter_list()
def refresh(self): chapter_page_res = requests.get(config.url_prefix + self.chapter_url) chapter_html = parse_html(chapter_page_res) content = chapter_html.xpath('//div[@id="content"]//text()') content_format = filter_ads(format_chapter(content)) self.content_list = content_format
def resolve(self, item, captcha_cb=None, select_cb=None): streams = [] links = util.parse_html(item['url']).select('script') for link in links: if 'data = ' in str(link): break links = re.search(r'data = "([^"]+)".*', str(link)).group(1) links = base64.b64decode(links) soup = bs4.BeautifulSoup(links, 'html5lib') sources = [ x.group(1) for x in re.finditer('iframe src="([^"]+)"', links) ] lang_regex = re.compile(r'[^(]+\(([^)]+)\)') sources_lang = [ lang_regex.search(x.a.text).group(1) for x in soup.select('li') ] sources = soup.select('iframe') sources = [x['src'] for x in sources] sources = [ x.replace('b3BlbmxvYWRmdWNrZG1jYXRyb2xscw==', 'https://openload.co/embed') for x in sources ] result = [] subs = [] for index, source in enumerate(sources): if 'openload' in str(source): provider = 'OPENLOAD' # openload is broken atm continue metas = util.parse_html(source).select('meta') fname = util.request(source) for meta in metas: if meta['name'] in 'description': fname = meta['content'] code = source.split('/')[-2] url = 'http://openload.co/f/' + code + '/' + fname.replace( ' ', '.') for track in util.parse_html(source).select('track'): if track.get('src'): subs.append([track['src'], track['srclang']]) elif 'flashx' in str(source): provider = 'FLASHX' code = re.search('embed-([^.-]+)[\.-]', source).group(1) url = 'https://www.flashx.tv/embed.php?c=%s' % code elif 'youwatch.org' in str(source): provider = 'YOUWATCH' url = source else: # fail on any other hoster continue hmf = urlresolver.HostedMediaFile(url=url, include_disabled=False, include_universal=False) part = 'None' language = sources_lang[index] if hmf.valid_url() is True: try: surl = hmf.resolve() except: continue item = self.video_item() item['title'] = '{0} ({1})'.format(provider, language) item['url'] = surl result.append(item) if subs: _result = [] for sub in subs: for item in result: item = copy(item) item['subs'] = sub[0] item['title'] += ' {0}'.format(sub[1]) _result.append(item) result = _result if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def resolve(self, item, captcha_cb=None, select_cb=None): streams = [] links = util.parse_html(item['url']).select('script') for link in links: if 'data = ' in str(link): break links = re.search(r'data = "([^"]+)".*', str(link)).group(1) links = base64.b64decode(links) soup = bs4.BeautifulSoup(links, 'html5lib') sources = [x.group(1) for x in re.finditer('iframe src="([^"]+)"', links)] lang_regex = re.compile(r'[^(]+\(([^)]+)\)') sources_lang = [lang_regex.search(x.a.text).group(1) for x in soup.select('li')] sources = soup.select('iframe') sources = [x['src'] for x in sources] sources = [x.replace('b3BlbmxvYWRmdWNrZG1jYXRyb2xscw==', 'https://openload.co/embed') for x in sources] result = [] subs = [] for index, source in enumerate(sources): if 'openload' in str(source): provider = 'OPENLOAD' # openload is broken atm continue metas = util.parse_html(source).select('meta') fname = util.request(source) for meta in metas: if meta['name'] in 'description': fname = meta['content'] code = source.split('/')[-2] url = 'http://openload.co/f/' + code + '/' + fname.replace(' ', '.') for track in util.parse_html(source).select('track'): if track.get('src'): subs.append([track['src'], track['srclang']]) elif 'flashx' in str(source): provider = 'FLASHX' code = re.search('embed-([^.-]+)[\.-]', source).group(1) url = 'https://www.flashx.tv/embed.php?c=%s' % code elif 'youwatch.org' in str(source): provider = 'YOUWATCH' url = source else: # fail on any other hoster continue hmf = urlresolver.HostedMediaFile(url=url, include_disabled=False, include_universal=False) part = 'None' language = sources_lang[index] if hmf.valid_url() is True: try: surl = hmf.resolve() except: continue item = self.video_item() item['title'] = '{0} ({1})'.format(provider, language) item['url'] = surl result.append(item) if subs: _result = [] for sub in subs: for item in result: item = copy(item) item['subs'] = sub[0] item['title'] += ' {0}'.format(sub[1]) _result.append(item) result = _result if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def list_search_results(self, url): page = util.parse_html(url)
def parse_data(): # Stiahnutie udajov mesto = __addon__.getSetting('mesto') mestometeogram = __addon__.getSetting('mestometeogram') key = __addon__.getSetting('key') try: pages = int(__addon__.getSetting('pages')) except ValueError: pages = 6 if not key: xbmcgui.Dialog().ok('Chyba', 'Zadajte v nastaveniach kľúč k OpenWeather API!') return True data = {'mesto': mesto, 'page': '1', 'id': 'meteo_predpoved_sk'} url = 'http://www.shmu.sk/sk/?#tab' page = util.post(url, data) soup = bs4.BeautifulSoup(page, "html5lib") print('mesto: %s, den: %s' % (mesto, den)) cnt = 1 for x in soup.select('.w600')[0].tbody.findAll('td', 'center'): if x.has_attr('style'): if 'white-space' in x['style']: print('Daily.%s.LongDay' % cnt, skdays[den + cnt - 1]) set_property('Daily.%s.LongDay' % cnt, skdays[den + cnt - 1]) set_property('Daily.%s.ShortDay' % cnt, skdays[den + cnt - 1]) night, day = x.get_text(separator='|').split('|') set_property('Daily.%s.HighTemperature' % cnt, day) set_property('Daily.%s.LowTemperature' % cnt, night) elif 'background:#00660E' in x['style']: imgname = x.img['src'].split('/')[-1] set_property('Daily.%s.Outlook' % cnt, x.img['alt']) image_name = x.img['src'].split('/')[-1] set_property('Daily.%s.OutlookIcon' % cnt, WEATHER_CODES[ image_name.replace('.gif', '')] + '.png') cnt += 1 url = 'http://api.openweathermap.org/data/2.5/find?q=%s&type=like&mode=json&APPID=%s&units=metric' \ % (urllib2.quote(mesto), key) req = urllib2.urlopen(url) response = req.read() req.close() jsonresponse = demjson.decode(response)['list'][0] set_property('Current.Temperature', str(jsonresponse['main']['temp'])) set_property('Current.Wind', str(jsonresponse['wind']['speed'] * 3.6)) try: set_property('Current.WindDirection', degToCompass(jsonresponse['wind']['deg'])) except: pass set_property('Current.FeelsLike', feelslike(round(float(jsonresponse['main']['temp'])), int(round(float(jsonresponse['wind']['speed']) * 3.6) + 0.5))) set_property('Current.Humidity', str(jsonresponse['main']['humidity'])) set_property('Current.DewPoint', dewpoint(round(float(jsonresponse['main']['temp'])), int(jsonresponse['main']['humidity']))) set_property('Current.Pressure', str(jsonresponse['main']['pressure'])) set_property('Current.Condition', str(jsonresponse['weather'][0]['main'])) iconfilename = en2icon[jsonresponse['weather'][0]['main'].lower()] if not iconfilename: iconfilename = 'none' set_property('Current.OutlookIcon', xbmc.translatePath(os.path.join( __cwd__, 'resources/lib/icons', '%s.png' % iconfilename))) meteogrampage = util.parse_html('http://www.shmu.sk/sk/?page=1&id=meteo_num_mgram') cityid = meteogrampage.select('select#nwp_mesto')[0].find(text=mestometeogram).parent['value'] day, month, year, hour, text = re.split( '[. ]', meteogrampage.select('select[class=w150] option')[-1].text) meteogramdate = '%s%s%s-%s00' % (year, month, day, hour) query = 'http://www.shmu.sk/data/datanwp/v2/' +\ 'meteogram/al-meteogram_%s-%s-nwp-.png' \ % (cityid, meteogramdate) req = urllib2.Request(query) response = urllib2.urlopen(req, timeout=10) meteogramimage = Image.open(cStringIO.StringIO(response.read())) response.close() set_property('Map.IsFetched', '') print('Stahujem meteogram..') cut_picture(pages=pages, meteogramimage=meteogramimage, meteogramdate=meteogramdate)
def fetch_mf2(self, url, id=None, require_mf2=True, raise_errors=False): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string id: string, optional id of specific element to extract and parse. defaults to the whole page. require_mf2: boolean, whether to return error if no mf2 are found raise_errors: boolean, whether to let error exceptions propagate up or handle them Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: resp = util.requests_get(url) resp.raise_for_status() except werkzeug.exceptions.HTTPException: # raised by us, probably via self.error() raise except BaseException as e: if raise_errors: raise util.interpret_http_exception(e) # log exception self.error(f'Could not fetch source URL {url}') if self.entity: self.entity.html = resp.text # parse microformats soup = util.parse_html(resp) mf2 = util.parse_mf2(soup, url=resp.url, id=id) if id and not mf2: self.error(f'Got fragment {id} but no element found with that id.') # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not mf2.get('items'): contents = soup.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' # TODO: i should be able to pass post or contents[0] to mf2py instead # here, but it returns no items. mf2py bug? doc = str(post) mf2 = util.parse_mf2(doc, resp.url) logger.debug(f'Parsed microformats2: {json_dumps(mf2, indent=2)}') items = mf2.get('items', []) if require_mf2 and (not items or not items[0]): self.error('No microformats2 data found in ' + resp.url, data=mf2, html=f""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="{resp.url}">{util.pretty_link(resp.url)}</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """) return resp, mf2
def search(self,keyword): return self.catl3(util.parse_html(self._url('search/?search_id='+keyword+'&search_type=search_videos&submit=Hledej')))