def parse_page(self, title, bookid, path, response): template = Template(PAGE_TEMPLATE) with codecs.open("./output/OEBPS/" + path, "wb", "utf-8") as f: pretty = BeautifulSoup(response.body).prettify() f.write(template.render(body=pretty.decode('utf8'))) for img in response.xpath("//img/@src").extract(): if img: yield scrapy.Request(self.host + '/library/view/' + title + '/' + bookid + '/' + img, callback=partial(self.parse_content_img, img))
def get_links_list(url): response = requests.get(url, headers=headers) # response.status htm = response.text htm = htm.encode('iso-8859-1', 'ignore') htm = BeautifulSoup(htm) plot = htm.findAll('h1', attrs={'class': re.compile('sporttitle')}) plot = parser.unescape( str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace( ']', '').decode('utf-8').strip()) plot = re.sub(r'<[^>]*>', r'', plot) htm = str(htm) if re.search(get_localized_string(T_ED_RESUL), htm): htm = re.split(get_localized_string(T_ED_RESUL), htm, 1)[1] htm = htm.split('<div id="comblockabs">', 1)[0] htm = re.sub(r'\t', r'', ''.join(htm)) htm = re.sub(r'\n', r'', htm) htm = parser.unescape(htm.decode('utf-8').strip()) query = """.+?<b>(.+?)</b>.+?""" elinks = re.compile(query, re.DOTALL).findall(htm) elinks[0] = '[COLOR lightskyblue]' + get_localized_string( T_ED_RESUL) + elinks[0] + '[/COLOR]' for el in elinks: image = media + '/33054.png' list_item = xbmcgui.ListItem(label=el) list_item.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) list_item.setInfo('video', {'plot': plot}) list_item.setProperty('IsPlayable', 'false') k_url = '' listing.append((k_url, list_item, False)) elif re.search('AceStream Links', htm): htm = htm.split('<span class="lnkt">AceStream Links</span>', 1)[-1] htm = htm.split('<div id="comblockabs">', 1)[0] htm = re.sub(r'\t', r'', ''.join(htm)) htm = re.sub(r'\n', r'', htm) htm = re.sub(r'<td width="16">', r'\n', htm) htm = parser.unescape(htm.decode('utf-8').strip()) query = """<img title=".+?/linkflag/(.+?).png" />.+?class="bitrate".+?">(.+?)/td>.+?<a href="acestream:(.+?)">.+?""" elinks = re.compile(query, re.DOTALL).findall(htm) for el in elinks: image = media + "/flags/" + el[0] + ".gif" list_item = xbmcgui.ListItem( label='[B]Audio: ' + check_audio_lang(el[0]) + ', Bitrate: AceStream ' + el[1].replace('<', '') + '[/B]') list_item.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) list_item.setInfo('video', {'plot': plot}) list_item.setProperty('IsPlayable', 'true') k_url = 'plugin://program.plexus/?mode=1&url=acestream:' + el[ 2] + '&name=[B]Audio: ' + check_audio_lang( el[0]) + ', Bitrate: AceStream ' + el[1].replace( '<', '') + '[/B]' listing.append((k_url, list_item, False)) else: image = media + '/33056.png' list_item = xbmcgui.ListItem( label='[I][B]' + get_localized_string(T_NO_LSTRM) + '[/B][/I]') list_item.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) list_item.setInfo('video', {'plot': plot}) list_item.setProperty('IsPlayable', 'false') k_url = '' listing.append((k_url, list_item, False)) if len(listing) < 1: image = media + '/33057.png' list_item = xbmcgui.ListItem(label='¡¡¡GRFTJX!!! ¡¡¡GRMBLFJ!!!') list_item.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) list_item.setInfo('video', {'plot': plot}) list_item.setProperty('IsPlayable', 'false') k_url = '' listing.append((k_url, list_item, False)) return listing
def get_event_list(url): li = '' response = requests.get(url, headers=headers) # response.status htm = response.text htm = htm.encode('iso-8859-1', 'ignore') htm = BeautifulSoup(htm) image = url.split('/') image = media + "/sports/" + image[5] + ".sport.png" plot = htm.findAll('span', attrs={'class': re.compile('sltitle')}) plot = parser.unescape( str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace( ']', '').decode('utf-8').strip()) plot = re.sub(r'<[^>]*>', r'', plot) htm = str(htm) tday = strftime("%-d de %B, %A ", localtime()) htm = htm.split('<span class="sltitle">', 1)[-1] htm = htm.split('<a href="/es/archive/">', 1)[0] htm = re.sub(r'\t', r'', ''.join(htm)) htm = re.sub(r'\n', r'', htm) htm = re.sub(r'/icons/', r'\n/icons/', htm) htm = re.sub(r'</span>', r'</span>\n', htm) htm = [line for line in htm.split('\n') if '/icons/' in line] htm = '\n'.join(htm) htm = parser.unescape(htm.decode('utf-8').strip()) query = """/icons/(.+?)".+?<a class=.+?href="(.+?)">(.+?)</a>.+?"evdesc">(.+?)<.+?>(.+?)</span>""" events = re.compile(query, re.DOTALL).findall(htm) events = list(dict.fromkeys(events)) # print (htm2) # print (events) for e in events: #image = "http://cdn.livetvcdn.net/img/icons/" + e[0] hrefs = urlbase + e[1] event = e[2] time = change_date_format(e[3]) mins = 1440 - hms_to_m(str(datetime.datetime.now().time())[:-7]) tnow = datetime.datetime.now() + datetime.timedelta(minutes=30) tday = datetime.datetime.now() + datetime.timedelta(minutes=mins) desc = e[4] # desc_image = unicodedata.normalize('NFD', desc[1:-1]).encode('ascii', 'ignore') # # response_image = google_images_download.googleimagesdownload() # arguments_image = { # "keywords": desc_image[1:-1], # "suffix_keywords": "logo", # "limit": 1, # "format": "jpg", # "output_directory": "storage", # #"image_directory": "pictures", # "no_directory": True, # "no_download": True # } # absolute_image_paths = response_image.download(arguments_image) # image = absolute_image_paths[desc_image[1:-1]+' logo'][0] if time < tnow: time = time.strftime("%d/%m/%y %H:%M") url = build_url({'mode': 'folder', 'foldername': hrefs}) li = xbmcgui.ListItem('[COLOR lightskyblue](' + time + ')[/COLOR] [B]' + event + '[/B] [COLOR lightseagreen]' + desc + '[/COLOR]') li.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) li.setInfo('video', {'plot': plot}) xbmcplugin.addSortMethod(handle=addon_handle, sortMethod=xbmcplugin.SORT_METHOD_LABEL) xbmcplugin.addDirectoryItem(handle=addon_handle, url=url, listitem=li, isFolder=True) elif time < tday: time = time.strftime("%d/%m/%y %H:%M") url = '' li = xbmcgui.ListItem('[I][COLOR lightskyblue](' + time + ')[/COLOR] [B]' + event + '[/B] [COLOR lightseagreen]' + desc + '[/COLOR][/I]') li.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) li.setInfo('video', {'plot': plot}) xbmcplugin.addSortMethod(handle=addon_handle, sortMethod=xbmcplugin.SORT_METHOD_LABEL) xbmcplugin.addDirectoryItem(handle=addon_handle, url=url, listitem=li, isFolder=False) if li == '': image = media + '/33056.png' url = '' li = xbmcgui.ListItem('[I][B]' + get_localized_string(T_NO_LSTRM) + '[/B][/I]') li.setArt({ 'fanart': addonID.getAddonInfo('fanart'), 'icon': image, 'thumb': image, 'poster': image }) li.setInfo('video', {'plot': plot}) xbmcplugin.addDirectoryItem(handle=addon_handle, url=url, listitem=li, isFolder=False) xbmcplugin.endOfDirectory(handle=addon_handle, succeeded=True)
li = '' response = requests.get(url, headers=headers) # response.status htm = response.text htm = htm.encode('iso-8859-1', 'ignore') htm = BeautifulSoup(htm) htm = str(htm) htm = htm.split('<div id="aul">', 1)[-1] htm = htm.split('<a href="/es/majorcompetitions/">', 1)[0] htm = [line for line in htm.split('\n') if '<a class="main" ' in line] htm = re.sub(r'\t', r'', ''.join(htm)) htm = re.sub(r'</td>', r'</td>\n', htm) htm = re.sub(r'</a><td background=', r'</a>\n<td background=', htm) htm = re.sub(r'(?m)^<td background=.*\n?', r'', htm) htm = parser.unescape(htm.decode('utf-8').strip()) query = """<a class=.+?href="(.+?)".+?<b>(.+?)</b>.+?""" sports = re.compile(query, re.DOTALL).findall(htm) #print (htm) #print (sports) for s in sports: hrefs = urlbase + s[0] image = s[0].split('/') image = media + "/sports/" + image[3] + ".cubic.gif" sport = s[1].encode('utf-8') url = build_url({'mode': 'folder', 'foldername': hrefs}) li = xbmcgui.ListItem(sport)