def download_subtitle(series_name, season, episode, output_dir): """ :param series_name: :param season: :param episode: """ dir_name = os.path.join(output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode)) if not os.access(dir_name, os.F_OK): os.makedirs(dir_name) os.chdir(dir_name) search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+") logging.info("Search url: {}".format(search_url)) search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read()) # TODO handle zero matches search_results_table = search_page_data.find(attrs={"class": "list first_column_title"}) results = search_results_table.tbody.findAll("tr") results = [x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles"] results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results] for result in results: download_page_url = BASE_URL + "/he/" + result download_page_data = urllib2.urlopen(download_page_url).read() download_button = BeautifulSOAP(download_page_data).find(attrs={"class": "button big download"}) download_url = BASE_URL + download_button.get("href") download_data = urllib2.urlopen(download_url).read() zip_filename = "output.zip" open(zip_filename, "wb").write(download_data) subprocess.call(["unzip", "-o", zip_filename]) logging.info("Unzipped {}".format(zip_filename)) os.unlink(zip_filename)
def panel(url): link=araclar.get_url(web) soup=BS(link.decode('utf-8','ignore')) div = soup.findAll("div",{"class":"blok-liste"}) for li in div[int(url)].findAll('li'):#-------------dizi anasayfalari bulur url= li.a['href'] name = li.a.text name=name.encode("utf-8") araclar.addDir(fileName,name,"kategoriler(url)",url,"YOK")
def run(self): #resp = urllib2.urlopen(self.url) #print self.url, resp.getcode() req = urllib2.Request(url=self.url,headers=headers) content = urllib2.urlopen(req) soup = BeautifulSOAP(content,fromEncoding="gb18030") #print soup.originalEncoding #print soup.prettify() songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')}) #print dir(songlist[0]) for song in songlist: song_url='' song_url= 'www.xiami.com' + song.get('href') print song_url ,song.string
def getSoup(url, data=None): print 'getsoup', url, data if url.startswith('http://') or url.startswith('https://'): data = makeRequest(url) if re.search("#EXTM3U", data) or 'm3u' in url: print 'found m3u data', data return data elif data == None: if xbmcvfs.exists(url): if url.startswith("smb://") or url.startswith("nfs://"): copy = xbmcvfs.copy( url, os.path.join(profile, 'temp', 'sorce_temp.txt')) if copy: data = open( os.path.join(profile, 'temp', 'sorce_temp.txt'), "r").read() xbmcvfs.delete( os.path.join(profile, 'temp', 'sorce_temp.txt')) else: addon_log("failed to copy from smb:") else: data = open(url, 'r').read() if re.match("#EXTM3U", data) or 'm3u' in url: print 'found m3u data', data return data else: addon_log("Soup Data not found!") return return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
def getSoup(url): if url.startswith('http://'): data = makeRequest(url) else: if xbmcvfs.exists(url): if url.startswith("smb://"): copy = xbmcvfs.copy( url, xbmc.translatePath( os.path.join(profile, 'temp', 'sorce_temp.txt'))) if copy: data = open( xbmc.translatePath( os.path.join(profile, 'temp', 'sorce_temp.txt')), "r").read() xbmcvfs.delete( xbmc.translatePath( os.path.join(profile, 'temp', 'sorce_temp.txt'))) else: print "--- failed to copy from smb: ----" else: data = open(url, 'r').read() else: print "---- Soup Data not found! ----" return soup = BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) return soup
def getSoup(url,data=None): global viewmode,tsdownloader, hlsretry tsdownloader=False hlsretry=False if url.startswith('http://') or url.startswith('https://'): enckey=False if '$$TSDOWNLOADER$$' in url: tsdownloader=True url=url.replace("$$TSDOWNLOADER$$","") if '$$HLSRETRY$$' in url: hlsretry=True url=url.replace("$$HLSRETRY$$","") if '$$LSProEncKey=' in url: enckey=url.split('$$LSProEncKey=')[1].split('$$')[0] rp='$$LSProEncKey=%s$$'%enckey url=url.replace(rp,"") data =makeRequest(url) if enckey: import pyaes enckey=enckey.encode("ascii") print enckey missingbytes=16-len(enckey) enckey=enckey+(chr(0)*(missingbytes)) print repr(enckey) data=base64.b64decode(data) decryptor = pyaes.new(enckey , pyaes.MODE_ECB, IV=None) data=decryptor.decrypt(data).split('\0')[0] #print repr(data) if re.search("#EXTM3U",data) or 'm3u' in url: # print 'found m3u data' return data elif data == None: if not '/' in url or not '\\' in url: # print 'No directory found. Lets make the url to cache dir' url = os.path.join(communityfiles,url) if xbmcvfs.exists(url): if url.startswith("smb://") or url.startswith("nfs://"): copy = xbmcvfs.copy(url, os.path.join(profile, 'temp', 'sorce_temp.txt')) if copy: data = open(os.path.join(profile, 'temp', 'sorce_temp.txt'), "r").read() xbmcvfs.delete(os.path.join(profile, 'temp', 'sorce_temp.txt')) else: addon_log("failed to copy from smb:") else: data = open(url, 'r').read() if re.match("#EXTM3U",data)or 'm3u' in url: # print 'found m3u data' return data else: addon_log("Soup Data not found!") return if '<SetViewMode>' in data: try: viewmode=re.findall('<SetViewMode>(.*?)<',data)[0] xbmc.executebuiltin("Container.SetViewMode(%s)"%viewmode) print 'done setview',viewmode except: pass return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
def search( keyword, page, type_='all', ): url = base + 'search.php' formdata = dict(submit='%E6%90%9C+%E7%B4%A2', keyword=keyword, type=type_) html = requests.post(url, data=formdata) BeautifulSOAP(html)
def addToMyChannels(cname): try: fileName=os.path.join(profile_path, 'MyChannels.xml') print fileName MyChannelList=getSoup(fileName,True) except: MyChannelList=None if not MyChannelList: MyChannelList= BeautifulSOAP('<channels></channels>') val=MyChannelList.find("channel",{"cname":cname}) #print 'val is ',val if not val: channeltag = Tag(MyChannelList, "channel") channeltag['cname']=cname MyChannelList.channels.insert(0, channeltag) #print MyChannelList.prettify() with open(fileName, "wb") as filewriter: filewriter.write(str(MyChannelList))
def geturladdress(keywords,type,number,filename): urltitle = {} pageid = string.atoi(number)/100 for idpage in range(0,pageid,1): entirehtml= getresponse(keywords,type,idpage*100) soup = BeautifulSOAP(entirehtml) results = soup.findAll('li', {'class': 'g'}) for result in results: title_a = result.find('a') if not title_a: continue else: title = ''.join(title_a.findAll(text=True)) title = html_unescape(title) #print title url = title_a['href'] #print url url = getdomain(url) urltitle[title]= url writetofile(filename,urltitle)
def geturladdress(keywords, type, number, filename): urltitle = {} pageid = string.atoi(number) / 100 for idpage in range(0, pageid, 1): entirehtml = getresponse(keywords, type, idpage * 100) soup = BeautifulSOAP(entirehtml) results = soup.findAll('li', {'class': 'g'}) for result in results: title_a = result.find('a') if not title_a: continue else: title = ''.join(title_a.findAll(text=True)) title = html_unescape(title) #print title url = title_a['href'] #print url url = getdomain(url) urltitle[title] = url writetofile(filename, urltitle)
def getChannelItems(name): response = open(file, 'rb') link=response.read() soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES) channel_list = soup.find('channel', attrs={'name' : name}) items = channel_list('item') for channel in channel_list('subchannel'): name = channel('name')[0].string thumb = channel('thumbnail')[0].string addDir(name,'',3,thumb) for item in items: try: name = item('title')[0].string except: pass try: if __settings__.getSetting('mirror_link') == "true": try: url = item('link')[1].string except: url = item('link')[0].string if __settings__.getSetting('mirror_link_low') == "true": try: url = item('link')[2].string except: try: url = item('link')[1].string except: url = item('link')[0].string else: url = item('link')[0].string except: pass try: thumbnail = item('thumbnail')[0].string except: thumbnail = '' addLink(url,name,thumbnail)
def getChannelItems(name): response = open(file, 'rb') link = response.read() soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES) channel_list = soup.find('channel', attrs={'name': name}) items = channel_list('item') for channel in channel_list('subchannel'): name = channel('name')[0].string thumb = channel('thumbnail')[0].string addDir(name, '', 3, thumb) for item in items: try: name = item('title')[0].string except: pass try: if __settings__.getSetting('mirror_link') == "true": try: url = item('link')[1].string except: url = item('link')[0].string if __settings__.getSetting('mirror_link_low') == "true": try: url = item('link')[2].string except: try: url = item('link')[1].string except: url = item('link')[0].string else: url = item('link')[0].string except: pass try: thumbnail = item('thumbnail')[0].string except: thumbnail = '' addLink(url, name, thumbnail)
def check_tor_status(self): """ Checks the tor status Returns ------- status: Bool The tor status """ html = urllib2.urlopen(self.TOR_CHECK_ENDPOINT).read() parsed_html = BeautifulSOAP(html) content = parsed_html.body.find('h1', attrs={'class': 'not'}).text return self.text_key in content.lower()
def getSoup(url): if url.startswith('http://'): data = makeRequest(url) else: if xbmcvfs.exists(url): if url.startswith("smb://") or url.startswith("nfs://"): copy = xbmcvfs.copy(url, os.path.join(profile, 'temp', 'sorce_temp.txt')) if copy: data = open(os.path.join(profile, 'temp', 'sorce_temp.txt'), "r").read() xbmcvfs.delete(os.path.join(profile, 'temp', 'sorce_temp.txt')) else: addon_log("failed to copy from smb:") else: data = open(url, 'r').read() else: addon_log("Soup Data not found!") return return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
class ExtractNewsInfo(object): def __init__(self): self.__soup = None def parser(self , html): self.clear() self.__soup = BeautifulSOAP(html) def get_tilte(self): if self.__soup: return self.__soup.html.head.title.text raise NoDataParser("没有调用解析") def __get_mata(self , name): if self.__soup: __keywords = self.__soup.find('meta' , attrs= {'name': name}) if __keywords: return __keywords['content'] return None
def extractUrl(uid): #enc="eNrjYnGVFRFl8GeOYHERtPTnZuDlYZPgYZdhkfXlCgjR9+XhZAlmCBTVlBRhYI1QFhAMAbIFBKMkPAJURcOcxWNcwwEd4gnn" # eJzjYnGVFRFl8GeOYHERtPTnZuDlYZPgYZdhkfXlCgjR9+XhZAlmCBTVlBRhYI1QFhAMAbIFBKMkPAJURcOcxWNcwwEd4gnn str = "operation=getPlaylist&uid=%s" % urllib.quote_plus(uid) str = encode(str, key) s = getUrl( "http://www.tv5mondeplusafrique.com/html/servicesV2/getPlaylist.xml?BulkLoaderNoCache=2_2&", post=str) s = decode(s, key) print "returned", repr(s.decode("unicode-escape")) from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup, BeautifulSOAP xmlobj = BeautifulSOAP(s.decode("unicode-escape"), convertEntities=BeautifulStoneSoup.XML_ENTITIES) vurl = xmlobj("video")[0] # print vurl su = vurl("secureurl")[0].string su = re.sub('[\[CDATA\]]', '', su) #print su # print 'yyyyyyyyyyyy',vurl if 'manifest.f4m?' in su: su = 'plugin://plugin.video.mastertuga/?url=' + urllib.quote_plus(su) return su
class ExtractNewsInfo(object): def __init__(self): self.__soup = None def parser(self , html): self.clear() self.__soup = BeautifulSOAP(html) def get_tilte(self): if self.__soup: return self.__soup.html.head.title.text raise NoDataParser("没有调用解析") def __get_mata(self , name): if self.__soup: __keywords = self.__soup.find('meta' , attrs= {'name': name}) if __keywords: return __keywords['content'] return None def get_key_words(self): return self.__get_mata( 'keywords') def get_description(self): return self.__get_mata('description') def clear(self): if self.__soup: self.__soup = None
def download_subtitle(series_name, season, episode, output_dir): """ :param series_name: :param season: :param episode: """ dir_name = os.path.join( output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode)) if not os.access(dir_name, os.F_OK): os.makedirs(dir_name) os.chdir(dir_name) search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+") logging.info("Search url: {}".format(search_url)) search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read()) # TODO handle zero matches search_results_table = search_page_data.find( attrs={"class": "list first_column_title"}) results = search_results_table.tbody.findAll("tr") results = [ x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles" ] results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results] for result in results: download_page_url = BASE_URL + "/he/" + result download_page_data = urllib2.urlopen(download_page_url).read() download_button = BeautifulSOAP(download_page_data).find( attrs={"class": "button big download"}) download_url = BASE_URL + download_button.get("href") download_data = urllib2.urlopen(download_url).read() zip_filename = "output.zip" open(zip_filename, "wb").write(download_data) subprocess.call(["unzip", "-o", zip_filename]) logging.info("Unzipped {}".format(zip_filename)) os.unlink(zip_filename)
def parser(self , html): self.clear() self.__soup = BeautifulSOAP(html)
def getSoup(url): data = makeRequest(url) return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
def PlayCommunityStream(channelId, name, mode): try: #print 'PlayCommunityStream' xbmcplugin.endOfDirectory(int(sys.argv[1])) pDialog = xbmcgui.DialogProgress() ret = pDialog.create('XBMC', 'Finding available resources...') #print 'channelId',channelId playFirst=selfAddon.getSetting( "playFirstChannel" ) if playFirst==None or playFirst=="" or playFirst=="false": playFirst=False else: playFirst=True playFirst=bool(playFirst) pDialog.update(20, 'Finding sources..') providers,default_source_exists=getSourceAndStreamInfo(channelId,playFirst,pDialog, sourceFilter) if default_source_exists: playFirst=True if len(providers)==0: pDialog.close() time = 2000 #in miliseconds line1="No sources found" xbmc.executebuiltin('Notification(%s, %s, %d, %s)'%(__addonname__,line1, time, __icon__)) return pDialog.update(30, 'Processing sources..') pDialog.close() #source=providers[""] enforceSourceSelection=False #print 'playFirst',playFirst done_playing=False current_index=0 auto_skip=False auto_skip=True if selfAddon.getSetting( "playOneByOne" )=="true" else False while not done_playing: #print 'trying again',enforceSourceSelection ret = pDialog.create('XBMC', 'Trying to play the source') #print 'dialogue creation' done_playing=True if (enforceSourceSelection or (len (providers)>1 and not playFirst)) and not auto_skip: #print 'select sources' selectedprovider=selectSource(providers) if not selectedprovider: return else: selectedprovider=providers[current_index] enforceSourceSelection=True #print 'picking source' (source,sInfo,order)=selectedprovider #pick first one #print source processor = source.findtext('processor') sourcename = source.findtext('sname') if communityStreamPath not in sys.path: sys.path.append(communityStreamPath) #print processor #from importlib import import_module processorObject=import_module(processor.replace('.py','')) pDialog.update(60, 'Trying to play..') pDialog.close() sinfoSoup= BeautifulSOAP(etree.tostring(sInfo), convertEntities=BeautifulStoneSoup.XML_ENTITIES) done_playing=processorObject.PlayStream(source,sinfoSoup,name,channelId) #print 'done_playing',done_playing if not done_playing: time = 2000 #in miliseconds line1="Failed playing from "+sourcename xbmc.executebuiltin('Notification(%s, %s, %d, %s)'%(__addonname__,line1, time, __icon__)) if auto_skip: done_playing=False current_index+=1 if current_index>len(providers): done_playing=True if not done_playing: (s,i,o) =providers[current_index] titles='' if s.findtext('id')=="generic": try: #print 'trying generic name' titles=s.findtext('sname')+': '+i.find('item').findtext('title') #print 'trying generic name end ' except: titles=s.findtext('sname') else: try: titles=s.findtext('sname')+': '+i.findtext('title') except: titles=s.findtext('sname') ret = pDialog.create('XBMC', 'Trying to play the Item# %d of %d, Cancel in 3 seconds.\n Source:%s'%(current_index+1, len(providers),titles)) xbmc.sleep(3000) if pDialog.iscanceled(): current_index=0 done_playing=False enforceSourceSelection=True auto_skip=False #print 'donexxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' return except: traceback.print_exc(file=sys.stdout)
def RefreshResources(auto=False, fNameOnly=None): # print Fromurl pDialog = xbmcgui.DialogProgress() if auto: ret = pDialog.create('XBMC', 'Daily Auto loading Fetching resources...') else: ret = pDialog.create('XBMC', 'Fetching resources...') baseUrlForDownload='https://raw.githubusercontent.com/xamarcade/DaawoTV/master/plugin.video.DaawoTV/resources/community/' Fromurl=baseUrlForDownload+'Resources.xml' req = urllib2.Request(Fromurl) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36') req.add_header('Referer','http://shahidaddon/') response = urllib2.urlopen(req) data=response.read() response.close() #data='<resources><file fname="Categories.xml"/><file fname="palestinecoolUrls.xml" url="http://goo.gl/yNlwCM"/></resources>' pDialog.update(20, 'Importing modules...') soup= BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) resources=soup('file') fileno=1 totalFile = len(resources) import hashlib for rfile in resources: if pDialog.iscanceled(): return progr = (fileno*80)/totalFile fname = rfile['fname'] if fNameOnly and not fname==fNameOnly: continue remoteUrl=None try: remoteUrl = rfile['url'] except: pass isBase64=False try: isBase64= rfile['base64']=="true" except: pass if remoteUrl: fileToDownload = remoteUrl else: fileToDownload = baseUrlForDownload+fname fileHash=hashlib.md5(fileToDownload+addonversion).hexdigest() lastFileTime=selfAddon.getSetting( "Etagid"+fileHash) if lastFileTime=="": lastFileTime=None resCode=200 #print fileToDownload eTag=None try: req = urllib2.Request(fileToDownload) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36') req.add_header('Referer','http://shahidaddon/') if lastFileTime: req.add_header('If-None-Match',lastFileTime) response = urllib2.urlopen(req) resCode=response.getcode() if resCode<>304: try: eTag=response.info().getheader('Etag') except: pass data=response.read() except Exception as e: s = str(e) if 'Not Modified'.lower() in s.lower(): resCode=304 data='' if ('Exec format error: exec' in data or 'A file permissions error has occurred' in data) and 'xbmcplugin' not in data: data='' if len(data)>0: try: if isBase64: import base64 data=base64.b64decode(data) except: print 'Failed..not base64.'+fname pDialog.update(20+progr, 'Failed..not base64.'+fname) data='' if len(data)>0: with open(os.path.join(communityStreamPath, fname), "wb") as filewriter: filewriter.write(data) if eTag: selfAddon.setSetting( id="Etagid"+fileHash ,value=eTag) pDialog.update(20+progr, 'imported ...'+fname) elif resCode==304: pDialog.update(20+progr, 'No Change.. skipping.'+fname) else: pDialog.update(20+progr, 'Failed..zero byte.'+fname) fileno+=1 pDialog.close() dialog = xbmcgui.Dialog() ok = dialog.ok('XBMC', 'Download finished. Close Addon and come back')
def PopUpNotice(): raw_notice = OpenURL(_Edit.PopUpNotice, timeout=500, cookiejar=BYB.cookie_name_create(_Edit.PopUpNotice)) xml_notice = BeautifulSOAP(raw_notice, convertEntities=BeautifulStoneSoup.XML_ENTITIES) BYB.headers_create(addon_cache, table='pop_message', headers='popupdate,displaydate') show = False if isinstance(xml_notice, BeautifulSOAP): if len(xml_notice('notify')) > 0: NotifingData = xml_notice('notify') for NotifyData in NotifingData: message = '' date = None header = '' freq = '' try: header = NotifyData('header')[0].string freq = NotifyData('freq')[0].string message = NotifyData('message')[0].string date = NotifyData('date')[0].string except: pass DisplayDateCheck = BYB.check_is_in_DB_table( addon_cache, table='pop_message', row_header='displaydate', check_item=datetime.datetime.today().strftime('%Y-%m-%d')) OnceDisplayCheck = BYB.check_is_in_DB_table(addon_cache, table='pop_message', row_header='popupdate', check_item=date) BYB.write_to_DB(addon_cache, table='pop_message', headers='popupdate,displaydate', items=(date, datetime.datetime.today().strftime('%Y-%m-%d'))) Dolog(datetime.datetime.today().strftime('%Y-%m-%d'), line_info=True) Dolog(BYB.check_is_in_DB_table( addon_cache, table='pop_message', row_header='displaydate', check_item=datetime.datetime.today().strftime('%Y-%m-%d')), line_info=True) if freq == 'every': show = True elif freq == 'daily' and DisplayDateCheck == False: Dolog('show daily', line_info=True) show = True elif freq == 'once' and OnceDisplayCheck == False: show = True else: Dolog('key word for freq wrong either every,daily,once') if show == True: if len(message) > 0: try: from libs import customgui customgui.PopUpDialog(header=header, text=message) except: BYB.Notify(title='Error', message='Error in displaying Pop Up Message') else: BYB.Notify(title='Error', message='Error in displaying Pop Up Message') else: pass
# 下载列表 +歌曲名 # BeautifulSoap # http://www.crummy.com/software/BeautifulSoup/bs3/documentation.zh.html import re,urllib,urllib2 from BeautifulSoup import BeautifulSOAP url= 'http://www.xiami.com/artist/top/id/1234' headers = {'User-Agent':"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0"} req = urllib2.Request(url=url,headers=headers) content = urllib2.urlopen(req) soup = BeautifulSOAP(content,fromEncoding="gb18030") #print soup.originalEncoding #print soup.prettify() songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')}) #print dir(songlist[0]) for song in songlist: song_url='' song_url= 'www.xiami.com' + song.get('href') print song_url ,song.string #songlist = re.findall(pattern,string) #songlist = re.findall(pattern,content) #for song in songlist: # print song #
def getSoup(fileName, isabsolutePath=False): strpath=os.path.join(communityStreamPath, fileName) if isabsolutePath: strpath=fileName data = open(strpath, "r").read() return BeautifulSOAP(data)#, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
def __init__(self, html): self.parser = BeautifulSOAP(html)