def download_subtitle(series_name, season, episode, output_dir): """ :param series_name: :param season: :param episode: """ dir_name = os.path.join(output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode)) if not os.access(dir_name, os.F_OK): os.makedirs(dir_name) os.chdir(dir_name) search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+") logging.info("Search url: {}".format(search_url)) search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read()) # TODO handle zero matches search_results_table = search_page_data.find(attrs={"class": "list first_column_title"}) results = search_results_table.tbody.findAll("tr") results = [x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles"] results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results] for result in results: download_page_url = BASE_URL + "/he/" + result download_page_data = urllib2.urlopen(download_page_url).read() download_button = BeautifulSOAP(download_page_data).find(attrs={"class": "button big download"}) download_url = BASE_URL + download_button.get("href") download_data = urllib2.urlopen(download_url).read() zip_filename = "output.zip" open(zip_filename, "wb").write(download_data) subprocess.call(["unzip", "-o", zip_filename]) logging.info("Unzipped {}".format(zip_filename)) os.unlink(zip_filename)
def addToMyChannels(cname): try: fileName=os.path.join(profile_path, 'MyChannels.xml') print fileName MyChannelList=getSoup(fileName,True) except: MyChannelList=None if not MyChannelList: MyChannelList= BeautifulSOAP('<channels></channels>') val=MyChannelList.find("channel",{"cname":cname}) #print 'val is ',val if not val: channeltag = Tag(MyChannelList, "channel") channeltag['cname']=cname MyChannelList.channels.insert(0, channeltag) #print MyChannelList.prettify() with open(fileName, "wb") as filewriter: filewriter.write(str(MyChannelList))
class ExtractNewsInfo(object): def __init__(self): self.__soup = None def parser(self , html): self.clear() self.__soup = BeautifulSOAP(html) def get_tilte(self): if self.__soup: return self.__soup.html.head.title.text raise NoDataParser("没有调用解析") def __get_mata(self , name): if self.__soup: __keywords = self.__soup.find('meta' , attrs= {'name': name}) if __keywords: return __keywords['content'] return None
def getChannelItems(name): response = open(file, 'rb') link=response.read() soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES) channel_list = soup.find('channel', attrs={'name' : name}) items = channel_list('item') for channel in channel_list('subchannel'): name = channel('name')[0].string thumb = channel('thumbnail')[0].string addDir(name,'',3,thumb) for item in items: try: name = item('title')[0].string except: pass try: if __settings__.getSetting('mirror_link') == "true": try: url = item('link')[1].string except: url = item('link')[0].string if __settings__.getSetting('mirror_link_low') == "true": try: url = item('link')[2].string except: try: url = item('link')[1].string except: url = item('link')[0].string else: url = item('link')[0].string except: pass try: thumbnail = item('thumbnail')[0].string except: thumbnail = '' addLink(url,name,thumbnail)
def getChannelItems(name): response = open(file, 'rb') link = response.read() soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES) channel_list = soup.find('channel', attrs={'name': name}) items = channel_list('item') for channel in channel_list('subchannel'): name = channel('name')[0].string thumb = channel('thumbnail')[0].string addDir(name, '', 3, thumb) for item in items: try: name = item('title')[0].string except: pass try: if __settings__.getSetting('mirror_link') == "true": try: url = item('link')[1].string except: url = item('link')[0].string if __settings__.getSetting('mirror_link_low') == "true": try: url = item('link')[2].string except: try: url = item('link')[1].string except: url = item('link')[0].string else: url = item('link')[0].string except: pass try: thumbnail = item('thumbnail')[0].string except: thumbnail = '' addLink(url, name, thumbnail)
def download_subtitle(series_name, season, episode, output_dir): """ :param series_name: :param season: :param episode: """ dir_name = os.path.join( output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode)) if not os.access(dir_name, os.F_OK): os.makedirs(dir_name) os.chdir(dir_name) search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+") logging.info("Search url: {}".format(search_url)) search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read()) # TODO handle zero matches search_results_table = search_page_data.find( attrs={"class": "list first_column_title"}) results = search_results_table.tbody.findAll("tr") results = [ x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles" ] results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results] for result in results: download_page_url = BASE_URL + "/he/" + result download_page_data = urllib2.urlopen(download_page_url).read() download_button = BeautifulSOAP(download_page_data).find( attrs={"class": "button big download"}) download_url = BASE_URL + download_button.get("href") download_data = urllib2.urlopen(download_url).read() zip_filename = "output.zip" open(zip_filename, "wb").write(download_data) subprocess.call(["unzip", "-o", zip_filename]) logging.info("Unzipped {}".format(zip_filename)) os.unlink(zip_filename)
class ExtractNewsInfo(object): def __init__(self): self.__soup = None def parser(self , html): self.clear() self.__soup = BeautifulSOAP(html) def get_tilte(self): if self.__soup: return self.__soup.html.head.title.text raise NoDataParser("没有调用解析") def __get_mata(self , name): if self.__soup: __keywords = self.__soup.find('meta' , attrs= {'name': name}) if __keywords: return __keywords['content'] return None def get_key_words(self): return self.__get_mata( 'keywords') def get_description(self): return self.__get_mata('description') def clear(self): if self.__soup: self.__soup = None