Esempio n. 1
0
def download_subtitle(series_name, season, episode, output_dir):
    """

    :param series_name:
    :param season:
    :param episode:
    """
    dir_name = os.path.join(output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode))
    if not os.access(dir_name, os.F_OK):
        os.makedirs(dir_name)
    os.chdir(dir_name)
    search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+")
    logging.info("Search url: {}".format(search_url))
    search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read())
    # TODO handle zero matches
    search_results_table = search_page_data.find(attrs={"class": "list first_column_title"})
    results = search_results_table.tbody.findAll("tr")
    results = [x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles"]
    results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results]
    for result in results:
        download_page_url = BASE_URL + "/he/" + result
        download_page_data = urllib2.urlopen(download_page_url).read()
        download_button = BeautifulSOAP(download_page_data).find(attrs={"class": "button big download"})
        download_url = BASE_URL + download_button.get("href")
        download_data = urllib2.urlopen(download_url).read()
        zip_filename = "output.zip"
        open(zip_filename, "wb").write(download_data)
        subprocess.call(["unzip", "-o", zip_filename])
        logging.info("Unzipped {}".format(zip_filename))
        os.unlink(zip_filename)
Esempio n. 2
0
def addToMyChannels(cname):
	try:
		fileName=os.path.join(profile_path, 'MyChannels.xml')
		print fileName
		MyChannelList=getSoup(fileName,True)
	except: MyChannelList=None
	if not MyChannelList:
		MyChannelList= BeautifulSOAP('<channels></channels>')
	
	val=MyChannelList.find("channel",{"cname":cname})
	#print 'val is ',val
	if not val:
		channeltag = Tag(MyChannelList, "channel")
		channeltag['cname']=cname
		MyChannelList.channels.insert(0, channeltag)
		#print MyChannelList.prettify()

		with open(fileName, "wb") as filewriter:
			filewriter.write(str(MyChannelList))
Esempio n. 3
0
class ExtractNewsInfo(object):

     def __init__(self):
         self.__soup = None

     def parser(self , html):
          self.clear()
          self.__soup = BeautifulSOAP(html)


     def get_tilte(self):
          if self.__soup:
              return self.__soup.html.head.title.text
          raise NoDataParser("没有调用解析")

     def __get_mata(self ,  name):
         if self.__soup:
             __keywords = self.__soup.find('meta' , attrs= {'name': name})
              if __keywords:
                  return __keywords['content']
         return None
Esempio n. 4
0
def getChannelItems(name):
        response = open(file, 'rb')
        link=response.read()
        soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
        channel_list = soup.find('channel', attrs={'name' : name})
        items = channel_list('item')
        for channel in channel_list('subchannel'):
                name = channel('name')[0].string
                thumb = channel('thumbnail')[0].string
                addDir(name,'',3,thumb)
        for item in items:
                try:
                        name = item('title')[0].string
                except:
                        pass
                     
                try:
                        if __settings__.getSetting('mirror_link') == "true":
                                try:
                                        url = item('link')[1].string	
                                except:
                                        url = item('link')[0].string
                        if __settings__.getSetting('mirror_link_low') == "true":
                                try:
                                        url = item('link')[2].string	
                                except:
                                        try:
                                                url = item('link')[1].string
                                        except:
                                                url = item('link')[0].string
                        else:
                                url = item('link')[0].string
                except:
                        pass
                        
                try:
                        thumbnail = item('thumbnail')[0].string
                except:
                        thumbnail = ''
                addLink(url,name,thumbnail)
Esempio n. 5
0
def getChannelItems(name):
    response = open(file, 'rb')
    link = response.read()
    soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    channel_list = soup.find('channel', attrs={'name': name})
    items = channel_list('item')
    for channel in channel_list('subchannel'):
        name = channel('name')[0].string
        thumb = channel('thumbnail')[0].string
        addDir(name, '', 3, thumb)
    for item in items:
        try:
            name = item('title')[0].string
        except:
            pass

        try:
            if __settings__.getSetting('mirror_link') == "true":
                try:
                    url = item('link')[1].string
                except:
                    url = item('link')[0].string
            if __settings__.getSetting('mirror_link_low') == "true":
                try:
                    url = item('link')[2].string
                except:
                    try:
                        url = item('link')[1].string
                    except:
                        url = item('link')[0].string
            else:
                url = item('link')[0].string
        except:
            pass

        try:
            thumbnail = item('thumbnail')[0].string
        except:
            thumbnail = ''
        addLink(url, name, thumbnail)
Esempio n. 6
0
def download_subtitle(series_name, season, episode, output_dir):
    """

    :param series_name:
    :param season:
    :param episode:
    """
    dir_name = os.path.join(
        output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode))
    if not os.access(dir_name, os.F_OK):
        os.makedirs(dir_name)
    os.chdir(dir_name)
    search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season,
                                            episode).replace(" ", "+")
    logging.info("Search url: {}".format(search_url))
    search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read())
    # TODO handle zero matches
    search_results_table = search_page_data.find(
        attrs={"class": "list first_column_title"})
    results = search_results_table.tbody.findAll("tr")
    results = [
        x for x in results
        if x.findAll("td")[2].a.div["alt"] == "English subtitles"
    ]
    results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results]
    for result in results:
        download_page_url = BASE_URL + "/he/" + result
        download_page_data = urllib2.urlopen(download_page_url).read()
        download_button = BeautifulSOAP(download_page_data).find(
            attrs={"class": "button big download"})
        download_url = BASE_URL + download_button.get("href")
        download_data = urllib2.urlopen(download_url).read()
        zip_filename = "output.zip"
        open(zip_filename, "wb").write(download_data)
        subprocess.call(["unzip", "-o", zip_filename])
        logging.info("Unzipped {}".format(zip_filename))
        os.unlink(zip_filename)
Esempio n. 7
0
class ExtractNewsInfo(object):

	 def __init__(self):
		 self.__soup = None

	 def parser(self , html):
	 	 self.clear()
	 	 self.__soup = BeautifulSOAP(html)


	 def get_tilte(self):
	 	 if self.__soup:
	 	 	return self.__soup.html.head.title.text
	 	 raise NoDataParser("没有调用解析")

	 def __get_mata(self ,  name):
	 	if self.__soup:
	 		__keywords = self.__soup.find('meta' , attrs= {'name': name})
	 	 	if __keywords:
	 	 		return __keywords['content']
	 	return None

	 def get_key_words(self):
	 	 return self.__get_mata( 'keywords')


	 def get_description(self):
	 	 return self.__get_mata('description') 
	 	 




	 def clear(self):
	 	 if self.__soup:
	 	 	self.__soup = None