Ejemplo n.º 1
0
 def test_urls(self):
     import parsers
     urls = [
         'http://tv.sohu.com/20120726/n349115692.shtml',
         # 'http://v.youku.com/v_show/id_XNzM3MTQwMDY4.html?f=22506977&ev=1',
         'http://www.56.com/u52/v_MTE4NjA0MDY1.html',
         'http://www.tudou.com/programs/view/ZAoQTPEqjAo/',
         # 'http://www.iqiyi.com/v_19rrhkqzgo.html'
     ]
     for url in urls:
        print parsers.getVidPageParser(url).info(url)
Ejemplo n.º 2
0
 def test_urls(self):
     import parsers
     urls = [
         'http://tv.sohu.com/20120726/n349115692.shtml',
         # 'http://v.youku.com/v_show/id_XNzM3MTQwMDY4.html?f=22506977&ev=1',
         'http://www.56.com/u52/v_MTE4NjA0MDY1.html',
         'http://www.tudou.com/programs/view/ZAoQTPEqjAo/',
         # 'http://www.iqiyi.com/v_19rrhkqzgo.html'
     ]
     for url in urls:
         print parsers.getVidPageParser(url).info(url)
Ejemplo n.º 3
0
 def parse_url(self, url, vidfmt, npf, outpath):
     parser = parsers.getVidPageParser(self.url)
     urls, title, self.ext, nperfile, headers = parser.info(url, vidfmt=vidfmt)
     urls = filter(lambda x: x.strip() != '', urls)
     if not self.ext:
         self.ext = guess_ext(urls, title)
     if nperfile:
         npf = nperfile
     title = to_native_string(title)
     self.outname = pjoin(self.outpath, '%s.%s' % (title, self.ext))
     self.tmpdir = pjoin(self.outpath, escape_file_path(title) + '.downloading')
     self.__task_history = pjoin(self.tmpdir, 'url.txt')
     return urls, npf, headers
Ejemplo n.º 4
0
 def parse_url(self, url, vidfmt, npf, outpath):
     parser = parsers.getVidPageParser(self.url)
     urls, title, self.ext, nperfile, headers = parser.info(url,
                                                            vidfmt=vidfmt)
     urls = filter(lambda x: x.strip() != '', urls)
     if not self.ext:
         self.ext = guess_ext(urls, title)
     if nperfile:
         npf = nperfile
     title = to_native_string(title)
     self.outname = pjoin(self.outpath, '%s.%s' % (title, self.ext))
     self.tmpdir = pjoin(self.outpath,
                         escape_file_path(title) + '.downloading')
     self.__task_history = pjoin(self.tmpdir, 'url.txt')
     return urls, npf, headers