Beispiel #1
0
    def get_episode(self, episode, url):
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, from_encoding='latin-1')
        episode.thumbnail = soup.find('div','capitulo_thumb').img.get('src')
        descr = ''.join(filter(lambda x:type(x) == bs4.element.NavigableString, soup.find("div","titCapitulo").children))
        episode.description = descr.replace('\t','').replace('\n','')
        episode.save()

        media = HttpMediaFile()
        media.url = re.findall("'file' : '(.*?)'", html)[0]
        media.width=480
        media.height=360
        media.episode = episode
        media.save()
Beispiel #2
0
    def scrap_serie(self, serie):
        html = urllib.urlopen(serie.url).read()
        soup = BeautifulSoup(html, from_encoding='utf8')
        for cnt,article in enumerate(soup.find('div','primary-pane').ol('li')):
            episode = Episode()
            episode.thumbnail = article.img.get('src')
            episode.name = article.find('span','video-overview').span.text
            print episode.name.encode('utf8')
            dur = article.find('span','video-time').text.split(':')
            episode.duration = time(0, int(dur[0]), int(dur[1]))
            episode.serie = serie
            episode.number = cnt+1
            episode.season = 1
            episode.save()

   
            video_id = re.findall('v=([^&]+)', article.a.get('href'))[0]
            media = HttpMediaFile()
            media.url = self.get_real_url(video_id)
            media.episode = episode
            media.save()
Beispiel #3
0
    def get_episode(self, serie, url):
        cnt = serie.episode_set.count()+1
        html = urllib.urlopen(url).read()
        dom = lxml.html.document_fromstring(html)
        for elem in dom.cssselect("#ms-player-thumb-videos ul li"):
            episode = Episode()
            episode.serie = serie
            episode.number = cnt
            cnt += 1
            episode.name = elem.cssselect(".ms-thumb-titulo")[0].text_content()
            print episode.name
            episode.thumbnail = elem.cssselect(".ms-thumb-img img")[0].get('src')
            episode.save()

            num = re.findall("\(([0-9])\)", elem.cssselect("a")[0].get('onclick'))[0]
            elemscript = dom.cssselect("#ms-player2-%s" % num)[0].getnext()
            sig = re.findall('"(.*?)"', elemscript.text_content())[1]

            media = HttpMediaFile()
            media.episode = episode
            media.url = "http://api.kewego.com/video/getHTML5Stream?playerKey=%s&sig=%s&format=normal" % (self.player_key,sig)
            media.save()
Beispiel #4
0
    def scrap_episode(self, episode, url):
        html = urllib.urlopen(url).read()
        html = re.sub('<\?.*?\?>','',html)
        soup = BeautifulSoup(html, from_encoding='utf8')
        
        episode.air = date(*map(int, reversed(soup.find('td','fecha-hora').text.split('/'))))
        
        fileset = MediaFileSet()
        fileset.episode = episode
        fileset.save()

        for cnt,embed in enumerate(soup('embed')):
            video_id = re.findall('/([^/]*?)\?', embed.get('src'))[0]

            media = HttpMediaFile()
            media.url = self.get_real_url(video_id)
            if media.url is None:
                print "ERROR"
                continue
            media.precedence = cnt+1
            print cnt+1
            media.save()
            fileset.medias.add(media)
        fileset.save()