Example #1
0
    def scrap_episode(self, serie, url, thumbnail):
        if Episode.objects.filter(url=url).count() > 0:
            print "EXISTS"
            return

        print url
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, from_encoding='utf-8')
        info = soup.find('article','info')

        p = soup.find('div','description').text
        season,number,duration = re.findall('Temporada ([0-9]+) \| Ep. ([0-9]+) \(([0-9:]+)\)', p)[0]
        if Episode.objects.filter(serie=serie, season=int(season), number=int(number)).count() > 0:
            print "EXISTS"
            return
        episode = Episode()
        episode.serie = serie
        episode.season = int(season)
        episode.number = int(number)
        episode.duration = time(0,*map(int,duration.split(':')))
        episode.description =  soup.find('div','description').p.children.next()
        episode.thumbnail = thumbnail
        episode.save()

        media = MundoFoxMediaFile()
        smil_url = re.findall('player.releaseUrl = "([^"]+)"', html)[0]
        smil_url += "&manifest=m3u&format=SMIL&Tracking=true&Embedd=true"
        media._url = smil_url
        media.episode = episode
        media.save()
Example #2
0
 def scrap_serie(self, serie):
     html = urllib.urlopen(serie.url).read()
     html = re.sub('<\?.*?\?>','',html)
     soup = BeautifulSoup(html, from_encoding='utf8')
     for cnt,article in enumerate(soup('td','listado-mediateca-menu')):
         episode = Episode()
         episode.serie = serie
         episode.thumbnail = urllib.basejoin(self.base_url, article.find('div','imagen-mediateca').img.get('src'))
         episode.name = article.find('h5','titulo-mediateca').a.text
         print episode.name.encode('utf8')
         episode.description = article.find('span','texto-mediateca').text
         episode_url = urllib.basejoin(self.base_url, article.find('h5','titulo-mediateca').a.get('href'))
         episode.number = cnt+1
         episode.season = 1
         episode.save()
         self.scrap_episode(episode, episode_url)
Example #3
0
    def scrap_serie(self, serie):
        html = urllib.urlopen(serie.url).read()
        soup = BeautifulSoup(html, from_encoding='utf8')
        for cnt,article in enumerate(soup.find('div','primary-pane').ol('li')):
            episode = Episode()
            episode.thumbnail = article.img.get('src')
            episode.name = article.find('span','video-overview').span.text
            print episode.name.encode('utf8')
            dur = article.find('span','video-time').text.split(':')
            episode.duration = time(0, int(dur[0]), int(dur[1]))
            episode.serie = serie
            episode.number = cnt+1
            episode.season = 1
            episode.save()

   
            video_id = re.findall('v=([^&]+)', article.a.get('href'))[0]
            media = HttpMediaFile()
            media.url = self.get_real_url(video_id)
            media.episode = episode
            media.save()
Example #4
0
 def scrap_serie(self, serie):
     #serie_url = "http://www.conectate.gob.ar/educar-portal-video-web/module/detalleRecurso/DetalleRecurso.do?canalId=1&modulo=menu&temaCanalId=1&tipoEmisionId=3&idRecurso=50123"
     try:
         html = urllib.urlopen(serie.url).read()
         soup = BeautifulSoup(html, from_encoding='latin-1')
         r = re.compile("nero:\t +(\w+)", re.DOTALL|re.UNICODE).findall(soup.find('div','titSerieEncabezado').text)
         if r:
             serie.genres.add(Genre.objects.get_or_create(name=r[0])[0])
         for seasonelem in soup('ul','serieCap'):
             c = seasonelem.get('id')[-1]
             season = int(c) if c.isdigit() else 1
             for cnt,episodeelem in enumerate(seasonelem("li")):
                 episode = Episode()
                 episode.serie = serie
                 episode.name = episodeelem.a.text
                 episode.number = cnt+1
                 episode_url = urllib.basejoin(self.BASE_URL, episodeelem.a.get("href"))
                 episode.save()
                 self.get_episode(episode, episode_url)
                 print "EPISODE S%02dE%02d: %s" % (episode.season, episode.number, episode.name.encode('utf8'))
     except Exception,e:
         print e
Example #5
0
    def get_episode(self, serie, url):
        cnt = serie.episode_set.count()+1
        html = urllib.urlopen(url).read()
        dom = lxml.html.document_fromstring(html)
        for elem in dom.cssselect("#ms-player-thumb-videos ul li"):
            episode = Episode()
            episode.serie = serie
            episode.number = cnt
            cnt += 1
            episode.name = elem.cssselect(".ms-thumb-titulo")[0].text_content()
            print episode.name
            episode.thumbnail = elem.cssselect(".ms-thumb-img img")[0].get('src')
            episode.save()

            num = re.findall("\(([0-9])\)", elem.cssselect("a")[0].get('onclick'))[0]
            elemscript = dom.cssselect("#ms-player2-%s" % num)[0].getnext()
            sig = re.findall('"(.*?)"', elemscript.text_content())[1]

            media = HttpMediaFile()
            media.episode = episode
            media.url = "http://api.kewego.com/video/getHTML5Stream?playerKey=%s&sig=%s&format=normal" % (self.player_key,sig)
            media.save()