Ejemplo n.º 1
0
    def getWantToSeeMovies(self):

        searchURL = self.baseURL + '/usuario/' + self.username + '/quero-ver/'

        moviesVec = []
        netflixVec = []

        nThreads = 8
        threadPool = ThreadPool(nThreads)
        threadPool.startWorking()

        def parsePage(pageUrl):
            wantToSeeCatalogueHTML = urllib.request.urlopen(
                urllib.request.Request(pageUrl, headers=self.hdr))
            catalogueSoup = BeautifulSoup(wantToSeeCatalogueHTML,
                                          'html.parser')
            print(pageUrl)
            #looping through each movie in the current page
            for movieDiv in catalogueSoup.findAll(
                    'li', {'class': 'span2 movie_list_item'}):
                divSoup = BeautifulSoup(str(movieDiv), 'html.parser')
                moviehref = str(divSoup.find("a")['href'])
                print(moviehref)
                movieURL = self.baseURL + moviehref
                threadPool.putInQueue(parseMovie, {"movieURL": movieURL})

        def parseMovie(movieURL):
            movie = {}
            moviePageHtml = urllib.request.urlopen(
                urllib.request.Request(movieURL, headers=self.hdr))
            moviePageSoup = BeautifulSoup(moviePageHtml, 'html.parser')
            movie['name'] = str(
                moviePageSoup.find('h2', {
                    'class': 'movie-original-title'
                }).string)
            movie['duration'] = str(
                moviePageSoup.find('span', {
                    'class': 'running_time'
                }).string)
            print(movie)
            moviesVec.append(movie)
            threadPool.putInQueue(checkNetflix, {'title': movie['name']})

        def checkNetflix(title):
            netflixWrapper = NetflixWrapper()
            resp = netflixWrapper.isTitleInNetflix(title)
            if (resp[0]):
                netflixVec.append(resp[1])

        for i in range(1, self.getWantToSeePages() + 1):
            pageUrl = searchURL + '?pagina=' + str(i)
            threadPool.putInQueue(parsePage, {'pageUrl': pageUrl})

        #block until all tasks are done
        threadPool.end()

        return [moviesVec, netflixVec]
Ejemplo n.º 2
0
    def getWantToSeeMovies(self):

        searchURL = self.baseURL + '/usuario/' + self.username + '/quero-ver/';

        moviesVec = []
        netflixVec = []

        nThreads = 8
        threadPool = ThreadPool(nThreads)
        threadPool.startWorking()

        def parsePage(pageUrl):
            wantToSeeCatalogueHTML = urllib.request.urlopen(urllib.request.Request(pageUrl, headers=self.hdr))
            catalogueSoup = BeautifulSoup(wantToSeeCatalogueHTML, 'html.parser')
            print(pageUrl)
            #looping through each movie in the current page
            for movieDiv in catalogueSoup.findAll('li', { 'class': 'span2 movie_list_item'}):
                divSoup = BeautifulSoup(str(movieDiv), 'html.parser')
                moviehref = str(divSoup.find("a")['href'])
                print(moviehref)
                movieURL = self.baseURL + moviehref
                threadPool.putInQueue(parseMovie, {"movieURL": movieURL})
        
        def parseMovie(movieURL):
            movie = {}
            moviePageHtml = urllib.request.urlopen(urllib.request.Request(movieURL, headers=self.hdr))
            moviePageSoup = BeautifulSoup(moviePageHtml, 'html.parser')
            movie['name'] = str(moviePageSoup.find('h2',{'class':'movie-original-title'}).string)
            movie['duration'] = str(moviePageSoup.find('span',{'class':'running_time'}).string)
            print(movie)
            moviesVec.append(movie)
            threadPool.putInQueue(checkNetflix, {'title': movie['name']})

        def checkNetflix(title):
            netflixWrapper = NetflixWrapper()
            resp = netflixWrapper.isTitleInNetflix(title)
            if(resp[0]):
                netflixVec.append(resp[1])

        for i in range(1, self.getWantToSeePages() + 1):
            pageUrl = searchURL + '?pagina=' + str(i)
            threadPool.putInQueue(parsePage, {'pageUrl': pageUrl})
        
        #block until all tasks are done
        threadPool.end()

        return [moviesVec, netflixVec]