Exemple #1
0
    def _search_movie(self, title, results, genre_id, search_type, start_year, end_year):
        """Return list of movies"""
        grabber = HTMLGrabber()
        li_list = []
        img_list = []
        params = {"q": title.encode("utf-8"), "page": 1}
        if genre_id: params['genreIds'] = genre_id
        if start_year: params['startYear'] = start_year
        if end_year: params['endYear'] = end_year

        search_url = ""
        if search_type:
            search_url = "/" + search_type

        url = filmweb_search_blank + search_url + "?" + urlencode(params)

        content = grabber.retrieve(url)  # @Make search more pages not only 1
        soup = BeautifulSoup(content)
        li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'}))
        img_list.extend(soup.findAll('div', {'class': 'hitImage'}))

        for i, li in enumerate(li_list):
            a = li.find('a', {'class': 'hdr hdr-medium hitTitle'})
            title = a.text
            url = a['href']
            # have to do another check because sometimes url doesnt provide movieID
            aimg = img_list[i].find('a')
            if aimg is not None:
                img = aimg.find("img")
                movieID = get_real_id(url, img['src'])
                yield movieID, title, url
Exemple #2
0
    def _search_filtered_movie(self, title, results, genre_id, search_type):

        grabber = HTMLGrabber()
        params = {}
        params['page'] = 1
        if title: params['q'] = title.encode("utf-8")
        if genre_id: params['genreIds'] = genre_id

        search_url = ""
        if search_type:
            search_url = "/" + search_type

        url = filmweb_search_blank + search_url + "?" + urllib.urlencode(params)

        content = grabber.retrieve(url)
        soup = BeautifulSoup(content)
        hits = soup.findAll('li', {'id': re.compile('hit_([0-9]*)')})
        for hit in hits:
            h3 = hit.find("h3")
            url = h3.find("a")['href']
            div_img = hit.find("div", {'class': 'filmPoster-1'})
            img = div_img.find("img")
            movieID = get_real_id(url, img['src'])

            yield movieID, title, url
Exemple #3
0
    def parse_filmography(self):
        from filmweb.Movie import Movie
        movie_links = self.soup.findAll("td",{'class':"filmTitleCol"})

        movies = []
        for movie_link in movie_links:
            a = movie_link.find("a")
            movieID = get_real_id(a['href'])
            movies.append( Movie(objID=movieID,title=a.text,url=a['href']) )
        return movies
    def parse_filmography(self):
        from filmweb.Movie import Movie

        movie_links = self.soup.findAll("tr", {'data-type': "F"})

        movies = []
        for movie_link in movie_links:
            a = movie_link.find("a")
            movieID = get_real_id(a['href'])
            movies.append(Movie(objID=movieID, title=a.text, url=a['href']))
        return movies
Exemple #5
0
    def _search_movie(self,title,results,):
        """Return list of movies"""
        grabber = HTMLGrabber()
        p_title = grabber.encode_string(title)
        li_list = []
        img_list = []

        #for type in ['film','serial']:
        content = grabber.retrieve(filmweb_search % (p_title,1)) #@Make search more pages not only 1
        soup = BeautifulSoup(content)
        li_list.extend( soup.findAll('div', {'class':'hitDescWrapper'}) )
        img_list.extend( soup.findAll('div', {'class':'hitImage'}) )

        for i, li in enumerate(li_list):
            a = li.find('a',{'class':re.compile('hdr.*')}) # class="hdr hdr-medium hitTitle" for now
            title = a.text
            url = a['href']
            # have to do another check because sometimes url doesnt provide movieID
            aimg = img_list[i].find('a')
            if aimg is not None:
                img = aimg.find("img")
                movieID = get_real_id(url,img['src'])
                yield movieID,title,url
Exemple #6
0
    def _search_person(self, title, results=20):
        # http://www.filmweb.pl/search/person?q=Tom+Cruise
        """Return list of persons"""
        grabber = HTMLGrabber()
        p_title = grabber.encode_string(title)
        li_list = []
        img_list = []

        content = grabber.retrieve(filmweb_person_search % (p_title, 1))  #@Make search more pages not only 1
        soup = BeautifulSoup(content)
        li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'}))
        img_list.extend(soup.findAll('div', {'class': 'hitImage'}))

        for i, li in enumerate(li_list):
            a = li.find('a', {'class': 'hdr hdr-medium hitTitle'})
            title = a.text
            url = a['href']
            # have to do another check because sometimes url doesnt provide movieID
            aimg = img_list[i].find('a')
            if aimg is not None:
                img = aimg.find('img')
                personID = get_real_id(url, img['src'])
                yield personID, title, url