def _search_movie(self,title,results,): """Return list of movies""" grabber = HTMLGrabber() p_title = grabber.encode_string(title) li_list = [] img_list = [] #for type in ['film','serial']: content = grabber.retrieve(filmweb_search % (p_title,1)) #@Make search more pages not only 1 soup = BeautifulSoup(content) li_list.extend( soup.findAll('div', {'class':'hitDescWrapper'}) ) img_list.extend( soup.findAll('div', {'class':'hitImage'}) ) for i, li in enumerate(li_list): a = li.find('a',{'class':re.compile('hdr.*')}) # class="hdr hdr-medium hitTitle" for now title = a.text url = a['href'] # have to do another check because sometimes url doesnt provide movieID aimg = img_list[i].find('a') if aimg is not None: img = aimg.find("img") movieID = get_real_id(url,img['src']) yield movieID,title,url
def _search_person(self, title, results=20): # http://www.filmweb.pl/search/person?q=Tom+Cruise """Return list of persons""" grabber = HTMLGrabber() p_title = grabber.encode_string(title) li_list = [] img_list = [] content = grabber.retrieve(filmweb_person_search % (p_title, 1)) #@Make search more pages not only 1 soup = BeautifulSoup(content) li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'})) img_list.extend(soup.findAll('div', {'class': 'hitImage'})) for i, li in enumerate(li_list): a = li.find('a', {'class': 'hdr hdr-medium hitTitle'}) title = a.text url = a['href'] # have to do another check because sometimes url doesnt provide movieID aimg = img_list[i].find('a') if aimg is not None: img = aimg.find('img') personID = get_real_id(url, img['src']) yield personID, title, url