def get_movie_url(movie_data): global imdb_url_str params_dict = {'s': 'tt'} data_list = movie_data.split('*') params_dict['q'] = data_list[1] if data_list[1] else romanize(data_list[0]) response = get_response('/find?' + urllib.urlencode(params_dict)) # first check whether response is desired movie page re_movie_url = re.compile(r'/title/[\d\w]+/',re.M|re.U|re.I) response_url = response.geturl() if re_movie_url.search(response_url): return response_url # then check whether there is single link to movie in the loaded page response_str = response.read() links_list = [] for link in re_movie_url.findall(response_str): if link not in links_list: links_list.append(link) if len(links_list) == 1: return imdb_url_str + links_list[0] # then check 1st link to movie in response page if links_list and check_imdb_movie_year(links_list[0],data_list[2]): return imdb_url_str + links_list[0] # finally try to find movie in exact matches table if response_str.find('Titles (Exact Matches)') != -1: table_str = get_between(response_str,'<table>','</table>',response_str.find('Titles (Exact Matches)')) for row_match in re.finditer(r'<tr>.*?\((?P<year>\d{4})\).*?</tr>',table_str,flags = re.I|re.M|re.U|re.S): if int(data_list[2]) - 2 <= int(row_match.group('year')) <= int(data_list[2]) + 2: return imdb_url_str + get_between(row_match.group(0),'href="','"')
def imdb_rate_movie(link,rate): global cookie imdb_url_str = 'http://www.imdb.com' path = "/ratings/_ajax/title" tt = link.split('/')[-2] data_dict = {"tconst": tt, "rating": str(rate), "auth": None, "tracking_tag": "title-maindetails"} # updating cookie & data headers_dict = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1", "Cookie": cookie_to_str(cookie), "Pragma": "no-cache", "Cache-Control": "no-cache"} request = urllib2.Request(link,None,headers_dict) response = urllib2.urlopen(request) for header in response.info().headers: if header.startswith('Set-Cookie'): pair = header.replace('Set-Cookie:','').split(';')[0].strip().split('=') cookie[pair[0]] = pair[1] data_dict["auth"] = get_between(response.read(),'data-auth="','"') # movie ranking data = urllib.urlencode(data_dict) headers_dict.update({"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1", "X-Requested-With": "XMLHttpRequest", "Referer": link, "Content-Length": str(len(data)), "Cookie": cookie_to_str(cookie)}) request = urllib2.Request(imdb_url_str + path,data,headers_dict) response = urllib2.urlopen(request) if not json.loads(response.read())["status"] == 200: sys.exit(1)