def parse_movie_detail(self): url = self.url text = request_and_parse(url=url, params=None, headers=MTIME_HTML_HEADERS) date_dict = self.extract_movie_detail(text) return date_dict
def get_movie_stills(self): """ get movie's stills,contains img_id, img_220 ,img_1000,img_235 :return: """ url = movie_stills_url.format(id=self.id) text = request_and_parse(url=url, params=None, headers=MTIME_STILLS_HEADERS) result_json = extract_json(text, regex_pattern_stills) return result_json
def parse_all_director_to_list(self): url = self.url text = request_and_parse(url=url, params=None, headers=MTIME_HTML_HEADERS) html = etree.HTML(text) directors = html.xpath('//dl[@class="info_l"]/dd[1]/a') temp_dict, temp_list = {}, [] for director in directors: temp_dict['name'] = director.xpath("./text()")[0] temp_dict['url'] = director.xpath("./@href")[0] temp_list.append(temp_dict.copy()) return temp_list
def parse_all_actor_to_list(self): url = self.url + FULL_CREDITS_SUFFIX text = request_and_parse(url=url, params=None, headers=MTIME_HTML_HEADERS) html = etree.HTML(text) main_actors = html.xpath('//div[@class="db_actor"]/dl[1]/dd') temp_dict, temp_list = {}, [] for actor in main_actors: temp_dict['img'] = actor.xpath(".//img/@src")[0] temp_dict['name_cn'] = actor.xpath(".//h3/a/text()")[0] temp_dict['name_en'] = actor.xpath(".//p/a/text()")[0] temp_dict['actor_url'] = actor.xpath(".//p/a/@href")[0] temp_dict['play_role'] = actor.xpath("./div[2]//h3/text()")[0] temp_list.append(temp_dict.copy()) return temp_list
def get_movie_box_office(self): movie_detail_params = { "Ajax_CallBack": "true", "Ajax_CallBackType": "Mtime.Library.Services", "Ajax_CallBackMethod": "GetMovieOverviewRating", "Ajax_CrossDomain": "1", "Ajax_RequestUrl": self.url, "t": get_current_date_str(), "Ajax_CallBackArgument0": self.get_mid_from_url() } text = request_and_parse(url=movie_detail_API_url, params=movie_detail_params, headers=MTIME_SERVICE_HEADERS) result = extract_json(text, regex_pattern_api) return result
def get_photo_data(self): url = self.person_url + PHOTO_SUFFIX text = request_and_parse(url=url, params=None, headers=MTIME_PEOPLE_HEADERS) result_json = extract_json(text, regex_pattern_director_picture) return result_json
def request_with_url(url): text = request_and_parse(url=url, params=None, headers=DIRECTOR_HEADERS) html = etree.HTML(text) return html