def analyse_movies_director(movie_director): dir_lst = [] for movie in movie_director: for dir in movie['3_movie_director']: dir_lst.append(dir) dir1 = list(dict.fromkeys(dir_lst)) #Converting into dict for removing #duplicates director_lst = [] for length in range(len(dir1)): lst = str(length) lst = [] count = 1 for director in dir_lst: if director == dir1[length]: lst.append(count) count += 1 director_lst.append(len(lst)) #Putting all the data in dictionary movie_director_dir = {} for movie_dir in range(len(director_lst)): movie_director_dir[dir1[movie_dir]] = director_lst[movie_dir] return movie_director_dir top_movies = scrape_top_list() movie_detail_lists = get_movie_list_details(top_movies[:10]) print(analyse_movies_director(movie_detail_lists))
cut_url=cut.replace("/", "") cut_url_list.append(cut_url) #Creation of file of name like as tt0066763,tt0345623. for count in range(len(cut_url_list)): if os.path.isfile(("Task_9_file/")+cut_url_list[count]): print("File Exist") else: time.sleep(3) response=requests.get(movie_url[count]) with open(("Task_9_file/")+cut_url_list[count],"w") as file_data: file_data.write(response.text) file_data.close() print("File Created") scraped_data=scrape_top_list() scrape_movie_details_with_time(scraped_data) # # # import os # def scrape_movie_details_with_time(movies): # lis =[] # movie_url=[] # cut_url_list=[] # for movie in movies: # movie_url.append(movie['5_Url']) # cut=movie['5_Url'].replace("https://www.imdb.com/title/", "")
time.sleep(1) response = requests.get(movie_cast_url) # Requests for half links soup = BeautifulSoup(response.text, 'html.parser') main_div = soup.find('div', attrs={"class": "article", "id": "titleCast"}) sub_div = main_div.find('div', class_="see-more") full_link = movie_cast_url + sub_div.a['href'] #Addition of two half links time.sleep(1) respons2 = requests.get( full_link) # Requests for data scrape from full-link soup = BeautifulSoup(respons2.text, "html.parser") main_div2 = soup.find('table', class_="cast_list") td = main_div2.find_all("td", class_="primary_photo") actor_id = [] actor_name = [] #Scrape Actor Id and Name my_list = [] my_dict = {"Imdb_Id": "", "Name": ""} for count in td: tag_a = count.find('a') my_dict["Imdb_Id"] = tag_a['href'][6:-1] my_dict["Name"] = tag_a.img['title'] my_list.append(my_dict) my_dict = {"Imdb_Id": "", "Name": ""} return my_list movie_lst = [] for scraped_data in scrape_top_list(): Movie_name = scraped_data['1_Name'] # pprint(scrape_movie_cast(scraped_data['5_Url']))