def analyse_movies_language(movies): list_1 = [] list_2 = [] dict = {} for movie in movies: movie_langauge = movie['langauge'] for lang_1 in movie_langauge: list_1.append(lang_1) # print(list_1) for i in list_1: if i not in list_2: list_2.append(i) for j in list_2: count = 0 for k in list_1: if j == k: count += 1 dict[j] = count print(dict) scrap_list = scrap_top_list()[0:30] # pp(scrape_movie_details(scrap_list)) vishal = scrape_movie_details(scrap_list) analyse_movies_language(vishal) # print(scrape_movie_details(scrap_list))
from scraping import scrap_top_list from pprint import pprint as pp # pprint.pprint(scrap_top_list()) again_dict = {} def group_by_decade(movies): for movie in movies: if movie['movie_year'][:-1]+'0' not in again_dict: again_dict[movie['movie_year'][:-1]+'0'] = [] again_dict[movie['movie_year'][:-1]+'0'].append(movie) pp(again_dict) group_by_decade(scrap_top_list())
def analyse_movies_directors(movies): # pp(movies) list_1 = [] list_2 = [] for director in movies: # pp(director) director_1 = director['directer_name'] # pp(director_1) dict = {} for i in director_1: # pp(i) list_1.append(i) # pp(list_1) for j in list_1: if j not in list_2: list_2.append(j) for k in list_2: count = 0 for l in list_1: if k == l: count += 1 dict[k] = count pp(dict) scrap_list = scrap_top_list()[0:10] analyse_movies = scrape_movie_details(scrap_list) analyse_movies_directors(analyse_movies)
list_of_item = [] for i in movie: dict_of_item = {} new_url = i['url_link'] detail = requests.get(new_url) soup = BeautifulSoup(detail.text, 'html.parser') movie_rating = soup.find('span', itemprop='ratingValue').text name = soup.find('div', class_='title_wrapper') movie_year = name.find('span', id='titleYear').text movie = name.find('h1').text movie_name = "" for i in movie: if i == "(": break else: movie_name += i dict_of_item['url'] = new_url dict_of_item['movie_rating'] = movie_rating dict_of_item['movie_name'] = movie_name dict_of_item['movie_year'] = movie_year list_of_item.append(dict_of_item) return list_of_item # pp(list_of_item) new_link = scrap_top_list()[0:10] pp(get_movie_list_details(new_link))
import requests,json,os from bs4 import BeautifulSoup from pprint import pprint as pp from scraping import scrap_top_list from task4 import scrape_movie_details url = scrap_top_list()[0:5] # pp(scrape_movie_details(url)) # pp(scrap_top_list()) def scrape_movie_details_1(movies): # pp(movies) for movie in movies: movie_url = movie['poster_image_url'] for url in movie_url: movie_url_1 = url[7:16] file_name = movie_url_1 + ".json" # pp(file_name) if os.path.isfile(file_name): with open (file_name,"r") as file: data = json.load(file) # pp(data) else: file_1 = open(file_name,"w") json.dump(movie,file_1) print("succesfully") url = scrap_top_list()[0:50] detail_movie = scrape_movie_details(url)
# pp(scrape_movie_details(scrap)) # pp(scrape_movie_cast()) # pp(scrap_top_list()) # print(scrap_top_list()) def get_movie_list_details(movies): # pp(movies) main_list = [] for movie in movies: # pp(movie) for cast in task_12: # pp(cast) movie["Cast"] = cast # pp(movie) main_list.append(movie) with open("task_13.json", "w") as file: json.dump(main_list, file) print("succesful") # return main_list scrap = scrap_top_list()[0:5] task_4 = scrape_movie_details(scrap) task_12 = scrape_movie_cast() pp(get_movie_list_details(task_4)) # get_movie_list_details(task_4) # get_movie_list_details(task_12)
from scraping import scrap_top_list import pprint from pprint import pprint as pp # pp(scrap_top_list()) def group_by_year(movies): # new_dict = {} new_dict = {} for movie in movies: new_list = [] for i in movies: if movie['movie_year'] == i['movie_year']: new_list.append(i) new_dict[movie['movie_year']] = new_list pprint.pprint(new_dict) # print(i['o_movie_year']) group_by_year(scrap_top_list()) # years = [] # for movie in movies: # if movie['year'] not in years: # years.append(movie['year'])