def get_movie_list_details(top_movie): movie_details_list = [] for i in top_movie: url = i["url"] a = scrape_movie_details(url) movie_details_list.append(a) return movie_details_list
def get_movie_list_details(movies): movie_details = [] for movie in movies: url = movie["url"] data = scrape_movie_details(url) movie_details.append(data) return movie_details # data = get_movie_list_details(scrape_top_list()[0:5]) # pprint(data)
def movies_json(): n = scrape_top_list() for i in n: o = i["url"] # print(o) l = scrape_movie_details(o) movies_link = (i["url"][27:36]) m = movies_link + ".json" with open(m, "w+") as json_file: json.dump(l, json_file) print("succss")
def analyse_language_and_directors(): if os.path.isfile("all_movies_data.json"): with open("all_movies_data.json", "r+") as file_data: var = json.load(file_data) list_for_all_directors = [] list_for_all_language = [] list_for_uni_director = [] list_for_uni_language = [] for j in var: a = j['director'] b = j['language'] for k in a: if k not in list_for_uni_director: list_for_uni_director.append(k) for l in b: if l not in list_for_uni_language: list_for_uni_language.append(l) # print((list_for_all_directors)) # print((list_for_uni_director)) # print (list_for_uni_language) main_dic = {} for j in list_for_uni_director: mini_dic = {} for i in list_for_uni_language: count = 0 for k in var: lang = k["language"] direc = k["director"] # print (lang,direc) if i in lang and j in direc: count += 1 if count > 0: # print (j,i,count) mini_dic[i] = count main_dic[j] = mini_dic pprint.pprint(main_dic) else: task_1 = top_scrape_list() list_big = [] for i in task_1: link = (i['url']) time_1 = random.randint(1, 3) time.sleep(time_1) task_2 = scrape_movie_details(link) list_big.append(task_2) with open("all_movies_data.json", "w+") as file_data: json.dump(list_big, file_data)
def kya_naam_du(): if os.path.isfile("all_movies_data.json"): with open('all_movies_data.json', 'r') as file_data: file = json.load(file_data) print(file) else: task_1 = top_scrape_list() list_big = [] for i in task_1: link = (i['url']) time_1 = random.randint(1, 3) time.sleep(time_1) task_2 = scrape_movie_details(link) list_big.append(task_2) with open("all_movies_data.json", "w+") as file_data: json.dump(list_big, file_data)
def json_data(): a = scrape_top_list() for i in a: b = i["url"] # print(b) time1 = random.randint(1, 4) time.sleep(time1) c = scrape_movie_details(b) main_list.append(c) link = (i["url"][27:36]) d = link + ".json" with open(d, "w+") as json_file: json.dump(c, json_file) with open("all_movies_details.json", 'w+') as json_data: json.dump(main_list, json_data) print("success")
def get_movie_list_details(): if os.path.isfile('cache_for_task5.json'): with open('cache_for_task5.json','r')as f: var=json.load(f) return(var) else: li=[] movie_list=top_scrape_list() for i in range(len(movie_list)): link=movie_list[i]['url'] if i==10: break else: var2=scrape_movie_details(link) li.append(var2) # pprint.pprint(li) # return(li) with open('cache_for_task5.json','w')as f: json.dump(li,f) # print(get_movie_list_details())
def analyse_movies_language(movies): list_1 = [] list_2 = [] dict = {} for movie in movies: movie_langauge = movie['langauge'] for lang_1 in movie_langauge: list_1.append(lang_1) # print(list_1) for i in list_1: if i not in list_2: list_2.append(i) for j in list_2: count = 0 for k in list_1: if j == k: count += 1 dict[j] = count print(dict) scrap_list = scrap_top_list()[0:30] # pp(scrape_movie_details(scrap_list)) vishal = scrape_movie_details(scrap_list) analyse_movies_language(vishal) # print(scrape_movie_details(scrap_list))
def analyse_movies_directors(movies): # pp(movies) list_1 = [] list_2 = [] for director in movies: # pp(director) director_1 = director['directer_name'] # pp(director_1) dict = {} for i in director_1: # pp(i) list_1.append(i) # pp(list_1) for j in list_1: if j not in list_2: list_2.append(j) for k in list_2: count = 0 for l in list_1: if k == l: count += 1 dict[k] = count pp(dict) scrap_list = scrap_top_list()[0:10] analyse_movies = scrape_movie_details(scrap_list) analyse_movies_directors(analyse_movies)
from bs4 import BeautifulSoup from pprint import pprint as pp from scraping import scrap_top_list from task4 import scrape_movie_details url = scrap_top_list()[0:5] # pp(scrape_movie_details(url)) # pp(scrap_top_list()) def scrape_movie_details_1(movies): # pp(movies) for movie in movies: movie_url = movie['poster_image_url'] for url in movie_url: movie_url_1 = url[7:16] file_name = movie_url_1 + ".json" # pp(file_name) if os.path.isfile(file_name): with open (file_name,"r") as file: data = json.load(file) # pp(data) else: file_1 = open(file_name,"w") json.dump(movie,file_1) print("succesfully") url = scrap_top_list()[0:50] detail_movie = scrape_movie_details(url) # scrape_movie_details_1(detail_movie)
def get_movie_list_details(movies_list): for movie in movies_list: movie_name=movie['name'] each_movie_details=scrape_movie_details(movie_name) movies_list_all_details.append(each_movie_details) return movies_list_all_details
# pp(scrape_movie_details(scrap)) # pp(scrape_movie_cast()) # pp(scrap_top_list()) # print(scrap_top_list()) def get_movie_list_details(movies): # pp(movies) main_list = [] for movie in movies: # pp(movie) for cast in task_12: # pp(cast) movie["Cast"] = cast # pp(movie) main_list.append(movie) with open("task_13.json", "w") as file: json.dump(main_list, file) print("succesful") # return main_list scrap = scrap_top_list()[0:5] task_4 = scrape_movie_details(scrap) task_12 = scrape_movie_cast() pp(get_movie_list_details(task_4)) # get_movie_list_details(task_4) # get_movie_list_details(task_12)