def analyse_language_and_directors(): dirctors_details={} for i in range(250): file=find_position(scrape_top_list()[i]) file_dict=json.loads(file) for j in file_dict['Director']: Language={} for k in range(250): data=json.loads(find_position(scrape_top_list()[k])) if j in data['Director']: for lang in data['Language']: if lang not in Language: Language[lang]=1 else: Language[lang]+=1 dirctors_details[j]=Language print(dirctors_details)
def movies_json(): n = scrape_top_list() for i in n: o = i["url"] # print(o) l = scrape_movie_details(o) movies_link = (i["url"][27:36]) m = movies_link + ".json" with open(m, "w+") as json_file: json.dump(l, json_file) print("succss")
def analyse_movies_genres(): Genres_dict = {} for i in range(250): data = find_position(scrape_top_list()[i]) data_dict = json.loads(data) for j in data_dict['Genres']: if j not in Genres_dict: Genres_dict[j] = 1 else: Genres_dict[j] += 1 return Genres_dict
def get_movies_list_details(): get_list = [] if os.path.exists('movies_list.json'): with open('movies_list.json', 'r') as file: new_file = json.load(file) return new_file else: for i in range(5): data = scrape_movies_details(scrape_top_list()[i]) get_list.append(data['cast']) with open('movies_list.json', 'w+') as file: dump_data = json.dump(get_list, file) return get_list
def json_data(): a = scrape_top_list() for i in a: b = i["url"] # print(b) time1 = random.randint(1, 4) time.sleep(time1) c = scrape_movie_details(b) main_list.append(c) link = (i["url"][27:36]) d = link + ".json" with open(d, "w+") as json_file: json.dump(c, json_file) with open("all_movies_details.json", 'w+') as json_data: json.dump(main_list, json_data) print("success")
########################## task 7 ################################: import requests, json from pprint import pprint from bs4 import BeautifulSoup from task5 import get_movie_list_details from task1 import scrape_top_list def analyse_movies_directors(movie_dirceter): dirceters = {} for dirceter in movie_dirceter: a = "" for i in dirceter["director"]: a += i if a not in dirceters: dirceters[a] = 1 else: dirceters[a] += 1 return dirceters dirceter_analyse = analyse_movies_directors( get_movie_list_details(scrape_top_list()[0:5])) pprint(dirceter_analyse)
from task1 import scrape_top_list from pprint import pprint def group_by_decade(movies): years = {} for i in movies: year = i["year"] year = str(year) decade = year[0:3] decade += "0" decade = int(decade) if decade not in years: years[decade] = [] years[decade].append(i) # else: # years[decade].append(i) return years byDecade = group_by_decade(scrape_top_list()) pprint(byDecade)
from task1 import scrape_top_list import requests import json from bs4 import BeautifulSoup from pprint import pprint movies_name = scrape_top_list() year_1 = [] year_2 = [] def group_by_year(): i = 0 while i < len(movies_name): year = movies_name[i]["year"] year_1.append(year) i = i + 1 j = 0 while j < len(year_1): if year_1[j] not in year_2: year_2.append(year_1[j]) j = j + 1 k = 0 while k < len(year_2): l = 0 while l < len(year_2): if year_2[k] < year_2[l]: a = year_2[k] year_2[k] = year_2[l] year_2[l] = a
movie_url = requests.get(movie_api) soup = BeautifulSoup(movie_url.text, "html.parser") director_name = soup.find("div", class_="credit_summary_item").a.get_text() director.append(director_name) bio = soup.find("div", class_="plot_summary") movie_bio = bio.find("div", class_="summary_text").get_text().strip() detail = soup.find("div", attrs={ "class": "article", "id": "titleDetails" }) div1 = detail.find_all("div") for i in div1: run = i.find_all("h4") for j in run: if "Language:" in j: lan = i.find_all("a") for lang_uage in lan: movie_language = lang_uage.get_text() language.append(movie_language) detail_mov["director"] = director detail_mov["language"] = language List.append(detail_mov.copy()) with open("get_movie_list_details.json", "w") as movie: json.dump(List, movie, indent=4) return List top_movie_list = analyse_language_and_directors(scrape_top_list())
from task1 import scrape_top_list import requests import json from bs4 import BeautifulSoup from pprint import pprint list = scrape_top_list() year = [] def group_by_decade(): i = 0 while i < len(list): year.append(list[i]["year"]) i += 1 year.sort() j = 0 dict = {} while j < len(year): year_1 = (year[j]//10)*10 k = 0 list_1 = [] while k < len(list): if list[k]["year"] >= year_1 and list[k]["year"] < (year_1 + 10): list_1.append(list[k]) dict[year_1] = list_1 k += 1 j += 1 with open(" decade.json","w") as saral_data3: json.dump(dict,saral_data3,indent=4) group_by_decade()
poster_image_url=soup.find("div",class_="poster").a["href"] poster_image = "https://www.imdb.com" + poster_image_url sub_div = soup.find("div",class_="article",id="titleDetails") details =sub_div.find_all("div",class_="txt-block") a=[] for i in details: if "Language"in i.text: language=i.find("a").text if "Country" in i.text: country = i.find("a").text movie_details_dict = {"name": "","Director":" ","bio":"","runtime":"","gener":"","poster_image_url":"","country":" ","language":""} # movie_details_dict={} movie_details_dict["name"] = movie_name movie_details_dict["Director"] = Director movie_details_dict["bio"] = movie_bio movie_details_dict["runtime"] = movie_time movie_details_dict["gener"] = genre movie_details_dict["poster_image_url"] = poster_image movie_details_dict["country"] = country movie_details_dict["language"] = language movie_details_list.append(movie_details_dict) return (movie_details_list) movie_list=scrape_top_list() if __name__ == "__main__": pprint(scrape_movie(movie_list[0:10]))
import pprint, string from task1 import scrape_top_list def group_by_decades(scrape_top_list): group = {} for i in scrape_top_list: value = i['year'] value = value // 10 new_value = int(str(value) + '0') if new_value not in group: group[new_value] = [] group[new_value].append(i) else: group[new_value].append(i) return group pprint.pprint(group_by_decades(scrape_top_list()))
import requests from task1 import scrape_top_list import json from pprint import pprint from bs4 import BeautifulSoup name1 = scrape_top_list() def scrape_movie_cast(): list_1 = [] movie_number = int(input("Enter the movie number: ")) cast_api = name1[movie_number]["url"] cast_url = requests.get(cast_api) soup = BeautifulSoup(cast_url.text,"html.parser") cast = soup.find("table","cast_list") td = cast.find_all("td",class_="") for i in td: my_dict = {} id = i.a["href"][6:15] artist = i.a.get_text().strip() my_dict["artist"] = artist my_dict["imbd_id"] = id list_1.append(my_dict) # print(list1) with open("artist_name.json","w") as saral: json.dump(list_1,saral,indent=4) return(list_1) scrape_movie_cast()
# print(Movie) final_data = { 'Movie': '', 'Director': '', 'Country': '', 'Language': '', 'Poster': '', 'Runtime': '', 'Genres': '' } final_data['Movie'] = Movie.strip() final_data['Director'] = dir_list final_data['Country'] = data_country final_data['Language'] = Language final_data['Poster'] = poster_url final_data['Runtime'] = new_time final_data['Genres'] = Genres # Add task 13 in task 4 final_data['cast'] = Scrape_movie_cast(url) # print(final_data) with open(url_movie['Name'] + '.json', 'w+') as file: file_new = json.dump(final_data, file) return final_data pprint(scrape_movies_details(scrape_top_list()[0])) # scrape_movies_details(value)
from tast5 import get_movies_list_details from task1 import scrape_top_list def Analyse_movies_language(movie_list): Language_list = {} for i in movie_list: for j in i['Language']: if j not in Language_list: Language_list[j] = 1 else: Language_list[j] += 1 return Language_list print(Analyse_movies_language(get_movies_list_details( scrape_top_list()[0:10])))
import json, requests from pprint import pprint from bs4 import BeautifulSoup from task1 import scrape_top_list url = scrape_top_list() def scrape_movie_cast(): for i in url: link = (i["url"]) link_list = (link + "fullcredits?ref_=tt_cl_sm#cast") page = requests.get(link_list) soup = BeautifulSoup(page.text, "html.parser") main_div = soup.find("div", class_="article listo") table = main_div.find("table", class_="cast_list") table_data = table.find_all("td", class_="") main_list = [] for data in table_data: cast_name = (data.text.strip()) link_name = (data.find("a").get("href")) imdb_id = (link_name[6:15]) dct = {"imdb_id": "", "name": ""} dct["imdb_id"] = imdb_id dct["name"] = cast_name main_list.append(dct) dum = json.dumps(main_list) # print(main_list) with open("main_list.json", "w+") as json_data:
import json, requests from pprint import pprint from bs4 import BeautifulSoup from task4 import scrape_movie_details from task1 import scrape_top_list data = scrape_top_list() print(data) def movies_json(): n = scrape_top_list() for i in n: o = i["url"] # print(o) l = scrape_movie_details(o) movies_link = (i["url"][27:36]) m = movies_link + ".json" with open(m, "w+") as json_file: json.dump(l, json_file) print("succss") movies_json()
from task8 import find_position import os ,json,requests,time,pprint from task1 import scrape_top_list from task4 import scrape_movies_details for i in range(250): pprint.pprint(find_position(scrape_top_list()[i])) time.sleep(5)
div = soup.find('div', class_='article', id='titleDetails') div1 = div.find_all("div", class_="txt-block") for i in div1: try: if i.h4.text == "Country:": country = (i.a.text) # print (country) elif i.h4.text == "Language:": language_a = i.find_all('a') language_lis = [] for j in language_a: language = "" language += j.text language_lis.append(language) # print(language_lis) except AttributeError: continue dic = {} dic["movie_name"] = movie_name dic["director"] = director dic["country"] = country dic["language"] = language_lis dic["poster_url"] = poster_link dic["bio"] = movie_bio dic["rumtime"] = movie_runtime dic["gener"] = gener return dic movieData = scrape_movie_details(scrape_top_list()[0]["url"]) # pprint(movieData)
from tast5 import get_movies_list_details from task1 import scrape_top_list def Analyse_movies_directors(movie_list): Director_list = {} for i in movie_list: for j in i['Director']: if j not in Director_list: Director_list[j] = 1 else: Director_list[j] += 1 return Director_list print(Analyse_movies_directors(get_movies_list_details( scrape_top_list()[0:5])))
import requests,pprint,os,json from bs4 import BeautifulSoup from task5 import get_movie_list_details from task1 import scrape_top_list from pprint import pprint def analyse_movies_language(movie_list): languages = {} for movie in movie_list: for lang in movie["language"]: print(lang) # if lang not in languages: # languages[lang]=1 # else: # languages[lang]+=1 # return languages language_analysis = analyse_movies_language(get_movie_list_details(scrape_top_list()[0:5])) # pprint(language_analysis)
runtime = time.find("time").get_text().strip() hour_to_min = (int(runtime[0])) * 60 i = 0 mins = "" a = runtime[3:] while i < len(a): if a[i] == "m": break mins = mins + a[i] i = i + 1 runtime_of_movie = hour_to_min + int(mins) movie_genre = time.find_all("a") movie_genre.pop() for i in movie_genre: genre_1 = i.get_text() genre.append(genre_1) detail_mov["movie_name"] = list[movie_number - 1]["name"] detail_mov["director"] = director detail_mov["country"] = "India" detail_mov["poster_url"] = movie_poster detail_mov["language"] = language detail_mov["movie_bio"] = movie_bio detail_mov["runtime"] = runtime_of_movie detail_mov["movie_genre"] = genre with open("movie_details.json", "w") as movie_number: json.dump(detail_mov, movie_number, indent=4) return (detail_mov) scrape_movie_details(scrape_top_list())
i = 0 mins = "" a = runtime[3:] while i < len(a): if a[i] == "m": break mins = mins + a[i] i = i + 1 runtime_of_movie = hour_to_min + int(mins) movie_genre = time.find_all("a") movie_genre.pop() for i in movie_genre: genre_1 = i.get_text() genre.append(genre_1) # print(genre_1) detail_mov["movie_name"] = name_2["name"] detail_mov["director"] = director detail_mov["country"] = "India" detail_mov["poster_url"] = movie_poster detail_mov["language"] = language detail_mov["movie_bio"] = movie_bio detail_mov["runtime"] = runtime_of_movie detail_mov["movie_genre"] = genre List.append(detail_mov.copy()) with open("10_movie_details.json", "w") as movie: json.dump(List, movie, indent=4) return List top_movie_list = get_movie_details(scrape_top_list())