Esempio n. 1
0
def save_data():
    movies_data = scrape_top_list()
    for one_movie in movies_data:
        id_movie = (one_movie['urls'][-10:-1])
        exists = os.path.exists("screpingdata/" + str(id_movie) + ".json")
        cwd = os.getcwd()
        if exists:
            with open(cwd + "/screpingdata/" + str(id_movie) + ".json",
                      "r+") as file:
                data = file.read()
                load_data = json.loads(data)
                return (load_data)
        else:
            for one_movie in movies_data:
                id_movie = (one_movie['urls'][-10:-1])
                # task_no. 9
                sleep_time = random.randint(1, 3)
                time.sleep(sleep_time)
                url = (one_movie["urls"])
                screpe_movie_data = scrape_movie_details(url)
                with open("screpingdata/" + str(id_movie) + ".json",
                          "w") as file:
                    data = json.dumps(screpe_movie_data,
                                      indent=4,
                                      sort_keys=True)
                    write_data = file.write(data)
                    return (write_data)
Esempio n. 2
0
def movie_detail():
    movie = scrape_top_list()
    movie_list = []
    for url in movie:
        movie_list.append(url["urls"])
    all_movie_detaile = []
    for movie_url in movie_list:
        one_movie_detaile = scrape_movie_details(movie_url)
        all_movie_detaile.append(one_movie_detaile)
    return (all_movie_detaile)
Esempio n. 3
0
def load_movies_data():
    movies_list = scrape_top_list()
    all_movies_data = []
    for movie_data in movies_list:
        ids = (movie_data['urls'][-10:-1])
        exists = os.path.exists("screpingdata/" + str(ids) + ".json")
        cwd = os.getcwd()
        if exists:
            with open(cwd + "/screpingdata/" + str(ids) + ".json",
                      "r+") as file:
                data = json.load(file)
                all_movies_data.append(data)
    return (all_movies_data)
from pprint import pprint
from IMDB_task1 import scrape_top_list
import os, json
movies_list = scrape_top_list()


# in this task we have to make a dictionary which is conten the data of director's languag.
# in how meny language make a movie and how meny time.
def analyse_language_and_directors(movies_list):
    all_movies_data = []
    final_dic = {}
    # for checking file exist or not
    for movie_data in movies_list:
        ids = (movie_data['urls'][-10:-1])
        exists = os.path.exists("screpingdata/" + str(ids) + ".json")
        cwd = os.getcwd()
        if exists:
            with open(cwd + "/screpingdata/" + str(ids) + ".json",
                      "r+") as file:
                data = json.load(file)
                all_movies_data.append(data)
                language_list = []
                director_list = []
                # for taking languages and directorse name 1 time
                for movis in all_movies_data:
                    for language in movis["Language"]:
                        if language not in language_list:
                            language_list.append(language)
                    for director in movis["Director"]:
                        if director not in director_list:
                            director_list.append(director)
Esempio n. 5
0
# this task for take cast data of the all data of the movie

from bs4 import BeautifulSoup
import requests,json
from pprint import pprint
from IMDB_task1 import scrape_top_list
all_movies_data = scrape_top_list()
for data in all_movies_data[:5] :
	url = data["urls"]
# task13
	def scrape_movie_cast(url):
		movie_url = requests.get(url)
		movie_text = movie_url.text
		soup = (BeautifulSoup(movie_text,"html.parser"))
		article = soup.find('div', attrs ={ "class" : "article" , 'id':'titleCast'})
		see_more = (article.find('div', attrs ={ "class" : "see-more"}))
		cast_url = ( url + (see_more).a["href"])
		cast = requests.get(cast_url)
		cast_soup = (BeautifulSoup(cast.text,"html.parser"))
		fulcredits = cast_soup.find('div', attrs ={ "class" : "header" , 'id':'fullcredits_content'})
		table = fulcredits.find("table",class_= "cast_list")
		tr = (table.find_all("tr"))
		tr.pop(0)
		movie_cast = []
		for one_tr in tr :
			all_movies_cast = {}
			td = one_tr.find_all("td")
			if len(td) > 1:
				table_data = (td[1])
				all_movies_cast["imdb_id"] = ((table_data.a["href"])[6:-1])
				all_movies_cast["name"] = ((table_data.text).strip())