コード例 #1
0
def analyse_movies_director(movie_director):
    dir_lst = []
    for movie in movie_director:
        for dir in movie['3_movie_director']:
            dir_lst.append(dir)
        dir1 = list(dict.fromkeys(dir_lst))  #Converting into dict for removing
        #duplicates
    director_lst = []
    for length in range(len(dir1)):
        lst = str(length)
        lst = []
        count = 1
        for director in dir_lst:
            if director == dir1[length]:
                lst.append(count)
                count += 1
        director_lst.append(len(lst))

    #Putting all the data in dictionary
    movie_director_dir = {}
    for movie_dir in range(len(director_lst)):
        movie_director_dir[dir1[movie_dir]] = director_lst[movie_dir]

    return movie_director_dir


top_movies = scrape_top_list()
movie_detail_lists = get_movie_list_details(top_movies[:10])
print(analyse_movies_director(movie_detail_lists))
コード例 #2
0
		cut_url=cut.replace("/", "")
		cut_url_list.append(cut_url)
	
	#Creation of file of name like as tt0066763,tt0345623.
	for count in range(len(cut_url_list)):
		if os.path.isfile(("Task_9_file/")+cut_url_list[count]):
			print("File Exist")
		else:
			time.sleep(3)
			response=requests.get(movie_url[count])
			with open(("Task_9_file/")+cut_url_list[count],"w") as file_data:
				file_data.write(response.text)
				file_data.close()
			print("File Created")

scraped_data=scrape_top_list()
scrape_movie_details_with_time(scraped_data)



# #

# import os

# def scrape_movie_details_with_time(movies):
# 	lis =[]
# 	movie_url=[]
# 	cut_url_list=[]
# 	for movie in movies:
# 		movie_url.append(movie['5_Url'])
# 		cut=movie['5_Url'].replace("https://www.imdb.com/title/", "")
コード例 #3
0
    time.sleep(1)
    response = requests.get(movie_cast_url)  # Requests for half links
    soup = BeautifulSoup(response.text, 'html.parser')
    main_div = soup.find('div', attrs={"class": "article", "id": "titleCast"})
    sub_div = main_div.find('div', class_="see-more")
    full_link = movie_cast_url + sub_div.a['href']  #Addition of two half links
    time.sleep(1)
    respons2 = requests.get(
        full_link)  # Requests for data scrape from full-link
    soup = BeautifulSoup(respons2.text, "html.parser")
    main_div2 = soup.find('table', class_="cast_list")
    td = main_div2.find_all("td", class_="primary_photo")
    actor_id = []
    actor_name = []
    #Scrape Actor Id and Name
    my_list = []
    my_dict = {"Imdb_Id": "", "Name": ""}
    for count in td:
        tag_a = count.find('a')
        my_dict["Imdb_Id"] = tag_a['href'][6:-1]
        my_dict["Name"] = tag_a.img['title']
        my_list.append(my_dict)
        my_dict = {"Imdb_Id": "", "Name": ""}

    return my_list


movie_lst = []
for scraped_data in scrape_top_list():
    Movie_name = scraped_data['1_Name']
    # pprint(scrape_movie_cast(scraped_data['5_Url']))