Example #1
0
def analyse_movies_language(movies):
    list_1 = []
    list_2 = []
    dict = {}
    for movie in movies:
        movie_langauge = movie['langauge']
        for lang_1 in movie_langauge:
            list_1.append(lang_1)
    # print(list_1)

    for i in list_1:
        if i not in list_2:
            list_2.append(i)
    for j in list_2:
        count = 0
        for k in list_1:
            if j == k:
                count += 1
        dict[j] = count
    print(dict)


scrap_list = scrap_top_list()[0:30]
# pp(scrape_movie_details(scrap_list))
vishal = scrape_movie_details(scrap_list)

analyse_movies_language(vishal)

# print(scrape_movie_details(scrap_list))
Example #2
0
from scraping import scrap_top_list
from pprint import pprint as pp

# pprint.pprint(scrap_top_list())
again_dict = {}
def group_by_decade(movies):
    for movie in movies:
        if movie['movie_year'][:-1]+'0' not in again_dict:
            again_dict[movie['movie_year'][:-1]+'0'] = []
        again_dict[movie['movie_year'][:-1]+'0'].append(movie)
    pp(again_dict)     
group_by_decade(scrap_top_list())



Example #3
0

def analyse_movies_directors(movies):
    # pp(movies)
    list_1 = []
    list_2 = []
    for director in movies:
        # pp(director)
        director_1 = director['directer_name']
        # pp(director_1)
        dict = {}
        for i in director_1:
            # pp(i)
            list_1.append(i)
    # pp(list_1)
    for j in list_1:
        if j not in list_2:
            list_2.append(j)
    for k in list_2:
        count = 0
        for l in list_1:
            if k == l:
                count += 1
        dict[k] = count
    pp(dict)


scrap_list = scrap_top_list()[0:10]
analyse_movies = scrape_movie_details(scrap_list)
analyse_movies_directors(analyse_movies)
Example #4
0
    list_of_item = []
    for i in movie:
        dict_of_item = {}
        new_url = i['url_link']
        detail = requests.get(new_url)
        soup = BeautifulSoup(detail.text, 'html.parser')
        movie_rating = soup.find('span', itemprop='ratingValue').text

        name = soup.find('div', class_='title_wrapper')
        movie_year = name.find('span', id='titleYear').text

        movie = name.find('h1').text
        movie_name = ""
        for i in movie:
            if i == "(":
                break
            else:
                movie_name += i

        dict_of_item['url'] = new_url
        dict_of_item['movie_rating'] = movie_rating
        dict_of_item['movie_name'] = movie_name
        dict_of_item['movie_year'] = movie_year
        list_of_item.append(dict_of_item)
    return list_of_item
    # pp(list_of_item)


new_link = scrap_top_list()[0:10]
pp(get_movie_list_details(new_link))
Example #5
0
import requests,json,os
from bs4 import BeautifulSoup
from pprint import pprint as pp
from scraping import scrap_top_list
from task4 import scrape_movie_details

url = scrap_top_list()[0:5]

# pp(scrape_movie_details(url))

# pp(scrap_top_list())

def scrape_movie_details_1(movies):
    # pp(movies)
    for movie in movies:
        movie_url = movie['poster_image_url']
        for url in movie_url:
            movie_url_1 = url[7:16]
        file_name = movie_url_1 + ".json"
        # pp(file_name)
        if os.path.isfile(file_name):
            with open (file_name,"r") as file:
                data = json.load(file)
            # pp(data)
        else:
            file_1 = open(file_name,"w")
            json.dump(movie,file_1)
            print("succesfully")

url = scrap_top_list()[0:50]
detail_movie = scrape_movie_details(url)
Example #6
0
# pp(scrape_movie_details(scrap))
# pp(scrape_movie_cast())

# pp(scrap_top_list())
# print(scrap_top_list())


def get_movie_list_details(movies):
    # pp(movies)
    main_list = []
    for movie in movies:
        # pp(movie)
        for cast in task_12:
            # pp(cast)
            movie["Cast"] = cast
        # pp(movie)
        main_list.append(movie)
        with open("task_13.json", "w") as file:
            json.dump(main_list, file)
            print("succesful")
    # return main_list


scrap = scrap_top_list()[0:5]
task_4 = scrape_movie_details(scrap)
task_12 = scrape_movie_cast()
pp(get_movie_list_details(task_4))
# get_movie_list_details(task_4)
# get_movie_list_details(task_12)
Example #7
0
from scraping import scrap_top_list
import pprint

from pprint import pprint as pp


# pp(scrap_top_list())
def group_by_year(movies):
    # new_dict = {}
    new_dict = {}
    for movie in movies:
        new_list = []
        for i in movies:
            if movie['movie_year'] == i['movie_year']:
                new_list.append(i)
        new_dict[movie['movie_year']] = new_list

    pprint.pprint(new_dict)

    # print(i['o_movie_year'])


group_by_year(scrap_top_list())

# years = []
# for movie in movies:
#     if movie['year'] not in years:
#         years.append(movie['year'])