Ejemplo n.º 1
0
def get_movie_list_details(top_movie):
    movie_details_list = []
    for i in top_movie:
        url = i["url"]
        a = scrape_movie_details(url)
        movie_details_list.append(a)
    return movie_details_list
Ejemplo n.º 2
0
def get_movie_list_details(movies):
    movie_details = []
    for movie in movies:
        url = movie["url"]
        data = scrape_movie_details(url)
        movie_details.append(data)
    return movie_details


# data = get_movie_list_details(scrape_top_list()[0:5])
# pprint(data)
Ejemplo n.º 3
0
def movies_json():

    n = scrape_top_list()
    for i in n:
        o = i["url"]
        # print(o)
        l = scrape_movie_details(o)

        movies_link = (i["url"][27:36])
        m = movies_link + ".json"
        with open(m, "w+") as json_file:
            json.dump(l, json_file)
        print("succss")
Ejemplo n.º 4
0
def analyse_language_and_directors():
    if os.path.isfile("all_movies_data.json"):
        with open("all_movies_data.json", "r+") as file_data:
            var = json.load(file_data)

            list_for_all_directors = []
            list_for_all_language = []
            list_for_uni_director = []
            list_for_uni_language = []
            for j in var:
                a = j['director']
                b = j['language']
                for k in a:
                    if k not in list_for_uni_director:
                        list_for_uni_director.append(k)
                for l in b:
                    if l not in list_for_uni_language:
                        list_for_uni_language.append(l)
            # print((list_for_all_directors))
            # print((list_for_uni_director))
            # print (list_for_uni_language)
            main_dic = {}
            for j in list_for_uni_director:
                mini_dic = {}
                for i in list_for_uni_language:
                    count = 0
                    for k in var:
                        lang = k["language"]
                        direc = k["director"]
                        # print (lang,direc)
                        if i in lang and j in direc:
                            count += 1
                    if count > 0:
                        # print (j,i,count)
                        mini_dic[i] = count
                main_dic[j] = mini_dic
            pprint.pprint(main_dic)
    else:

        task_1 = top_scrape_list()
        list_big = []
        for i in task_1:
            link = (i['url'])
            time_1 = random.randint(1, 3)
            time.sleep(time_1)
            task_2 = scrape_movie_details(link)
            list_big.append(task_2)

        with open("all_movies_data.json", "w+") as file_data:
            json.dump(list_big, file_data)
Ejemplo n.º 5
0
def kya_naam_du():
    if os.path.isfile("all_movies_data.json"):
        with open('all_movies_data.json', 'r') as file_data:
            file = json.load(file_data)
            print(file)

    else:
        task_1 = top_scrape_list()
        list_big = []
        for i in task_1:
            link = (i['url'])
            time_1 = random.randint(1, 3)
            time.sleep(time_1)
            task_2 = scrape_movie_details(link)
            list_big.append(task_2)
            with open("all_movies_data.json", "w+") as file_data:
                json.dump(list_big, file_data)
Ejemplo n.º 6
0
def json_data():
    a = scrape_top_list()
    for i in a:
        b = i["url"]
        # print(b)
        time1 = random.randint(1, 4)
        time.sleep(time1)
        c = scrape_movie_details(b)
        main_list.append(c)

        link = (i["url"][27:36])
        d = link + ".json"
        with open(d, "w+") as json_file:
            json.dump(c, json_file)
        with open("all_movies_details.json", 'w+') as json_data:
            json.dump(main_list, json_data)
        print("success")
Ejemplo n.º 7
0
def get_movie_list_details():
	if os.path.isfile('cache_for_task5.json'):
		with open('cache_for_task5.json','r')as f:
			var=json.load(f)
			return(var)
	else:
		li=[]
		movie_list=top_scrape_list()
		for i in range(len(movie_list)):
			link=movie_list[i]['url']
			if i==10:
				break
			else:
				var2=scrape_movie_details(link)
				li.append(var2)
		# pprint.pprint(li)
		# return(li)
		with open('cache_for_task5.json','w')as f:
			json.dump(li,f)

# print(get_movie_list_details())
Ejemplo n.º 8
0
def analyse_movies_language(movies):
    list_1 = []
    list_2 = []
    dict = {}
    for movie in movies:
        movie_langauge = movie['langauge']
        for lang_1 in movie_langauge:
            list_1.append(lang_1)
    # print(list_1)

    for i in list_1:
        if i not in list_2:
            list_2.append(i)
    for j in list_2:
        count = 0
        for k in list_1:
            if j == k:
                count += 1
        dict[j] = count
    print(dict)


scrap_list = scrap_top_list()[0:30]
# pp(scrape_movie_details(scrap_list))
vishal = scrape_movie_details(scrap_list)

analyse_movies_language(vishal)

# print(scrape_movie_details(scrap_list))
Ejemplo n.º 9
0

def analyse_movies_directors(movies):
    # pp(movies)
    list_1 = []
    list_2 = []
    for director in movies:
        # pp(director)
        director_1 = director['directer_name']
        # pp(director_1)
        dict = {}
        for i in director_1:
            # pp(i)
            list_1.append(i)
    # pp(list_1)
    for j in list_1:
        if j not in list_2:
            list_2.append(j)
    for k in list_2:
        count = 0
        for l in list_1:
            if k == l:
                count += 1
        dict[k] = count
    pp(dict)


scrap_list = scrap_top_list()[0:10]
analyse_movies = scrape_movie_details(scrap_list)
analyse_movies_directors(analyse_movies)
Ejemplo n.º 10
0
from bs4 import BeautifulSoup
from pprint import pprint as pp
from scraping import scrap_top_list
from task4 import scrape_movie_details

url = scrap_top_list()[0:5]

# pp(scrape_movie_details(url))

# pp(scrap_top_list())

def scrape_movie_details_1(movies):
    # pp(movies)
    for movie in movies:
        movie_url = movie['poster_image_url']
        for url in movie_url:
            movie_url_1 = url[7:16]
        file_name = movie_url_1 + ".json"
        # pp(file_name)
        if os.path.isfile(file_name):
            with open (file_name,"r") as file:
                data = json.load(file)
            # pp(data)
        else:
            file_1 = open(file_name,"w")
            json.dump(movie,file_1)
            print("succesfully")

url = scrap_top_list()[0:50]
detail_movie = scrape_movie_details(url)
# scrape_movie_details_1(detail_movie)
Ejemplo n.º 11
0
def get_movie_list_details(movies_list):
	for movie in movies_list:
		movie_name=movie['name']
		each_movie_details=scrape_movie_details(movie_name)
		movies_list_all_details.append(each_movie_details)
	return movies_list_all_details
Ejemplo n.º 12
0
# pp(scrape_movie_details(scrap))
# pp(scrape_movie_cast())

# pp(scrap_top_list())
# print(scrap_top_list())


def get_movie_list_details(movies):
    # pp(movies)
    main_list = []
    for movie in movies:
        # pp(movie)
        for cast in task_12:
            # pp(cast)
            movie["Cast"] = cast
        # pp(movie)
        main_list.append(movie)
        with open("task_13.json", "w") as file:
            json.dump(main_list, file)
            print("succesful")
    # return main_list


scrap = scrap_top_list()[0:5]
task_4 = scrape_movie_details(scrap)
task_12 = scrape_movie_cast()
pp(get_movie_list_details(task_4))
# get_movie_list_details(task_4)
# get_movie_list_details(task_12)