for movie in movies: revenue = int(movie['revenue']) tlist = json.loads(movie['keywords']) for t in tlist: name = t['name'] # print(year, revenue) item = [name, revenue] year_revenue.append(item) print(year_revenue) print('----#--------#--------#----') my_list2 = [] keyword_revenue = [] keyword_revenue.append(['keyword', 'revenue']) justwords = [] for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]): count_v = sum(v[1] for v in g) if count_v > 5893668099: my_list2.append({'word': i, 'size': int(count_v / 500000000)}) keyword_revenue.append([i, count_v]) justwords.append(i) print(my_list2) print(" ".join(justwords)) csv_write(keyword_revenue, 'i4keyword_revenue.csv') ''' [{'word': '3d', 'size': 77}, {'word': 'aftercreditsstinger', 'size': 72}, {'word': 'airplane', 'size': 14}, {'word': 'alien', 'size': 29}, {'word': 'animation', 'size': 19}, {'word': 'based on comic book', 'size': 42}, {'word': 'based on novel', 'size': 55}, {'word': 'based on young adult novel', 'size': 17}, {'word': 'battle', 'size': 17}, {'word': 'best friend', 'size': 12}, {'word': 'biography', 'size': 12}, {'word': 'cia', 'size': 13}, {'word': 'conspiracy', 'size': 13}, {'word': 'daughter', 'size': 15}, {'word': 'dc comics', 'size': 14}, {'word': 'dinosaur', 'size': 13}, {'word': 'duringcreditsstinger', 'size': 113}, {'word': 'dying and death', 'size': 18}, {'word': 'dystopia', 'size': 42}, {'word': 'elves', 'size': 13}, {'word': 'escape', 'size': 15}, {'word': 'explosion', 'size': 15}, {'word': 'family', 'size': 12}, {'word': 'father son relationship', 'size': 14}, {'word': 'friendship', 'size': 22}, {'word': 'future', 'size': 17}, {'word': 'hero', 'size': 13}, {'word': 'imax', 'size': 27}, {'word': 'island', 'size': 13}, {'word': 'london england', 'size': 13}, {'word': 'los angeles', 'size': 13}, {'word': 'love', 'size': 16}, {'word': "love of one's life", 'size': 11}, {'word': 'magic', 'size': 29}, {'word': 'martial arts', 'size': 13}, {'word': 'marvel cinematic universe', 'size': 20}, {'word': 'marvel comic', 'size': 38}, {'word': 'mission', 'size': 16}, {'word': 'monster', 'size': 12}, {'word': 'murder', 'size': 17}, {'word': 'musical', 'size': 16}, {'word': 'new york', 'size': 13}, {'word': 'orcs', 'size': 12}, {'word': 'rescue', 'size': 13}, {'word': 'revenge', 'size': 24}, {'word': 'robot', 'size': 11}, {'word': 'saving the world', 'size': 23}, {'word': 'scientist', 'size': 12}, {'word': 'secret agent', 'size': 13}, {'word': 'secret identity', 'size': 18}, {'word': 'sequel', 'size': 51}, {'word': 'ship', 'size': 16}, {'word': 'soldier', 'size': 12}, {'word': 'space', 'size': 16}, {'word': 'space opera', 'size': 13}, {'word': 'spy', 'size': 15}, {'word': 'super powers', 'size': 15}, {'word': 'superhero', 'size': 52}, {'word': 'suspense', 'size': 15}, {'word': 'terrorist', 'size': 14}, {'word': 'time travel', 'size': 13}, {'word': 'undercover', 'size': 14}, {'word': 'violence', 'size': 29}, {'word': 'war', 'size': 13}, {'word': 'witch', 'size': 18}, {'word': 'woman director', 'size': 30}] '''
import re p = re.compile(r'@([^\s:]+)') # test_str = "@galaxy5univ I like you\nRT @BestOfGalaxies: Let's sit under the stars ...\n@jonghyun" # p2 = re.compile(r'(?:http|ftp|https)://(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?') # print(p2.findall(test_str)) # # => ['galaxy5univ', 'BestOfGalaxies', 'jonghyun__bot', 'yosizo', 'LDH_3_yui'] # # => ['https://yahoo.com', 'https://msn.news.com'] #---------- ------ ------------------------------ ------------------ # path_to_file = 'twitter_data/elonmusk_following.txt' path_to_file = 'twitter_data/pualg_follwing.txt' text = '' with open(path_to_file) as file_object: # this is a safe way of opening files for line in file_object: text += line list0 = p.findall(text) print(len(list0)) list1 = list(set(list0)) print(len(list1)) mydata = [] for item in list1: mydata.append([item]) # csv_write(mydata, 'twitter_data/elonmusk_following.csv') csv_write(mydata, 'twitter_data/pualg_follwing.csv')
tlist = json.loads(movie['production_countries']) for t in tlist: name = t['name'] if movie['vote_count'] is not None: vote_count = float(movie['vote_count']) else: vote_count = 0 # print(year, revenue) item = [name, vote_count] year_revenue.append(item) print(year_revenue) print('----#--------#--------#----') mylist = [] mylist.append(["Country", "Value"]) for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]): count_v = sum(v[1] for v in g) if count_v > 30: mylist.append([i, count_v]) print(mylist[0:10]) print('----#--------#--------#----') csv_write(mylist, 'i2country_vote.csv')
print('----#--------#--------#----') mylist = [] genres = [] mylist.append(["genres", "vote_average"]) for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]): count_v = sum(v[1] for v in g) if count_v > 30: mylist.append([i, count_v]) genres.append(i) from random import randint # x = [randint(0, 90) for p in range(0, len(genres))] # print(x) # x = [randint(0, 1961) for p in range(0, len(genres))] # print(x) # x = [randint(0, 2492) for p in range(0, len(genres))] # print(x) # x = [randint(0, 1202) for p in range(0, len(genres))] # print(x) print(mylist[0:10]) print('----#--------#--------#----') csv_write(mylist, 'i2genres_vote.csv')
mylist.append(["budget", "revenue", "popularity", "runtime", "vote_average", "vote_count"]) for movie in movies: budget = float(movie['budget'])/1000000 revenue = float(movie['revenue'])/1000000 popularity = float(movie['popularity']) if movie['runtime']!= '': runtime = float(movie['runtime']) else: runtime = 0 if movie['vote_average'] is not None: vote_average = float(movie['vote_average']) else: vote_average = 0 if movie['vote_count'] is not None: vote_count = float(movie['vote_count']) else: vote_count = 0 item = [budget, revenue, popularity, runtime, vote_average, vote_count] mylist.append(item) print(mylist[0:10]) print('----#--------#--------#----') csv_write(mylist, 'i1one_to_one.csv')
# test_str = "@galaxy5univ I like you\nRT @BestOfGalaxies: Let's sit under the stars ...\n@jonghyun" # p2 = re.compile(r'(?:http|ftp|https)://(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?') # print(p2.findall(test_str)) # # => ['galaxy5univ', 'BestOfGalaxies', 'jonghyun__bot', 'yosizo', 'LDH_3_yui'] # # => ['https://yahoo.com', 'https://msn.news.com'] # ---------- ------ ------------------------------ ------------------ # path_to_file = 'twitter_data/elonmusk_following.txt' path_to_file = "twitter_data/pualg_follwing.txt" # path_to_file = "twitter_data/pualg_follwing_windows.txt" text = "" with open(path_to_file,'r', encoding='UTF-8') as file_object: # this is a safe way of opening files for line in file_object: text += line list0 = p.findall(text) print(len(list0)) list1 = list(set(list0)) print(len(list1)) mydata = [] for item in list1: mydata.append([item]) # csv_write(mydata, 'twitter_data/elonmusk_following.csv') csv_write(mydata, "twitter_data/pualg_follwing.csv")
for movie in movies: year = movie['release_date'][:4] revenue = float(movie['revenue']) # print(year, revenue) item = [year, revenue] year_revenue.append(item) print(year_revenue) print('----#--------#--------#----') my_list2 = [] for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]): my_list2.append([i, sum(v[1] for v in g)]) print(my_list2) csv_write(my_list2, 'year_revenue_sum.csv') def my_mean(values): n = 0 Sum = 0.0 for v in values: Sum += v n += 1 return Sum / n print('----#--------#--------#----') my_list3 = [] for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]): my_list3.append([i, my_mean(v[1] for v in g)])