from load_data import get_movies import feature_engineering as fe from sklearn.svm import SVC from sklearn import tree from sklearn import mixture from sklearn.naive_bayes import GaussianNB from sklearn.naive_bayes import MultinomialNB from sklearn.cluster import KMeans from sklearn.cross_validation import cross_val_score import matplotlib.pyplot as plt import pandas as pd import numpy as np df = get_movies() df['Rev_Budget'] = fe.get_rev_budget_ratio() df['Female_Dir'] = fe.get_female_directing() df['Female_Dir_Score'] = fe.get_female_directing_score() df['Female_Cast'] = fe.get_female_cast() df['Female_Cast_Score'] = fe.get_female_cast_score() df['Female_Writing'] = fe.get_female_writing() df['Female_Writing_Score'] = fe.get_female_writing_score() df['Rec_Bechdel'] = fe.recs_passing_avg_score() df['Dir_Age'] = fe.average_age_of_director() df['Cast_Age'] = fe.average_age_of_cast() df['Dir_Pop'] = fe.ave_pop_directors() df['Cast_Pop'] = fe.ave_pop_cast() df['First_Billed_Female'] = fe.first_billed_female()
# Necessary to get matplotlib to import correctly. import matplotlib as mpl mpl.use('TkAgg') import matplotlib.pyplot as plt import pandas as pd import numpy as np from load_data import get_movies, get_people movies = get_movies() # people = get_people().set_index('TMDB_ID') def passing_over_year(): map_to_decades = movies['Year'].apply(lambda year: year // 10 * 10) ind = list(map_to_decades.unique()) y_data = [[0] * len(ind) for label in range(0, 4)] for i in range(len(ind)): for label in range(0, 4): bucket = ind[i] y_data[label][i] = 100 * (movies[(map_to_decades == bucket) & ( movies['Bechdel_Rating'] == label)].shape[0]) / ( movies[map_to_decades == bucket].shape[0]) bar_width = 10 line_width = 1 edge_color = 'black' ind = ind[3:] for li in range(len(y_data)):
from load_data import get_movies, get_people import pandas as pd import numpy as np from functools import reduce from collections import defaultdict movie_data = get_movies() movie_by_id = movie_data.set_index('TMDB_ID') people_data = get_people() people_data = people_data.set_index('TMDB_ID') def person_bechdel_score(): cast_to_scores = defaultdict(int) cast_to_num_movies = defaultdict(int) for movie_tmdb_id in movie_data.index: cast = movie_data.loc[movie_tmdb_id]['Cast'] for mem in cast: person_id = int(mem['id']) cast_to_scores[person_id] += movie_data.loc[movie_tmdb_id][ 'Bechdel_Rating'] cast_to_num_movies[person_id] += 1 print("Created dictionaries.") best_score = 0 best_id = 0 for person in cast_to_num_movies: if cast_to_num_movies[person] > 2: if (cast_to_scores[person] / cast_to_num_movies[person]) > best_score: best_score = (cast_to_scores[person] /