Example #1
0
from load_data import get_movies
import feature_engineering as fe

from sklearn.svm import SVC
from sklearn import tree
from sklearn import mixture
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.cluster import KMeans
from sklearn.cross_validation import cross_val_score

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

df = get_movies()

df['Rev_Budget'] = fe.get_rev_budget_ratio()
df['Female_Dir'] = fe.get_female_directing()
df['Female_Dir_Score'] = fe.get_female_directing_score()
df['Female_Cast'] = fe.get_female_cast()
df['Female_Cast_Score'] = fe.get_female_cast_score()
df['Female_Writing'] = fe.get_female_writing()
df['Female_Writing_Score'] = fe.get_female_writing_score()
df['Rec_Bechdel'] = fe.recs_passing_avg_score()
df['Dir_Age'] = fe.average_age_of_director()
df['Cast_Age'] = fe.average_age_of_cast()
df['Dir_Pop'] = fe.ave_pop_directors()
df['Cast_Pop'] = fe.ave_pop_cast()
df['First_Billed_Female'] = fe.first_billed_female()
Example #2
0
# Necessary to get matplotlib to import correctly.
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

from load_data import get_movies, get_people

movies = get_movies()
# people = get_people().set_index('TMDB_ID')


def passing_over_year():
    map_to_decades = movies['Year'].apply(lambda year: year // 10 * 10)
    ind = list(map_to_decades.unique())
    y_data = [[0] * len(ind) for label in range(0, 4)]

    for i in range(len(ind)):
        for label in range(0, 4):
            bucket = ind[i]
            y_data[label][i] = 100 * (movies[(map_to_decades == bucket) & (
                movies['Bechdel_Rating'] == label)].shape[0]) / (
                    movies[map_to_decades == bucket].shape[0])

    bar_width = 10
    line_width = 1
    edge_color = 'black'
    ind = ind[3:]
    for li in range(len(y_data)):
Example #3
0
from load_data import get_movies, get_people
import pandas as pd
import numpy as np
from functools import reduce
from collections import defaultdict

movie_data = get_movies()
movie_by_id = movie_data.set_index('TMDB_ID')
people_data = get_people()
people_data = people_data.set_index('TMDB_ID')


def person_bechdel_score():
    cast_to_scores = defaultdict(int)
    cast_to_num_movies = defaultdict(int)
    for movie_tmdb_id in movie_data.index:
        cast = movie_data.loc[movie_tmdb_id]['Cast']
        for mem in cast:
            person_id = int(mem['id'])
            cast_to_scores[person_id] += movie_data.loc[movie_tmdb_id][
                'Bechdel_Rating']
            cast_to_num_movies[person_id] += 1

    print("Created dictionaries.")
    best_score = 0
    best_id = 0
    for person in cast_to_num_movies:
        if cast_to_num_movies[person] > 2:
            if (cast_to_scores[person] /
                    cast_to_num_movies[person]) > best_score:
                best_score = (cast_to_scores[person] /