def partition_components(decomposed): imdb_actor_info = util.read_imdb_actor_info() actor_list = imdb_actor_info.id.unique() imdb_actor_info = util.read_imdb_actor_info() actor_list = imdb_actor_info[imdb_actor_info['id'].isin( actor_list)]['name'].tolist() mlmovies = util.read_mlmovies() movies_list = mlmovies.movieid.unique() movies_list = mlmovies[mlmovies['movieid'].isin( movies_list)]['moviename'].tolist() year_list = mlmovies.year.unique() partitions = { 1: { 'actor': [], 'movie': [], 'year': [] }, 2: { 'actor': [], 'movie': [], 'year': [] }, 3: { 'actor': [], 'movie': [], 'year': [] }, 4: { 'actor': [], 'movie': [], 'year': [] }, 5: { 'actor': [], 'movie': [], 'year': [] } } for j, actor_vec in enumerate(decomposed[0]): partition_num = np.argmax(actor_vec) + 1 partitions[partition_num]['actor'].append(actor_list[j]) for j, movie_vec in enumerate(decomposed[1]): partition_num = np.argmax(movie_vec) + 1 partitions[partition_num]['movie'].append(movies_list[j]) for j, year_vec in enumerate(decomposed[2]): partition_num = np.argmax(year_vec) + 1 partitions[partition_num]['year'].append(year_list[j]) util.print_partition(partitions) util.write_partition_output_file(partitions, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return genre = sys.argv[1] no_of_components = 4 imdb_actor_info = util.read_imdb_actor_info() #print imdb_actor_info tf_idf_matrix = util.get_tf_idf_matrix(genre) actor_list = list(tf_idf_matrix.columns.values) actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist() #print actor_list pca = PCA(n_components=no_of_components) pca.fit(tf_idf_matrix) concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(pca.components_[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(genre, concepts) util.write_output_file(genre, concepts, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return movieid = int(sys.argv[1]) mlmovies = util.read_mlmovies() movie_actors = util.read_movie_actor() imdb_actor_info = util.read_imdb_actor_info() input_movie = mlmovies[mlmovies['movieid'] == movieid]['moviename'].values[0] actors_of_movie = movie_actors.where(movie_actors['movieid']==movieid).dropna().loc[:,'actorid'].unique() #print (actors_of_movie) movie_matrix = util.get_movie_tf_idf_matrix() actor_matrix = util.get_actor_tf_idf_matrix() #print(actor_matrix.shape) input_movie_vector = pd.DataFrame(movie_matrix.loc[movieid])#.transpose() #print(input_movie_vector.shape) similarity_matrix = actor_matrix.dot(input_movie_vector) similarity_matrix = similarity_matrix[~similarity_matrix.index.isin(actors_of_movie)] #print(similarity_matrix) actors = [] for index, row in similarity_matrix.iterrows(): actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, actor_name, similarity_matrix.loc[index][movieid])) actors.sort(key=lambda tup: tup[2], reverse=True) #print (actors) util.print_output(movieid, input_movie, actors[:no_of_actors]) util.write_output_file(movieid, input_movie, actors[:no_of_actors], output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print (tf_idf_matrix) input_actor_tf_idf = tf_idf_matrix.loc[actorid] #print (input_actor_tf_idf) actors = [] for index, row in tf_idf_matrix.iterrows(): actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor, other_actors[:no_of_actors], output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print(tf_idf_matrix) actor_tf_idf = tf_idf_matrix.loc[actorid] #print(actor_tf_idf) svd = SVD(n_components=no_of_components) svd.fit(tf_idf_matrix) svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index) input_actor_row = svd_df.loc[actorid] actors = [] for index, row in svd_df.iterrows(): name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, name, 1 - cosine(row, input_actor_row))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor_name, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)
def main(): mlmovies = util.read_mlmovies() imdb_actor_info = util.read_imdb_actor_info() movie_actor = util.read_movie_actor() movies_list = mlmovies.movieid.unique() year_list = mlmovies.year.unique() actor_list = imdb_actor_info.id.unique() movie_year_matrix = [] actor_movie_year_grouped = pd.merge(movie_actor, mlmovies, on=['movieid','movieid'], how='inner') actor_movie_year_tensor = [] count=0 for actor in actor_list: movie_year_matrix = [] for movie in movies_list: movie_year_list = [] for year in year_list: if actor_movie_year_grouped[(actor_movie_year_grouped.actorid == actor) & (actor_movie_year_grouped.movieid == movie) & (actor_movie_year_grouped.year == year)].empty: movie_year_list.append(0.0) else: movie_year_list.append(1.0) movie_year_matrix.append(movie_year_list) actor_movie_year_tensor.append(movie_year_matrix) cPickle.dump( actor_movie_year_tensor, open( "actor_movie_year_tensor.pkl", "wb" ) )
def latent_actor_semantics(actor_matrix): imdb_actor_info = util.read_imdb_actor_info() actor_list = imdb_actor_info.id.unique() actor_list = imdb_actor_info[imdb_actor_info['id'].isin( actor_list)]['name'].tolist() concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(np.transpose(actor_matrix)[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(concepts, 'Actor') util.write_output_file(concepts, output_file, 'Actor')