def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return genre = sys.argv[1] no_of_components = 4 imdb_actor_info = util.read_imdb_actor_info() #print imdb_actor_info tf_idf_matrix = util.get_tf_idf_matrix(genre) actor_list = list(tf_idf_matrix.columns.values) actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist() #print actor_list pca = PCA(n_components=no_of_components) pca.fit(tf_idf_matrix) concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(pca.components_[i]): concept.append((actor_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(genre, concepts) util.write_output_file(genre, concepts, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print (tf_idf_matrix) input_actor_tf_idf = tf_idf_matrix.loc[actorid] #print (input_actor_tf_idf) actors = [] for index, row in tf_idf_matrix.iterrows(): actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor, other_actors[:no_of_actors], output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return genre = sys.argv[1] no_of_components = 4 genome_tags = util.read_genome_tags() #print genome_tags tf_idf_matrix = util.get_tf_idf_matrix(genre) tagid_list = list(tf_idf_matrix.columns.values) tag_list = genome_tags[genome_tags['tagId'].isin(tagid_list)]['tag'].tolist() #print tag_list svd = TruncatedSVD(n_components=no_of_components, n_iter=100, random_state=None) svd.fit(tf_idf_matrix) concepts = [] for i in range(no_of_components): concept = [] for j, component in enumerate(svd.components_[i]): concept.append((tag_list[j], component)) concept.sort(key=lambda tup: abs(tup[1]), reverse=True) concepts.append(concept) util.print_output(genre, concepts) util.write_output_file(genre, concepts, output_file)
def main(): if len(sys.argv) < 2: print('Expected arguments are not provided.') return actorid = int(sys.argv[1]) imdb_actor_info = util.read_imdb_actor_info() input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0] tf_idf_matrix = util.get_tf_idf_matrix() #print(tf_idf_matrix) actor_tf_idf = tf_idf_matrix.loc[actorid] #print(actor_tf_idf) svd = SVD(n_components=no_of_components) svd.fit(tf_idf_matrix) svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index) input_actor_row = svd_df.loc[actorid] actors = [] for index, row in svd_df.iterrows(): name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0] actors.append((index, name, 1 - cosine(row, input_actor_row))) other_actors = list(filter(lambda tup: tup[0] != actorid, actors)) other_actors.sort(key=lambda tup: tup[2], reverse=True) util.print_output(actorid, input_actor_name, other_actors[:no_of_actors]) util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)