def generate_average_distance_csv(features_csv, distance_output): feature_vectors = get_features_list(features_csv) feature_vector_count = len(feature_vectors) print "generating distance csv..." distance_count = 0 distance_sum = 0 iteration = 1 for i in range(feature_vector_count): if i + 1 > feature_vector_count: break for j in range(i + 1, feature_vector_count): v1 = feature_vectors[i] v2 = feature_vectors[j] distance_sum += utility.calculate_distance(v1, v2) distance_count += 1 utility.print_progress_bar(iteration, feature_vector_count) iteration += 1 with open(distance_output, 'wb') as f: the_writer = csv.writer(f) headers = ["file", "average_distance"] the_writer.writerow(headers) vector = [features_csv, float(distance_sum) / float(distance_count)] the_writer.writerow(vector) f.close()
def evaluate_kbest_MeanReciprocalRank(matrix, game_set_file, output_file): from utility import print_progress_bar unfound_pair_score = matrix.get_min_association_score() print("Min association score: {0:.1f}".format(unfound_pair_score)) game_set = read_game_set_tab(game_set_file) kbest_list = [1, 10, 25, 50, 75, 100] kbest_dict_abs_rank, kbest_dict_group_rank = defaultdict(int), defaultdict( int) eval_details = [] MRR_score_abs_rank, MRR_score_group_rank = 0, 0 for number, game_words in enumerate(game_set, 1): print_progress_bar(number, len(game_set)) clues = game_words[:5] solution = game_words[5] abs_rank, group_rank, group, rank_in_group, scores, clues_matched_info = getSolutionRank( matrix, clues, solution, unfound_pair_score) spaced_clues_matched_info = ' '.join(clues_matched_info) spaced_scores = ' '.join(['{0:.1f}'.format(x) for x in scores]) score_sum_str = '{0:.1f}'.format(sum(scores)) report_fields = clues + [ solution, abs_rank, group_rank, group, rank_in_group, spaced_clues_matched_info, spaced_scores, score_sum_str ] eval_report = '\t'.join([str(x) for x in report_fields]) #print(eval_report) eval_details.append(eval_report) if abs_rank != WORST_SCORE and abs_rank <= 100: MRR_score_abs_rank += 1. / abs_rank if group_rank != WORST_SCORE and group_rank <= 100: MRR_score_group_rank += 1. / group_rank for t in kbest_list: if abs_rank != WORST_SCORE and abs_rank <= t: kbest_dict_abs_rank[t] += 1 if group_rank != WORST_SCORE and group_rank <= t: kbest_dict_group_rank[t] += 1 total = len(game_set) kbest_scores_abs_rank = sorted(kbest_dict_abs_rank.items()) kbest_scores_group_rank = sorted(kbest_dict_group_rank.items()) summary = [ 'total games: {}'.format(total), '\nk-best scores abs rank:', '\t'.join([str(k) for k, score in kbest_scores_abs_rank]), '\t'.join([str(score) for k, score in kbest_scores_abs_rank]), '\nMean Reciprocal Rank score abs rank: {0:.1f}'.format( MRR_score_abs_rank), '\nk-best scores group rank:', '\t'.join([str(k) for k, score in kbest_scores_group_rank]), '\t'.join([str(score) for k, score in kbest_scores_group_rank]), '\nMean Reciprocal Rank score group rank: {0:.1f}'.format( MRR_score_group_rank), ] if output_file: with open(output_file, 'w') as f_out: print_write(f_out, '\n'.join(summary)) print_write(f_out, '\n\nPosition Details:\n\n') print_write(f_out, '\n'.join(eval_details)) print_write(f_out, '\n\n') else: print('\n'.join(summary)) print('\n\n')
def generate_category_mean_csv(): dataset_by_category_result = fill_data_set("shot_features.csv") category_mean_list = {} for key, value in dataset_by_category_result.iteritems(): mean = calculate_distance_mean(key, value) category_mean_list[key] = mean for key, value in category_mean_list.iteritems(): print key, str(value) print "creating csv file..." with open('category_mean.csv', 'wb') as f: the_writer = csv.writer(f) headers = ["category", "distance_mean"] the_writer.writerow(headers) iteration = 1 max_value = len(category_mean_list) for key, value in category_mean_list.iteritems(): vector = [key, value] the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close() print("") print("csv file has been created successfully")
def __generate_cluster_distribution_per_category_csv(self, file_name): print "creating csv file..." output_path = self.__input_directory_path.replace("clustering_results", "evaluation_results") with open( output_path + "cluster_per_category/" + file_name, 'wb') as f: the_writer = csv.writer(f) headers = [ "category" ] for cluster in self.__cluster_list: headers.append("cluster " + str(cluster)) # print str(cluster) the_writer.writerow(headers) iteration = 1 for category, cluster_list_per_category in self.__clustering_result_set_by_category.iteritems(): vector = [category] clusters_per_category_statistics_dictionary = self.__get_cluster_distribution_per_category( cluster_list_per_category) for category2, stat in clusters_per_category_statistics_dictionary.iteritems(): vector.append(stat) the_writer.writerow(vector) utl.print_progress_bar(iteration, self.__clustering_result_set_by_category_count) iteration += 1 f.close() print("") print("csv file has been created successfully")
def generate_category_video_name_csv(category_codes_csv_file_path, video_with_category_csv_file_path): category_codes_dictionary = get_category_codes_dictionary(category_codes_csv_file_path) video_with_category_code_dictionary = get_video_with_category_code_dictionary(video_with_category_csv_file_path) with open("category_with_video.csv", 'wb') as csv_f: the_writer = csv.writer(csv_f) iteration = 1 max_value = len(video_with_category_code_dictionary) the_writer.writerow(["video_name", "category"]) for video_name, category_code in video_with_category_code_dictionary.iteritems(): the_writer.writerow([video_name, category_codes_dictionary[category_code]]) utility.print_progress_bar(iteration, max_value) iteration += 1 csv_f.close()
def generate_classification_accuracy(input_file, output_file): well_classified_counter_dic = {} with open(input_file) as f: total_videos_count = sum(1 for line in f) - 1 f.close() algorithms_names = [ "kNN", "Logistic_Regression", "Neural_Network", "SVM", "Random_Forest", "Naive_Bayes", "AdaBoost" ] for algorithm_name in algorithms_names: well_classified_counter_dic[algorithm_name] = 0 with open(input_file, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print "" print("preparing dataset_by_category ...") for row in reader: if iteration > 0: category = row[1] algorithms_results = row[2:len(row)] i = 0 for algorithm_name in algorithms_names: if category == algorithms_results[i]: well_classified_counter_dic[algorithm_name] += 1 i += 1 utl.print_progress_bar(iteration + 1, total_videos_count) iteration += 1 csvFile.close() algorithm_score = {} for algorithm_name, well_classified_counter in well_classified_counter_dic.iteritems( ): algorithm_score[algorithm_name] = float( well_classified_counter) / float(total_videos_count) print "" print "generating generate category feature csv" with open(output_file, 'wb') as f: the_writer = csv.writer(f) the_writer.writerow(algorithms_names) iteration = 1 max_value = len(algorithm_score) vector = [] for algorithm_name, accuracy in algorithm_score.iteritems(): vector.append(accuracy) utl.print_progress_bar(iteration, max_value) iteration += 1 the_writer.writerow(vector) f.close()
def __fill_data_set(self, csv_file): with open(csv_file, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("preparing data...") for row in reader: if iteration > 0: shot_metadata = [row[0], row[1], row[2], 0] self.__shot_metadata.append(shot_metadata) self.__data_set.append(self.__convert_to_float(row[3:len(row)])) utility.print_progress_bar(iteration, self.__data_set_count) iteration += 1 csvFile.close() print("")
def generate_csv(k_medoids_result, output_file): print "generating generate category feature csv" with open(output_file, 'wb') as f: the_writer = csv.writer(f) headers = ["video", "shot_number", "category", "cluster"] the_writer.writerow(headers) iteration = 1 max_value = len(k_medoids_result) for cluster, cluster_videos in k_medoids_result.iteritems(): for video in cluster_videos: vector = [video.name, 0, video.category, cluster] the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close()
def create_csv_file(two_category_distance_mean_list): print "creating different category mean csv file..." with open('different_category_mean.csv', 'wb') as f: the_writer = csv.writer(f) headers = ["category1", "category2", "distance_mean"] the_writer.writerow(headers) iteration = 1 max_value = len(two_category_distance_mean_list) for vector in two_category_distance_mean_list: the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 print("") print("csv file has been created successfully")
def __fill_clustering_result_set(self, file_name): with open(self.__input_directory_path + file_name) as f: self.__clustering_result_set_by_category_count = sum(1 for line in f) - 1 f.close() with open(self.__input_directory_path + file_name, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("preparing dataset_by_category ...") dataset_by_category = {} clusters = [] dataset_by_cluster = {} categories = [] for row in reader: if iteration > 0: category = row[2] cluster = row[3] if category not in dataset_by_category: dataset_by_category[category] = [] if int(cluster) not in clusters: clusters.append(int(cluster)) dataset_by_category[category].append(cluster) if cluster not in dataset_by_cluster: dataset_by_cluster[cluster] = [] if category not in categories: categories.append(category) dataset_by_cluster[cluster].append(category) utl.print_progress_bar(iteration + 1, self.__clustering_result_set_by_category_count) iteration += 1 csvFile.close() print("") self.__clustering_result_set_by_category = dataset_by_category self.__cluster_list = clusters self.__category_list = categories self.__clustering_result_set_by_cluster = dataset_by_cluster self.__cluster_list.sort() self.__category_list.sort() self.__clustering_result_set_by_category = collections.OrderedDict( sorted(self.__clustering_result_set_by_category.items())) self.__clustering_result_set_by_cluster = collections.OrderedDict( sorted(self.__clustering_result_set_by_cluster.items())) self.__clustering_result_set_by_category_count = len(self.__clustering_result_set_by_category) self.__clustering_result_set_by_cluster_count = len(self.__clustering_result_set_by_cluster)
def __generate_precision_cluster_per_category_csv(self, file_name): print "generate_mean_average_precision_cluster_per_category_csv..." precision_category_per_cluster_input_file = self.__input_directory_path.replace("clustering_results", "evaluation_results") \ + "cluster_per_category/" + file_name iteration = 0 with open(precision_category_per_cluster_input_file) as csvFile: reader = csv.reader(csvFile) print("preparing dataset_by_category ...") categories = {} clusters = [] for row in reader: if iteration > 0: category_label = row[0] category_distribution_score = EvaluationManager.__convert_to_float(row[1:len(row)]) categories[category_label] = category_distribution_score else: clusters = row[1: len(row)] iteration += 1 max_value = iteration - 1 csvFile.close() output_file_path = precision_category_per_cluster_input_file.replace("cluster_per_category", "precision_cluster_per_category") with open(output_file_path, 'wb') as f: the_writer = csv.writer(f) headers = [ "category", "cluster", "precision" ] the_writer.writerow(headers) iteration = 1 categories = collections.OrderedDict( sorted(categories.items())) for category_label, category_distribution_score in categories.iteritems(): max_feature = max(category_distribution_score) total = sum(category_distribution_score) index_of_max_feature = category_distribution_score.index(max_feature) score = max_feature / total vector = [category_label, clusters[index_of_max_feature], score] the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close() print("") print("csv file has been created successfully")
def merge_features_with_clustering_results_complete( features_file_path, clustering_results_file_path, out_put_file_path): clustering_result_data = fill_clustering_results_data( clustering_results_file_path) features_data = fill_features_data(features_file_path) print "creating csv file..." with open(out_put_file_path, 'wb') as f: the_writer = csv.writer(f) headers = [ "video", "shot_number", "category", "cluster", "interactions_number_speakers_2", "interactions_number_speakers_3", "interactions_number_speakers_4", "interactions_number_speakers_4+", "intervention_short", "intervention_long", "speakers_type_ponctuel", "speakers_type_localise", "speakers_type_present", "speakers_type_regulier", "speakers_type_important", "speaker_distribution", "mean_number_of_faces", "std_number_of_faces", "inter_intensity_variation1", "inter_intensity_variation2", "inter_intensity_variation3", "inter_intensity_variation4", "inter_intensity_variation5", "inter_intensity_variation6", "inter_intensity_variation7", "inter_intensity_variation8", "inter_intensity_variation9", "intra_intensity_variation1", "intra_intensity_variation2", "intra_intensity_variation3", "intra_intensity_variation4", "intra_intensity_variation5", "intra_intensity_variation6", "intra_intensity_variation7", "intra_intensity_variation8", "intra_intensity_variation9", "number_shot_transition", "number_speaker_transition", "speech", "music", "speech_with_music", "speech_with_non_music", "non_speech_with_music", "non_speech_with_non_music", "words", "duration" ] the_writer.writerow(headers) video_list_length = len(clustering_result_data) max_value = video_list_length iteration = 1 for video_name, video in clustering_result_data.iteritems(): video_from_features = features_data[video_name] for shot_number, cluster in video.shot.iteritems(): features = video_from_features.shot[shot_number] vector = [video_name, shot_number, video.category, cluster ] + features the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close() print("") print("csv file has been created successfully")
def calculate_distance_mean(category, category_data_set): print("calculating distance mean for '" + category + "' category ...") category_data_set_len = len(category_data_set) # type: int distance_list = [] for i in range(0, category_data_set_len): for j in range(i + 1, category_data_set_len): vec1 = category_data_set[i] vec2 = category_data_set[j] distance_list.append(utl.calculate_distance(vec1, vec2)) utl.print_progress_bar(i + 1, category_data_set_len) print("") print("distance mean calculation for category '" + category + "' has been completed!") return calculate_mean(distance_list)
def add_category_to_test_data(): video_with_category_dictionary = get_video_with_category_dictionary("category_with_video.csv") test_directory_path = "C:/code/features/input/structure_analysis/test/" for i in range(1, 5): directory = test_directory_path + str(i) + "/" file_name_list = utility.get_file_name_list(directory) iteration = 1 max_value = len(file_name_list) print "" print "normalizing " + directory for file_name in file_name_list: input_path = directory + file_name output_path = test_directory_path + "normalized/" + str(i) + "/" + file_name add_category_to_xml(input_path, output_path, file_name, video_with_category_dictionary) utility.print_progress_bar(iteration, max_value) iteration += 1
def generate_normalized_feature_csv(self): print "start csv" with open('normalized_complete_video_features.csv', 'wb') as f: the_writer = csv.writer(f) headers = [ "video", "shot_number", "category", "interactions_number_speakers_2", "interactions_number_speakers_3", "interactions_number_speakers_4", "interactions_number_speakers_4+", "intervention_short", "intervention_long", "speakers_type_ponctuel", "speakers_type_localise", "speakers_type_present", "speakers_type_regulier", "speakers_type_important", "speaker_distribution", "mean_number_of_faces", "std_number_of_faces", "inter_intensity_variation", "intra_intensity_variation", "number_shot_transition", "number_speaker_transition", "speech", "music", "speech_with_music", "speech_with_non_music", "non_speech_with_music", "non_speech_with_non_music", "words", "duration" ] the_writer.writerow(headers) iteration = 1 max_value = len(self.feature_list) for vector in self.feature_list: normalized_vector = vector[0:17] inter_intensity_variation_vector = self.__convert_to_float( vector[17:26]) inter_intensity_variation = sum( inter_intensity_variation_vector) / len( inter_intensity_variation_vector) intra_intensity_variation_vector = self.__convert_to_float( vector[26:35]) intra_intensity_variation = sum( intra_intensity_variation_vector) / len( intra_intensity_variation_vector) other_features = self.__convert_to_float( vector[35:len(vector)]) normalized_vector.append(inter_intensity_variation) normalized_vector.append(intra_intensity_variation) normalized_vector += other_features the_writer.writerow(normalized_vector) utility.print_progress_bar(iteration, max_value) iteration += 1 f.close()
def convert_to_csv(txt_file_path, headers): print "" print "generating " + txt_file_path + " csv file" f = open(txt_file_path, "r") csv_rows = [headers] output = txt_file_path.replace(".txt", ".csv") for line in f: csv_rows.append(line.split()) f.close() with open(output, 'wb') as csv_f: the_writer = csv.writer(csv_f) iteration = 1 max_value = len(csv_rows) for row in csv_rows: the_writer.writerow(row) utility.print_progress_bar(iteration, max_value) iteration += 1 csv_f.close()
def generate_dominant_category_csv(input_file_path, output_file_path): videos_dictionary, algorithms_names = get_videos_dictionary( input_file_path) print "" print "generating generate category feature csv" with open(output_file_path, 'wb') as f: the_writer = csv.writer(f) headers = ["video", "category"] + algorithms_names the_writer.writerow(headers) iteration = 1 max_value = len(videos_dictionary) for video_name, video_object in videos_dictionary.iteritems(): vector = get_dominant_category_vector(video_object) the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close()
def __fill_features_data(features_file_path): with open(features_file_path) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(features_file_path, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("preparing clustering results data ...") feature_list = [] for row in reader: if iteration > 0: feature_list.append(row) utility.print_progress_bar(iteration + 1, data_set_count) iteration += 1 csvFile.close() print("") return feature_list
def fill_features_data(features_file_path): with open(features_file_path) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(features_file_path, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 video_list = [] print("preparing clustering results data ...") video = Video() for row in reader: if iteration > 0: video_name = row[0] shot_number = row[1] category_label = row[2] features = convert_to_float(row[3:len(row)]) if video.name == "": video.category = category_label video.name = video_name video.shots = {} video.shots[shot_number] = features if video.name != video_name and video.name not in video_list: shot_feature_list = [] for shot, shot_features in video.shots.iteritems(): shot_feature_list.append(shot_features) video.features = calculate_vectors_average(shot_feature_list) video_list.append(video) # new video video = Video() video.category = category_label video.name = video_name video.shots = {} video.features = [] utl.print_progress_bar(iteration + 1, data_set_count) iteration += 1 csvFile.close() print("") return video_list
def generate_video_similarity_distance_csv(self, method): methods = { "shots_method": self.__calculate_similarity_common_shots_method, "common_clusters_method": self.__calculate_similarity_common_clusters_method, "distance_matrix_method": self.__calculate_similarity_distance_matrix_method_v2 } print "creating csv file..." with open(self.__directory_path + self.__out_put_file_name, 'wb') as f: the_writer = csv.writer(f) headers = ["v1", "v2", "v1 category", "v2 category", "distance"] the_writer.writerow(headers) video_list_length = len(self.__video_list) max_value = video_list_length max_value = 20 iteration = 1 for i in range(video_list_length): # print("calculate distance for " + self.__video_list[i].name) # print "" # iteration = 1 for j in range(i + 1, video_list_length): v1 = self.__video_list[i] v2 = self.__video_list[j] video_similarity_row = [ v1.name, v2.name, v1.category, v2.category ] distance = methods[method](v1, v2) video_similarity_row.append(distance) the_writer.writerow(video_similarity_row) utl.print_progress_bar(iteration, max_value) iteration += 1 if iteration > max_value: break # print "" f.close() print("") print("csv file has been created successfully")
def generate_intra_inter_category_distribution(features_file_path, input_file_path, output_file_path): category_center_features = get_category_center_features_dictionary( input_file_path) category_all_features = get_category_all_features_dictionary( features_file_path) category_self_distance_dictionary = get_category_self_distance_dictionary( category_all_features, category_center_features) category_distance_dictionary = {} categories = [] for category, category_feature_vector in category_center_features.iteritems( ): categories.append(category) for category2, category_feature_vector2 in category_center_features.iteritems( ): if category != category2: dist = get_two_categories_distance( category_all_features[category], category_center_features[category2]) else: dist = category_self_distance_dictionary[category] category_distance_dictionary[category + "_" + category2] = dist print "" print "generating generate category feature csv" with open(output_file_path, 'wb') as f: the_writer = csv.writer(f) headers = ["category"] + categories the_writer.writerow(headers) iteration = 1 max_value = len(category_distance_dictionary) for category in categories: vector = [category] for category2 in categories: vector.append(category_distance_dictionary[category + "_" + category2]) utl.print_progress_bar(iteration, max_value) iteration += 1 the_writer.writerow(vector) f.close()
def __fill_video_list(csv_file): with open(csv_file) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(csv_file, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 video_list = [] print("preparing video list data ...") video = Video() for row in reader: if iteration > 0: video_name = row[0] shot_number = row[1] category_label = row[2] features = row[3:len(row)] if video.name == "": video.category = category_label video.name = video_name video.clusters = {} video.shots = {} shot = Shot() shot.features = SimilarityCalculator.__convert_to_float( features) shot.number = int(shot_number) video.shots[shot_number] = shot if video.name != video_name: video_list.append(video) video = Video() video.category = category_label video.name = video_name video.clusters = {} video.shots = {} utl.print_progress_bar(iteration + 1, data_set_count) iteration += 1 csvFile.close() print("") return video_list
def fill_data_set(csv_file): with open(csv_file) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(csv_file, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("preparing dataset_by_category ...") dataset_by_category = {} for row in reader: if iteration > 0: category = row[2] if category not in dataset_by_category: dataset_by_category[category] = [] dataset_by_category[category].append( convert_to_float(row[3:len(row)])) utl.print_progress_bar(iteration, data_set_count) iteration += 1 csvFile.close() print("") return dataset_by_category
def get_features_list(csv_file): data_set = [] with open(csv_file) as f: data_set_count = sum(1 for line in f) - 1 with open(csv_file, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("") print("preparing data...") for row in reader: if iteration > 0: shot_metadata = [row[0], row[1], row[2], 0] shot_metadata.append(shot_metadata) data_set.append(convert_to_float(row[3:len(row)])) utility.print_progress_bar(iteration, data_set_count) else: data_set_count -= 1 iteration += 1 csvFile.close() print("") return data_set
def generate_category_feature(input_file_path, output_file_path): category_features = get_category_all_features_dictionary(input_file_path) category_average_features = {} for category, category_feature_vector_list in category_features.iteritems( ): category_average_features[category] = calculate_vectors_average( category_feature_vector_list) print "" print "generate category feature csv" with open(output_file_path, 'wb') as f: the_writer = csv.writer(f) headers = [ "video" "category", "interactions_number_speakers_2", "interactions_number_speakers_3", "interactions_number_speakers_4", "interactions_number_speakers_4+", "intervention_short", "intervention_long", "speakers_type_ponctuel", "speakers_type_localise", "speakers_type_present", "speakers_type_regulier", "speakers_type_important", "speaker_distribution", "mean_number_of_faces", "std_number_of_faces", "inter_intensity_variation", "intra_intensity_variation", "number_shot_transition", "number_speaker_transition", "speech", "music", "speech_with_music", "speech_with_non_music", "non_speech_with_music", "non_speech_with_non_music", "words", "duration" ] the_writer.writerow(headers) iteration = 1 max_value = len(category_average_features) for category, category_feature_vector in category_average_features.iteritems( ): vector = [category] + category_feature_vector the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close()
def fill_features_data(features_file_path): with open(features_file_path) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(features_file_path, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 video_list = {} print("preparing clustering results data ...") video = VideoClusterResult() for row in reader: if iteration > data_set_count: break if iteration > 0: video_name = row[0] shot_number = row[1] category_label = row[2] features = row[3:len(row)] if video.name == "": video.category = category_label video.name = video_name video.shot = {} video.shot[shot_number] = features if video.name != video_name and video.name not in video_list: video_list[video.name] = video video = VideoClusterResult() video.category = category_label video.name = video_name video.shot = {} utl.print_progress_bar(iteration + 1, data_set_count) iteration += 1 csvFile.close() print("") return video_list
def get_category_center_features_dictionary(input_file_path): category_features = {} with open(input_file_path) as f: data_set_count = sum(1 for line in f) - 1 f.close() with open(input_file_path, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("") print("preparing dataset_by_category ...") for row in reader: if iteration > 0: category = row[0] feature_vector = convert_to_float(row[1:len(row)]) if category not in category_features: category_features[category] = [] category_features[category].append(feature_vector) utl.print_progress_bar(iteration, data_set_count) iteration += 1 csvFile.close() return category_features
def __generate_mean_average_precision_category_per_cluster_csv(self): print "generate_mean_average_precision_category_per_cluster_csv..." precision_category_per_cluster_directory = self.__input_directory_path.replace("clustering_results", "evaluation_results") \ + "precision_category_per_cluster/" file_name_list = utl.get_file_name_list(precision_category_per_cluster_directory) accuracy_list = {} max_value = len(file_name_list) for file_name in file_name_list: key = file_name.replace(".csv", "") \ .replace("_k_means", "") \ .replace("_birch", "") \ .replace("_mean_shift", "") \ .replace("_db_scan", "").replace("k_medoids_results_", "") category_per_cluster_csv = precision_category_per_cluster_directory + file_name accuracy_list[int(key)] = EvaluationManager.__get_mean_average_precision(category_per_cluster_csv) output_path = precision_category_per_cluster_directory.replace("precision_category_per_cluster/", "") with open(output_path + "mean_average_precision_category_per_cluster.csv", 'wb') as f: the_writer = csv.writer(f) headers = [ "file_name", "map", ] the_writer.writerow(headers) iteration = 1 accuracy_list = collections.OrderedDict( sorted(accuracy_list.items())) for key, accuracy in accuracy_list.iteritems(): vector = [key, accuracy] the_writer.writerow(vector) utl.print_progress_bar(iteration, max_value) iteration += 1 f.close() print("") print("csv file has been created successfully")
def get_videos_dictionary(input_file_path): videos_dictionary = {} with open(input_file_path) as f: data_set_count = sum(1 for line in f) - 1 f.close() algorithms_names = [ "kNN", "Logistic_Regression", "Neural_Network", "SVM", "Random_Forest", "Naive_Bayes", "AdaBoost" ] with open(input_file_path, 'r') as csvFile: reader = csv.reader(csvFile) iteration = 0 print("preparing dataset_by_category ...") for row in reader: if iteration > 0: category = row[0] video_name = row[1] shot_number = row[2] algorithms_results = row[3:len(row)] if video_name not in videos_dictionary: videos_dictionary[video_name] = Video() videos_dictionary[video_name].category = category videos_dictionary[video_name].name = video_name videos_dictionary[video_name].shots = {} if shot_number not in videos_dictionary[video_name].shots: videos_dictionary[video_name].shots[shot_number] = {} i = 0 for algorithms_name in algorithms_names: videos_dictionary[video_name].shots[shot_number][ algorithms_name] = algorithms_results[i] i += 1 utl.print_progress_bar(iteration + 1, data_set_count) iteration += 1 csvFile.close() return videos_dictionary, algorithms_names
import utility import csv from feature_manager import FeatureManager directory_path = "C:/code/features/output/video/structure-analysis-video-dev/" feature_files = utility.get_file_name_list(directory_path) feature_vector_list = [] iteration = 1 max_value = len(feature_files) for file_name in feature_files: fm = FeatureManager(directory_path, file_name) feature_vector_list = feature_vector_list + fm.get_feature_vector_list() utility.print_progress_bar(iteration, max_value) iteration += 1 print "start csv" with open('complete_video_features.csv', 'wb') as f: the_writer = csv.writer(f) headers = [ "video", "shot_number", "category", "interactions_number_speakers_2", "interactions_number_speakers_3", "interactions_number_speakers_4", "interactions_number_speakers_4+", "intervention_short", "intervention_long", "speakers_type_ponctuel", "speakers_type_localise", "speakers_type_present", "speakers_type_regulier", "speakers_type_important", "speaker_distribution", "mean_number_of_faces", "std_number_of_faces", "inter_intensity_variation1", "inter_intensity_variation2", "inter_intensity_variation3", "inter_intensity_variation4", "inter_intensity_variation5", "inter_intensity_variation6", "inter_intensity_variation7", "inter_intensity_variation8", "inter_intensity_variation9", "intra_intensity_variation1", "intra_intensity_variation2", "intra_intensity_variation3",