def generate_average_distance_csv(features_csv, distance_output):
    feature_vectors = get_features_list(features_csv)
    feature_vector_count = len(feature_vectors)

    print "generating distance csv..."
    distance_count = 0
    distance_sum = 0
    iteration = 1
    for i in range(feature_vector_count):
        if i + 1 > feature_vector_count:
            break
        for j in range(i + 1, feature_vector_count):
            v1 = feature_vectors[i]
            v2 = feature_vectors[j]
            distance_sum += utility.calculate_distance(v1, v2)
            distance_count += 1
        utility.print_progress_bar(iteration, feature_vector_count)
        iteration += 1

    with open(distance_output, 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["file", "average_distance"]
        the_writer.writerow(headers)
        vector = [features_csv, float(distance_sum) / float(distance_count)]
        the_writer.writerow(vector)
        f.close()
Exemple #2
0
def evaluate_kbest_MeanReciprocalRank(matrix, game_set_file, output_file):
    from utility import print_progress_bar
    unfound_pair_score = matrix.get_min_association_score()
    print("Min association score: {0:.1f}".format(unfound_pair_score))
    game_set = read_game_set_tab(game_set_file)
    kbest_list = [1, 10, 25, 50, 75, 100]
    kbest_dict_abs_rank, kbest_dict_group_rank = defaultdict(int), defaultdict(
        int)
    eval_details = []
    MRR_score_abs_rank, MRR_score_group_rank = 0, 0
    for number, game_words in enumerate(game_set, 1):
        print_progress_bar(number, len(game_set))
        clues = game_words[:5]
        solution = game_words[5]
        abs_rank, group_rank, group, rank_in_group, scores, clues_matched_info = getSolutionRank(
            matrix, clues, solution, unfound_pair_score)
        spaced_clues_matched_info = ' '.join(clues_matched_info)
        spaced_scores = ' '.join(['{0:.1f}'.format(x) for x in scores])
        score_sum_str = '{0:.1f}'.format(sum(scores))
        report_fields = clues + [
            solution, abs_rank, group_rank, group, rank_in_group,
            spaced_clues_matched_info, spaced_scores, score_sum_str
        ]
        eval_report = '\t'.join([str(x) for x in report_fields])
        #print(eval_report)
        eval_details.append(eval_report)
        if abs_rank != WORST_SCORE and abs_rank <= 100:
            MRR_score_abs_rank += 1. / abs_rank
        if group_rank != WORST_SCORE and group_rank <= 100:
            MRR_score_group_rank += 1. / group_rank
        for t in kbest_list:
            if abs_rank != WORST_SCORE and abs_rank <= t:
                kbest_dict_abs_rank[t] += 1
            if group_rank != WORST_SCORE and group_rank <= t:
                kbest_dict_group_rank[t] += 1
    total = len(game_set)
    kbest_scores_abs_rank = sorted(kbest_dict_abs_rank.items())
    kbest_scores_group_rank = sorted(kbest_dict_group_rank.items())
    summary = [
        'total games: {}'.format(total),
        '\nk-best scores abs rank:',
        '\t'.join([str(k) for k, score in kbest_scores_abs_rank]),
        '\t'.join([str(score) for k, score in kbest_scores_abs_rank]),
        '\nMean Reciprocal Rank score abs rank: {0:.1f}'.format(
            MRR_score_abs_rank),
        '\nk-best scores group rank:',
        '\t'.join([str(k) for k, score in kbest_scores_group_rank]),
        '\t'.join([str(score) for k, score in kbest_scores_group_rank]),
        '\nMean Reciprocal Rank score group rank: {0:.1f}'.format(
            MRR_score_group_rank),
    ]
    if output_file:
        with open(output_file, 'w') as f_out:
            print_write(f_out, '\n'.join(summary))
            print_write(f_out, '\n\nPosition Details:\n\n')
            print_write(f_out, '\n'.join(eval_details))
            print_write(f_out, '\n\n')
    else:
        print('\n'.join(summary))
        print('\n\n')
def generate_category_mean_csv():
    dataset_by_category_result = fill_data_set("shot_features.csv")

    category_mean_list = {}
    for key, value in dataset_by_category_result.iteritems():
        mean = calculate_distance_mean(key, value)
        category_mean_list[key] = mean

    for key, value in category_mean_list.iteritems():
        print key, str(value)

    print "creating csv file..."
    with open('category_mean.csv', 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["category", "distance_mean"]
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(category_mean_list)

        for key, value in category_mean_list.iteritems():
            vector = [key, value]
            the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        f.close()
        print("")
        print("csv file has been created successfully")
Exemple #4
0
    def __generate_cluster_distribution_per_category_csv(self, file_name):

        print "creating csv file..."
        output_path = self.__input_directory_path.replace("clustering_results", "evaluation_results")

        with open(
                output_path + "cluster_per_category/" + file_name,
                'wb') as f:
            the_writer = csv.writer(f)
            headers = [
                "category"
            ]

            for cluster in self.__cluster_list:
                headers.append("cluster " + str(cluster))
                # print str(cluster)
            the_writer.writerow(headers)
            iteration = 1

            for category, cluster_list_per_category in self.__clustering_result_set_by_category.iteritems():
                vector = [category]
                clusters_per_category_statistics_dictionary = self.__get_cluster_distribution_per_category(
                    cluster_list_per_category)
                for category2, stat in clusters_per_category_statistics_dictionary.iteritems():
                    vector.append(stat)
                the_writer.writerow(vector)
                utl.print_progress_bar(iteration, self.__clustering_result_set_by_category_count)
                iteration += 1
            f.close()
            print("")
            print("csv file has been created successfully")
Exemple #5
0
def generate_category_video_name_csv(category_codes_csv_file_path, video_with_category_csv_file_path):
    category_codes_dictionary = get_category_codes_dictionary(category_codes_csv_file_path)
    video_with_category_code_dictionary = get_video_with_category_code_dictionary(video_with_category_csv_file_path)
    with open("category_with_video.csv", 'wb') as csv_f:
        the_writer = csv.writer(csv_f)
        iteration = 1
        max_value = len(video_with_category_code_dictionary)
        the_writer.writerow(["video_name", "category"])
        for video_name, category_code in video_with_category_code_dictionary.iteritems():
            the_writer.writerow([video_name, category_codes_dictionary[category_code]])
            utility.print_progress_bar(iteration, max_value)
            iteration += 1
        csv_f.close()
Exemple #6
0
def generate_classification_accuracy(input_file, output_file):
    well_classified_counter_dic = {}
    with open(input_file) as f:
        total_videos_count = sum(1 for line in f) - 1
    f.close()

    algorithms_names = [
        "kNN", "Logistic_Regression", "Neural_Network", "SVM", "Random_Forest",
        "Naive_Bayes", "AdaBoost"
    ]
    for algorithm_name in algorithms_names:
        well_classified_counter_dic[algorithm_name] = 0

    with open(input_file, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        print ""
        print("preparing dataset_by_category ...")

        for row in reader:
            if iteration > 0:
                category = row[1]
                algorithms_results = row[2:len(row)]
                i = 0
                for algorithm_name in algorithms_names:
                    if category == algorithms_results[i]:
                        well_classified_counter_dic[algorithm_name] += 1
                    i += 1
                utl.print_progress_bar(iteration + 1, total_videos_count)
            iteration += 1
    csvFile.close()
    algorithm_score = {}
    for algorithm_name, well_classified_counter in well_classified_counter_dic.iteritems(
    ):
        algorithm_score[algorithm_name] = float(
            well_classified_counter) / float(total_videos_count)

    print ""
    print "generating generate category feature csv"
    with open(output_file, 'wb') as f:
        the_writer = csv.writer(f)
        the_writer.writerow(algorithms_names)
        iteration = 1
        max_value = len(algorithm_score)
        vector = []
        for algorithm_name, accuracy in algorithm_score.iteritems():
            vector.append(accuracy)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        the_writer.writerow(vector)
        f.close()
 def __fill_data_set(self, csv_file):
     with open(csv_file, 'r') as csvFile:
         reader = csv.reader(csvFile)
         iteration = 0
         print("preparing data...")
         for row in reader:
             if iteration > 0:
                 shot_metadata = [row[0], row[1], row[2], 0]
                 self.__shot_metadata.append(shot_metadata)
                 self.__data_set.append(self.__convert_to_float(row[3:len(row)]))
                 utility.print_progress_bar(iteration, self.__data_set_count)
             iteration += 1
     csvFile.close()
     print("")
def generate_csv(k_medoids_result, output_file):
    print "generating generate category feature csv"
    with open(output_file, 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["video", "shot_number", "category", "cluster"]
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(k_medoids_result)
        for cluster, cluster_videos in k_medoids_result.iteritems():
            for video in cluster_videos:
                vector = [video.name, 0, video.category, cluster]
                the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        f.close()
def create_csv_file(two_category_distance_mean_list):
    print "creating different category mean csv file..."
    with open('different_category_mean.csv', 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["category1", "category2", "distance_mean"]
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(two_category_distance_mean_list)
        for vector in two_category_distance_mean_list:
            the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1

        print("")
        print("csv file has been created successfully")
Exemple #10
0
    def __fill_clustering_result_set(self, file_name):
        with open(self.__input_directory_path + file_name) as f:
            self.__clustering_result_set_by_category_count = sum(1 for line in f) - 1
        f.close()
        with open(self.__input_directory_path + file_name, 'r') as csvFile:
            reader = csv.reader(csvFile)
            iteration = 0
            print("preparing dataset_by_category ...")
            dataset_by_category = {}
            clusters = []
            dataset_by_cluster = {}
            categories = []

            for row in reader:
                if iteration > 0:
                    category = row[2]
                    cluster = row[3]

                    if category not in dataset_by_category:
                        dataset_by_category[category] = []
                    if int(cluster) not in clusters:
                        clusters.append(int(cluster))

                    dataset_by_category[category].append(cluster)

                    if cluster not in dataset_by_cluster:
                        dataset_by_cluster[cluster] = []
                    if category not in categories:
                        categories.append(category)

                    dataset_by_cluster[cluster].append(category)

                    utl.print_progress_bar(iteration + 1, self.__clustering_result_set_by_category_count)
                iteration += 1
        csvFile.close()
        print("")
        self.__clustering_result_set_by_category = dataset_by_category
        self.__cluster_list = clusters
        self.__category_list = categories
        self.__clustering_result_set_by_cluster = dataset_by_cluster
        self.__cluster_list.sort()
        self.__category_list.sort()
        self.__clustering_result_set_by_category = collections.OrderedDict(
            sorted(self.__clustering_result_set_by_category.items()))
        self.__clustering_result_set_by_cluster = collections.OrderedDict(
            sorted(self.__clustering_result_set_by_cluster.items()))
        self.__clustering_result_set_by_category_count = len(self.__clustering_result_set_by_category)
        self.__clustering_result_set_by_cluster_count = len(self.__clustering_result_set_by_cluster)
Exemple #11
0
    def __generate_precision_cluster_per_category_csv(self, file_name):
        print "generate_mean_average_precision_cluster_per_category_csv..."
        precision_category_per_cluster_input_file = self.__input_directory_path.replace("clustering_results",
                                                                                        "evaluation_results") \
                                                    + "cluster_per_category/" + file_name
        iteration = 0
        with open(precision_category_per_cluster_input_file) as csvFile:
            reader = csv.reader(csvFile)
            print("preparing dataset_by_category ...")
            categories = {}
            clusters = []
            for row in reader:
                if iteration > 0:
                    category_label = row[0]
                    category_distribution_score = EvaluationManager.__convert_to_float(row[1:len(row)])
                    categories[category_label] = category_distribution_score
                else:
                    clusters = row[1: len(row)]
                iteration += 1
        max_value = iteration - 1
        csvFile.close()
        output_file_path = precision_category_per_cluster_input_file.replace("cluster_per_category",
                                                                             "precision_cluster_per_category")
        with open(output_file_path, 'wb') as f:
            the_writer = csv.writer(f)
            headers = [
                "category",
                "cluster",
                "precision"
            ]

            the_writer.writerow(headers)
            iteration = 1
            categories = collections.OrderedDict(
                sorted(categories.items()))

            for category_label, category_distribution_score in categories.iteritems():
                max_feature = max(category_distribution_score)
                total = sum(category_distribution_score)
                index_of_max_feature = category_distribution_score.index(max_feature)
                score = max_feature / total
                vector = [category_label, clusters[index_of_max_feature], score]
                the_writer.writerow(vector)
                utl.print_progress_bar(iteration, max_value)
                iteration += 1
            f.close()
            print("")
            print("csv file has been created successfully")
Exemple #12
0
def merge_features_with_clustering_results_complete(
        features_file_path, clustering_results_file_path, out_put_file_path):
    clustering_result_data = fill_clustering_results_data(
        clustering_results_file_path)
    features_data = fill_features_data(features_file_path)
    print "creating csv file..."
    with open(out_put_file_path, 'wb') as f:
        the_writer = csv.writer(f)
        headers = [
            "video", "shot_number", "category", "cluster",
            "interactions_number_speakers_2", "interactions_number_speakers_3",
            "interactions_number_speakers_4",
            "interactions_number_speakers_4+", "intervention_short",
            "intervention_long", "speakers_type_ponctuel",
            "speakers_type_localise", "speakers_type_present",
            "speakers_type_regulier", "speakers_type_important",
            "speaker_distribution", "mean_number_of_faces",
            "std_number_of_faces", "inter_intensity_variation1",
            "inter_intensity_variation2", "inter_intensity_variation3",
            "inter_intensity_variation4", "inter_intensity_variation5",
            "inter_intensity_variation6", "inter_intensity_variation7",
            "inter_intensity_variation8", "inter_intensity_variation9",
            "intra_intensity_variation1", "intra_intensity_variation2",
            "intra_intensity_variation3", "intra_intensity_variation4",
            "intra_intensity_variation5", "intra_intensity_variation6",
            "intra_intensity_variation7", "intra_intensity_variation8",
            "intra_intensity_variation9", "number_shot_transition",
            "number_speaker_transition", "speech", "music",
            "speech_with_music", "speech_with_non_music",
            "non_speech_with_music", "non_speech_with_non_music", "words",
            "duration"
        ]
        the_writer.writerow(headers)
        video_list_length = len(clustering_result_data)
        max_value = video_list_length
        iteration = 1
        for video_name, video in clustering_result_data.iteritems():
            video_from_features = features_data[video_name]
            for shot_number, cluster in video.shot.iteritems():
                features = video_from_features.shot[shot_number]
                vector = [video_name, shot_number, video.category, cluster
                          ] + features
                the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        f.close()
        print("")
        print("csv file has been created successfully")
def calculate_distance_mean(category, category_data_set):
    print("calculating distance mean for '" + category + "' category  ...")

    category_data_set_len = len(category_data_set)  # type: int
    distance_list = []
    for i in range(0, category_data_set_len):
        for j in range(i + 1, category_data_set_len):
            vec1 = category_data_set[i]
            vec2 = category_data_set[j]
            distance_list.append(utl.calculate_distance(vec1, vec2))
        utl.print_progress_bar(i + 1, category_data_set_len)
    print("")
    print("distance mean calculation for category '" + category +
          "' has been completed!")

    return calculate_mean(distance_list)
Exemple #14
0
def add_category_to_test_data():
    video_with_category_dictionary = get_video_with_category_dictionary("category_with_video.csv")
    test_directory_path = "C:/code/features/input/structure_analysis/test/"
    for i in range(1, 5):
        directory = test_directory_path + str(i) + "/"
        file_name_list = utility.get_file_name_list(directory)
        iteration = 1
        max_value = len(file_name_list)
        print ""
        print "normalizing " + directory
        for file_name in file_name_list:
            input_path = directory + file_name
            output_path = test_directory_path + "normalized/" + str(i) + "/" + file_name
            add_category_to_xml(input_path, output_path, file_name, video_with_category_dictionary)
            utility.print_progress_bar(iteration, max_value)
            iteration += 1
Exemple #15
0
 def generate_normalized_feature_csv(self):
     print "start csv"
     with open('normalized_complete_video_features.csv', 'wb') as f:
         the_writer = csv.writer(f)
         headers = [
             "video", "shot_number", "category",
             "interactions_number_speakers_2",
             "interactions_number_speakers_3",
             "interactions_number_speakers_4",
             "interactions_number_speakers_4+", "intervention_short",
             "intervention_long", "speakers_type_ponctuel",
             "speakers_type_localise", "speakers_type_present",
             "speakers_type_regulier", "speakers_type_important",
             "speaker_distribution", "mean_number_of_faces",
             "std_number_of_faces", "inter_intensity_variation",
             "intra_intensity_variation", "number_shot_transition",
             "number_speaker_transition", "speech", "music",
             "speech_with_music", "speech_with_non_music",
             "non_speech_with_music", "non_speech_with_non_music", "words",
             "duration"
         ]
         the_writer.writerow(headers)
         iteration = 1
         max_value = len(self.feature_list)
         for vector in self.feature_list:
             normalized_vector = vector[0:17]
             inter_intensity_variation_vector = self.__convert_to_float(
                 vector[17:26])
             inter_intensity_variation = sum(
                 inter_intensity_variation_vector) / len(
                     inter_intensity_variation_vector)
             intra_intensity_variation_vector = self.__convert_to_float(
                 vector[26:35])
             intra_intensity_variation = sum(
                 intra_intensity_variation_vector) / len(
                     intra_intensity_variation_vector)
             other_features = self.__convert_to_float(
                 vector[35:len(vector)])
             normalized_vector.append(inter_intensity_variation)
             normalized_vector.append(intra_intensity_variation)
             normalized_vector += other_features
             the_writer.writerow(normalized_vector)
             utility.print_progress_bar(iteration, max_value)
             iteration += 1
         f.close()
Exemple #16
0
def convert_to_csv(txt_file_path, headers):
    print ""
    print "generating " + txt_file_path + " csv file"
    f = open(txt_file_path, "r")
    csv_rows = [headers]
    output = txt_file_path.replace(".txt", ".csv")
    for line in f:
        csv_rows.append(line.split())
    f.close()
    with open(output, 'wb') as csv_f:
        the_writer = csv.writer(csv_f)
        iteration = 1
        max_value = len(csv_rows)
        for row in csv_rows:
            the_writer.writerow(row)
            utility.print_progress_bar(iteration, max_value)
            iteration += 1
        csv_f.close()
Exemple #17
0
def generate_dominant_category_csv(input_file_path, output_file_path):
    videos_dictionary, algorithms_names = get_videos_dictionary(
        input_file_path)

    print ""
    print "generating generate category feature csv"
    with open(output_file_path, 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["video", "category"] + algorithms_names
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(videos_dictionary)
        for video_name, video_object in videos_dictionary.iteritems():
            vector = get_dominant_category_vector(video_object)
            the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        f.close()
Exemple #18
0
    def __fill_features_data(features_file_path):

        with open(features_file_path) as f:
            data_set_count = sum(1 for line in f) - 1
        f.close()
        with open(features_file_path, 'r') as csvFile:
            reader = csv.reader(csvFile)
            iteration = 0
            print("preparing clustering results data ...")
            feature_list = []
            for row in reader:
                if iteration > 0:
                    feature_list.append(row)
                    utility.print_progress_bar(iteration + 1, data_set_count)
                iteration += 1
        csvFile.close()
        print("")
        return feature_list
Exemple #19
0
def fill_features_data(features_file_path):
    with open(features_file_path) as f:
        data_set_count = sum(1 for line in f) - 1
    f.close()
    with open(features_file_path, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        video_list = []
        print("preparing clustering results data ...")
        video = Video()

        for row in reader:
            if iteration > 0:
                video_name = row[0]
                shot_number = row[1]
                category_label = row[2]
                features = convert_to_float(row[3:len(row)])
                if video.name == "":
                    video.category = category_label
                    video.name = video_name
                    video.shots = {}

                video.shots[shot_number] = features

                if video.name != video_name and video.name not in video_list:
                    shot_feature_list = []
                    for shot, shot_features in video.shots.iteritems():
                        shot_feature_list.append(shot_features)
                    video.features = calculate_vectors_average(shot_feature_list)
                    video_list.append(video)
                    # new video
                    video = Video()
                    video.category = category_label
                    video.name = video_name
                    video.shots = {}
                    video.features = []

                utl.print_progress_bar(iteration + 1, data_set_count)
            iteration += 1
    csvFile.close()
    print("")
    return video_list
    def generate_video_similarity_distance_csv(self, method):

        methods = {
            "shots_method":
            self.__calculate_similarity_common_shots_method,
            "common_clusters_method":
            self.__calculate_similarity_common_clusters_method,
            "distance_matrix_method":
            self.__calculate_similarity_distance_matrix_method_v2
        }
        print "creating csv file..."
        with open(self.__directory_path + self.__out_put_file_name, 'wb') as f:
            the_writer = csv.writer(f)
            headers = ["v1", "v2", "v1 category", "v2 category", "distance"]
            the_writer.writerow(headers)
            video_list_length = len(self.__video_list)
            max_value = video_list_length
            max_value = 20
            iteration = 1

            for i in range(video_list_length):
                # print("calculate distance for " + self.__video_list[i].name)
                # print ""
                # iteration = 1
                for j in range(i + 1, video_list_length):
                    v1 = self.__video_list[i]
                    v2 = self.__video_list[j]
                    video_similarity_row = [
                        v1.name, v2.name, v1.category, v2.category
                    ]

                    distance = methods[method](v1, v2)
                    video_similarity_row.append(distance)
                    the_writer.writerow(video_similarity_row)
                utl.print_progress_bar(iteration, max_value)
                iteration += 1
                if iteration > max_value:
                    break
                # print ""
            f.close()
            print("")
            print("csv file has been created successfully")
Exemple #21
0
def generate_intra_inter_category_distribution(features_file_path,
                                               input_file_path,
                                               output_file_path):
    category_center_features = get_category_center_features_dictionary(
        input_file_path)
    category_all_features = get_category_all_features_dictionary(
        features_file_path)
    category_self_distance_dictionary = get_category_self_distance_dictionary(
        category_all_features, category_center_features)
    category_distance_dictionary = {}
    categories = []
    for category, category_feature_vector in category_center_features.iteritems(
    ):
        categories.append(category)
        for category2, category_feature_vector2 in category_center_features.iteritems(
        ):
            if category != category2:
                dist = get_two_categories_distance(
                    category_all_features[category],
                    category_center_features[category2])
            else:
                dist = category_self_distance_dictionary[category]

            category_distance_dictionary[category + "_" + category2] = dist
    print ""
    print "generating generate category feature csv"
    with open(output_file_path, 'wb') as f:
        the_writer = csv.writer(f)
        headers = ["category"] + categories
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(category_distance_dictionary)
        for category in categories:
            vector = [category]
            for category2 in categories:
                vector.append(category_distance_dictionary[category + "_" +
                                                           category2])
                utl.print_progress_bar(iteration, max_value)
                iteration += 1
            the_writer.writerow(vector)
        f.close()
    def __fill_video_list(csv_file):
        with open(csv_file) as f:
            data_set_count = sum(1 for line in f) - 1
        f.close()
        with open(csv_file, 'r') as csvFile:
            reader = csv.reader(csvFile)
            iteration = 0
            video_list = []
            print("preparing video list data ...")
            video = Video()

            for row in reader:
                if iteration > 0:
                    video_name = row[0]
                    shot_number = row[1]
                    category_label = row[2]
                    features = row[3:len(row)]
                    if video.name == "":
                        video.category = category_label
                        video.name = video_name
                        video.clusters = {}
                        video.shots = {}
                    shot = Shot()
                    shot.features = SimilarityCalculator.__convert_to_float(
                        features)
                    shot.number = int(shot_number)
                    video.shots[shot_number] = shot

                    if video.name != video_name:
                        video_list.append(video)
                        video = Video()
                        video.category = category_label
                        video.name = video_name
                        video.clusters = {}
                        video.shots = {}

                    utl.print_progress_bar(iteration + 1, data_set_count)
                iteration += 1
        csvFile.close()
        print("")
        return video_list
def fill_data_set(csv_file):
    with open(csv_file) as f:
        data_set_count = sum(1 for line in f) - 1
    f.close()
    with open(csv_file, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        print("preparing dataset_by_category ...")
        dataset_by_category = {}
        for row in reader:
            if iteration > 0:
                category = row[2]
                if category not in dataset_by_category:
                    dataset_by_category[category] = []

                dataset_by_category[category].append(
                    convert_to_float(row[3:len(row)]))
                utl.print_progress_bar(iteration, data_set_count)
            iteration += 1
    csvFile.close()
    print("")
    return dataset_by_category
def get_features_list(csv_file):
    data_set = []
    with open(csv_file) as f:
        data_set_count = sum(1 for line in f) - 1
    with open(csv_file, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        print("")

        print("preparing data...")
        for row in reader:
            if iteration > 0:
                shot_metadata = [row[0], row[1], row[2], 0]
                shot_metadata.append(shot_metadata)
                data_set.append(convert_to_float(row[3:len(row)]))
                utility.print_progress_bar(iteration, data_set_count)
            else:
                data_set_count -= 1
            iteration += 1
    csvFile.close()
    print("")
    return data_set
Exemple #25
0
def generate_category_feature(input_file_path, output_file_path):
    category_features = get_category_all_features_dictionary(input_file_path)

    category_average_features = {}

    for category, category_feature_vector_list in category_features.iteritems(
    ):
        category_average_features[category] = calculate_vectors_average(
            category_feature_vector_list)
    print ""
    print "generate category feature csv"
    with open(output_file_path, 'wb') as f:
        the_writer = csv.writer(f)
        headers = [
            "video"
            "category", "interactions_number_speakers_2",
            "interactions_number_speakers_3", "interactions_number_speakers_4",
            "interactions_number_speakers_4+", "intervention_short",
            "intervention_long", "speakers_type_ponctuel",
            "speakers_type_localise", "speakers_type_present",
            "speakers_type_regulier", "speakers_type_important",
            "speaker_distribution", "mean_number_of_faces",
            "std_number_of_faces", "inter_intensity_variation",
            "intra_intensity_variation", "number_shot_transition",
            "number_speaker_transition", "speech", "music",
            "speech_with_music", "speech_with_non_music",
            "non_speech_with_music", "non_speech_with_non_music", "words",
            "duration"
        ]
        the_writer.writerow(headers)
        iteration = 1
        max_value = len(category_average_features)
        for category, category_feature_vector in category_average_features.iteritems(
        ):
            vector = [category] + category_feature_vector
            the_writer.writerow(vector)
            utl.print_progress_bar(iteration, max_value)
            iteration += 1
        f.close()
Exemple #26
0
def fill_features_data(features_file_path):
    with open(features_file_path) as f:
        data_set_count = sum(1 for line in f) - 1
    f.close()
    with open(features_file_path, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        video_list = {}
        print("preparing clustering results data ...")
        video = VideoClusterResult()

        for row in reader:
            if iteration > data_set_count:
                break
            if iteration > 0:
                video_name = row[0]
                shot_number = row[1]
                category_label = row[2]
                features = row[3:len(row)]
                if video.name == "":
                    video.category = category_label
                    video.name = video_name
                    video.shot = {}

                video.shot[shot_number] = features

                if video.name != video_name and video.name not in video_list:
                    video_list[video.name] = video
                    video = VideoClusterResult()
                    video.category = category_label
                    video.name = video_name
                    video.shot = {}

                utl.print_progress_bar(iteration + 1, data_set_count)
            iteration += 1
    csvFile.close()
    print("")
    return video_list
Exemple #27
0
def get_category_center_features_dictionary(input_file_path):
    category_features = {}
    with open(input_file_path) as f:
        data_set_count = sum(1 for line in f) - 1
    f.close()
    with open(input_file_path, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        print("")
        print("preparing dataset_by_category ...")

        for row in reader:
            if iteration > 0:
                category = row[0]
                feature_vector = convert_to_float(row[1:len(row)])
                if category not in category_features:
                    category_features[category] = []
                category_features[category].append(feature_vector)

            utl.print_progress_bar(iteration, data_set_count)
            iteration += 1
    csvFile.close()
    return category_features
Exemple #28
0
    def __generate_mean_average_precision_category_per_cluster_csv(self):
        print "generate_mean_average_precision_category_per_cluster_csv..."
        precision_category_per_cluster_directory = self.__input_directory_path.replace("clustering_results",
                                                                                       "evaluation_results") \
                                                   + "precision_category_per_cluster/"
        file_name_list = utl.get_file_name_list(precision_category_per_cluster_directory)
        accuracy_list = {}
        max_value = len(file_name_list)
        for file_name in file_name_list:
            key = file_name.replace(".csv", "") \
                .replace("_k_means", "") \
                .replace("_birch", "") \
                .replace("_mean_shift", "") \
                .replace("_db_scan", "").replace("k_medoids_results_", "")
            category_per_cluster_csv = precision_category_per_cluster_directory + file_name
            accuracy_list[int(key)] = EvaluationManager.__get_mean_average_precision(category_per_cluster_csv)

        output_path = precision_category_per_cluster_directory.replace("precision_category_per_cluster/", "")
        with open(output_path + "mean_average_precision_category_per_cluster.csv", 'wb') as f:
            the_writer = csv.writer(f)
            headers = [
                "file_name",
                "map",
            ]

            the_writer.writerow(headers)
            iteration = 1
            accuracy_list = collections.OrderedDict(
                sorted(accuracy_list.items()))
            for key, accuracy in accuracy_list.iteritems():
                vector = [key, accuracy]
                the_writer.writerow(vector)
                utl.print_progress_bar(iteration, max_value)
                iteration += 1
            f.close()
            print("")
            print("csv file has been created successfully")
Exemple #29
0
def get_videos_dictionary(input_file_path):
    videos_dictionary = {}
    with open(input_file_path) as f:
        data_set_count = sum(1 for line in f) - 1
    f.close()
    algorithms_names = [
        "kNN", "Logistic_Regression", "Neural_Network", "SVM", "Random_Forest",
        "Naive_Bayes", "AdaBoost"
    ]
    with open(input_file_path, 'r') as csvFile:
        reader = csv.reader(csvFile)
        iteration = 0
        print("preparing dataset_by_category ...")

        for row in reader:
            if iteration > 0:
                category = row[0]
                video_name = row[1]
                shot_number = row[2]
                algorithms_results = row[3:len(row)]
                if video_name not in videos_dictionary:
                    videos_dictionary[video_name] = Video()
                    videos_dictionary[video_name].category = category
                    videos_dictionary[video_name].name = video_name
                    videos_dictionary[video_name].shots = {}
                if shot_number not in videos_dictionary[video_name].shots:
                    videos_dictionary[video_name].shots[shot_number] = {}
                i = 0
                for algorithms_name in algorithms_names:
                    videos_dictionary[video_name].shots[shot_number][
                        algorithms_name] = algorithms_results[i]
                    i += 1
                utl.print_progress_bar(iteration + 1, data_set_count)
            iteration += 1
    csvFile.close()
    return videos_dictionary, algorithms_names
Exemple #30
0
import utility
import csv
from feature_manager import FeatureManager

directory_path = "C:/code/features/output/video/structure-analysis-video-dev/"
feature_files = utility.get_file_name_list(directory_path)
feature_vector_list = []
iteration = 1
max_value = len(feature_files)
for file_name in feature_files:
    fm = FeatureManager(directory_path, file_name)
    feature_vector_list = feature_vector_list + fm.get_feature_vector_list()
    utility.print_progress_bar(iteration, max_value)
    iteration += 1
print "start csv"
with open('complete_video_features.csv', 'wb') as f:
    the_writer = csv.writer(f)
    headers = [
        "video", "shot_number", "category", "interactions_number_speakers_2",
        "interactions_number_speakers_3", "interactions_number_speakers_4",
        "interactions_number_speakers_4+", "intervention_short",
        "intervention_long", "speakers_type_ponctuel",
        "speakers_type_localise", "speakers_type_present",
        "speakers_type_regulier", "speakers_type_important",
        "speaker_distribution", "mean_number_of_faces", "std_number_of_faces",
        "inter_intensity_variation1", "inter_intensity_variation2",
        "inter_intensity_variation3", "inter_intensity_variation4",
        "inter_intensity_variation5", "inter_intensity_variation6",
        "inter_intensity_variation7", "inter_intensity_variation8",
        "inter_intensity_variation9", "intra_intensity_variation1",
        "intra_intensity_variation2", "intra_intensity_variation3",