def __init__(self):
     self.__db_manager = DBManager()
     self.regexp_for_predict_lines = "\d{1,}\s{1,}\d{1}:\w{1,8}.{1,}"
     self.__dictionaries_directory = PROJECT_ROOT_DIRECTORY + DICTIONARIES_DIR_NAME
 def __init__(self):
     self.__db_manager = DBManager()
     self.regexp_for_predict_lines = "\d{1,}\s{1,}\d{1}:\w{1,8}.{1,}"
     self.__dictionaries_directory = PROJECT_ROOT_DIRECTORY + DICTIONARIES_DIR_NAME
class GeneralHelpers:

    def __init__(self):
        self.__db_manager = DBManager()
        self.regexp_for_predict_lines = "\d{1,}\s{1,}\d{1}:\w{1,8}.{1,}"
        self.__dictionaries_directory = PROJECT_ROOT_DIRECTORY + DICTIONARIES_DIR_NAME

    def pretty_print_list(self, list_to_print, message):
        """
        Pretty prints a given list
        :param list_to_print: List, list to print
        :param message: String, a message to print before printing list
        :return: void
        """
        if len(list_to_print):
            print(message)

            for element in list_to_print:
                print(element)

    def get_chunks_of_list(self, list_to_chunk, chunk_size):
        """
        Returns chunks of new list
        :param list_to_chunk: List, List to divide into chunks
        :param chunk_size: Int, size of a chunk
        :return:
        """

        n = max(1, chunk_size)
        return [list_to_chunk[i:i + n] for i in range(0, len(list_to_chunk), n)]

    def find_root_from_zemberek(self, word):
        """
        Finds root of given word from zemberek
        :param word: string, word
        :return: string, root of word
        """
        connection_output = self._make_jar_call(ZEMBEREK_ROOT_FINDER_JAR_FILE_NAME, word)
        return connection_output

    def correct_misspelling_from_zemberek(self, word):
        """
        Corrects misspelled words by asking to zemberek
        :param word: string, word to correct
        :return: string, corrected word
        """
        connection_output = self._make_jar_call(ZEMBEREK_SUGGESTION_FINDER_JAR_FILE_NAME, word)

        suggestions = connection_output.split(",")
        if len(suggestions):
            if word in suggestions:
                return word
            else:
                return suggestions[0]
        else:
            return word

    def save_changes_in_suggestion_cache(self, suggestions_cache):
        """
        Saves given suggestion cache to file
        :param suggestions_cache: dict, suggestions
        :return: void
        """
        suggestion_cache_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + SUGGESTION_CACHE_FILE_NAME
        self._write_json_to_file(suggestions_cache, suggestion_cache_file_path)

    def save_changes_in_root_cache(self, roots_cache):
        """
        Saves given roots cache to file
        :param roots_cache: roots_cache, dict, roots
        :return: void
        """
        roots_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + ROOTS_CACHE_FILE_NAME
        self._write_json_to_file(roots_cache, roots_file_path)

    def load_suggestion_cache(self):
        """
        Loads previously asked (to zemberek) suggestion cache
        :return: void
        """
        suggestion_cache_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + SUGGESTION_CACHE_FILE_NAME
        if os.path.isfile(suggestion_cache_file_path):
            with open(suggestion_cache_file_path, "r") as suggestions_file:
                suggestion_cache = json.load(suggestions_file)
                return suggestion_cache

    def load_roots_cache(self):
        """
        Loads previously asked (to zemberek) word roots cache
        :return: void
        """
        roots_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + ROOTS_CACHE_FILE_NAME
        if os.path.isfile(roots_file_path):
            with open(roots_file_path, "r") as roots_file:
                roots_cache = json.load(roots_file)
                return roots_cache

    def generate_arff_file(self, file_path, file_name, arff_data):
        """
        Generates arff file
        :param file_name: file_name for arff data
        :param arff_data: dict, arff_data
        :return: string, generated file path
        """

        if not os.path.exists(file_path):
            os.makedirs(file_path)

        arff_file = codecs.open(file_path+file_name, 'w+', encoding='utf-8')
        arff.dump(arff_data, arff_file)
        arff_file.close()

    def generate_random_string(self, n):
        """
        Generates random string with size of n
        :param n: int, size
        :return: string, n-length random string
        """
        random_string = ''.join(random.choice('abcdefghijklmnoprstuvyzxw1234567890') for _ in range(n))
        return random_string

    def generate_random_file_name(self, file_name, extension):
        """
        Generates random file name with given file name
        :param file_name: string, desired file name root
        :return: string, randomized file name
        """
        random_file_appendix = self.generate_random_string(5)
        final_file_name = file_name + '_' + random_file_appendix + extension
        return final_file_name

    def get_accuracy_scores_for_years_from_root_dir(self, root_dir):
        """
        Returns a dict of __years' classifier scores
        :param root_dir: string, path to root directory
        :return: dict, __years' classifier scores
        """
        years_scores = {}

        # Iterating over directories in root directory
        for root, dirs, files in os.walk(root_dir):

            # Iterating over files in a directory
            for file_name in files:

                # If it's a txt file we got it. E.g. TTNet_2015_SMO.txt
                if file_name.endswith('.txt'):

                    file_path = root + '/' + file_name
                    model_name, year, classifier_name = file_name.rstrip('.txt').split("_") #TTNet, 2015, SMO

                    if not year in years_scores:
                        years_scores[year] = {}

                    with open(file_path, 'r') as classifier_log_file:
                        file_content = classifier_log_file.read()
                        years_scores[year][classifier_name] = self.get_accuracy_score_from_log_file_content(file_content)

        # Calculating mean for each year and sorting
        for year, classifiers in years_scores.iteritems():
            all_classifier_scores = np.array(classifiers.values())
            years_scores[year]['MEAN'] = round(all_classifier_scores.mean(), 2)
            years_scores[year] = collections.OrderedDict(sorted(years_scores[year].items()))

        sorted_years_scores = collections.OrderedDict(sorted(years_scores.items()))

        return sorted_years_scores

    def get_accuracy_scores_for_experiment_years_from_root_dir(self, root_dir):
        """
        Returns a dict of scores
        :param root_dir: string, path to root directory
        :return: dict, __years' classifier scores
        """
        lines_scores = {}

        # Iterating over directories in root directory
        for root, dirs, files in os.walk(root_dir):

            experiment_scores = []
            # Iterating over files in a directory
            for file_name in files:

                if file_name.endswith('.txt'):

                    file_path = root + '/' + file_name

                    with open(file_path, 'r') as classifier_log_file:
                        file_content = classifier_log_file.read()
                        accuracy_score_of_log_file = self.get_accuracy_score_from_log_file_content(file_content)
                        experiment_scores.append(accuracy_score_of_log_file)

            if len(experiment_scores):
                mean_of_experiment_scores = round(np.array(experiment_scores).mean(), 2)
                root_components = root.split("/")
                line_name = root_components[-2]
                point_name = root_components[-1]
                point_name = LINES_DIR_DICT[point_name]

                if not line_name in lines_scores:
                    lines_scores[line_name] = {}

                lines_scores[line_name][point_name] = mean_of_experiment_scores

        for line, points in lines_scores.iteritems():
            lines_scores[line] = collections.OrderedDict(sorted(points.items()))

        lines_scores = collections.OrderedDict(sorted(lines_scores.items()))

        return lines_scores

    def get_accuracy_score_from_log_file_content(self, log_file_content):
        """
        Returns accuracy score of log file
        :param log_file_content: string
        :return: float
        """
        regexp_for_accuracy_lines = "Correctly Classified Instances.{1,}"

        classifier_accuracy_lines = re.findall(regexp_for_accuracy_lines, log_file_content, re.IGNORECASE)

        if len(classifier_accuracy_lines):
            accuracy_line = classifier_accuracy_lines[0]
            accuracy_line_components = accuracy_line.split() #['Correctly', 'Classified', 'Instances', '209', '41.8', '%']
            accuracy = accuracy_line_components[4]
            return float(accuracy)
        else:
            return -1.0

    def get_log_files_stats(self, root_dir):
        """
        Returns each classifier's monthly accuracy scores from log files.
        :param root_dir: string
        :return: dict
        """

        # Going to log files directory
        os.chdir(root_dir)

        all_log_files = {}

        # iterating over log files of a model (say, Turkcell)
        for txt_file in glob("*.txt"):
            # openning file
            with open(txt_file, 'r') as a_log_file:
                # getting log file's name
                file_name = a_log_file.name.split(".")[0]

                # updating data model with log file's content
                all_log_files.update({file_name: a_log_file.read()})

        # we got a model and we need it month's lengths (Say, we got 42 tweets for October)
        self.model_month_counts = self.__db_manager.get_months_lengths()

        all_accuracy_scores = {}

        # iterating log files we previously read
        for log_file_name, log_file_content in all_log_files.iteritems():
            accuracy_scores = self.get_accuracy_scores_per_month_from_log_file(log_file_content)

            if not log_file_name in all_accuracy_scores:
                all_accuracy_scores[log_file_name] = accuracy_scores

        return all_accuracy_scores

    def get_accuracy_scores_per_month_from_log_file(self, log_file_content):
        """
        Returns each year's [true, total] predictions for given log file content
        :param log_file_content: string
        :return: dict
        """
        predict_lines_in_file = re.findall(self.regexp_for_predict_lines, log_file_content)

        correct_vs_total_values = {}
        start_idx = 0
        # e.g. { 2014: [42, 42, ...., 38] }
        for year, months_lengths in self.model_month_counts.iteritems():

            # e.g. 42
            for month_length in months_lengths:

                end_idx = start_idx + month_length

                # a months' stats
                a_months_stats = [0, month_length]  # (Correct, Total)

                # slicing months' lines to find lines like: "1 3:positive 3:positive          0      0     *1"
                months_lines = predict_lines_in_file[start_idx:end_idx]

                # iterating over a month's lines to find accuracy
                for month_line in months_lines:

                    # if line contains + it's an error
                    if not '+' in month_line:
                        a_months_stats[0] += 1

                if not year in correct_vs_total_values:
                    correct_vs_total_values[year] = []

                correct_vs_total_values[year].append(a_months_stats)

                start_idx += month_length

        """
        Example correct_vs_total_values:
        {
            2013: [[25, 43], [26, 42], [21, 42], [31, 44], [25, 42], [24, 40], [27, 42], [23, 42], [23, 43], [28, 41], [25, 42], [21, 39]],
            2014: [[35, 55], [19, 39], [25, 46], [32, 43], [23, 42], [25, 42], [27, 42], [28, 45], [25, 45], [27, 45], [23, 42], [15, 27]],
            2015: [[25, 43], [32, 56], [34, 57], [36, 56], [31, 54], [35, 55], [37, 68], [38, 57], [27, 51]]
        }
        """
        # Let's find accuracies now.

        accuracy_scores = {}

        for year, all_months_predictions in correct_vs_total_values.iteritems():

            if not year in accuracy_scores:
                accuracy_scores[year] = []

            for month_predictions in all_months_predictions:
                correct_predictions = month_predictions[0]
                total_predictions = month_predictions[1]

                one_acc_score = float(correct_predictions) / total_predictions
                one_acc_score *= 100
                one_acc_score = round(one_acc_score, 2)


                accuracy_scores[year].append(one_acc_score)

        return accuracy_scores

    def find_key_of_given_value_in_dict(self, dictionary, value_to_search):
        """
        Returns key of given value
        :param dictionary: dict
        :param value_to_search: string
        :return: string
        """
        for k, v in dictionary.iteritems():
            if v == value_to_search:
                return k
        return -1

    def find_most_distinct_n_samples(self, document_one, document_two, n_samples):
        """
        Returns most distinct n_samples from document_two comparing to document_one (documents are arff data)
        :param document_one: list
        :param document_two: list
        :param n_samples: int
        :return: list
        """
        pass

    def cumulate_years_scores(self, years_scores):
        """
        Cumulates and finds averages of all __years' scores
        :param years_scores: list
        :return: dict
        """
        final_result_of_all_experiments = {}
        for one_experiments_scores in years_scores:

            for line_name, one_lines_scores in one_experiments_scores.iteritems():

                if not line_name in final_result_of_all_experiments:
                    final_result_of_all_experiments[line_name] = {}

                for setup_name, score_or_list in one_lines_scores.iteritems():

                    if not setup_name in final_result_of_all_experiments[line_name]:
                        final_result_of_all_experiments[line_name][setup_name] = []

                    final_result_of_all_experiments[line_name][setup_name].append(score_or_list)

        for line_name, one_lines_scores in final_result_of_all_experiments.iteritems():
            if line_name != "line2":

                for setup_name, scores_list in one_lines_scores.iteritems():
                    final_result_of_all_experiments[line_name][setup_name] = round(np.mean(scores_list), 2)

            else:

                for setup_name, scores_list in one_lines_scores.iteritems():
                    np_array_scores_list = np.array(scores_list)
                    min_mean_max_mean = np.mean(scores_list, axis=0)
                    final_result_of_all_experiments[line_name][setup_name] = min_mean_max_mean

        print('Relative scores:')
        pprint.pprint(final_result_of_all_experiments, width=2)
        print(final_result_of_all_experiments)
        for line_name, score in final_result_of_all_experiments['line3'].iteritems():

            target_line1_key = self.get_line1_key_from_line3_key(line_name)
            new_line3_score = final_result_of_all_experiments['line1'][target_line1_key] + score
            final_result_of_all_experiments['line3'][line_name] = new_line3_score

        print('Absolute scores:')
        pprint.pprint(final_result_of_all_experiments, width=2)

        return final_result_of_all_experiments

    def calculate_relative_scores(self, all_scores):
        """
        Calculates relative scores of line3 according to line1
        :return: void
        """

        line1_scores = all_scores['line1']

        for setup_name, score in all_scores['line3'].iteritems():

            target_line1_key = self.get_line1_key_from_line3_key(setup_name)
            difference = score - line1_scores[target_line1_key]
            all_scores['line3'][setup_name] = difference

        return all_scores


    def get_line1_key_from_line3_key(self, line3_key):
        """
        Returns relevant line1 key from line3 key
        :param line3_key: string
        :return: string
        """
        regexp_for_target_keys = "(L\d{1}-|\+(2012|2013|2014|2015)_"+ALE_PARTITION_50_KEY+")"
        target_line1_key = re.sub(regexp_for_target_keys, "", line3_key)
        return target_line1_key

    def _write_json_to_file(self, json_data, file_path):
        """
        Writes given data to given path
        :param data: list or dict, json_data
        :param file_path: string
        :return:
        """
        with open(file_path, 'w') as outfile:
            json.dump(json_data, outfile)

    def _make_jar_call(self, jar_file_name, word):
        """
        Makes a jar call with proper parameters
        :param jar_file_name: string, jar file to make call
        :param word: string, parameter word
        :return: string, connection output
        """
        jar_file_path = JAR_FILE_DIR_NAME + jar_file_name

        # Making the call
        process_call = Popen(['java', '-jar', jar_file_path, word], stdout=PIPE, stderr=STDOUT)

        # Getting output
        output = process_call.communicate()[0].decode('utf-8')
        return output
class GeneralHelpers:
    def __init__(self):
        self.__db_manager = DBManager()
        self.regexp_for_predict_lines = "\d{1,}\s{1,}\d{1}:\w{1,8}.{1,}"
        self.__dictionaries_directory = PROJECT_ROOT_DIRECTORY + DICTIONARIES_DIR_NAME

    def pretty_print_list(self, list_to_print, message):
        """
        Pretty prints a given list
        :param list_to_print: List, list to print
        :param message: String, a message to print before printing list
        :return: void
        """
        if len(list_to_print):
            print(message)

            for element in list_to_print:
                print(element)

    def get_chunks_of_list(self, list_to_chunk, chunk_size):
        """
        Returns chunks of new list
        :param list_to_chunk: List, List to divide into chunks
        :param chunk_size: Int, size of a chunk
        :return:
        """

        n = max(1, chunk_size)
        return [
            list_to_chunk[i:i + n] for i in range(0, len(list_to_chunk), n)
        ]

    def find_root_from_zemberek(self, word):
        """
        Finds root of given word from zemberek
        :param word: string, word
        :return: string, root of word
        """
        connection_output = self._make_jar_call(
            ZEMBEREK_ROOT_FINDER_JAR_FILE_NAME, word)
        return connection_output

    def correct_misspelling_from_zemberek(self, word):
        """
        Corrects misspelled words by asking to zemberek
        :param word: string, word to correct
        :return: string, corrected word
        """
        connection_output = self._make_jar_call(
            ZEMBEREK_SUGGESTION_FINDER_JAR_FILE_NAME, word)

        suggestions = connection_output.split(",")
        if len(suggestions):
            if word in suggestions:
                return word
            else:
                return suggestions[0]
        else:
            return word

    def save_changes_in_suggestion_cache(self, suggestions_cache):
        """
        Saves given suggestion cache to file
        :param suggestions_cache: dict, suggestions
        :return: void
        """
        suggestion_cache_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + SUGGESTION_CACHE_FILE_NAME
        self._write_json_to_file(suggestions_cache, suggestion_cache_file_path)

    def save_changes_in_root_cache(self, roots_cache):
        """
        Saves given roots cache to file
        :param roots_cache: roots_cache, dict, roots
        :return: void
        """
        roots_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + ROOTS_CACHE_FILE_NAME
        self._write_json_to_file(roots_cache, roots_file_path)

    def load_suggestion_cache(self):
        """
        Loads previously asked (to zemberek) suggestion cache
        :return: void
        """
        suggestion_cache_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + SUGGESTION_CACHE_FILE_NAME
        if os.path.isfile(suggestion_cache_file_path):
            with open(suggestion_cache_file_path, "r") as suggestions_file:
                suggestion_cache = json.load(suggestions_file)
                return suggestion_cache

    def load_roots_cache(self):
        """
        Loads previously asked (to zemberek) word roots cache
        :return: void
        """
        roots_file_path = self.__dictionaries_directory + MODEL_NAME + '/' + ROOTS_CACHE_FILE_NAME
        if os.path.isfile(roots_file_path):
            with open(roots_file_path, "r") as roots_file:
                roots_cache = json.load(roots_file)
                return roots_cache

    def generate_arff_file(self, file_path, file_name, arff_data):
        """
        Generates arff file
        :param file_name: file_name for arff data
        :param arff_data: dict, arff_data
        :return: string, generated file path
        """

        if not os.path.exists(file_path):
            os.makedirs(file_path)

        arff_file = codecs.open(file_path + file_name, 'w+', encoding='utf-8')
        arff.dump(arff_data, arff_file)
        arff_file.close()

    def generate_random_string(self, n):
        """
        Generates random string with size of n
        :param n: int, size
        :return: string, n-length random string
        """
        random_string = ''.join(
            random.choice('abcdefghijklmnoprstuvyzxw1234567890')
            for _ in range(n))
        return random_string

    def generate_random_file_name(self, file_name, extension):
        """
        Generates random file name with given file name
        :param file_name: string, desired file name root
        :return: string, randomized file name
        """
        random_file_appendix = self.generate_random_string(5)
        final_file_name = file_name + '_' + random_file_appendix + extension
        return final_file_name

    def get_accuracy_scores_for_years_from_root_dir(self, root_dir):
        """
        Returns a dict of __years' classifier scores
        :param root_dir: string, path to root directory
        :return: dict, __years' classifier scores
        """
        years_scores = {}

        # Iterating over directories in root directory
        for root, dirs, files in os.walk(root_dir):

            # Iterating over files in a directory
            for file_name in files:

                # If it's a txt file we got it. E.g. TTNet_2015_SMO.txt
                if file_name.endswith('.txt'):

                    file_path = root + '/' + file_name
                    model_name, year, classifier_name = file_name.rstrip(
                        '.txt').split("_")  #TTNet, 2015, SMO

                    if not year in years_scores:
                        years_scores[year] = {}

                    with open(file_path, 'r') as classifier_log_file:
                        file_content = classifier_log_file.read()
                        years_scores[year][
                            classifier_name] = self.get_accuracy_score_from_log_file_content(
                                file_content)

        # Calculating mean for each year and sorting
        for year, classifiers in years_scores.iteritems():
            all_classifier_scores = np.array(classifiers.values())
            years_scores[year]['MEAN'] = round(all_classifier_scores.mean(), 2)
            years_scores[year] = collections.OrderedDict(
                sorted(years_scores[year].items()))

        sorted_years_scores = collections.OrderedDict(
            sorted(years_scores.items()))

        return sorted_years_scores

    def get_accuracy_scores_for_experiment_years_from_root_dir(self, root_dir):
        """
        Returns a dict of scores
        :param root_dir: string, path to root directory
        :return: dict, __years' classifier scores
        """
        lines_scores = {}

        # Iterating over directories in root directory
        for root, dirs, files in os.walk(root_dir):

            experiment_scores = []
            # Iterating over files in a directory
            for file_name in files:

                if file_name.endswith('.txt'):

                    file_path = root + '/' + file_name

                    with open(file_path, 'r') as classifier_log_file:
                        file_content = classifier_log_file.read()
                        accuracy_score_of_log_file = self.get_accuracy_score_from_log_file_content(
                            file_content)
                        experiment_scores.append(accuracy_score_of_log_file)

            if len(experiment_scores):
                mean_of_experiment_scores = round(
                    np.array(experiment_scores).mean(), 2)
                root_components = root.split("/")
                line_name = root_components[-2]
                point_name = root_components[-1]
                point_name = LINES_DIR_DICT[point_name]

                if not line_name in lines_scores:
                    lines_scores[line_name] = {}

                lines_scores[line_name][point_name] = mean_of_experiment_scores

        for line, points in lines_scores.iteritems():
            lines_scores[line] = collections.OrderedDict(sorted(
                points.items()))

        lines_scores = collections.OrderedDict(sorted(lines_scores.items()))

        return lines_scores

    def get_accuracy_score_from_log_file_content(self, log_file_content):
        """
        Returns accuracy score of log file
        :param log_file_content: string
        :return: float
        """
        regexp_for_accuracy_lines = "Correctly Classified Instances.{1,}"

        classifier_accuracy_lines = re.findall(regexp_for_accuracy_lines,
                                               log_file_content, re.IGNORECASE)

        if len(classifier_accuracy_lines):
            accuracy_line = classifier_accuracy_lines[0]
            accuracy_line_components = accuracy_line.split(
            )  #['Correctly', 'Classified', 'Instances', '209', '41.8', '%']
            accuracy = accuracy_line_components[4]
            return float(accuracy)
        else:
            return -1.0

    def get_log_files_stats(self, root_dir):
        """
        Returns each classifier's monthly accuracy scores from log files.
        :param root_dir: string
        :return: dict
        """

        # Going to log files directory
        os.chdir(root_dir)

        all_log_files = {}

        # iterating over log files of a model (say, Turkcell)
        for txt_file in glob("*.txt"):
            # openning file
            with open(txt_file, 'r') as a_log_file:
                # getting log file's name
                file_name = a_log_file.name.split(".")[0]

                # updating data model with log file's content
                all_log_files.update({file_name: a_log_file.read()})

        # we got a model and we need it month's lengths (Say, we got 42 tweets for October)
        self.model_month_counts = self.__db_manager.get_months_lengths()

        all_accuracy_scores = {}

        # iterating log files we previously read
        for log_file_name, log_file_content in all_log_files.iteritems():
            accuracy_scores = self.get_accuracy_scores_per_month_from_log_file(
                log_file_content)

            if not log_file_name in all_accuracy_scores:
                all_accuracy_scores[log_file_name] = accuracy_scores

        return all_accuracy_scores

    def get_accuracy_scores_per_month_from_log_file(self, log_file_content):
        """
        Returns each year's [true, total] predictions for given log file content
        :param log_file_content: string
        :return: dict
        """
        predict_lines_in_file = re.findall(self.regexp_for_predict_lines,
                                           log_file_content)

        correct_vs_total_values = {}
        start_idx = 0
        # e.g. { 2014: [42, 42, ...., 38] }
        for year, months_lengths in self.model_month_counts.iteritems():

            # e.g. 42
            for month_length in months_lengths:

                end_idx = start_idx + month_length

                # a months' stats
                a_months_stats = [0, month_length]  # (Correct, Total)

                # slicing months' lines to find lines like: "1 3:positive 3:positive          0      0     *1"
                months_lines = predict_lines_in_file[start_idx:end_idx]

                # iterating over a month's lines to find accuracy
                for month_line in months_lines:

                    # if line contains + it's an error
                    if not '+' in month_line:
                        a_months_stats[0] += 1

                if not year in correct_vs_total_values:
                    correct_vs_total_values[year] = []

                correct_vs_total_values[year].append(a_months_stats)

                start_idx += month_length
        """
        Example correct_vs_total_values:
        {
            2013: [[25, 43], [26, 42], [21, 42], [31, 44], [25, 42], [24, 40], [27, 42], [23, 42], [23, 43], [28, 41], [25, 42], [21, 39]],
            2014: [[35, 55], [19, 39], [25, 46], [32, 43], [23, 42], [25, 42], [27, 42], [28, 45], [25, 45], [27, 45], [23, 42], [15, 27]],
            2015: [[25, 43], [32, 56], [34, 57], [36, 56], [31, 54], [35, 55], [37, 68], [38, 57], [27, 51]]
        }
        """
        # Let's find accuracies now.

        accuracy_scores = {}

        for year, all_months_predictions in correct_vs_total_values.iteritems(
        ):

            if not year in accuracy_scores:
                accuracy_scores[year] = []

            for month_predictions in all_months_predictions:
                correct_predictions = month_predictions[0]
                total_predictions = month_predictions[1]

                one_acc_score = float(correct_predictions) / total_predictions
                one_acc_score *= 100
                one_acc_score = round(one_acc_score, 2)

                accuracy_scores[year].append(one_acc_score)

        return accuracy_scores

    def find_key_of_given_value_in_dict(self, dictionary, value_to_search):
        """
        Returns key of given value
        :param dictionary: dict
        :param value_to_search: string
        :return: string
        """
        for k, v in dictionary.iteritems():
            if v == value_to_search:
                return k
        return -1

    def find_most_distinct_n_samples(self, document_one, document_two,
                                     n_samples):
        """
        Returns most distinct n_samples from document_two comparing to document_one (documents are arff data)
        :param document_one: list
        :param document_two: list
        :param n_samples: int
        :return: list
        """
        pass

    def cumulate_years_scores(self, years_scores):
        """
        Cumulates and finds averages of all __years' scores
        :param years_scores: list
        :return: dict
        """
        final_result_of_all_experiments = {}
        for one_experiments_scores in years_scores:

            for line_name, one_lines_scores in one_experiments_scores.iteritems(
            ):

                if not line_name in final_result_of_all_experiments:
                    final_result_of_all_experiments[line_name] = {}

                for setup_name, score_or_list in one_lines_scores.iteritems():

                    if not setup_name in final_result_of_all_experiments[
                            line_name]:
                        final_result_of_all_experiments[line_name][
                            setup_name] = []

                    final_result_of_all_experiments[line_name][
                        setup_name].append(score_or_list)

        for line_name, one_lines_scores in final_result_of_all_experiments.iteritems(
        ):
            if line_name != "line2":

                for setup_name, scores_list in one_lines_scores.iteritems():
                    final_result_of_all_experiments[line_name][
                        setup_name] = round(np.mean(scores_list), 2)

            else:

                for setup_name, scores_list in one_lines_scores.iteritems():
                    np_array_scores_list = np.array(scores_list)
                    min_mean_max_mean = np.mean(scores_list, axis=0)
                    final_result_of_all_experiments[line_name][
                        setup_name] = min_mean_max_mean

        print('Relative scores:')
        pprint.pprint(final_result_of_all_experiments, width=2)
        print(final_result_of_all_experiments)
        for line_name, score in final_result_of_all_experiments[
                'line3'].iteritems():

            target_line1_key = self.get_line1_key_from_line3_key(line_name)
            new_line3_score = final_result_of_all_experiments['line1'][
                target_line1_key] + score
            final_result_of_all_experiments['line3'][
                line_name] = new_line3_score

        print('Absolute scores:')
        pprint.pprint(final_result_of_all_experiments, width=2)

        return final_result_of_all_experiments

    def calculate_relative_scores(self, all_scores):
        """
        Calculates relative scores of line3 according to line1
        :return: void
        """

        line1_scores = all_scores['line1']

        for setup_name, score in all_scores['line3'].iteritems():

            target_line1_key = self.get_line1_key_from_line3_key(setup_name)
            difference = score - line1_scores[target_line1_key]
            all_scores['line3'][setup_name] = difference

        return all_scores

    def get_line1_key_from_line3_key(self, line3_key):
        """
        Returns relevant line1 key from line3 key
        :param line3_key: string
        :return: string
        """
        regexp_for_target_keys = "(L\d{1}-|\+(2012|2013|2014|2015)_" + ALE_PARTITION_50_KEY + ")"
        target_line1_key = re.sub(regexp_for_target_keys, "", line3_key)
        return target_line1_key

    def _write_json_to_file(self, json_data, file_path):
        """
        Writes given data to given path
        :param data: list or dict, json_data
        :param file_path: string
        :return:
        """
        with open(file_path, 'w') as outfile:
            json.dump(json_data, outfile)

    def _make_jar_call(self, jar_file_name, word):
        """
        Makes a jar call with proper parameters
        :param jar_file_name: string, jar file to make call
        :param word: string, parameter word
        :return: string, connection output
        """
        jar_file_path = JAR_FILE_DIR_NAME + jar_file_name

        # Making the call
        process_call = Popen(['java', '-jar', jar_file_path, word],
                             stdout=PIPE,
                             stderr=STDOUT)

        # Getting output
        output = process_call.communicate()[0].decode('utf-8')
        return output