class Grammar:
    def __init__(self, hmm, rule_set=None):
        if isinstance(hmm, HMM):
            self.hmm = hmm
        else:
            self.hmm = HMM(hmm)
        segment_table = SegmentTable()
        self.segment_symbol_length = uniform_encoding.log2(len(segment_table) + 1)  # + 1 for the delimiter
        if rule_set:
            self.rule_set = rule_set
        else:
            self.rule_set = RuleSet(noise=False)

        noises = configurations.get("NOISE_RULE_SET", [])
        self.noise_rule_set = RuleSet.load_noise_rules_from_flat_list(noises)

        self._cached_hmm_transducer = None
        self._cached_rule_set_transducer = None
        self._cached_noise_rule_set_transducer = None

    def generate_word(self):
        emission = self.hmm.generate_emission()
        return choice(self.rule_set.get_outputs_of_word(emission))

    def generate_all_words(self):
        # TODO: I think this also generates noised data. This should be fixed.
        words = []
        emissions = self.hmm.generate_all_emissions()
        for emission in emissions:
            words += self.rule_set.get_outputs_of_word(emission)
        return words

    def get_transducer(self, with_noise=True):
        hmm_transducer = self.get_hmm_transducer()

        if "case_name" in configurations.configurations_dict:
            case_name = configurations.configurations_dict["case_name"]
            dot(hmm_transducer, "{}_hmm_transducer".format(case_name))
        rules_set_transducer = self.get_rule_set_transducer()
        if with_noise:
            noise_rules_transducer = self.get_noise_rule_set_transducer()
        else:
            noise_rules_transducer = None

        return self._compose_grammar_transducers(
            hmm_transducer, rules_set_transducer, noise_rules_transducer
        )

    def _compose_grammar_transducers(self, first_transducer, *other_transducers):
        composed_transducer = first_transducer
        for transducer in other_transducers:
            if transducer:
                composed_transducer.arc_sort_input()
                transducer.arc_sort_input()
                composed_transducer = composed_transducer >> transducer
        return composed_transducer

    def get_nfa(self):
        grammar_pyfst_transducer = self.get_transducer()
        # dot(grammar_pyfst_transducer, "grammar_pyfst_transducer")
        # grammar_pyfst_transducer.remove_epsilon()
        return ParsingNFA.get_from_pyfst_transducer(grammar_pyfst_transducer)

    def make_mutation(self):
        mutation_successful = False

        if ga_config.MUTATE_BOTH_HMM_AND_RULES:
            hmm_mutation_successful = False
            rule_set_mutation_successful = False

            if configurations["EVOLVE_HMM"]:
                hmm_mutation_successful = self.hmm.make_mutation()
            if configurations["EVOLVE_RULES"]:
                rule_set_mutation_successful = self.rule_set.make_mutation()

            mutation_successful = mutation_successful or rule_set_mutation_successful or hmm_mutation_successful

            if hmm_mutation_successful:
                self.invalidate_cached_hmm_transducer()
            if rule_set_mutation_successful:
                self.invalidate_cached_rule_set_transducer()

        else:
            rule_set_mutation_weight = 0 if not configurations["EVOLVE_RULES"] else configurations["MUTATE_RULE_SET"]
            hmm_mutation_weight = 0 if not configurations["EVOLVE_HMM"] else configurations["MUTATE_HMM"]

            mutation_weights = [('rule_set', rule_set_mutation_weight),
                                ('hmm', hmm_mutation_weight)]

            weighted_mutatable_object_list = get_weighted_list(mutation_weights)
            object_name_to_mutate = choice(weighted_mutatable_object_list)
            if object_name_to_mutate == 'rule_set':
                object_to_mutate = self.rule_set
            elif object_name_to_mutate == 'hmm':
                object_to_mutate = self.hmm
            mutation_successful = object_to_mutate.make_mutation()

            if mutation_successful:
                if object_name_to_mutate == 'hmm':
                    self.invalidate_cached_hmm_transducer()
                elif object_name_to_mutate == 'rule_set':
                    self.invalidate_cached_rule_set_transducer()

        return mutation_successful

    def get_encoding_length(self):
        if not configurations["UNDERSPECIFICATION_FLAG"]:
            hmm_encoding_length = self.hmm.get_encoding_length(self.segment_symbol_length,
                                                               restrictions_on_alphabet=configurations["RESTRICTIONS_ON_ALPHABET"])
        else:
            hmm_encoding_length = self.hmm.get_underspecified_encoding_length()
        rules_encoding_length = self.rule_set.get_encoding_length()
        return hmm_encoding_length, rules_encoding_length

    def generate_word_list(self, n):
        result = []
        for _ in range(n):
            result.append(self.generate_word())
        return result

    def get_all_outputs(self, with_noise=True):
        transducer = self.get_transducer(with_noise=with_noise)
        if configurations["MINIMIZE_TRANSDUCER"]:
            transducer = self.minimize_transducer(transducer)

        transducer_symbol_table = SegmentTable().transducer_symbol_table
        outputs = list()
        for path in transducer.paths():
            output = ""
            for i in path:
                symbol = transducer_symbol_table.find(i.olabel)
                if symbol != u"\u03b5" and symbol != MORPHEME_BOUNDARY and symbol != WORD_BOUNDARY:
                    output += symbol
            outputs.append(output)
        return outputs

    def get_hmm_transducer(self):
        if self._cached_hmm_transducer is None:
            self._cached_hmm_transducer = self.hmm.get_transducer()
        return self._cached_hmm_transducer

    def get_rule_set_transducer(self):
        if self._cached_rule_set_transducer is None:  # rule set transducer may be None
            self._cached_rule_set_transducer = self.rule_set.get_transducer()

        return self._cached_rule_set_transducer

    def get_noise_rule_set_transducer(self):
        if self._cached_noise_rule_set_transducer is None:
            self._cached_noise_rule_set_transducer = self.noise_rule_set.get_transducer()
        return self._cached_noise_rule_set_transducer

    def get_log_lines(self):
        return self.hmm.get_log_lines() + self.rule_set.get_log_lines()

    def invalidate_cached_hmm_transducer(self):
        self._cached_hmm_transducer = None

    def invalidate_cached_rule_set_transducer(self):
        self._cached_rule_set_transducer = None

    @staticmethod
    def minimize_transducer(transducer):
        transducer.project_output()
        transducer = transducer.determinize()
        transducer.minimize()
        return transducer

    def __getstate__(self):
        # Don't pickle cached transducers
        state = self.__dict__.copy()
        state['_cached_hmm_transducer'] = None
        state['_cached_rule_set_transducer'] = None
        state['_cached_noise_rule_set_transducer'] = None
        return state
class Grammar:
    def __init__(self, hmm, rule_set=None):
        if isinstance(hmm, HMM):
            self.hmm = hmm
        else:
            self.hmm = HMM(hmm)
        segment_table = SegmentTable()
        self.segment_symbol_length = ceil(log(len(segment_table) + 1,
                                              2))  # + 1 for the delimiter
        if rule_set:
            self.rule_set = rule_set
        else:
            self.rule_set = RuleSet()

    def generate_word(self):
        emission = self.hmm.generate_emission()
        return choice(self.rule_set.get_outputs_of_word(emission))

    def get_transducer(self):
        hmm_transducer = self.hmm.get_transducer()
        if "case_name" in configurations.configurations_dict:
            case_name = configurations.configurations_dict["case_name"]
            dot(hmm_transducer, "{}_hmm_transducer".format(case_name))
        rules_set_transducer = self.rule_set.get_transducer()
        if rules_set_transducer:
            hmm_transducer.arc_sort_input()
            rules_set_transducer.arc_sort_input()
            composed_hmm_rules_transducer = hmm_transducer >> rules_set_transducer
        else:
            composed_hmm_rules_transducer = hmm_transducer
        return composed_hmm_rules_transducer

    def get_nfa(self):
        grammar_pyfst_transducer = self.get_transducer()
        # dot(grammar_pyfst_transducer, "grammar_pyfst_transducer")
        # grammar_pyfst_transducer.remove_epsilon()
        return ParsingNFA.get_from_pyfst_transducer(grammar_pyfst_transducer)

    def make_mutation(self):
        num_mutations = randint(1, ga_config.MAX_MUTATIONS)
        mutation_result = False

        for _ in range(num_mutations):
            if ga_config.MUTATE_BOTH_HMM_AND_RULES:
                rule_set_success = False
                hmm_success = False
                if configurations["EVOLVE_RULES"]:
                    rule_set_success = self.rule_set.make_mutation()
                if configurations["EVOLVE_HMM"]:
                    hmm_success = self.hmm.make_mutation()
                mutation_result = mutation_result or rule_set_success or hmm_success

            else:
                rule_set_mutation_weight = 0 if not configurations[
                    "EVOLVE_RULES"] else configurations["MUTATE_RULE_SET"]
                hmm_mutation_weight = 0 if not configurations[
                    "EVOLVE_HMM"] else configurations["MUTATE_HMM"]

                mutation_weights = [(self.rule_set, rule_set_mutation_weight),
                                    (self.hmm, hmm_mutation_weight)]

                weighted_mutatable_object_list = get_weighted_list(
                    mutation_weights)
                object_to_mutate = choice(weighted_mutatable_object_list)
                mutation_result = object_to_mutate.make_mutation()

        return mutation_result

    def get_encoding_length(self):
        if not configurations["UNDERSPECIFICATION_FLAG"]:
            hmm_encoding_length = self.hmm.get_encoding_length(
                self.segment_symbol_length,
                restrictions_on_alphabet=configurations[
                    "RESTRICTIONS_ON_ALPHABET"])
        else:
            hmm_encoding_length = self.hmm.get_underspecified_encoding_length()
        rules_encoding_length = self.rule_set.get_encoding_length()
        return hmm_encoding_length, rules_encoding_length

    def generate_word_list(self, n):
        result = []
        for _ in range(n):
            result.append(self.generate_word())
        return result

    def get_all_outputs(self):
        transducer = self.get_transducer()
        transducer_symbol_table = SegmentTable().transducer_symbol_table
        outputs = list()
        for path in transducer.paths():
            output = ""
            for i in path:
                symbol = transducer_symbol_table.find(i.olabel)
                if symbol != u"\u03b5" and symbol != MORPHEME_BOUNDARY and symbol != WORD_BOUNDARY:
                    output += symbol
            outputs.append(output)
        return outputs