コード例 #1
0
    def __init__(self, feature_dict):
        self.feature_table = get_feature_table()

        for feature in feature_dict.keys():
            if not self.feature_table.is_valid_feature(feature):
                raise ValueError("Illegal feature: {0}".format(feature))

        self.feature_dict = feature_dict
コード例 #2
0
 def add_emission_to_state(self):
     """pick an inner state, pick a segment - add segment to state """
     state = choice(self.inner_states)
     feature_table = get_feature_table()
     segment = feature_table.get_random_segment()
     if segment not in self.get_emissions(state):
         self.emissions[state].append(segment)
         return True
     else:
         return False
コード例 #3
0
    def __init__(self, constraint_set_list):
        self.feature_table = get_feature_table()
        self.constraints = list()
        for constraint in constraint_set_list:
            constraint_name = constraint["type"]
            bundles_list = constraint["bundles"]
            if not constraint_name:
                raise ValueError("Missing 'type' key")

            constraint_class = Constraint.get_constraint_class_by_name(constraint_name)
            self.constraints.append(constraint_class(bundles_list))
コード例 #4
0
    def __init__(self,
                 string_input_words,
                 max_word_length_in_data,
                 initial_hmm=None,
                 alphabet_or_words="words"):
        if not isinstance(string_input_words, list):
            raise ValueError("should be list")
        self.feature_table = get_feature_table()
        self.max_word_length_in_data = max_word_length_in_data
        if not initial_hmm:
            feature_table = get_feature_table()
            if alphabet_or_words == "alphabet":
                alphabet = feature_table.get_alphabet()
                self.hmm = HMM.create_hmm_alphabet(alphabet)
            elif alphabet_or_words == "words":
                self.hmm = HMM.create_hmm_from_list(string_input_words)
        else:
            self.hmm = initial_hmm

        self.words = []
        self._update_words()
コード例 #5
0
    def __init__(self, bundles_list, allow_multiple_bundles):
        """ bundle_list can contain either raw dictionaries or full blown FeatureBundle """
        self.feature_table = get_feature_table()
        if len(bundles_list) > 1 and not allow_multiple_bundles:
            raise ValueError("More bundles than allowed")

        self.feature_bundles = list()  # contain FeatureBundles

        for bundle in bundles_list:
            if type(bundle) is dict:
                self.feature_bundles.append(FeatureBundle(bundle))
            elif type(bundle) is FeatureBundle:
                self.feature_bundles.append(bundle)
            else:
                raise ValueError("Not a dict or FeatureBundle")
コード例 #6
0
    def generate_random(cls):
        feature_table = get_feature_table()
        if get_configuration("INITIAL_NUMBER_OF_FEATURES"
                             ) > feature_table.get_number_of_features():
            raise ValueError(
                "INITIAL_NUMBER_OF_FEATURES is bigger from number of available features"
            )

        feature_dict = dict()
        available_feature_labels = feature_table.get_features()
        for i in range(get_configuration("INITIAL_NUMBER_OF_FEATURES")):
            feature_label = choice(available_feature_labels)
            feature_dict[feature_label] = feature_table.get_random_value(
                feature_label)
            available_feature_labels.remove(feature_label)
        return FeatureBundle(feature_dict)
コード例 #7
0
    def add_segment_to_emission(self):
        """pick an inner state, pick an emission, pick a segment and insert in random position"""

        state = choice(self.inner_states)
        if self.get_emissions(state):
            original_emission = choice(self.get_emissions(state))
            feature_table = get_feature_table()
            segment_to_insert = feature_table.get_random_segment()

            insertion_index = randint(0, len(original_emission))
            new_emission = original_emission[:
                                             insertion_index] + segment_to_insert + original_emission[
                                                 insertion_index:]
            if new_emission not in self.get_emissions(state):
                self.emissions[state].append(new_emission)
                return True
        return False
コード例 #8
0
    def change_segment_in_emission(self):
        """pick a state, pick an emission, pick a segment, change it"""
        state = choice(self.inner_states)
        emissions = self.get_emissions(state)
        if emissions:
            emission = choice(emissions)
            # crate new emission
            emission_string_list = list(emission)
            index_of_change = randint(0, len(emission_string_list) - 1)
            old_segment = emission_string_list[index_of_change]
            feature_table = get_feature_table()
            segments = feature_table.get_alphabet()
            segments.remove(old_segment)
            new_segment = choice(segments)
            emission_string_list[index_of_change] = new_segment
            new_emission = ''.join(emission_string_list)

            # replace emission
            emissions.append(new_emission)
            return True
        else:
            return False
コード例 #9
0
 def get_encoding_length(self):
     feature_table = get_feature_table()
     feature_dict = feature_table[self.symbol]
     return len(feature_dict)
コード例 #10
0
 def has_feature_bundle(self, feature_bundle):
     feature_table = get_feature_table()
     items = feature_bundle.get_feature_dict().items()
     list_of_booleans = [item in feature_table[self.symbol].items() for item in items]
     has_feature_bundle_result = all(list_of_booleans)
     return has_feature_bundle_result
コード例 #11
0
 def __init__(self, word_string):
     """word_string and segment should be in sync at any time"""
     self.feature_table = get_feature_table()
     self.word_string = word_string
     self.segments = [Segment(char) for char in self.word_string]
コード例 #12
0
 def __init__(self, transitions, emissions, inner_states):
     self.feature_table = get_feature_table()
     self.transitions: Dict = transitions
     self.emissions: Dict = emissions
     self.inner_states: List = inner_states
     self.nfa = self._get_nfa()
コード例 #13
0
 def __init__(self, constraint_set, lexicon):
     self.feature_table = get_feature_table()
     self.constraint_set = constraint_set
     self.lexicon = lexicon