def __init__(self, feature_dict): self.feature_table = get_feature_table() for feature in feature_dict.keys(): if not self.feature_table.is_valid_feature(feature): raise ValueError("Illegal feature: {0}".format(feature)) self.feature_dict = feature_dict
def add_emission_to_state(self): """pick an inner state, pick a segment - add segment to state """ state = choice(self.inner_states) feature_table = get_feature_table() segment = feature_table.get_random_segment() if segment not in self.get_emissions(state): self.emissions[state].append(segment) return True else: return False
def __init__(self, constraint_set_list): self.feature_table = get_feature_table() self.constraints = list() for constraint in constraint_set_list: constraint_name = constraint["type"] bundles_list = constraint["bundles"] if not constraint_name: raise ValueError("Missing 'type' key") constraint_class = Constraint.get_constraint_class_by_name(constraint_name) self.constraints.append(constraint_class(bundles_list))
def __init__(self, string_input_words, max_word_length_in_data, initial_hmm=None, alphabet_or_words="words"): if not isinstance(string_input_words, list): raise ValueError("should be list") self.feature_table = get_feature_table() self.max_word_length_in_data = max_word_length_in_data if not initial_hmm: feature_table = get_feature_table() if alphabet_or_words == "alphabet": alphabet = feature_table.get_alphabet() self.hmm = HMM.create_hmm_alphabet(alphabet) elif alphabet_or_words == "words": self.hmm = HMM.create_hmm_from_list(string_input_words) else: self.hmm = initial_hmm self.words = [] self._update_words()
def __init__(self, bundles_list, allow_multiple_bundles): """ bundle_list can contain either raw dictionaries or full blown FeatureBundle """ self.feature_table = get_feature_table() if len(bundles_list) > 1 and not allow_multiple_bundles: raise ValueError("More bundles than allowed") self.feature_bundles = list() # contain FeatureBundles for bundle in bundles_list: if type(bundle) is dict: self.feature_bundles.append(FeatureBundle(bundle)) elif type(bundle) is FeatureBundle: self.feature_bundles.append(bundle) else: raise ValueError("Not a dict or FeatureBundle")
def generate_random(cls): feature_table = get_feature_table() if get_configuration("INITIAL_NUMBER_OF_FEATURES" ) > feature_table.get_number_of_features(): raise ValueError( "INITIAL_NUMBER_OF_FEATURES is bigger from number of available features" ) feature_dict = dict() available_feature_labels = feature_table.get_features() for i in range(get_configuration("INITIAL_NUMBER_OF_FEATURES")): feature_label = choice(available_feature_labels) feature_dict[feature_label] = feature_table.get_random_value( feature_label) available_feature_labels.remove(feature_label) return FeatureBundle(feature_dict)
def add_segment_to_emission(self): """pick an inner state, pick an emission, pick a segment and insert in random position""" state = choice(self.inner_states) if self.get_emissions(state): original_emission = choice(self.get_emissions(state)) feature_table = get_feature_table() segment_to_insert = feature_table.get_random_segment() insertion_index = randint(0, len(original_emission)) new_emission = original_emission[: insertion_index] + segment_to_insert + original_emission[ insertion_index:] if new_emission not in self.get_emissions(state): self.emissions[state].append(new_emission) return True return False
def change_segment_in_emission(self): """pick a state, pick an emission, pick a segment, change it""" state = choice(self.inner_states) emissions = self.get_emissions(state) if emissions: emission = choice(emissions) # crate new emission emission_string_list = list(emission) index_of_change = randint(0, len(emission_string_list) - 1) old_segment = emission_string_list[index_of_change] feature_table = get_feature_table() segments = feature_table.get_alphabet() segments.remove(old_segment) new_segment = choice(segments) emission_string_list[index_of_change] = new_segment new_emission = ''.join(emission_string_list) # replace emission emissions.append(new_emission) return True else: return False
def get_encoding_length(self): feature_table = get_feature_table() feature_dict = feature_table[self.symbol] return len(feature_dict)
def has_feature_bundle(self, feature_bundle): feature_table = get_feature_table() items = feature_bundle.get_feature_dict().items() list_of_booleans = [item in feature_table[self.symbol].items() for item in items] has_feature_bundle_result = all(list_of_booleans) return has_feature_bundle_result
def __init__(self, word_string): """word_string and segment should be in sync at any time""" self.feature_table = get_feature_table() self.word_string = word_string self.segments = [Segment(char) for char in self.word_string]
def __init__(self, transitions, emissions, inner_states): self.feature_table = get_feature_table() self.transitions: Dict = transitions self.emissions: Dict = emissions self.inner_states: List = inner_states self.nfa = self._get_nfa()
def __init__(self, constraint_set, lexicon): self.feature_table = get_feature_table() self.constraint_set = constraint_set self.lexicon = lexicon