def devoicer(self, words): for i, word in enumerate(words): c = word[-1] segment = SegmentTable().get_segment_by_symbol(c) new_features = deepcopy(segment.features) new_features[Feature('voice', ('+', '-'))] = '-' new_c = SegmentTable().get_segment_symbol_by_features(new_features) if new_c: words[i] = word[:-1] + new_c
def devoice(self, words): for i, word in enumerate(words): # if random.randint(1, 5) != 5: # continue # chance of 5 to 1 of changing c = word[-1] segment = SegmentTable().get_segment_by_symbol(c) new_features = deepcopy(segment.features) new_features[Feature('voice', ('+', '-'))] = '-' new_c = SegmentTable().get_segment_symbol_by_features(new_features) if new_c: words[i] = word[:-1] + new_c
def apply_noise(self, words): total_to_noise = int(len(words) * self.noise_rate / 100) segment_table = SegmentTable() for i, word in enumerate(words[:total_to_noise]): c = word[-1] segment = segment_table.get_segment_by_symbol(c) if not self._is_voiced_obstruent(segment): continue new_features = deepcopy(segment.features) new_features[Feature('voice', ('+', '-'))] = '-' new_c = segment_table.get_segment_symbol_by_features(new_features) if new_c: words[i] = word[:-1] + new_c
def __init__(self, feature_string_dict, role=None): """ :param feature_string_dict: dictionary of form {"cons": "+", "WB": True} :param role: "target", "change", "left_context", or "right_context" """ feature_dict = dict() self.role = role self.kleene = False self.word_boundary = False self.morpheme_boundary = False if WORD_BOUNDARY_FEATURE_NAME in feature_string_dict: if feature_string_dict[ WORD_BOUNDARY_FEATURE_NAME] and self._is_context_bundle(): self.word_boundary = True elif MORPHEME_BOUNDARY_FEATURE_NAME in feature_string_dict: if feature_string_dict[ MORPHEME_BOUNDARY_FEATURE_NAME] and self._is_context_bundle( ): self.morpheme_boundary = True else: for feature_name in feature_string_dict: if feature_name is KLEENE_FEATURE_NAME: if self._is_context_bundle( ) and configurations['CHANGE_KLEENE_VALUE']: self.kleene = feature_string_dict[feature_name] else: feature = Feature(feature_name) if not SegmentTable().is_valid_feature(feature): raise ValueError( u"{} not in segment_table".format(feature_name)) else: feature_dict[feature] = feature_string_dict[ feature_name] self.feature_dict = feature_dict
from rule import Rule from rule_set import RuleSet from segment_table import Segment, SegmentTable, Feature simulation_number = 1 syll = Feature('syll') cons = Feature('cons') long = Feature('long') bound = Feature('bound') a = Segment('a', {syll: '+', cons: '-', long: '-', bound: '-'}) b = Segment('b', {syll: '-', cons: '+', long: '-', bound: '-'}) lengthening = Segment('Y', {syll: '-', cons: '-', long: '+', bound: '-'}) morpheme_boundary = Segment('B', {syll: '-', cons: '-', long: '-', bound: '+'}) segment_table = SegmentTable([a, b, lengthening, morpheme_boundary]) configurations_dict = \ { "MUTATE_RULE_SET": 3, "MUTATE_HMM": 1, "COMBINE_EMISSIONS": 0, "ADVANCE_EMISSION": 0, "CLONE_STATE": 0, "CLONE_EMISSION": 0, "ADD_STATE": 0, "REMOVE_STATE": 0, "ADD_TRANSITION": 0,
from rule import Rule from rule_set import RuleSet from segment_table import Segment, SegmentTable, Feature from copy import deepcopy simulation_number = 1 cons = Feature('cons') high = Feature('high') labial = Feature('labial') voiceless = Feature('voiceless') bound = Feature('bound') a = Segment('a', {cons: '-', high: '-', labial: '-', voiceless: '-', bound: '-'}) i = Segment('i', {cons: '-', high: '+', labial: '-', voiceless: '-', bound: '-'}) b = Segment('b', {cons: '+', high: '-', labial: '+', voiceless: '-', bound: '-'}) d = Segment('d', {cons: '+', high: '-', labial: '-', voiceless: '-', bound: '-'}) F = Segment('F', {cons: '-', high: '-', labial: '-', voiceless: '+', bound: '-'}) morpheme_boundary = Segment('B', {cons: '-', high: '-', labial: '-', voiceless: '-', bound: '+'}) segment_table = SegmentTable([a, i, b, d, F, morpheme_boundary]) configurations_dict = \ { "MUTATE_RULE_SET": 2, "MUTATE_HMM": 1, "COMBINE_EMISSIONS": 0,
from rule import Rule from rule_set import RuleSet from segment_table import Segment, SegmentTable, Feature simulation_number = 1 cons = Feature('cons') a = Segment('a', {cons: '-'}) b = Segment('b', {cons: '+'}) segment_table = SegmentTable([a, b]) configurations_dict = \ { "MUTATE_RULE_SET": 1, "MUTATE_HMM": 1, "COMBINE_EMISSIONS": 0, "ADVANCE_EMISSION": 0, "CLONE_STATE": 0, "CLONE_EMISSION": 0, "ADD_STATE": 0, "REMOVE_STATE": 0, "ADD_TRANSITION": 0, "REMOVE_TRANSITION": 0, "ADD_SEGMENT_TO_EMISSION": 0, "REMOVE_SEGMENT_FROM_EMISSION": 1, "CHANGE_SEGMENT_IN_EMISSION": 0, "ADD_EMISSION_TO_STATE": 0,
def _is_voiced_obstruent(self, segment): is_voiced = segment.features[Feature('voice', ('+', '-'))] == '+' is_obstruent = segment.features[Feature('son', ('+', '-'))] == '-' return is_voiced and is_obstruent
from rule import Rule from rule_set import RuleSet from segment_table import Segment, SegmentTable, Feature from copy import deepcopy simulation_number = 1 cons = Feature('cons') high = Feature('high') voice = Feature('voice', values=('-', '+', '0')) bound = Feature('bound') a = Segment('a', {cons: '-', high: '-', voice: '-', bound: '-'}) i = Segment('i', {cons: '-', high: '+', voice: '-', bound: '-'}) t = Segment('t', {cons: '+', high: '-', voice: '-', bound: '-'}) d = Segment('d', {cons: '+', high: '-', voice: '+', bound: '-'}) T = Segment('T', {cons: '+', high: '-', voice: '0', bound: '-'}) B = Segment('B', {cons: '-', high: '-', voice: '-', bound: '+'}) segment_table = SegmentTable([a, i, t, d, T, B]) configurations_dict = \ { "MUTATE_RULE_SET": 1, "MUTATE_HMM": 1, "COMBINE_EMISSIONS": 0, "ADVANCE_EMISSION": 0, "CLONE_STATE": 0, "CLONE_EMISSION": 0,