def dataset_modification(config_path, output_path):

    data_config_path = '{}/data.yaml'.format(config_path)

    data = Utility.yaml_load(data_config_path)

    dataset = data['subset']

    print dataset

    for i in xrange(5, 20, 4):
        
        if training_set[i-1] == 'i': 
            continue

        subset = training_set[0:i]
        # print subset
        # print len(subset), len(subset)*50

        dataset[ '{}-{}'.format(subset[0], subset[len(subset)-1]) ] = subset

    # print dataset
    # print data

    Utility.yaml_save('{}/data.yaml'.format(output_path), data)

    pass
def change_exp(config_path, output_path, subset):

    exp = Utility.yaml_load( '{}/experiment.yaml'.format(config_path) )

    exp['data_set']['optim_dur'] = 'a-{}'.format(subset[len(subset)-1])
    exp['data_set']['train'] = 'a-{}'.format(subset[len(subset)-1])

    print exp

    Utility.yaml_save('{}/experiment.yaml'.format(output_path), exp)

    pass
        Utility.make_directory(set_out_path)

        if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path):
            print ch

            for i in xrange(1, 51):

                name = 'tscsd{}{}'.format(ch, Utility.fill_zero(i, 2))

                yaml_filename = '{}/{}.utt.yaml'.format(set_utt_base_path, name )
                if not Utility.is_file_exist(yaml_filename):
                    continue

                full_file = '{}/{}.lab'.format(set_syllable_full_path, name)

                count = [0]
                yaml = Utility.yaml_load(yaml_filename)
                add_stress(yaml, count, name)

                if not (len(Utility.read_file_line_by_line(full_file)) == count[0] ):
                    print 'Not equal'
                    print name, len(Utility.read_file_line_by_line(full_file)), count[0]

                out_file = '{}/{}.utt.yaml'.format(set_out_path, name)
                Utility.yaml_save(out_file, yaml)

    print 'all no {} names '.format(coun_no)

    pass
Example #4
0
def gen_utt(file_path, out_file):

    out_list = []

    phrase, word, syllable, phone = collections.OrderedDict(
    ), collections.OrderedDict(), collections.OrderedDict(
    ), collections.OrderedDict()

    word_index, syllable_index, phone_index = 0, 0, 0

    output_array = []

    lines = Utility.read_file_line_by_line(file_path)
    for line in lines:
        #regex
        pat = re.compile(
            r"""
            ^(?P<prev_phoneme>.+)-(?P<cur_phoneme>.+)\+(?P<next_phoneme>.+)
            /A:(?P<prev_phone_position>.+)_.+-(?P<cur_phone_position>.+)_.+\+(?P<next_phone_position>.+)_.+
            /B:(?P<prev_tone>.+)-(?P<cur_tone>.+)\+(?P<next_tone>.+)
            /C:(?P<prev_syllable_position>.+)_.+-(?P<cur_syllable_position>.+)_.+\+(?P<next_syllable_position>.+)_.+
            /D:(?P<prev_number_of_phone>.+)-(?P<cur_number_of_phone>.+)\+(?P<next_number_of_phone>.+)
            /E:(?P<prev_word_position>.+)-(?P<cur_word_position>.+)\+(?P<next_word_position>.+)
            /F:.+_(?P<prev_number_of_syllable>.+)-.+_(?P<cur_number_of_syllable>.+)\+.+_(?P<next_number_of_syllable>.+)
            /G:.+_(?P<number_of_syllable_in_sentence>.+)_(?P<number_of_word_in_sentence>.+)
            /H:(?P<prev_word_partofspeech>.+)_.+-(?P<cur_word_partofspeech>.+)_.+\+(?P<next_word_partofspeech>.+)_.+
            /I:(?P<cur_stress>.+)
            ?
            """, re.VERBOSE)

        match = re.match(pat, line)
        if match == None:
            raise (UtteranceException('Unmatched context: {:}'.format(line)))

        #Check if pau or sil add new entity
        if (match.group('cur_phoneme') == 'sil'):
            #Phone unit
            od = collections.OrderedDict()
            od['unit'] = 'phone'
            od['entity'] = match.group('cur_phoneme')
            output_array.append(od)
        elif (match.group('cur_phoneme') == 'pau'):
            #Phone unit
            od = collections.OrderedDict()
            od['unit'] = 'phone'
            od['entity'] = match.group('cur_phoneme')
            output_array[len(output_array) - 1]['inners'].append(od)
        else:
            # Check if Start of utterance, add new utterance
            if (match.group('cur_word_position')
                    == '1') & (match.group('cur_syllable_position') == '1') & (
                        match.group('cur_phone_position') == '1'):
                #Utterance unit
                od = collections.OrderedDict()
                od['unit'] = 'utterance'
                od['inners'] = []
                output_array.append(od)
                #print output_array

            # Check if Start of word, add new word
            if (match.group('cur_syllable_position')
                    == '1') & (match.group('cur_phone_position') == '1'):
                #Word unit
                od = collections.OrderedDict()
                od['unit'] = 'word'

                word_index += 1
                od['word_index'] = word_index

                od['part_of_speech'] = match.group('cur_word_partofspeech')
                od['inners'] = []
                output_array[len(output_array) - 1]['inners'].append(od)
                #print "output_array"

            # Check if Start of syllable, add new syllable
            if (match.group('cur_phone_position') == '1'):
                # Tone entity
                od = collections.OrderedDict()
                od['unit'] = 'syllable'

                syllable_index += 1
                od['syllable_index'] = syllable_index

                od['tone_type'] = int(match.group('cur_tone'))
                od['stress'] = match.group('cur_stress')
                od['inners'] = []
                word_index = len(output_array[len(output_array) - 1]['inners'])
                output_array[len(output_array) -
                             1]['inners'][word_index - 1]['inners'].append(od)
                #print output_array

            # Add new phone
            od = collections.OrderedDict()
            od['unit'] = 'phone'

            phone_index += 1
            od['phone_index'] = phone_index

            od['entity'] = match.group('cur_phoneme')
            word_index = len(output_array[len(output_array) - 1]['inners'])
            syllable_index = len(output_array[len(output_array) -
                                              1]['inners'][word_index -
                                                           1]['inners'])
            output_array[len(output_array) -
                         1]['inners'][word_index -
                                      1]['inners'][syllable_index -
                                                   1]['inners'].append(od)

    # print output_array
    Utility.yaml_save(out_file, output_array)

    pass