def make_equal_frames(self, in_file_list, ref_file_list, in_dimension_dict):
        logger = logging.getLogger("test")

        logger.info('making equal number of lines...')

        io_funcs = BinaryIOCollection()

        utt_number = len(in_file_list)

        for i in xrange(utt_number):
            in_file_name = in_file_list[i]
            in_data_stream_name = in_file_name.split('.')[-1]
            in_feature_dim = in_dimension_dict[in_data_stream_name]
            in_features, in_frame_number = io_funcs.load_binary_file_frame(in_file_name, in_feature_dim)

            ref_file_name = ref_file_list[i]
            ref_data_stream_name = ref_file_name.split('.')[-1]
            ref_feature_dim = in_dimension_dict[ref_data_stream_name]
            ref_features, ref_frame_number = io_funcs.load_binary_file_frame(ref_file_name, ref_feature_dim)

            target_features = numpy.zeros((ref_frame_number, in_feature_dim))
            if in_frame_number == ref_frame_number:
                continue;
            elif in_frame_number > ref_frame_number:
                target_features[0:ref_frame_number, ] = in_features[0:ref_frame_number, ]
            elif in_frame_number < ref_frame_number:
                target_features[0:in_frame_number, ] = in_features[0:in_frame_number, ]
            io_funcs.array_to_binary_file(target_features, in_file_name)

        logger.info('Finished: made equal rows in data stream %s with reference to data stream %s ' %(in_data_stream_name, ref_data_stream_name))
def generate_lf0(source_f0_folder, target_lf0_folder, file_lengths):

    io_funcs = BinaryIOCollection()
    for file_id, file_length in file_lengths.iteritems():
        source_file_path = os.path.join(source_f0_folder,
                                        '{}.f0'.format(file_id))
        target_file_path = os.path.join(target_lf0_folder,
                                        '{}.lf0'.format(file_id))
        with open(source_file_path, 'rt') as handle:
            lines = handle.readlines()
        f0s = [float(line) for line in lines]
        if len(lines) <= file_length:
            f0s += [0.] * (file_length - len(lines))
        else:
            f0s = f0s[:file_length]

        lf0s = [-1e+10] * file_length
        for i in xrange(file_length):
            if f0s[i] <= 10.0:
                continue
            lf0s[i] = math.log(f0s[i])

        print(target_file_path)
        io_funcs.array_to_binary_file(lf0s, target_file_path)

    print
Example #3
0
 def shift_for_one_utterance(self, utt, feat_dim, semi):
     if semi == 0:
         return os.path.basename(utt)
     io_funcs = BinaryIOCollection()
     feat, num_frame = io_funcs.load_binary_file_frame(utt, feat_dim)
     for f in feat:
         self.shift_pitch_feat(f[curr_start_ind: curr_end_ind + 1], semi)
         self.shift_pitch_feat(f[prev_start_ind: prev_end_ind + 1], semi)
         self.shift_pitch_feat(f[next_start_ind: next_end_ind + 1], semi)
     if semi > 0:
         filename = utt + '_u' + str(semi)
     else:
         filename = utt + '_d' + str(-semi)
     io_funcs.array_to_binary_file(feat, filename)
     return os.path.basename(filename)
Example #4
0
class AlignFeats(object):
    def __init__(self):
        self.io_funcs = BinaryIOCollection()

    def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict):
        '''
        align source feats as per the dtw path (matching target length)
        '''
        src_features, frame_number = self.io_funcs.load_binary_file_frame(src_feat_file, feat_dim)
        
        tgt_length = len(dtw_path_dict)
        src_aligned_features = numpy.zeros((tgt_length, feat_dim))
        
        for i in range(tgt_length):
            src_aligned_features[i, ] = src_features[dtw_path_dict[i]]

        self.io_funcs.array_to_binary_file(src_aligned_features, src_aligned_feat_file)
    def duration_decomposition(self, in_file_list, dimension,
                               out_dimension_dict, file_extension_dict):

        logger = logging.getLogger('param_generation')

        logger.debug('duration_decomposition for %d files' % len(in_file_list))

        state_number = 5  ## hard coding, try removing in future?

        if len(list(out_dimension_dict.keys())) > 1:
            logger.critical(
                "we don't support any additional features along with duration as of now."
            )
            sys.exit(1)
        else:
            feature_name = list(out_dimension_dict.keys())[0]

        io_funcs = BinaryIOCollection()

        findex = 0
        flen = len(in_file_list)
        for file_name in in_file_list:

            findex = findex + 1

            dir_name = os.path.dirname(file_name)
            file_id = os.path.splitext(os.path.basename(file_name))[0]

            features, frame_number = io_funcs.load_binary_file_frame(
                file_name, dimension)
            gen_features = numpy.int32(numpy.round(features))
            gen_features[gen_features < 1] = 1

            if dimension > state_number:
                gen_features = gen_features[:, state_number]

            logger.info('processing %4d of %4d: %s' %
                        (findex, flen, file_name))

            new_file_name = os.path.join(
                dir_name, file_id + file_extension_dict[feature_name])
            io_funcs.array_to_binary_file(gen_features, new_file_name)

            logger.debug('wrote to file %s' % new_file_name)
Example #6
0
class AlignFeats(object):
    def __init__(self):
        self.io_funcs = BinaryIOCollection()

    def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim,
                        dtw_path_dict):
        '''
        align source feats as per the dtw path (matching target length)
        '''
        src_features, frame_number = self.io_funcs.load_binary_file_frame(
            src_feat_file, feat_dim)

        tgt_length = len(dtw_path_dict)
        src_aligned_features = numpy.zeros((tgt_length, feat_dim))

        for i in range(tgt_length):
            src_aligned_features[i, ] = src_features[dtw_path_dict[i]]

        self.io_funcs.array_to_binary_file(src_aligned_features,
                                           src_aligned_feat_file)
    def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None):

        print ('param_generation')

        print ('acoustic_decomposition for %d files' % len(in_file_list) )

        self.load_covariance(var_file_dict, out_dimension_dict)

        stream_start_index = {}
        dimension_index = 0
        recorded_vuv = True
        vuv_dimension = None

        for feature_name in list(out_dimension_dict.keys()):
            if feature_name != 'vuv':
               stream_start_index[feature_name] = dimension_index
            else:
               vuv_dimension = dimension_index
            dimension_index += out_dimension_dict[feature_name]

        io_funcs = BinaryIOCollection()

        mlpg_algo = MLParameterGeneration()

        findex=0
        flen=len(in_file_list)
        for file_name in in_file_list:

            findex=findex+1

            dir_name = os.path.dirname(file_name)
            file_id = os.path.splitext(os.path.basename(file_name))[0]

            features, frame_number = io_funcs.load_binary_file_frame(file_name, dimension)

            print('processing %4d of %4d: %s' % (findex,flen,file_name) )

            for feature_name in self.gen_wav_features:

                print(' feature: %s' % feature_name)
                current_features = features[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]]
                if FAST_MLPG:
                    ### fast version wants variance per frame, not single global one:
                    var = self.var[feature_name]
                    var = numpy.transpose(numpy.tile(var,frame_number))
                else:
                    var = self.var[feature_name]

#                print  var.shape[1]
                if do_MLPG == False:
                    gen_features = current_features
                else:
                    gen_features = mlpg_algo.generation(current_features, var, out_dimension_dict[feature_name]//3)

                print(' feature dimensions: %d by %d' %(gen_features.shape[0], gen_features.shape[1]))

                if feature_name in ['lf0', 'F0']:
                    if 'vuv' in stream_start_index:
                        vuv_feature = features[:, stream_start_index['vuv']:stream_start_index['vuv']+1]

                        for i in range(frame_number):
                            if new_vuv_feature[i] < 0.5:
                                gen_features[i, 0] = self.inf_float

                new_file_name = os.path.join(dir_name, file_id + file_extension_dict[feature_name])

                if self.enforce_silence:
                    silence_pattern = cfg.silence_pattern
                    label_align_dir = cfg.in_label_align_dir
                    in_f = open(label_align_dir+'/'+file_id+'.lab','r')
                    for line in in_f.readlines():
                        line = line.strip()

                        if len(line) < 1:
                            continue
                        temp_list  = re.split('\s+', line)
                        start_time = int(int(temp_list[0])*(10**-4)/5)
                        end_time   = int(int(temp_list[1])*(10**-4)/5)

                        full_label = temp_list[2]

                        label_binary_flag = self.check_silence_pattern(full_label, silence_pattern)

                        if label_binary_flag:
                            if feature_name in ['lf0', 'F0', 'mag']:
                                gen_features[start_time:end_time, :] = self.inf_float
                            else:
                                gen_features[start_time:end_time, :] = 0.0

                io_funcs.array_to_binary_file(gen_features, new_file_name)
                print(' wrote to file %s' % new_file_name)
                           'vuv' : '.vuv'}

    var_file_dict  = {'mgc':'{}/mgc.var'.format(args.var_dir),
		      'vuv':'{}/vuv.var'.format(args.var_dir),
                      'lf0':'{}/lf0.var'.format(args.var_dir)}

    generator = ParameterGeneration()

    # out_dimension_dict is the cmp structrue of your nnet output cmp
    generator.acoustic_decomposition(in_file_list, 127, out_dimension_dict, file_extension_dict, var_file_dict)

    if not os.path.exists(lf0_dir):
        os.mkdir(lf0_dir)
    if not os.path.exists(mgc_dir):
        os.mkdir(mgc_dir)
    os.system('mv {}/*.lf0 {}'.format(cmp_dir, lf0_dir))
    os.system('mv {}/*.mgc {}'.format(cmp_dir, mgc_dir))

    io_funcs = BinaryIOCollection()
    inf_float = -1.0e+10
    for item in os.listdir(cmp_dir):
        vuv = numpy.reshape(numpy.fromfile(os.path.join(cmp_dir, item), dtype=numpy.float32), [-1,127])[:,123]
        name, ext = os.path.splitext(item)
        lf0 = numpy.reshape(numpy.fromfile(os.path.join(lf0_dir, "{}.lf0".format(name)),dtype=numpy.float32),[-1,1])
        mgc = numpy.reshape(numpy.fromfile(os.path.join(mgc_dir, "{}.mgc".format(name)),dtype=numpy.float32),[-1,41])
        mgc = signal.convolve2d(
            mgc, [[1.0 / 3], [1.0 / 3], [1.0 / 3]], mode="same", boundary="symm")
        lf0[vuv < 0.5] = inf_float
        io_funcs.array_to_binary_file(lf0, os.path.join(lf0_dir, "{}.lf0".format(name)))
        io_funcs.array_to_binary_file(mgc, os.path.join(mgc_dir, "{}.mgc".format(name)))
    def prepare_data(self, in_file_list_dict, out_file_list, in_dimension_dict,
                     out_dimension_dict):

        logger = logging.getLogger("acoustic_comp")
        stream_start_index = {}
        stream_dim_index = 0
        for stream_name in out_dimension_dict.keys():
            if not stream_start_index.has_key(stream_name):
                stream_start_index[stream_name] = stream_dim_index

            stream_dim_index += out_dimension_dict[stream_name]

        io_funcs = BinaryIOCollection()

        for i in xrange(self.file_number):
            out_file_name = out_file_list[i]

            #if os.path.isfile(out_file_name):
            #    logger.info('processing file %4d of %4d : %s exists' % (i+1, self.file_number, out_file_name))
            #    continue

            logger.info('processing file %4d of %4d : %s' %
                        (i + 1, self.file_number, out_file_name))

            out_data_matrix = None
            out_frame_number = 0

            for k in xrange(self.data_stream_number):
                data_stream_name = self.data_stream_list[k]

                in_file_name = in_file_list_dict[data_stream_name][i]

                in_feature_dim = in_dimension_dict[data_stream_name]
                features, frame_number = io_funcs.load_binary_file_frame(
                    in_file_name, in_feature_dim)

                if k == 0:
                    out_frame_number = frame_number
                    out_data_matrix = numpy.zeros(
                        (out_frame_number, self.out_dimension))

                if frame_number > out_frame_number:
                    features = features[0:out_frame_number, ]
                    frame_number = out_frame_number

                try:
                    assert out_frame_number == frame_number
                except AssertionError:
                    logger.critical(
                        'the frame number of data stream %s is not consistent with others: current %d others %d'
                        % (data_stream_name, out_frame_number, frame_number))
                    raise

                dim_index = stream_start_index[data_stream_name]

                if data_stream_name in ['lf0', 'F0']:  ## F0 added for GlottHMM
                    features, vuv_vector = self.interpolate_f0(features)

                    ### if vuv information to be recorded, store it in corresponding column
                    if self.record_vuv:
                        out_data_matrix[0:out_frame_number,
                                        stream_start_index['vuv']:
                                        stream_start_index['vuv'] +
                                        1] = vuv_vector

                out_data_matrix[0:out_frame_number, dim_index:dim_index +
                                in_feature_dim] = features
                dim_index = dim_index + in_feature_dim

                if self.compute_dynamic[data_stream_name]:

                    delta_features = self.compute_dynamic_matrix(
                        features, self.delta_win, frame_number, in_feature_dim)
                    acc_features = self.compute_dynamic_matrix(
                        features, self.acc_win, frame_number, in_feature_dim)

                    out_data_matrix[0:out_frame_number, dim_index:dim_index +
                                    in_feature_dim] = delta_features
                    dim_index = dim_index + in_feature_dim

                    out_data_matrix[0:out_frame_number, dim_index:dim_index +
                                    in_feature_dim] = acc_features

            ### write data to file
            io_funcs.array_to_binary_file(out_data_matrix, out_file_name)
            logger.debug(' wrote %d frames of features', out_frame_number)