Example #1
0
def extract_single_acoustic(in_path, out_path):
    """
    Extract mfcc features from one audio file
    :param in_path: the path to the audio file
    :param out_path: the path to save the mfcc features
    """
    tmp_input = "tmp.input"
    tmp_features = "tmp.features"
    tmp_label = "tmp.labels"
    zero = 0.01

    input_file = open(tmp_dir + tmp_input, 'wb')  # open the input file for the feature extraction
    features_file = open(tmp_dir + tmp_features, 'wb')  # open file for the feature list path
    labels_file = open(tmp_dir + tmp_label, 'wb')  # open file for the labels
    length = utils.get_wav_file_length(in_path)

    # write the data
    input_file.write(
        '"' + in_path + '" ' + str('%.8f' % 0) + ' ' + str(float(length) - zero) + ' ' + str(
            '%.8f' % 0) + ' ' + str(
            '%.8f' % 0))
    features_file.write(out_path)

    input_file.close()
    features_file.close()
    labels_file.close()

    command = "sbin/fea_extract %s %s %s" % (tmp_dir + tmp_input, tmp_dir + tmp_features, tmp_dir + tmp_label)
    utils.easy_call(command)

    # remove leftovers
    os.remove(tmp_dir + tmp_input)
    os.remove(tmp_dir + tmp_features)
    os.remove(tmp_dir + tmp_label)
Example #2
0
def run(features_path, output_path):
    f_abs_path = os.path.abspath(features_path)
    o_abs_path = os.path.abspath(output_path)
    os.chdir("lua_scripts/")
    cmd = 'th classify_multi_class.lua -input_file %s -output_file %s' % (
        f_abs_path, o_abs_path)
    utils.easy_call(cmd)
    os.chdir("..")
Example #3
0
def extract_features(wav_filename, output_path, start_extract, end_extract):
    # defines
    temp_input_filename = utils.generate_tmp_filename('input')
    temp_label_filename = utils.generate_tmp_filename('labels')
    temp_features_filename = utils.generate_tmp_filename('features')
    temp_wav16_filename = utils.generate_tmp_filename('wav')

    # validation
    if not os.path.exists(wav_filename):
        print >> sys.stderr, 'Error: input path %s does not exists.' % wav_filename
        return

    # loop over all the files in the input dir
    if wav_filename.endswith('.wav'):
        try:
            # convert to 16K 16bit
            cmd = 'sox %s -r 16000 -b 16 %s' % (wav_filename, temp_wav16_filename)
            utils.easy_call(cmd)

            onset = (float(start_extract) + float(end_extract)) / 2
            offset = (float(start_extract) + float(end_extract)) / 2

            # =================== ACOUSTIC FEATURES =================== #
            # # write labels
            # label_file = wav_filename.replace('.wav', label_suffix)
            # fid = open(label_file, 'w')
            # fid.write('1 2\n')
            # fid.write('%s %s %s\n' % (str(1), str(1), str(1)))
            # fid.close()

            # creating the files
            input_file = open(temp_features_filename, 'wb')  # open the input file for the feature extraction
            features_file = open(temp_input_filename, 'wb')  # open file for the feature list path
            labels_file = open(temp_label_filename, 'wb')  # open file for the labels

            # write the data
            input_file.write(
                    '"' + temp_wav16_filename + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(
                            float(end_extract)) + ' ' + str(
                            '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset)))
            features_file.write(output_path.replace('.wav', '.txt'))

            input_file.close()
            features_file.close()
            labels_file.close()

            command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name)
            utils.easy_call(command)

            # remove leftovers
            os.remove(temp_input_filename)
            os.remove(temp_label_filename)
            os.remove(temp_features_filename)
            os.remove(temp_wav16_filename)
        except:
            print wav_filename
Example #4
0
def predict(input_path, output_path, model, csv_filename):

    if not os.path.exists(input_path):
        print >> sys.stderr, "%s file does not exits" % input_path
        return

    t_model = model.upper()    
    if t_model == 'RNN':
        model_path = 'results/1_layer_model.net'
        print '==> using single layer RNN'
    elif t_model == '2RNN':
        model_path = 'results/2_layer_model.net'
        print '==> using 2 stacked layers RNN'
    elif t_model == 'BIRNN':
        model_path = 'results/bi_model.net'
        print '==> using bi-directional RNN'
    else:
        model_path = 'results/1_layer_model.net'
        print '==> unknown model, using default model: single RNN'

    try:
        length = utils.get_wav_file_length(input_path)
    except:
        print "The input file ", input_path, " is probably not a valid WAV file."
        exit(-1)

    feature_file = generate_tmp_filename('features')
    prob_file = generate_tmp_filename('prob')
    predict_file = generate_tmp_filename('prediction')
    dur_file = generate_tmp_filename('dur')

    print '\n1) Extracting features and classifying ...'
    abs_path = os.path.abspath(input_path)
    os.chdir("front_end/")
    fe.main(abs_path, feature_file)
    os.chdir("..")

    print '\n2) Model predictions ...'
    cmd = 'th classify.lua -x_filename %s -class_path %s -prob_path %s -model_path %s' % (
        feature_file, predict_file, prob_file, model_path)
    os.chdir("back_end/")
    utils.easy_call(cmd)
    os.chdir("..")

    print '\n3) Extracting duration'
    post_process(os.path.abspath(predict_file), dur_file)

    print '\n4) Writing TextGrid file to %s ...' % output_path
    create_text_grid(dur_file, input_path, output_path, length, float(0.0), csv_filename)

    # remove leftovers
    os.remove(feature_file)
    os.remove(prob_file)
    os.remove(predict_file)
    os.remove(dur_file)
Example #5
0
def extract_single_mfcc(in_path, out_path):
    """
    Extract mfcc features from one audio file
    :param in_path: the path to the audio file
    :param out_path: the path to save the mfcc features
    """
    import platform
    plat = platform.system().lower()
    if plat is 'darwin':
        sox_path = 'sbin/osx/sox'
        htk_path = 'sbin/osx'
    elif 'linux' in plat:
        sox_path = 'sox'
        htk_path = 'sbin/linux'
    else:
        sox_path = 'sbin/osx/sox'
        htk_path = 'sbin/osx'

    tmp_file = utils.generate_tmp_filename('wav')
    cmd = "%s %s -r 16000 -b 16 %s" % (sox_path, in_path, tmp_file)
    utils.easy_call(cmd)
    cmd = "%s/HCopy -C config/htk.config %s %s" % (htk_path, tmp_file, out_path)
    utils.easy_call(cmd)
    os.remove(tmp_file)
Example #6
0
def predict(input_path, output_path, model):
    tmp_dir = 'tmp/'
    tmp_features = 'tmp.features'
    tmp_prob = 'tmp.prob'
    tmp_prediction = 'tmp.prediction'
    tmp_duration = 'tmp.dur'

    if not os.path.exists(input_path):
        print >> sys.stderr, "wav file does not exits"
        return

    t_model = model.upper()    
    if t_model == 'RNN':
        model_path = 'results/1_layer_model.net'
        print '==> using single RNN layer'
    elif t_model == '2RNN':
        model_path = 'results/2_layer_model.net'
        print '==> using 2 stacked layers of RNN'
    elif t_model == 'BIRNN':
        model_path = 'results/1_bi_model.net'
        print '==> using single bi-directional RNN layer'
    elif t_model == '2BIRNN':
        model_path = 'results/2_bi_model.net'
        print '==> using two stacked layers of bi-directional RNN'
    else:
        model_path = 'results/1_layer_model.net'
        print '==> unknown model, using default model: single layer of RNN'

    length = utils.get_wav_file_length(input_path)
    prob_file = tmp_dir + tmp_prob
    predict_file = tmp_dir + tmp_prediction
    dur_file = tmp_dir+tmp_duration

    # remove tmo dir if exists
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    print '\n1) Extracting features and classifying ...'
    cmd = 'python predict_single_file.py %s %s ' % (
    os.path.abspath(os.path.abspath(input_path)), os.path.abspath(tmp_dir) + '/' + tmp_features)
    os.chdir("front_end/")
    utils.easy_call(cmd)
    os.chdir("..")

    print '\n2) Model predictions ...'
    cmd = 'th classify.lua -folder_path %s -x_filename %s -class_path %s -prob_path %s -model_path %s' % (
    os.path.abspath(tmp_dir), tmp_features, os.path.abspath(predict_file), os.path.abspath(prob_file), model_path)
    os.chdir("back_end/")
    utils.easy_call(cmd)
    os.chdir("..")

    print '\n3) Extracting duration'
    post_process(os.path.abspath(predict_file), dur_file)

    print '\n4) Writing TextGrid file to %s ...' % output_path
    create_text_grid(dur_file, output_path, length, float(0.0))

    # remove leftovers
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
Example #7
0
def measurement_features(audio_path, textgrid_path, output_path):
    # defines
    tmp_dir = 'tmp/'
    tmp_input = tmp_dir + 'tmp.input'
    tmp_label = tmp_dir + 'tmp.labels'
    label_suffix = '.labels'
    tmp_features = tmp_dir + 'tmp.features'
    tmp_file = tmp_dir + 'tmp.wav'
    epsilon = 0.001

    # validation
    if not os.path.exists(audio_path):
        print >> sys.stderr, 'Error: input path does not exists.'
        return
    if not os.path.exists(output_path):
        print 'output path does not exists, creating output directory.'
        os.mkdir(output_path)
    # create tmp dir
    if os.path.exists(tmp_dir):
        st.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    # loop over all the files in the input dir
    for item in os.listdir(audio_path):
        if item.endswith('.wav'):
            try:
                # convert to 16K 16bit
                cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item,
                                                         tmp_file)
                utils.easy_call(cmd)

                # parse the textgrid
                textgrid = TextGrid()
                textgrid.read(textgrid_path +
                              item.replace('.wav', '.TextGrid'))

                length = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[
                    2]._Interval__xmax

                onset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[
                    1]._Interval__xmin
                offset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[
                    1]._Interval__xmax

                start_extract = 0
                end_extract = min(offset + 0.08, length - epsilon)

                # =================== ACOUSTIC FEATURES =================== #
                # write labels
                label_file = output_path + item.replace('.wav', label_suffix)
                fid = open(label_file, 'w')
                fid.write('1 2\n')
                # fid.write('%s %s %s\n' % (
                #     int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1),
                #     int((release_end - start_extract) * 1000 + 1)))
                fid.write('%s %s %s\n' %
                          (int(onset * 1000) + 1, int(offset * 1000) + 1,
                           int(offset * 1000) + 4))
                fid.close()

                # creating the files
                input_file = open(
                    tmp_features,
                    'wb')  # open the input file for the feature extraction
                features_file = open(
                    tmp_input, 'wb')  # open file for the feature list path
                labels_file = open(tmp_label, 'wb')  # open file for the labels

                # write the data
                input_file.write('"' + tmp_file + '" ' +
                                 str('%.8f' % float(start_extract)) + ' ' +
                                 str(float(end_extract)) + ' ' +
                                 str('%.8f' % float(onset)) + ' ' +
                                 str('%.8f' % float(offset)))
                features_file.write(output_path + item.replace('.wav', '.txt'))

                input_file.close()
                features_file.close()
                labels_file.close()

                command = "./sbin/VotFrontEnd2 %s %s %s" % (
                    input_file.name, features_file.name, labels_file.name)
                utils.easy_call(command)

                # remove leftovers
                os.remove(tmp_input)
                os.remove(tmp_label)
                os.remove(tmp_features)
            except:
                print item
    st.rmtree(tmp_dir)
Example #8
0
def extract_features(audio_path, output_path, start_extract, end_extract):
    # defines
    tmp_dir = 'tmp/'
    tmp_input = tmp_dir + 'tmp.input'
    tmp_label = tmp_dir + 'tmp.labels'
    label_suffix = '.labels'
    tmp_features = tmp_dir + 'tmp.features'
    tmp_file = tmp_dir + 'tmp.wav'

    # validation
    if not os.path.exists(audio_path):
        print >> sys.stderr, 'Error: input path does not exists.'
        return
    # create tmp dir
    if os.path.exists(tmp_dir):
        st.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    # loop over all the files in the input dir
    if audio_path.endswith('.wav'):
        try:
            # convert to 16K 16bit
            cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path, tmp_file)
            utils.easy_call(cmd)

            onset = (float(start_extract) + float(end_extract)) / 2
            offset = (float(start_extract) + float(end_extract)) / 2

            # =================== ACOUSTIC FEATURES =================== #
            # # write labels
            # label_file = audio_path.replace('.wav', label_suffix)
            # fid = open(label_file, 'w')
            # fid.write('1 2\n')
            # fid.write('%s %s %s\n' % (str(1), str(1), str(1)))
            # fid.close()

            # creating the files
            input_file = open(tmp_features, 'wb')  # open the input file for the feature extraction
            features_file = open(tmp_input, 'wb')  # open file for the feature list path
            labels_file = open(tmp_label, 'wb')  # open file for the labels

            # write the data
            input_file.write(
                    '"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(
                            float(end_extract)) + ' ' + str(
                            '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset)))
            features_file.write(output_path.replace('.wav', '.txt'))

            input_file.close()
            features_file.close()
            labels_file.close()

            command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name)
            utils.easy_call(command)

            # remove leftovers
            os.remove(tmp_input)
            os.remove(tmp_label)
            os.remove(tmp_features)
        except:
            print audio_path
    st.rmtree(tmp_dir)
Example #9
0
def extract_features(wav_filename, output_path, start_extract, end_extract):
    # defines
    temp_input_filename = utils.generate_tmp_filename('input')
    temp_label_filename = utils.generate_tmp_filename('labels')
    temp_features_filename = utils.generate_tmp_filename('features')
    temp_wav16_filename = utils.generate_tmp_filename('wav')

    # validation
    if not os.path.exists(wav_filename):
        print >> sys.stderr, 'Error: input path %s does not exists.' % wav_filename
        return

    # loop over all the files in the input dir
    if wav_filename.endswith('.wav'):
        try:
            # convert to 16K 16bit
            cmd = 'sox %s -r 16000 -b 16 %s' % (wav_filename,
                                                temp_wav16_filename)
            utils.easy_call(cmd)

            onset = (float(start_extract) + float(end_extract)) / 2
            offset = (float(start_extract) + float(end_extract)) / 2

            # =================== ACOUSTIC FEATURES =================== #
            # # write labels
            # label_file = wav_filename.replace('.wav', label_suffix)
            # fid = open(label_file, 'w')
            # fid.write('1 2\n')
            # fid.write('%s %s %s\n' % (str(1), str(1), str(1)))
            # fid.close()

            # creating the files
            input_file = open(
                temp_features_filename,
                'wb')  # open the input file for the feature extraction
            features_file = open(temp_input_filename,
                                 'wb')  # open file for the feature list path
            labels_file = open(temp_label_filename,
                               'wb')  # open file for the labels

            # write the data
            input_file.write('"' + temp_wav16_filename + '" ' +
                             str('%.8f' % float(start_extract)) + ' ' +
                             str(float(end_extract)) + ' ' +
                             str('%.8f' % float(onset)) + ' ' +
                             str('%.8f' % float(offset)))
            features_file.write(output_path.replace('.wav', '.txt'))

            input_file.close()
            features_file.close()
            labels_file.close()

            command = "./sbin/VotFrontEnd2 %s %s %s" % (
                input_file.name, features_file.name, labels_file.name)
            utils.easy_call(command)

            # remove leftovers
            os.remove(temp_input_filename)
            os.remove(temp_label_filename)
            os.remove(temp_features_filename)
            os.remove(temp_wav16_filename)
        except:
            print wav_filename
def measurement_features(audio_path, textgrid_path, output_path):
    # defines
    tmp_dir = 'tmp/'
    tmp_input = tmp_dir + 'tmp.input'
    tmp_label = tmp_dir + 'tmp.labels'
    label_suffix = '.labels'
    tmp_features = tmp_dir + 'tmp.features'
    tmp_file = tmp_dir + 'tmp.wav'
    gap_start = 0.05
    # gap_start = 0.1
    gap_end = 0.05

    # validation
    if not os.path.exists(audio_path):
        print >> sys.stderr, 'Error: input path does not exists.'
        return
    if not os.path.exists(output_path):
        print 'output path does not exists, creating output directory.'
        os.mkdir(output_path)
    # create tmp dir
    if os.path.exists(tmp_dir):
        st.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    # loop over all the files in the input dir
    for item in os.listdir(audio_path):
        if item.endswith('.wav'):
            try:
                # convert to 16K 16bit
                cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file)
                utils.easy_call(cmd)

                # parse the textgrid
                textgrid = TextGrid()
                textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid'))
                release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin
                release_end = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmax

                voicing_start = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmin
                voicing_end = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmax

                # onset = min(release_start, voicing_start)
                # offset = max(release_end, voicing_end)

                onset = release_start
                offset = release_end

                start_extract = onset - gap_start
                end_extract = offset + gap_end

                # =================== ACOUSTIC FEATURES =================== #
                # write labels
                label_file = output_path + item.replace('.wav', label_suffix)
                fid = open(label_file, 'w')
                fid.write('1 2\n')
                # fid.write('%s %s %s\n' % (
                #     int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1),
                #     int((release_end - start_extract) * 1000 + 1)))
                fid.write('%s %s %s %s\n' % (
                    int((release_start - start_extract) * 1000 + 1), int((release_end - start_extract) * 1000 + 1),
                    int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1)))
                fid.close()

                # creating the files
                input_file = open(tmp_features, 'wb')  # open the input file for the feature extraction
                features_file = open(tmp_input, 'wb')  # open file for the feature list path
                labels_file = open(tmp_label, 'wb')  # open file for the labels

                # write the data
                input_file.write(
                        '"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(
                                float(end_extract)) + ' ' + str(
                                '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset)))
                features_file.write(output_path + item.replace('.wav', '.txt'))

                input_file.close()
                features_file.close()
                labels_file.close()

                command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name)
                utils.easy_call(command)

                # remove leftovers
                os.remove(tmp_input)
                os.remove(tmp_label)
                os.remove(tmp_features)
            except:
                print item

    st.rmtree(tmp_dir)
def neg_vot_creator(audio_path, textgrid_path, output_path, l):
    # defines
    tmp_dir = 'tmp/'
    tmp_input = tmp_dir + 'tmp.input'
    tmp_label = tmp_dir + 'tmp.labels'
    label_suffix = '.labels'
    tmp_features = tmp_dir + 'tmp.features'
    tmp_file = tmp_dir + 'tmp.wav'

    # validation
    if not os.path.exists(audio_path):
        print >> sys.stderr, 'Error: input path does not exists.'
        return
    if not os.path.exists(output_path):
        print 'output path does not exists, creating output directory.'
        os.mkdir(output_path)
    # create tmp dir
    if os.path.exists(tmp_dir):
        st.rmtree(tmp_dir)
    os.mkdir(tmp_dir)
    count = 0
    # loop over all the files in the input dir
    for item in os.listdir(audio_path):
        if item.endswith('.wav'):
            try:
                # convert to 16K 16bit
                cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file)
                utils.easy_call(cmd)

                # parse the textgrid
                textgrid = TextGrid()
                textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid'))
                release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin

                end_time = release_start
                if end_time - 0.1 < 0:
                    count += 1
                start_time = max(0, end_time - 0.1)

                # =================== ACOUSTIC FEATURES =================== #
                # write labels
                label_file = output_path + item.replace('.wav', label_suffix)
                fid = open(label_file, 'w')
                fid.write('%s\n' % str(l))
                fid.close()

                # creating the files
                input_file = open(tmp_features, 'wb')  # open the input file for the feature extraction
                features_file = open(tmp_input, 'wb')  # open file for the feature list path
                labels_file = open(tmp_label, 'wb')  # open file for the labels

                # write the data
                input_file.write(
                        '"' + tmp_file + '" ' + str('%.8f' % float(start_time)) + ' ' + str(
                                float(end_time)) + ' ' + str(
                                '%.8f' % float(start_time)) + ' ' + str('%.8f' % float(end_time)))
                features_file.write(output_path + item.replace('.wav', '.txt'))

                input_file.close()
                features_file.close()
                labels_file.close()

                command = "./sbin/VowelDurationFrontEnd %s %s %s" % (input_file.name, features_file.name, labels_file.name)
                utils.easy_call(command)

                # remove leftovers
                os.remove(tmp_input)
                os.remove(tmp_label)
                os.remove(tmp_features)
            except:
                print item
    st.rmtree(tmp_dir)
Example #12
0
def neg_vot_creator(audio_path, textgrid_path, output_path, l):
    # defines
    tmp_dir = 'tmp/'
    tmp_input = tmp_dir + 'tmp.input'
    tmp_label = tmp_dir + 'tmp.labels'
    label_suffix = '.labels'
    tmp_features = tmp_dir + 'tmp.features'
    tmp_file = tmp_dir + 'tmp.wav'

    # validation
    if not os.path.exists(audio_path):
        print >> sys.stderr, 'Error: input path does not exists.'
        return
    if not os.path.exists(output_path):
        print 'output path does not exists, creating output directory.'
        os.mkdir(output_path)
    # create tmp dir
    if os.path.exists(tmp_dir):
        st.rmtree(tmp_dir)
    os.mkdir(tmp_dir)
    count = 0
    # loop over all the files in the input dir
    for item in os.listdir(audio_path):
        if item.endswith('.wav'):
            try:
                # convert to 16K 16bit
                cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item,
                                                         tmp_file)
                utils.easy_call(cmd)

                # parse the textgrid
                textgrid = TextGrid()
                textgrid.read(textgrid_path +
                              item.replace('.wav', '.TextGrid'))
                release_start = textgrid._TextGrid__tiers[
                    2]._IntervalTier__intervals[1]._Interval__xmin

                end_time = release_start
                if end_time - 0.1 < 0:
                    count += 1
                start_time = max(0, end_time - 0.1)

                # =================== ACOUSTIC FEATURES =================== #
                # write labels
                label_file = output_path + item.replace('.wav', label_suffix)
                fid = open(label_file, 'w')
                fid.write('%s\n' % str(l))
                fid.close()

                # creating the files
                input_file = open(
                    tmp_features,
                    'wb')  # open the input file for the feature extraction
                features_file = open(
                    tmp_input, 'wb')  # open file for the feature list path
                labels_file = open(tmp_label, 'wb')  # open file for the labels

                # write the data
                input_file.write('"' + tmp_file + '" ' +
                                 str('%.8f' % float(start_time)) + ' ' +
                                 str(float(end_time)) + ' ' +
                                 str('%.8f' % float(start_time)) + ' ' +
                                 str('%.8f' % float(end_time)))
                features_file.write(output_path + item.replace('.wav', '.txt'))

                input_file.close()
                features_file.close()
                labels_file.close()

                command = "./sbin/VowelDurationFrontEnd %s %s %s" % (
                    input_file.name, features_file.name, labels_file.name)
                utils.easy_call(command)

                # remove leftovers
                os.remove(tmp_input)
                os.remove(tmp_label)
                os.remove(tmp_features)
            except:
                print item
    st.rmtree(tmp_dir)