def link_cluster_caller(name, base_path, db_file, name_out_path):

    global log

    x = Utility.load_obj('{}/x.pkl'.format(base_path))
    inverselengthscale = Utility.load_obj('{}/input_sensitivity.pkl'.format(base_path))

    for n_clusters in xrange(2, 6):
        for mul in [0.025, 0.05, 0.075, 0.1]:

            n_neighbors = int( len(x)*mul )

            title = 'param_n_cluster_{}_n_neighbors_{}x'.format(n_clusters, mul)
            name_out_file = '{}/{}.eps'.format(name_out_path, title)

            log.append(title)
            log.append('n_cluster : {}'.format(n_clusters))
            log.append('n_neighbors for kernel : {}'.format(n_neighbors))

            labels = link_clustering(x, inverselengthscale, n_clusters, n_neighbors)
            plot(x, inverselengthscale, labels, name_out_file, title)

            Utility.save_obj(labels, '{}/{}.pkl'.format(name_out_path, title) )


    Utility.write_to_file_line_by_line('{}/{}_log.txt'.format(name_out_path, name), log)

    pass
Exemple #2
0
    def find_data_point_from_coordinate(filepath, input_sen_path, labels,
                                        syllable_data_tag, area):

        # Read data file
        data_point = Utility.load_obj(filepath)
        #         print data_point

        # Get input sensitivity
        input_sen_obj = Utility.load_obj(input_sen_path)
        input_sensitivety = Utility.get_input_sensitivity(input_sen_obj, 3)
        #         print input_sensitivety

        x_coordinate = data_point[:,
                                  [input_sensitivety[0], input_sensitivety[1]]]
        #         print x_coordinate
        x_cor = np.array(x_coordinate)
        index = DataReader.filter_data(x_cor, area)

        print index

        lab = Utility.load_obj(labels)
        print len(lab)
        print lab[index]

        syllable_tag = Utility.load_obj(syllable_data_tag)
        print len(syllable_tag)
        print syllable_tag

        # Return

        pass
def run_for_voice_data():

    dropbox_path = '/home/h1/decha/Dropbox/'

    output_name,delta_bool,delta2_bool = '02_delta_delta-delta', True, True
    # output_name,delta_bool,delta2_bool = '03_delta',  True, False
    # output_name,delta_bool,delta2_bool = '04_no_delta',  False, False
    input_dims = 3

    for tone in ['0','1','2','3','4', '01234']:
    # for tone in ['01234']:

        print 'Running Tone : {}'.format(tone)

        if tone is '01234':
            data_object_path = '{}/Inter_speech_2016/Syllable_object/01_manual_labeling_object/syllable_all.pickle'.format(dropbox_path)
            syllable_management = Utility.load_obj(data_object_path)
        else :
            data_object_path = '{}/Inter_speech_2016/Syllable_object/01_manual_labeling_object/syllable_{}.pickle'.format(dropbox_path,tone)
            syllable_management = Utility.load_obj(data_object_path)

        print 'Delta : {}, Delta-Dealta : {}'.format(delta_bool, delta2_bool)
        output_path = '{}/Inter_speech_2016/Syllable_object/{}/BGP_LVM/{}_dimentionality/Tone_{}/'.format(dropbox_path,output_name,input_dims,tone)

        print output_path

        Latent_variable_model_Training.execute_Bayesian_GPLVM_training(
            syllable_management, 
            Syllable.TRAINING_FEATURE_POLYNOMIAL_2_DEGREE_VOICE, 
            input_dims, 
            output_path,
            delta_bool=delta_bool,
            delta2_bool=delta2_bool)

    pass
Exemple #4
0
def analysis(main_path):
    # main_path = '/work/w13/decha/Inter_speech_2016_workplace/Data/07c-5dims_missing_data_delta_deltadelta/BayesianGPLVMMiniBatch_Missing/Tone_4/'
    gpmodel = Utility.load_obj('{}/GP2dRegression.npy'.format(main_path))

    model_path = '{}/GP_model.npy'.format(main_path)
    model = Utility.load_obj(model_path)
    data = model.X.mean

    x = []

    input_sensitivity = model.input_sensitivity()
    print input_sensitivity

    index = Utility.get_input_sensitivity(input_sensitivity, 2)
    print index

    for i in range(len(data)):
        x.append([data[i, index[0]], data[i, index[1]]])

    x = np.array(x)

    y = np.array(gpmodel.predict(x)[0])
    print y.shape

    plt.clf()
    plt.scatter(x[:, 0], x[:, 1], c=y, cmap='gray')
    plt.savefig('{}/gpregression.pdf'.format(main_path))
    pass
Exemple #5
0
def run_plot_and_latex():

    # output_name = '02_delta_delta-delta'
    # output_name = '03_delta'
    # output_name = '04_no_delta'
    system_names = [
        '02_delta_delta-delta', '03_delta', '04_no_delta',
        '05_missing_data_no_delta', '06_02_with_3-dimentionality'
    ]

    for output_name in system_names:

        base_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/{}/BGP_LVM/'.format(
            output_name)
        object_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/01_manual_labeling_object/'

        for tone in ['0', '1', '2', '3', '4', '01234']:

            model_path = '{}/Tone_{}/GP_model.npy'.format(base_path, tone)
            data_object = '{}/syllable_{}.pickle'.format(object_path, tone)

            if tone == '01234':
                data_object = '{}/syllable_all.pickle'.format(object_path)

            outpath = '{}/Tone_{}/stress_unstress_plot.eps'.format(
                base_path, tone)

            GP_LVM_Scatter.plot_scatter(Utility.load_obj(model_path),
                                        Utility.load_obj(data_object), outpath)

    pass
Exemple #6
0
def add_data_object():

    obj = Utility.load_obj(
        '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle'
    )

    name_index = Utility.load_obj(
        '/work/w13/decha/Inter_speech_2016_workplace/Tonal_projection/11_missing_data/all_vowel_type/input_dims_10/delta-True_delta-delta-True/BayesianGPLVMMiniBatch_Missing_Tone_01234/name_index.npy'
    )
    name_index = np.array(name_index)

    model = Utility.load_obj(
        '/work/w13/decha/Inter_speech_2016_workplace/Tonal_projection/11_missing_data/all_vowel_type/input_dims_10/delta-True_delta-delta-True/BayesianGPLVMMiniBatch_Missing_Tone_01234/GP_model.npy'
    )

    data = np.array(model.X.mean)
    print data.shape

    for syl in obj.syllables_list:
        name = syl.name_index
        if 'gpr' not in name: continue

        name_position = np.where(name_index == name)
        # print name_position
        latent_data = data[name_position][0]
        # print latent_data
        syl.set_latent_for_single_space(latent_data)
        # print syl.single_space_latent
        # sys.exit()
    Utility.save_obj(
        obj,
        '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle'
    )
def find_group(tone, v, name):

    # if 'n' in v:
    #     v = 'vvvn'
    # elif 'sg' in v:
    #     v = 'vvvsg'
    # else:
    #     v = 'vvv'

    group_path = '/work/w13/decha/Inter_speech_2016_workplace/Tonal_projection/06_Tonal_part_projection_noise_reduction-250-iters-opt/{}/input_dims_10/delta-True_delta-delta-True/BGP_LVM_Tone_{}/'.format(
        v, tone)

    name_index = np.array(
        Utility.load_obj('{}/name_index.npy'.format(group_path)))
    label = np.array(
        Utility.load_obj('{}/clustered_label.npy'.format(group_path)))

    # print name
    # print name_index

    if '.' in name:
        print name

    if len(label[name_index == name]) == 0:
        print name
        return 3
    return label[name_index == name][0]

    pass
    def get_latent_data(base_path, names, label_feature, use_input_sensitivity=False, normalize=False):

        model = Utility.load_obj('{}/GP_model.npy'.format(base_path))
        input_sensitivity = model.input_sensitivity()

        latent_data = np.array(Utility.load_obj('{}/GP_model.npy'.format(base_path)).X.mean)
        name_index = np.array(Utility.load_obj('{}/name_index.npy'.format(base_path)))

        latent_Y = []
        for n in names:
            ind = np.where(name_index==n)
            latent_Y.append(latent_data[ind][0])

        if len(latent_Y) != len(names):
            print 'Un equal data : {}'.format(base_path) 
            sys.exit()

        latent_Y = np.array(latent_Y)

        print 'Get input sent {}'.format(input_sensitivity)

        if not use_input_sensitivity: 
            input_sensitivity = None

        data = ANN_Executioner_Helper.get_ClassificationDataSet(latent_Y, label_feature, normalize=normalize, input_sensitivity=input_sensitivity)
        return data
def plot_type(plot_type, out_file_path, base_path_list, data_object_path):

    model_path = '{}/GP_model.npy'.format(base_path_list)
    name_index_list = '{}/name_index.npy'.format(base_path_list)

    import os.path
    if not os.path.isfile(model_path): return

    data_object = data_object_path

    # model = Utility.load_obj(model_path)
    # data = model.X.mean
    # means = np.array(data)

    GP_LVM_Scatter.plot_scatter(
        Utility.load_obj(model_path),
        Utility.load_obj(data_object),
        out_file_path,
        name_index_list=Utility.load_obj(name_index_list),
        label_type=plot_type,
        no_short_duration=True,
        perform_unsupervised=False,
        non_unlabelled_stress=False,
        get_only_gpr_data=False,
        get_only_manual_data=True,
        return_after_dbscan=False)

    # sys.exit()

    pass
Exemple #10
0
def normalize_data(db_file, name_out_path, target_type, missing_db_file,
                   missing_type):

    db = Utility.load_obj(db_file)

    missing_db = Utility.load_obj(missing_db_file)

    new_data = []

    for syl in db:
        d = syl['TF'][target_type]['data']

        # print syl['dur']

        dur = 0
        for du in syl['dur']:
            dur = dur + du
        consonant_ratio = syl['dur'][0] / dur
        # print consonant_ratio

        missing = None
        for m in missing_db:
            if syl['id'] == m['id']:
                missing = m
                break

        unvoice_frames = np.argwhere(
            np.isnan(missing['TF'][missing_type]['data']))
        # print unvoice_frames
        unvoice_frames_ratio = float(len(unvoice_frames)) / float(len(d) - 1)

        d = np.append(d, consonant_ratio)
        d = np.append(d, unvoice_frames_ratio)
        # print d
        new_data.append(d)

        # if not len(unvoice_frames) == 0:
        #     sys.exit()

    new_data = np.array(new_data)

    print new_data.shape

    new_db = []

    for idx, syl in enumerate(db):
        syl['TF']['intepolate151_with_consonant_unvoice_ratio'] = dict()
        syl['TF']['intepolate151_with_consonant_unvoice_ratio'][
            'data'] = new_data[idx]
        syl['TF']['intepolate151_with_consonant_unvoice_ratio'][
            'description'] = 'intepolate151 adding ratio of consonant and unvoice frame in syllable'

        new_db.append(syl)

    Utility.save_obj(new_db, name_out_path)

    pass
Exemple #11
0
def run_training(base_path, db_file, name_out_path):

    names_file = '{}/names.pkl'.format(base_path)
    out_data = '{}/x.pkl'.format(base_path)
    input_sensitivity = '{}/input_sensitivity.pkl'.format(base_path)

    names = Utility.load_obj(names_file)
    db = Utility.load_obj(db_file)

    name_list = []
    for d in db:
        name_list.append(d['id'])

    label = []
    for nn in names:
        idx = name_list.index(nn)

        if nn in potential_list:
            label.append('3')
        elif db[idx]['stress'] == '1':
            label.append(db[idx]['stress'])
        else:
            label.append(db[idx]['stress'])

    out = Utility.load_obj(out_data)
    input_sent = Utility.load_obj(input_sensitivity)

    print 'Input sensitivity', input_sent

    most_dominants = Utility.get_input_sensitivity(input_sent, 2)

    label = map(int, label)
    label = np.array(label)

    train = np.append(out[label == 2], out[label == 3], axis=0)

    train = np.c_[train[:, most_dominants[0]], train[:, most_dominants[1]]]

    print train.shape

    global kern
    lengthscale = 1 / np.array(input_sent, dtype=float)
    kern = GPy.kern.RBF(len(train[0]),
                        ARD=True,
                        lengthscale=[
                            lengthscale[most_dominants[0]],
                            lengthscale[most_dominants[1]]
                        ])

    print most_dominants

    xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
    plane = np.c_[xx.ravel(), yy.ravel()]
    svm_classifier(train, '', '', '', '', plane, xx, yy)

    pass
def call_accuracy(db_file, x_base_file, setting, name):

    db = Utility.load_obj(db_file)

    real = load_real_label(db)
    n_cluster, n_neighbor = find_config(name)

    unstress_list = setting[0]
    stress_list = setting[1]

    x_file = '{}/param_n_cluster_{}_n_neighbors_{}x.pkl'.format(
        x_base_file, n_cluster, n_neighbor)
    pred = Utility.load_obj(x_file)
    print pred.shape

    print set(pred), setting

    for un in unstress_list:
        pred[pred == un] = 555  # Unstress

    for st in stress_list:
        pred[pred == st] = 999  # Stress

    pred[pred == 999] = 1
    pred[pred == 555] = 0

    if name == '1_non-nasal':
        print set(pred)

    acc = accuracy_score(real, pred)

    f1 = f1_score(real, pred, average=None)

    print 'acc : ', acc
    print 'f1 : ', f1

    global acc_scores
    global f1_scores

    # spl = name.split('_')
    acc_scores[name] = acc
    f1_scores[name] = f1

    result_file = dict()
    result_file['pred'] = pred
    result_file['real'] = real
    result_file['acc'] = acc
    result_file['f1'] = f1
    result_file['name'] = name
    result_file['n_cluster'] = n_cluster
    result_file['n_neighbors'] = n_neighbor

    Utility.save_obj(result_file, '{}/result_file.pkl'.format(x_base_file))

    pass
def load_name_and_label(name_file, label_file):

    global d

    names = Utility.load_obj(name_file)
    labels = Utility.load_obj(label_file)

    for n, lab in zip(names, labels):
        print n, lab
        d[n] = {'stress': lab}

    pass
Exemple #14
0
def call_run_dbscan(data_path, inverselengthscale_path, out_base_path):

    x = Utility.load_obj(data_path)

    print x.shape

    inverselengthscale = Utility.load_obj(inverselengthscale_path)

    eps, m = find_distance_stat(x, inverselengthscale, out_base_path)

    run_dbscan(x, inverselengthscale, eps, m, out_base_path)

    pass
Exemple #15
0
def perform_unsupervised(out_file_path, base_path_list, data_object_path):

    model_path = '{}/GP_model.npy'.format(base_path_list)

    import os.path
    if not os.path.isfile(model_path) : return

    data_object = data_object_path

    plot_result(
        Utility.load_obj(model_path), 
        Utility.load_obj(data_object), 
        out_file_path)
    def get_train_and_test_fold(fold_object_path, number_of_fold, tst_fold):
        syls_trn, syls_tst = [], []
        
        test_fold_path = '{}{}.pickle'.format(fold_object_path, tst_fold)
        syls_tst = Utility.load_obj(test_fold_path).syllables_list

        train_fold_path = []
        for j in range(number_of_fold):
            if j==tst_fold : continue
            fold_path = '{}{}.pickle'.format(fold_object_path, j)
            syls_trn+= Utility.load_obj(fold_path).syllables_list

        return ( SyllableDatabaseManagement(syllable_list=syls_trn), SyllableDatabaseManagement(syllable_list=syls_tst) )
Exemple #17
0
def run_command(feature_type, missing_data, data_object_base_path_name, base_out_path, input_dims, tone_list, dur_position, num_sampling, d1, d2):
    
    deltas = [
        [d1, d2]
    ]

    output_name_paths = []

    for i, d in enumerate(deltas):
        outp = '{}/input_dims_{}/delta-{}_delta-delta-{}/'.format(base_out_path, input_dims, d[0], d[1])
        output_name_paths.append(outp)

    print 'Missing Data : {}'.format(missing_data)
    print 'Inducing points : 10 percent'

    for idx, output_name in enumerate(output_name_paths):

        delta_bool=deltas[idx][0]
        delta2_bool=deltas[idx][1]

        if missing_data:
            method_name = 'BayesianGPLVMMiniBatch_Missing'
        else :
            method_name = 'BGP_LVM'

        for tone in tone_list:

            print 'Delta : {}, Delta-Dealta : {}'.format(delta_bool, delta2_bool)

            data_object_path = '{}{}.pickle'.format(data_object_base_path_name, tone)

            print 'data path ',data_object_path

            syllable_management = Utility.load_obj(data_object_path)

            if len(syllable_management.syllables_list) == 0:
                print 'No syllable in this object database : {}'.format(tone)
                print '-----------------------------------------------------------------'
                continue

            output_path = '{}/{}_Tone_{}/'.format(output_name, method_name, tone)

            Utility.make_directory(output_path)

            print output_path

            Latent_variable_model_Training.execute_Bayesian_GPLVM_training(
                syllable_management, 
                feature_type, 
                input_dims, 
                output_path,
                num_sampling=num_sampling,
                dur_position=dur_position,
                delta_bool=delta_bool,
                delta2_bool=delta2_bool,
                missing_data=missing_data,
                num_inducing=int(len(syllable_management.syllables_list)*0.1),
                max_iters=500)

    pass
Exemple #18
0
 def __init__(self, load_data_object=None, syllable_list=None):
     '''
     Constructor
     '''
     self.syllables_list = syllable_list
     if load_data_object is not None:
         self.syllables_list = Utility.load_obj(load_data_object)
    def get_data_with_missing_values(self, 
        num_sampling, subtract_typical_contour, feature_name=None, 
        delta=False, deltadelta=False):

        x = np.linspace(0, len(self.raw_data), num=num_sampling)
        Y = np.interp(x, np.arange(len(self.raw_data)), self.raw_data)

        data = Y

        if feature_name is not None:
            training_data = np.interp(x, np.arange(len(self.training_feature[feature_name])), self.training_feature[feature_name])
            # print training_data, len(training_data)
            data = training_data

        data[ Y<0 ] = np.nan

        if subtract_typical_contour: 
            typical_tone_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/Typical_contour/50dims/tone_{}.pickle'.format(self.tone)
            typical_tone_obj = Utility.load_obj(typical_tone_path)
            data = data - typical_tone_obj

        # print data, len(data)

        if delta:
            y_delta = np.gradient(data)
            # print y_delta
            if deltadelta:
                y_delta_delta = np.gradient(y_delta)
                # print y_delta_delta
                y_delta = np.append(y_delta, y_delta_delta)
            data = np.append(data, y_delta)

        # print np.array(data), len(data)

        return np.array(data)
Exemple #20
0
def gen_dct_data(syllable_management_path):

    syl_object = Utility.load_obj(syllable_management_path)

    for syl in syl_object.syllables_list:
        data = syl.get_Y_features(
            Syllable.
            Training_feature_tonal_part_raw_remove_head_tail_interpolated,
            50,
            False,
            False,
            exp=True,
            subtract_means=False,
            output=None,
            missing_data=False)
        data_dct = dct(data, 2, norm='ortho')
        idct = dct(data_dct, 3, norm='ortho')

        print syl.name_index
        # print data
        # print data_dct
        # print idct

        syl.training_feature[
            Syllable.Training_feature_tonal_part_dct_coeff] = data_dct

    Utility.save_obj(syl_object, syllable_management_path)

    pass
Exemple #21
0
def run_training(db_file, name_out_path, n_components, data_type):

    db = Utility.load_obj(db_file)

    Y = []

    names = []

    for syl in db:
        feat = syl['TF'][data_type]['data']
        Y.append(feat)
        names.append(syl['id'])
        # sys.exit()

    Y = np.array(Y)

    print Y.shape
    # print Y[0]

    config = {'n_components': n_components, 'data': Y}

    print config

    m, Y_r = GPy_Interface.pca(config)

    # print Y_r.shape

    Utility.save_obj(m, '{}/model.pkl'.format(name_out_path))
    Utility.save_obj(Y_r, '{}/pca_reduction_output.pkl'.format(name_out_path))

    Utility.save_obj(names, '{}/names.pkl'.format(name_out_path))
    Utility.save_obj(Y, '{}/training_data.pkl'.format(name_out_path))

    pass
def fix_database(db_file, change_list_file, out_file):

    global db 
    db = None
    db = Utility.load_obj(db_file)

    change_list = []

    less_than = None

    for line in Utility.read_file_line_by_line(change_list_file):
        if 'tsc' in line:
            n = Utility.trim(line).replace(' ', '_')
            change_list.append(n)
        elif '<' in line:
            # print line
            less_than = line.split(' ')[1]
            pass

    # print change_list
    # print less_than

    if (len(change_list) == 0) | (less_than == None):
        raise 'Change list file false'

    new_list = change_stress(change_list, less_than)

    Utility.save_obj(new_list, out_file)

    pass
Exemple #23
0
def gen_data(db_file, name_out_path):

    out = []

    for syl in Utility.load_obj(db_file):
        y = Syllable.get_normailze_with_missing_data(syl['raw_lf0'], 50,
                                                     syl['dur'])
        # print len(y)

        syl['TF'] = dict()

        missing_data = dict()
        missing_data['data'] = y
        missing_data[
            'description'] = 'Raw lf0 (first 50 + delta + delta-delta) + duration in frame unit (the last one). Unvoice frames are defined as missing data '

        syl['TF']['missing151'] = missing_data

        # print syl

        out.append(syl)

        # sys.exit(0)

    Utility.save_obj(out, name_out_path)

    pass
def remove_duration_data(db_file, name_out_path):

    db = Utility.load_obj(db_file)

    new_data = []

    for syl in db:
        d = syl['TF']['intepolate151normailize']['data']
        new_data.append(d)

    new_data = np.array(new_data)

    print new_data

    new_data = np.delete(new_data, [150, 151], axis=1)

    print new_data

    print new_data.shape

    new_db = []
    for idx, syl in enumerate(db):
        syl['TF']['intepolate150_normailize_no_duration'] = dict()
        syl['TF']['intepolate150_normailize_no_duration']['data'] = new_data[
            idx]
        syl['TF']['intepolate150_normailize_no_duration'][
            'description'] = 'Normalized version of intepolate151, but remove duration'

        new_db.append(syl)

    Utility.save_obj(new_db, name_out_path)

    pass
Exemple #25
0
def set_pre_suc():
    tones = ['01234']

    name_list_path = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/'

    for t in tones:
        path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_{}.pickle'.format(
            t)
        print path

        syl_management = Utility.load_obj(path)
        for syl in syl_management.syllables_list:
            if 'manual' in syl.name_index: continue

            name = syl.name_index.split('_')
            file_tar = '{}/{}/{}.lab'.format(name_list_path, name[2][0],
                                             name[2])
            list_file = Utility.read_file_line_by_line(file_tar)
            for idx, l in enumerate(list_file):
                f = Utility.trim(l)
                if f == syl.name_index:
                    # print '--------------------'
                    preceeding = Utility.trim(list_file[idx - 1])
                    # print f
                    succeeding = Utility.trim(list_file[idx + 1])
                    # print '--------------------'
                    syl.set_preceeding_succeeding_name_index(
                        preceeding, succeeding)

            # sys.exit()

        Utility.save_obj(syl_management, path)
def normalize_data(db_file, name_out_path):

    db = Utility.load_obj(db_file)

    new_data = []

    for syl in db:
        d = syl['TF']['intepolate151']['data']
        new_data.append(d)

    new_data = np.array(new_data)

    print new_data

    X_normalized = preprocessing.normalize(new_data, norm='l2')

    print X_normalized

    print X_normalized.shape

    new_db = []
    for idx, syl in enumerate(db):
        syl['TF']['intepolate151_normalize_by_preprocessing.normalize'] = dict(
        )
        syl['TF']['intepolate151_normalize_by_preprocessing.normalize'][
            'data'] = X_normalized[idx]
        syl['TF']['intepolate151_normalize_by_preprocessing.normalize'][
            'description'] = 'preprocessing.normalize version of intepolate151'

        new_db.append(syl)

    Utility.save_obj(new_db, name_out_path)

    pass
def run_data_processor(db_file):

    db = Utility.load_obj(db_file)

    real = []
    # Y = []

    for syl in db:

        if syl['stress'] == '2':
            real.append(1)
        elif syl['stress'] in ['0', '1']:
            real.append(int(syl['stress']))
        else:
            print syl['stress']
            real.append(int(syl['stress']))

        # Y.append(syl['TF']['intepolate151_normalize_by_preprocessing.normalize']['data'])

        # sys.exit()

    real = np.array(real)
    # Y = np.array(Y)

    return real

    pass
def find_min_y(db_all):

    max_y = 600

    for syl in Utility.load_obj(db_all):

        if len(syl['raw_lf0']) == 0: continue

        r = syl['raw_lf0']
        r = np.array(r)
        r[r<0] = np.nan

        # print r

        # sys.exit()

        if min( np.exp( r ) ) < 150:
            # print min( np.exp( r ) )
            continue

        if min( np.exp( r ) ) < max_y:
            max_y = min( np.exp( r ) )

    print 'min y = ', max_y

    pass
def fix():

    base_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/Tonal_object/remove_all_silence_file/'

    fixed_list_path = '/work/w13/decha/Inter_speech_2016_workplace/Fix_stress_label/fix_list/'

    fixed_list = load_fix_list(fixed_list_path)

    fixed_list = np.array(fixed_list)

    for v in Utility.list_file(base_path):
        if v.startswith('.'): continue
        vowel_path = '{}/{}/'.format(base_path, v)
        for tone in Utility.list_file(vowel_path):
            if tone.startswith('.'): continue
            tone_file_path = '{}/{}'.format(vowel_path, tone)
            print tone_file_path
            syl_obj = Utility.load_obj(tone_file_path)

            for syl in syl_obj.syllables_list:
                # print syl.stress_manual
                if syl.name_index in fixed_list:
                    print syl.name_index, syl.stress_manual
                    if syl.stress_manual == 0:
                        syl.stress_manual = 1
                    else:
                        syl.stress_manual = 0
                    # print syl.name_index
            Utility.save_obj(syl_obj, tone_file_path)

    pass
Exemple #30
0
def normalize_data(db_file, name_out_path):

    db = Utility.load_obj(db_file)

    new_data = []

    for syl in db:
        d = syl['TF']['missing151']['data']
        dd = np.array(d)
        dd[np.argwhere(np.isnan(d))] = un_voice
        new_data.append(dd)

    new_data = np.array(new_data)

    print new_data

    robust_scaler = RobustScaler()
    Xtr_r = robust_scaler.fit_transform(new_data)

    print Xtr_r

    print Xtr_r.shape

    new_db = []
    for idx, syl in enumerate(db):
        syl['TF']['missing151_robust_scale'] = dict()
        syl['TF']['missing151_robust_scale']['data'] = Xtr_r[idx]
        syl['TF']['missing151_robust_scale'][
            'description'] = 'robust_scale version of missing151'

        new_db.append(syl)

    Utility.save_obj(new_db, name_out_path)

    pass