コード例 #1
0
ファイル: cross_eval.py プロジェクト: shiyuzh2007/Audio2Vec-1
def main():

    q_feats, q_labs = reader.read_csv_file(FLAG.q_fn)
    db_feats, db_labs = reader.read_csv_file(FLAG.db_fn)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words)
    word_color_dict = reader.build_label_color_list(targets, color_list)
    model = joblib.load(FLAG.model_fn)

    proj_feat_dic = PCA.extract_targets(db_feats, db_labs, targets)

    ### get the target words average ###
    ave_target_feat_dic = PCA.average_over_words(proj_feat_dic)
    ave_target_feat_list = [ave_test_feat_dic[i] for i in ave_target_feat_dic]
    ave_num_target_feat_list = PCA.average_over_words_num(
        proj_feat_dic, targets)

    anno_list = [i for i in ave_target_feat_dic]
    ave_target_trans = model.transform(ave_target_feat_list)

    all_feats, delta_lab_list = PCA.target_dic2list(proj_feat_dic)
    all_feats_trans = model.transform(all_feats)

    sampled_feats, sampled_delta_lab = MAP.sampling(all_feats_trans,
                                                    delta_lab_list)

    fig = plt.figure()
    if FLAG.pca_dim == 2:
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = PCA.plot_with_anno(ave_test_trans, anno_list, rev_dic, ax)
        ### plotting all word utterance ###
        ax = PCA.plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax,
                                word_color_dict)

    elif FLAG.pca_dim == 3:
        #### start plotting the 3D projections ####
        ax = fig.add_subplot(111, projection='3d')
        ax = PCA.plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = PCA.plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic,
                                   ax, word_color_dict)

    else:
        print("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        print(len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i], labs[i]))

        train_list, test_list = MAP.split_train_test(all_list)
        print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))

        return

    ax.legend(loc='upper right')
    plt.show()
    return
コード例 #2
0
ファイル: norm.py プロジェクト: shiyuzh2007/Audio2Vec-1
def main():
    test_feats, test_labs = dp.read_csv_file(FLAG.query_fn)
    train_feats, train_labs = dp.read_csv_file(FLAG.corpus_fn)
    if len(test_feats[0]) != len(train_feats[0]):
        print(len(test_feats[0]), len(train_feats[0]))
        raise NameError('The dimension between two files are not the same')
    feat_dim = len(test_feats[0])
    mean, var = get_mean_var(train_feats)
    train_normed_feats = normalize(train_feats, mean, var)
    test_normed_feats = normalize(test_feats, mean, var)
    dp.write_feat_lab(FLAG.query_fn + '_normed', test_normed_feats, test_labs)
    dp.write_feat_lab(FLAG.corpus_fn + '_normed', train_normed_feats,
                      train_labs)
    #print (MAP(test_list[:FLAG.test_num],train_list, feat_dim=feat_dim))

    return
コード例 #3
0
ファイル: tsne_eval.py プロジェクト: shiyuzh2007/Audio2Vec-1
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.trans_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = PCA.extract_targets(feats, labs, targets)
    word_color_dict = reader.build_label_color_list(targets, color_list)

    ave_test_feat_dic = PCA.average_over_words(test_feat_dic)
    ave_test_feat_list = [ave_test_feat_dic[i] for i in ave_test_feat_dic]
    test_all_feats, test_all_delta_labs = PCA.target_dic2list(test_feat_dic)

    ave_num_feats, ave_num_lab = PCA.average_over_words_num(
        test_feat_dic, targets)
    #ave_num_feats, ave_num_lab = PCA.target_dic2list(ave_num_feat_lists)
    #sampled_feats, sampled_delta_lab = PCA.sampling(test_all_feats,
    #    test_all_delta_labs)
    sampled_feats, _ = PCA.PCA_transform(ave_num_feats)
    ave_test_feat_trans = PCA.TSNE_transform(sampled_feats, FLAG.tsne_dim)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax = PCA.plot_all_color(ave_test_feat_trans, ave_num_lab, rev_dic, ax,
                            word_color_dict)
    #ax = PCA.plot_with_anno( ave_test_feat_trans, anno_list, rev_dic, ax)
    #ax.legend(loc='upper right')
    plt.show()

    return
コード例 #4
0
def main():
    ### preprocessing ###
    src_feats, src_labs = DP.read_csv_file(FLAG.src_file)
    pca_model = joblib.load(FLAG.model)
    ### output transformed features ###
    src_trans_feats = pca_model.transform(src_feats)
    DP.write_feat_lab(FLAG.target_file, src_trans_feats, src_labs)

    return
コード例 #5
0
def apply_NE(fn):
    '''
    return:
      feats: the feature list
      lab: the label list
    '''
    feats, labs = dp.read_csv_file(fn, delimiter=',')
    lens = gl.gen_len(feats)
    transed_feats = naive_encoder(feats, lens)

    return transed_feats, labs
コード例 #6
0
def transform(fn, dic):
    feats, labs = dp.read_csv_file(fn, ',')
    dirname = FLAG.out_dir
    lens = gl.gen_len(feats)
    for i, lab in enumerate(labs):
        if lab not in dic:
            dic[lab] = 0
        outname = dirname + '/' + str(lab) + '_' + str(dic[lab])
        write_DTW_feat(outname, feats[i], lens[i])
        dic[lab] += 1

    return
コード例 #7
0
def main():
    word_dic, word_rev_dic = dp.build_dic(FLAG.word_dic)
    query_feats, query_labs = dp.read_csv_file(FLAG.test_fn)
    data_feats, data_labs = dp.read_csv_file(FLAG.train_fn)

    #tmp = np.array(query_feats + data_feats)
    #tmp_p = tmp -np.mean(np.array(tmp),0)
    #tmp_list = tmp_p.tolist()
    #query_feats = tmp_list[:len(query_feats)]
    #data_feats = tmp_list[len(query_feats):]
    #dp.write_feat_lab(FLAG.test_fn + '_normed', query_feats, query_labs)
    #dp.write_feat_lab(FLAG.train_fn + '_normed', data_feats, data_labs)
    #query_feats = query_feats[:100]
    #query_labs = query_labs[:100]
    #data_feats = data_feats[:10000]
    #data_labs = data_labs[:10000]

    lex_dic = dp.build_lexicon(FLAG.lexicon, word_dic)
    bucks, bucks_cnt = gen_similarity_average(query_feats, query_labs,
                                              data_feats, data_labs, lex_dic)
    print(bucks, bucks_cnt)
    return
コード例 #8
0
def main():
    text_emb, text_labs = dp.read_csv_file(FLAG.text_embeds, ' ')
    audio_emb, audio_labs = dp.read_csv_file(FLAG.audio_embeds, ' ')
    text_pca, audio_pca = PCA_transform(text_emb, audio_emb)

    a2t_mat, t2a_mat = ICP_train(text_pca, audio_pca)

    t2a_text = np.transpose(text_pca)
    tmp_text = np.matmul(t2a_mat, t2a_text)

    # t2a_audio_map = generate_pair(tmp_text, audio_pca)

    a2t_audio = np.transpose(audio_pca)
    tmp_audio = np.matmul(a2t_mat, a2t_audio)
    # a2t_text_map = generate_pair(tmp_audio, text_pca)

    np_audio = np.array(audio_emb)
    t2a_audio_emb = np_audio[t2a_audio_map]
    np_text = np.array(text_emb)
    a2t_text_emb = np_text[a2t_text_map]

    a2t_mat, t2a_mat = ICP_train_full(text_emb, t2a_audio_emb, audio_emb,
                                      a2t_text_emb)
コード例 #9
0
ファイル: PCA_eval.py プロジェクト: shiyuzh2007/Audio2Vec-1
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.train_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = extract_targets(feats, labs, targets)
    word_color_dict = reader.build_label_color_list(targets, color_list)

    ### PCA through the average target words ###
    ave_test_feat_dic = average_over_words(test_feat_dic)
    ave_test_feat_list = [ ave_test_feat_dic[i] for i in ave_test_feat_dic]
    
    ave_num_feat_lists, tmp_lab = average_over_words_num(test_feat_dic, targets)
    ave_num_trans, model = PCA_transform(ave_num_feat_lists)
    
    anno_list = [ i for i in ave_test_feat_dic ]
    ave_test_trans = model.transform(ave_test_feat_list)
    
    ### use the PCA model to transform only testing data ###
    all_feats, delta_lab_list = target_dic2list(test_feat_dic)
    all_feats_trans = model.transform(all_feats)
    ### samples number of word occurances  ###

    sampled_feats, sampled_delta_lab = sampling(all_feats_trans, delta_lab_list)

    if FLAG.save_model :
        s = joblib.dump(model,FLAG.model_fn)

    if FLAG.pca_dim == 2:
        fig = plt.figure()
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = plot_with_anno(ave_test_trans, anno_list, rev_dic, ax)
        ### plotting all word utterance ###
        ax = plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax,
            word_color_dict)
        
    elif FLAG.pca_dim ==3 :
        fig = plt.figure()
        #### start plotting the 3D projections #### 
        ax = fig.add_subplot(111, projection='3d')
        ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax,
            word_color_dict)

    else:
        print ("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        # print (len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i],labs[i]))
        
        train_list, test_list = MAP.split_train_test(all_list)
        print (MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))
        
        return 
    ### get the words that not using for PCA ###
    if FLAG.other_words != 'None':
        other_target = reader.build_targets(FLAG.other_words,dic)
        extract_others = extract_targets(feats, labs, other_target)
        ave_feat, other_lb_list = extract_additional_words(extract_others, other_target)
        ave_ftrans  = model.transform(ave_feat)
        
        if FLAG.pca_dim == 2:
            plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax)
        else :
            plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax)

    ax.legend(loc='upper right')
    plt.show()
    return 
コード例 #10
0
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.train_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = extract_targets(feats, labs, targets)
    ave_dic = average_over_words(test_feat_dic)
    ave_feat_list = [ave_dic[i] for i in ave_dic]
    anno_list = [i for i in ave_dic]

    for i in test_feat_dic:
        print(i)
        print(len(test_feat_dic[i]))
    #word_color_dict = reader.build_label_color_list(targets, color_list)

    another_feats, another_labs = reader.read_csv_file(FLAG.apply_file)
    another_dic, another_rev_dic = reader.build_dic(FLAG.apply_dic)
    another_tar = reader.build_targets(FLAG.another_target_words, another_dic)
    another_test_dic = extract_targets(another_feats, another_labs,
                                       another_tar)
    another_color_dict = reader.build_label_color_list(another_tar,
                                                       another_color_list)
    test_feats2, test_delta_labs2 = target_dic2list(another_test_dic)
    ave_dic2 = average_over_words(another_test_dic)
    ave_feat_list2 = [ave_dic2[i] for i in ave_dic2]
    anno_list2 = [i for i in ave_dic2]
    ### PCA through all average words, eliminating the less occurance of words? ###

    feats_trans, model = PCA_transform(feats)
    test_feats, test_delta_labs = target_dic2list(test_feat_dic)
    print(len(test_feats))
    test_feats_trans = model.transform(test_feats)
    ### PCA through the average target words ###
    ave_feat_trans_list = model.transform(ave_feat_list)
    #ave_feat_trans_list = tsne.fit_transform(ave_feat_trans_list)
    test_feats2_trans = model.transform(test_feats2)
    ave_feat_trans_list2 = model.transform(ave_feat_list2)

    if FLAG.pca_dim == 2:
        fig = plt.figure()
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = plot_with_anno(ave_feat_trans_list, anno_list, rev_dic, ax, 'o',
                            'German')
        ax = plot_with_anno(ave_feat_trans_list2, anno_list2, another_rev_dic,
                            ax, 'x', 'French')

        ### plotting all word utterance ###
        #ax = plot_all_color(test_feats_trans, test_delta_labs, rev_dic, ax,
        #    word_color_dict)
    #    ax = plot_all_color(test_feats2_trans, test_delta_labs2,
    #        another_rev_dic, ax, another_color_dict)

    elif FLAG.pca_dim == 3:
        fig = plt.figure()
        #### start plotting the 3D projections ####
        ax = fig.add_subplot(111, projection='3d')
        # ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = plot_all_color_3d(test_feats_trans, test_delta_labs, rev_dic, ax,
                               word_color_dict)

    else:
        print("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        # print (len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i], labs[i]))

        train_list, test_list = MAP.split_train_test(all_list)
        print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))

        return
    ### get the words that not using for PCA ###
    #if FLAG.other_words != 'None':
    #    other_target = reader.build_targets(FLAG.other_words,dic)
    #    extract_others = extract_targets(feats, labs, other_target)
    #    ave_feat, other_lb_list = extract_additional_words(extract_others, other_target)
    #    ave_ftrans  = model.transform(ave_feat)
    #
    #    if FLAG.pca_dim == 2:
    #        plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax)
    #    else :
    #        plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax)

    ax.legend(loc='upper right')
    plt.show()
    return