def main(): q_feats, q_labs = reader.read_csv_file(FLAG.q_fn) db_feats, db_labs = reader.read_csv_file(FLAG.db_fn) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words) word_color_dict = reader.build_label_color_list(targets, color_list) model = joblib.load(FLAG.model_fn) proj_feat_dic = PCA.extract_targets(db_feats, db_labs, targets) ### get the target words average ### ave_target_feat_dic = PCA.average_over_words(proj_feat_dic) ave_target_feat_list = [ave_test_feat_dic[i] for i in ave_target_feat_dic] ave_num_target_feat_list = PCA.average_over_words_num( proj_feat_dic, targets) anno_list = [i for i in ave_target_feat_dic] ave_target_trans = model.transform(ave_target_feat_list) all_feats, delta_lab_list = PCA.target_dic2list(proj_feat_dic) all_feats_trans = model.transform(all_feats) sampled_feats, sampled_delta_lab = MAP.sampling(all_feats_trans, delta_lab_list) fig = plt.figure() if FLAG.pca_dim == 2: ### start plotting the average results ### ax = fig.add_subplot(111) ax = PCA.plot_with_anno(ave_test_trans, anno_list, rev_dic, ax) ### plotting all word utterance ### ax = PCA.plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) elif FLAG.pca_dim == 3: #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') ax = PCA.plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = PCA.plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) else: print("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) print(len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i], labs[i])) train_list, test_list = MAP.split_train_test(all_list) print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ax.legend(loc='upper right') plt.show() return
def main(): test_feats, test_labs = dp.read_csv_file(FLAG.query_fn) train_feats, train_labs = dp.read_csv_file(FLAG.corpus_fn) if len(test_feats[0]) != len(train_feats[0]): print(len(test_feats[0]), len(train_feats[0])) raise NameError('The dimension between two files are not the same') feat_dim = len(test_feats[0]) mean, var = get_mean_var(train_feats) train_normed_feats = normalize(train_feats, mean, var) test_normed_feats = normalize(test_feats, mean, var) dp.write_feat_lab(FLAG.query_fn + '_normed', test_normed_feats, test_labs) dp.write_feat_lab(FLAG.corpus_fn + '_normed', train_normed_feats, train_labs) #print (MAP(test_list[:FLAG.test_num],train_list, feat_dim=feat_dim)) return
def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.trans_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = PCA.extract_targets(feats, labs, targets) word_color_dict = reader.build_label_color_list(targets, color_list) ave_test_feat_dic = PCA.average_over_words(test_feat_dic) ave_test_feat_list = [ave_test_feat_dic[i] for i in ave_test_feat_dic] test_all_feats, test_all_delta_labs = PCA.target_dic2list(test_feat_dic) ave_num_feats, ave_num_lab = PCA.average_over_words_num( test_feat_dic, targets) #ave_num_feats, ave_num_lab = PCA.target_dic2list(ave_num_feat_lists) #sampled_feats, sampled_delta_lab = PCA.sampling(test_all_feats, # test_all_delta_labs) sampled_feats, _ = PCA.PCA_transform(ave_num_feats) ave_test_feat_trans = PCA.TSNE_transform(sampled_feats, FLAG.tsne_dim) fig = plt.figure() ax = fig.add_subplot(111) ax = PCA.plot_all_color(ave_test_feat_trans, ave_num_lab, rev_dic, ax, word_color_dict) #ax = PCA.plot_with_anno( ave_test_feat_trans, anno_list, rev_dic, ax) #ax.legend(loc='upper right') plt.show() return
def main(): ### preprocessing ### src_feats, src_labs = DP.read_csv_file(FLAG.src_file) pca_model = joblib.load(FLAG.model) ### output transformed features ### src_trans_feats = pca_model.transform(src_feats) DP.write_feat_lab(FLAG.target_file, src_trans_feats, src_labs) return
def apply_NE(fn): ''' return: feats: the feature list lab: the label list ''' feats, labs = dp.read_csv_file(fn, delimiter=',') lens = gl.gen_len(feats) transed_feats = naive_encoder(feats, lens) return transed_feats, labs
def transform(fn, dic): feats, labs = dp.read_csv_file(fn, ',') dirname = FLAG.out_dir lens = gl.gen_len(feats) for i, lab in enumerate(labs): if lab not in dic: dic[lab] = 0 outname = dirname + '/' + str(lab) + '_' + str(dic[lab]) write_DTW_feat(outname, feats[i], lens[i]) dic[lab] += 1 return
def main(): word_dic, word_rev_dic = dp.build_dic(FLAG.word_dic) query_feats, query_labs = dp.read_csv_file(FLAG.test_fn) data_feats, data_labs = dp.read_csv_file(FLAG.train_fn) #tmp = np.array(query_feats + data_feats) #tmp_p = tmp -np.mean(np.array(tmp),0) #tmp_list = tmp_p.tolist() #query_feats = tmp_list[:len(query_feats)] #data_feats = tmp_list[len(query_feats):] #dp.write_feat_lab(FLAG.test_fn + '_normed', query_feats, query_labs) #dp.write_feat_lab(FLAG.train_fn + '_normed', data_feats, data_labs) #query_feats = query_feats[:100] #query_labs = query_labs[:100] #data_feats = data_feats[:10000] #data_labs = data_labs[:10000] lex_dic = dp.build_lexicon(FLAG.lexicon, word_dic) bucks, bucks_cnt = gen_similarity_average(query_feats, query_labs, data_feats, data_labs, lex_dic) print(bucks, bucks_cnt) return
def main(): text_emb, text_labs = dp.read_csv_file(FLAG.text_embeds, ' ') audio_emb, audio_labs = dp.read_csv_file(FLAG.audio_embeds, ' ') text_pca, audio_pca = PCA_transform(text_emb, audio_emb) a2t_mat, t2a_mat = ICP_train(text_pca, audio_pca) t2a_text = np.transpose(text_pca) tmp_text = np.matmul(t2a_mat, t2a_text) # t2a_audio_map = generate_pair(tmp_text, audio_pca) a2t_audio = np.transpose(audio_pca) tmp_audio = np.matmul(a2t_mat, a2t_audio) # a2t_text_map = generate_pair(tmp_audio, text_pca) np_audio = np.array(audio_emb) t2a_audio_emb = np_audio[t2a_audio_map] np_text = np.array(text_emb) a2t_text_emb = np_text[a2t_text_map] a2t_mat, t2a_mat = ICP_train_full(text_emb, t2a_audio_emb, audio_emb, a2t_text_emb)
def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.train_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = extract_targets(feats, labs, targets) word_color_dict = reader.build_label_color_list(targets, color_list) ### PCA through the average target words ### ave_test_feat_dic = average_over_words(test_feat_dic) ave_test_feat_list = [ ave_test_feat_dic[i] for i in ave_test_feat_dic] ave_num_feat_lists, tmp_lab = average_over_words_num(test_feat_dic, targets) ave_num_trans, model = PCA_transform(ave_num_feat_lists) anno_list = [ i for i in ave_test_feat_dic ] ave_test_trans = model.transform(ave_test_feat_list) ### use the PCA model to transform only testing data ### all_feats, delta_lab_list = target_dic2list(test_feat_dic) all_feats_trans = model.transform(all_feats) ### samples number of word occurances ### sampled_feats, sampled_delta_lab = sampling(all_feats_trans, delta_lab_list) if FLAG.save_model : s = joblib.dump(model,FLAG.model_fn) if FLAG.pca_dim == 2: fig = plt.figure() ### start plotting the average results ### ax = fig.add_subplot(111) ax = plot_with_anno(ave_test_trans, anno_list, rev_dic, ax) ### plotting all word utterance ### ax = plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) elif FLAG.pca_dim ==3 : fig = plt.figure() #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) else: print ("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) # print (len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i],labs[i])) train_list, test_list = MAP.split_train_test(all_list) print (MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ### get the words that not using for PCA ### if FLAG.other_words != 'None': other_target = reader.build_targets(FLAG.other_words,dic) extract_others = extract_targets(feats, labs, other_target) ave_feat, other_lb_list = extract_additional_words(extract_others, other_target) ave_ftrans = model.transform(ave_feat) if FLAG.pca_dim == 2: plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax) else : plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax) ax.legend(loc='upper right') plt.show() return
def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.train_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = extract_targets(feats, labs, targets) ave_dic = average_over_words(test_feat_dic) ave_feat_list = [ave_dic[i] for i in ave_dic] anno_list = [i for i in ave_dic] for i in test_feat_dic: print(i) print(len(test_feat_dic[i])) #word_color_dict = reader.build_label_color_list(targets, color_list) another_feats, another_labs = reader.read_csv_file(FLAG.apply_file) another_dic, another_rev_dic = reader.build_dic(FLAG.apply_dic) another_tar = reader.build_targets(FLAG.another_target_words, another_dic) another_test_dic = extract_targets(another_feats, another_labs, another_tar) another_color_dict = reader.build_label_color_list(another_tar, another_color_list) test_feats2, test_delta_labs2 = target_dic2list(another_test_dic) ave_dic2 = average_over_words(another_test_dic) ave_feat_list2 = [ave_dic2[i] for i in ave_dic2] anno_list2 = [i for i in ave_dic2] ### PCA through all average words, eliminating the less occurance of words? ### feats_trans, model = PCA_transform(feats) test_feats, test_delta_labs = target_dic2list(test_feat_dic) print(len(test_feats)) test_feats_trans = model.transform(test_feats) ### PCA through the average target words ### ave_feat_trans_list = model.transform(ave_feat_list) #ave_feat_trans_list = tsne.fit_transform(ave_feat_trans_list) test_feats2_trans = model.transform(test_feats2) ave_feat_trans_list2 = model.transform(ave_feat_list2) if FLAG.pca_dim == 2: fig = plt.figure() ### start plotting the average results ### ax = fig.add_subplot(111) ax = plot_with_anno(ave_feat_trans_list, anno_list, rev_dic, ax, 'o', 'German') ax = plot_with_anno(ave_feat_trans_list2, anno_list2, another_rev_dic, ax, 'x', 'French') ### plotting all word utterance ### #ax = plot_all_color(test_feats_trans, test_delta_labs, rev_dic, ax, # word_color_dict) # ax = plot_all_color(test_feats2_trans, test_delta_labs2, # another_rev_dic, ax, another_color_dict) elif FLAG.pca_dim == 3: fig = plt.figure() #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') # ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = plot_all_color_3d(test_feats_trans, test_delta_labs, rev_dic, ax, word_color_dict) else: print("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) # print (len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i], labs[i])) train_list, test_list = MAP.split_train_test(all_list) print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ### get the words that not using for PCA ### #if FLAG.other_words != 'None': # other_target = reader.build_targets(FLAG.other_words,dic) # extract_others = extract_targets(feats, labs, other_target) # ave_feat, other_lb_list = extract_additional_words(extract_others, other_target) # ave_ftrans = model.transform(ave_feat) # # if FLAG.pca_dim == 2: # plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax) # else : # plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax) ax.legend(loc='upper right') plt.show() return