def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.trans_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = PCA.extract_targets(feats, labs, targets) word_color_dict = reader.build_label_color_list(targets, color_list) ave_test_feat_dic = PCA.average_over_words(test_feat_dic) ave_test_feat_list = [ave_test_feat_dic[i] for i in ave_test_feat_dic] test_all_feats, test_all_delta_labs = PCA.target_dic2list(test_feat_dic) ave_num_feats, ave_num_lab = PCA.average_over_words_num( test_feat_dic, targets) #ave_num_feats, ave_num_lab = PCA.target_dic2list(ave_num_feat_lists) #sampled_feats, sampled_delta_lab = PCA.sampling(test_all_feats, # test_all_delta_labs) sampled_feats, _ = PCA.PCA_transform(ave_num_feats) ave_test_feat_trans = PCA.TSNE_transform(sampled_feats, FLAG.tsne_dim) fig = plt.figure() ax = fig.add_subplot(111) ax = PCA.plot_all_color(ave_test_feat_trans, ave_num_lab, rev_dic, ax, word_color_dict) #ax = PCA.plot_with_anno( ave_test_feat_trans, anno_list, rev_dic, ax) #ax.legend(loc='upper right') plt.show() return
def main(): q_feats, q_labs = reader.read_csv_file(FLAG.q_fn) db_feats, db_labs = reader.read_csv_file(FLAG.db_fn) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words) word_color_dict = reader.build_label_color_list(targets, color_list) model = joblib.load(FLAG.model_fn) proj_feat_dic = PCA.extract_targets(db_feats, db_labs, targets) ### get the target words average ### ave_target_feat_dic = PCA.average_over_words(proj_feat_dic) ave_target_feat_list = [ave_test_feat_dic[i] for i in ave_target_feat_dic] ave_num_target_feat_list = PCA.average_over_words_num( proj_feat_dic, targets) anno_list = [i for i in ave_target_feat_dic] ave_target_trans = model.transform(ave_target_feat_list) all_feats, delta_lab_list = PCA.target_dic2list(proj_feat_dic) all_feats_trans = model.transform(all_feats) sampled_feats, sampled_delta_lab = MAP.sampling(all_feats_trans, delta_lab_list) fig = plt.figure() if FLAG.pca_dim == 2: ### start plotting the average results ### ax = fig.add_subplot(111) ax = PCA.plot_with_anno(ave_test_trans, anno_list, rev_dic, ax) ### plotting all word utterance ### ax = PCA.plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) elif FLAG.pca_dim == 3: #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') ax = PCA.plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = PCA.plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) else: print("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) print(len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i], labs[i])) train_list, test_list = MAP.split_train_test(all_list) print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ax.legend(loc='upper right') plt.show() return
def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.train_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = extract_targets(feats, labs, targets) word_color_dict = reader.build_label_color_list(targets, color_list) ### PCA through the average target words ### ave_test_feat_dic = average_over_words(test_feat_dic) ave_test_feat_list = [ ave_test_feat_dic[i] for i in ave_test_feat_dic] ave_num_feat_lists, tmp_lab = average_over_words_num(test_feat_dic, targets) ave_num_trans, model = PCA_transform(ave_num_feat_lists) anno_list = [ i for i in ave_test_feat_dic ] ave_test_trans = model.transform(ave_test_feat_list) ### use the PCA model to transform only testing data ### all_feats, delta_lab_list = target_dic2list(test_feat_dic) all_feats_trans = model.transform(all_feats) ### samples number of word occurances ### sampled_feats, sampled_delta_lab = sampling(all_feats_trans, delta_lab_list) if FLAG.save_model : s = joblib.dump(model,FLAG.model_fn) if FLAG.pca_dim == 2: fig = plt.figure() ### start plotting the average results ### ax = fig.add_subplot(111) ax = plot_with_anno(ave_test_trans, anno_list, rev_dic, ax) ### plotting all word utterance ### ax = plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) elif FLAG.pca_dim ==3 : fig = plt.figure() #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax, word_color_dict) else: print ("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) # print (len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i],labs[i])) train_list, test_list = MAP.split_train_test(all_list) print (MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ### get the words that not using for PCA ### if FLAG.other_words != 'None': other_target = reader.build_targets(FLAG.other_words,dic) extract_others = extract_targets(feats, labs, other_target) ave_feat, other_lb_list = extract_additional_words(extract_others, other_target) ave_ftrans = model.transform(ave_feat) if FLAG.pca_dim == 2: plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax) else : plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax) ax.legend(loc='upper right') plt.show() return
def main(): ### preprocessing ### feats, labs = reader.read_csv_file(FLAG.train_file) dic, rev_dic = reader.build_dic(FLAG.word_dic) targets = reader.build_targets(FLAG.target_words, dic) test_feat_dic = extract_targets(feats, labs, targets) ave_dic = average_over_words(test_feat_dic) ave_feat_list = [ave_dic[i] for i in ave_dic] anno_list = [i for i in ave_dic] for i in test_feat_dic: print(i) print(len(test_feat_dic[i])) #word_color_dict = reader.build_label_color_list(targets, color_list) another_feats, another_labs = reader.read_csv_file(FLAG.apply_file) another_dic, another_rev_dic = reader.build_dic(FLAG.apply_dic) another_tar = reader.build_targets(FLAG.another_target_words, another_dic) another_test_dic = extract_targets(another_feats, another_labs, another_tar) another_color_dict = reader.build_label_color_list(another_tar, another_color_list) test_feats2, test_delta_labs2 = target_dic2list(another_test_dic) ave_dic2 = average_over_words(another_test_dic) ave_feat_list2 = [ave_dic2[i] for i in ave_dic2] anno_list2 = [i for i in ave_dic2] ### PCA through all average words, eliminating the less occurance of words? ### feats_trans, model = PCA_transform(feats) test_feats, test_delta_labs = target_dic2list(test_feat_dic) print(len(test_feats)) test_feats_trans = model.transform(test_feats) ### PCA through the average target words ### ave_feat_trans_list = model.transform(ave_feat_list) #ave_feat_trans_list = tsne.fit_transform(ave_feat_trans_list) test_feats2_trans = model.transform(test_feats2) ave_feat_trans_list2 = model.transform(ave_feat_list2) if FLAG.pca_dim == 2: fig = plt.figure() ### start plotting the average results ### ax = fig.add_subplot(111) ax = plot_with_anno(ave_feat_trans_list, anno_list, rev_dic, ax, 'o', 'German') ax = plot_with_anno(ave_feat_trans_list2, anno_list2, another_rev_dic, ax, 'x', 'French') ### plotting all word utterance ### #ax = plot_all_color(test_feats_trans, test_delta_labs, rev_dic, ax, # word_color_dict) # ax = plot_all_color(test_feats2_trans, test_delta_labs2, # another_rev_dic, ax, another_color_dict) elif FLAG.pca_dim == 3: fig = plt.figure() #### start plotting the 3D projections #### ax = fig.add_subplot(111, projection='3d') # ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax) ax = plot_all_color_3d(test_feats_trans, test_delta_labs, rev_dic, ax, word_color_dict) else: print("no plotting but testing through MAP") all_list = [] feat_trans = model.transform(feats) # print (len(feat_trans[0])) for i in range(len(feats)): all_list.append((feat_trans[i], labs[i])) train_list, test_list = MAP.split_train_test(all_list) print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim)) return ### get the words that not using for PCA ### #if FLAG.other_words != 'None': # other_target = reader.build_targets(FLAG.other_words,dic) # extract_others = extract_targets(feats, labs, other_target) # ave_feat, other_lb_list = extract_additional_words(extract_others, other_target) # ave_ftrans = model.transform(ave_feat) # # if FLAG.pca_dim == 2: # plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax) # else : # plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax) ax.legend(loc='upper right') plt.show() return