def plot_scatter(model, data_object, outpath, label_type=None, target_tone=None, name_index_list=None, phoneme_list=None, plotted_tone=None, bivariate=False, followed_list_file=None, perform_unsupervised=False, get_only_stress=False, non_unlabelled_stress=False, get_only_gpr_data=False, return_after_dbscan=False, get_only_manual_data=False, no_short_duration=False): data = model.X.mean y, name_index, tone, stress, syllable_short_long_type, syllable_positions, phonemes, syllable_type = data_object.get_GP_LVM_training_data( Syllable.Training_feature_tonal_part_raw_remove_head_tail_interpolated , dur_position=[1,2] , no_short_duration=no_short_duration, num_sampling=50, get_only_stress=get_only_stress, non_unlabelled_stress=non_unlabelled_stress, get_only_gpr_data=get_only_gpr_data, get_only_manual_data=get_only_manual_data) # print 'Plot scatter' # print stress # sys.exit() # print syllable_type # print model.X.mean x = [] y = [] input_sensitivity = model.input_sensitivity() print input_sensitivity index = Utility.get_input_sensitivity(input_sensitivity, 3) print index data = np.array(data) name_index_list = np.array(name_index_list) index_filter = [] for n in name_index: # print n idx = np.where( name_index_list==n ) [0] # print idx index_filter.append(idx[0]) data = data[index_filter] stress = np.array(stress) labels_true = np.arange(len(stress), dtype=int) labels_true[stress == 'Stress'] = 1 labels_true[stress == 'Unstress'] = 0 # print len(data), len(stress) # print len(labels_true), set(labels_true) # sys.exit() if len(data) != len(stress): print 'Error data is not equal' return plt.clf() if perform_unsupervised: try: DBSCAN_executioner.run(data, labels_true, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity) # Kmeans_executioner.run(data, labels_true, os.path.dirname(outpath), [index[0], index[1]], input_sensitivity) except: print 'Error at path : {}'.format(outpath) traceback.print_exc() if return_after_dbscan: return plt.clf() print 'Data : {}'.format(len(data)) print 'Stress : {}'.format(len(stress)) # print stress x = data[:,index[0]] x = data[:,1] y = data[:,index[1]] y = data[:,0] z = data[:,index[2]] print 'syllable_positions', len(syllable_positions) if label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS: # Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g']) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False # print stress # sys.exit() # Scatter.plot(x[mask], y[mask], outpath, label_list=stress[mask], color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_3D_COLORING: # Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g']) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') normalized = (z-min(z))/(max(z)-min(z)) * 100 Scatter.plot(x, y, outpath, label_list=None, color=normalized.astype(int).tolist(), cmap='gray') elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_SEP_GPR: gpr_file_list = [] for idx, n in enumerate(name_index): if 'gpr' in n: gpr_file_list.append(idx) gpr_file_list = np.array(gpr_file_list) stress[gpr_file_list] = 'GPR_Stress' stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_STRESS_AND_SPLIT_TONE: stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') tone = np.array(tone) mask = np.ones(len(stress), dtype=bool) mask[unstress_index] = False outpath = Utility.get_base_path(outpath) canplot = True try: labels_object = Utility.load_obj('{}/clustered_label.npy'.format(outpath)) if len(labels_object)!=len(stress): canplot = False except: canplot = False for t in set(tone): Utility.make_directory('{}/tone_stress_label/'.format(outpath)) Utility.make_directory('{}/clustering_label/'.format(outpath)) print len(x), len(y), len(tone), len(stress) Scatter.plot(x[tone==t], y[tone==t], '{}/tone_stress_label/tone_{}.eps'.format(outpath, t), label_list=stress[tone==t], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) if canplot: 'Plot label tone {}'.format(t) Scatter.plot(x[tone==t], y[tone==t], '{}/clustering_label//tone_{}.eps'.format(outpath, t), label_list=labels_object[tone==t], bivariate=bivariate, X_bi=x[stress_index], Y_bi=y[stress_index]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_SHORT_LONG: Scatter.plot(x, y, outpath, label_list=syllable_short_long_type) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_POSITIONS: long_list = [] short_list = [] for idx, p in enumerate(phonemes): v = p.split('-')[1] if v not in Syllable.short_vowel: long_list.append(idx) else: short_list.append(idx) print len(long_list) , len(x) x = np.array(x) y = np.array(y) syllable_positions = np.array(syllable_positions) Scatter.plot(x[long_list], y[long_list], outpath, label_list=syllable_positions[long_list]) elif label_type is GP_LVM_Scatter.LABEL_TYPE_TONES: Scatter.plot(x, y, outpath, label_list=tone, color=['r','g','b','black','yellow']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_ONE_TONE_STRESS_UNSTRESS: tone = np.array(map(str, tone)) stress = np.core.defchararray.add(stress, '_' ) stress_tone = np.core.defchararray.add(stress, tone) target_list = np.array([]) print target_tone for t in target_tone: print t, target_list, np.where(tone == t) target_list = np.union1d(target_list, np.where(tone == t)[0]) stress_tone = stress_tone[target_list.astype(int)]#np.delete(stress_tone, delete_list) x = x[target_list.astype(int)]#np.delete(x, delete_list) y = y[target_list.astype(int)]#np.delete(y, delete_list) Scatter.plot(x, y, outpath, label_list=stress_tone) elif label_type is None : Scatter.plot(x, y, outpath, label_list=None) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_IN_MANUAL_PHRASE: name_index = np.array(name_index) # print name_index single_list = np.array(Utility.load_obj(name_index_list['single'])) followed_by_sil_list = np.array(Utility.load_obj(name_index_list['followed_by_sil'])) poly_list = np.array(Utility.load_obj(name_index_list['poly'])) all_union = [] single_indices = [] for syl in single_list: single_indices = np.union1d(single_indices, np.where( name_index == syl)[0]) followed_by_sil_indices = [] for syl in followed_by_sil_list: followed_by_sil_indices = np.union1d(followed_by_sil_indices, np.where( name_index == syl)[0]) poly_indices = [] for syl in poly_list: poly_indices = np.union1d(poly_indices, np.where( name_index == syl)[0]) name_index[single_indices.astype(int)] = 'Single ' name_index[followed_by_sil_indices.astype(int)] = 'Followed' name_index[poly_indices.astype(int)] = 'Poly' all_union = np.union1d(all_union, single_indices) all_union = np.union1d(all_union, followed_by_sil_indices) all_union = np.union1d(all_union, poly_indices) mask = np.ones(len(name_index), dtype=bool) mask[all_union.astype(int)] = False name_index[mask] = 'Other' Scatter.plot(x, y, outpath, label_list=name_index, color=['r','g','b','y']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_IN_MANUAL_PHRASE_PLUS_SHORT_LONG_SYLLABLE: name_index = np.array(name_index) # print name_index single_list = np.array(Utility.load_obj(name_index_list['single'])) followed_by_sil_list = np.array(Utility.load_obj(name_index_list['followed_by_sil'])) poly_list = np.array(Utility.load_obj(name_index_list['poly'])) all_union = [] single_indices = [] for syl in single_list: single_indices = np.union1d(single_indices, np.where( name_index == syl)[0]) followed_by_sil_indices = [] for syl in followed_by_sil_list: followed_by_sil_indices = np.union1d(followed_by_sil_indices, np.where( name_index == syl)[0]) poly_indices = [] for syl in poly_list: poly_indices = np.union1d(poly_indices, np.where( name_index == syl)[0]) name_index[single_indices.astype(int)] = 'Single ' name_index[followed_by_sil_indices.astype(int)] = 'Followed' name_index[poly_indices.astype(int)] = 'Poly' all_union = np.union1d(all_union, single_indices) all_union = np.union1d(all_union, followed_by_sil_indices) all_union = np.union1d(all_union, poly_indices) mask = np.ones(len(name_index), dtype=bool) mask[all_union.astype(int)] = False name_index[mask] = 'Other' outpath = outpath.split('.')[0] syllable_short_long_type = np.array(syllable_short_long_type) short_list = np.where(syllable_short_long_type=='short')[0] long_list = np.where(syllable_short_long_type=='long')[0] # print short_list, long_list Scatter.plot(x[short_list], y[short_list], '{}_short.pdf'.format(outpath), label_list=name_index[short_list], color=['r','g','b','y']) Scatter.plot(x[long_list], y[long_list], '{}_long.pdf'.format(outpath), label_list=name_index[long_list], color=['r','g','b','y']) elif label_type is GP_LVM_Scatter.LABEL_TYPE_PHONEME: phonemes = np.array(phonemes) stress = np.array(stress) for phoneme in phoneme_list: if plotted_tone != '01234': if plotted_tone not in phoneme: continue target_index = np.where(phonemes == phoneme) stress_index = np.where(stress == 'Stress') # print stress_index outpath = outpath.split('.')[0] Scatter.plot(x[target_index], y[target_index], '{}_{}.pdf'.format(outpath, phoneme), label_list=stress[target_index], bivariate=True, X_bi=x[stress_index], Y_bi=y[stress_index], title=phoneme, xlim=(-4.4657748693986417, 8.1238328278216105), ylim=(-7.2366812187855185, 6.1187134324317736)) elif label_type is GP_LVM_Scatter.LABEL_TYPE_SYLLABLE_TYPE: syllable_type = np.array(syllable_type) stress = np.array(stress) types = set(syllable_type) for typ in types: print typ typ_index = np.where(syllable_type==typ) sub_stress = stress[typ_index] sub_x = x[typ_index] sub_y = y[typ_index] stress_index = np.where(sub_stress == 'Stress') unstress_index = np.where(sub_stress == 'Unstress') mask = np.ones(len(sub_stress), dtype=bool) mask[unstress_index] = False outpath = outpath.split('.')[0] Scatter.plot(sub_x, sub_y, '{}_{}.pdf'.format(outpath, typ), label_list=sub_stress, color=['r','b','g'], bivariate=False, X_bi=sub_x[stress_index], Y_bi=sub_y[stress_index], title=typ, xlim=(-4.4657748693986417, 8.1238328278216105), ylim=(-7.2366812187855185, 6.1187134324317736)) elif label_type is GP_LVM_Scatter.LABEL_TYPE_FOLLOWED_BY_SIL: followed_list = Utility.load_obj(followed_list_file) fow_index = [] name_index = np.array(name_index) for f in followed_list: k = np.where(name_index == f)[0] for kk in k: fow_index.append(kk.astype(int)) # print fow_index stress = np.array(stress) stress_index = np.where(stress == 'Stress') unstress_index = np.where(stress == 'Unstress') stress[stress_index] = 'Unstress' stress[fow_index] = 'Stress' Scatter.plot(x, y, outpath, label_list=stress, color=['r','b','g'], bivariate=True, X_bi=x[fow_index], Y_bi=y[fow_index]) tone = np.array(tone) for t in [0,1,2,3,4]: x_tone = x[np.where(tone == t)] y_tone = y[np.where(tone == t)] stress_tone = stress[np.where(tone == t)] tone_path = '{}_{}.pdf'.format(outpath.split('.')[0], t) Scatter.plot(x_tone, y_tone, tone_path, label_list=stress_tone, color=['r','b','g'], bivariate=True, X_bi=x[fow_index], Y_bi=y[fow_index], title='Tone {}'.format(t), xlim=(-3.7420549236630576, 3.7939531202951904), ylim=(-4.2426927228030289, 6.3913714950885101)) base_path = outpath.split('.')[0] Utility.save_obj(x, '{}_{}.pickle'.format(base_path,'x')) Utility.save_obj(y, '{}_{}.pickle'.format(base_path,'y')) Utility.save_obj(stress, '{}_{}.pickle'.format(base_path,'stress_followed')) Utility.save_obj(tone, '{}_{}.pickle'.format(base_path,'tone')) # elif label_type is GP_LVM_Scatter.LABEL_TYPE_SEPARATED_UNSUPERVISED_GROUP: pass
def gen_fold_with_balance_stress_unstress(syl_object, fold): base_path = Utility.get_base_path(syl_object) print base_path base_name = Utility.get_basefilename(syl_object) print base_name outpath = '{}/{}_fold'.format(base_path, base_name) Utility.make_directory(outpath) syl_manage_object = Utility.load_obj(syl_object) stress_list = [] unstress_list = [] print len(syl_manage_object.syllables_list) for syl in syl_manage_object.syllables_list: # print syl.stress_manual if syl.stress_manual == '1': stress_list.append(syl) elif syl.stress_manual == '0': unstress_list.append(syl) print len(stress_list), len(unstress_list) out_list = [] for i in range(fold): out_list.append([]) print out_list i = 0 for syl in stress_list: out_list[i].append(syl) i += 1 if i == fold: i = 0 print 'Stress length: ' for o in out_list: print len(o) i = 0 for syl in unstress_list: out_list[i].append(syl) i += 1 if i == fold: i = 0 print 'Unstress length: ' for o in out_list: print len(o) for i in range(fold): syl_o = SyllableDatabaseManagement(syllable_list=out_list[i]) Utility.save_obj( syl_o, '{}/{}_{}-fold_{}.pickle'.format(outpath, base_name, fold, i)) pass