def compare_training(): """ Compare the repartition of the training sets : decomposition in training (60%), CV (20%) and test (20%) sets. """ from matplotlib.gridspec import GridSpec from options import read_binary_file libpath = '../lib/Piton' list_files = glob.glob(os.path.join(libpath,'learning*')) list_files.sort() df = pd.read_csv(os.path.join(libpath,'class_train_set.csv')) labels = np.array(df.Type.values) m = len(labels) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) nbc, nbl = 3,4 grid = GridSpec(nbl,nbc*3) colors = ['lightskyblue', 'lightcoral'] fig = plt.figure(figsize=(18,12)) fig.set_facecolor('white') for iter,file in enumerate(list_files): if iter%2: colors = ['lightskyblue', 'lightcoral'] else: colors = ['powderblue', 'plum'] dic = read_binary_file(file) train = labels[dic[:mtraining]] cv = labels[dic[mtraining:mtraining+mcv]] test = labels[dic[mtraining+mcv:]] prop_train = [len(train[train=='VT']),len(train[train=='EB'])] prop_test = [len(test[test=='VT']),len(test[test=='EB'])] prop_cv = [len(cv[cv=='VT']),len(cv[cv=='EB'])] num = iter%nbc + iter + iter/nbc * nbc row = iter/nbc col = iter%nbc * 3 plt.subplot(grid[row,col],aspect='equal') plt.pie(prop_train,autopct='%1.1f%%',labels=['VT','EB'],colors=colors) plt.text(-0.5,1.4,'Training set') plt.text(-0.5,-1.4,r'$m_{training}=%d$'%mtraining) plt.subplot(grid[row,col+1],aspect='equal') plt.pie(prop_cv,autopct='%1.1f%%',labels=['VT','EB'],colors=colors) plt.text(-0.3,1.4,'CV set') plt.text(-0.3,-1.4,r'$m_{CV}=%d$'%mcv) plt.text(-.5,2.,'Tirage %d'%iter) plt.subplot(grid[row,col+2],aspect='equal') plt.pie(prop_test,autopct='%1.1f%%',labels=['VT','EB'],colors=colors) plt.text(-0.3,1.4,'Test set') plt.text(-0.3,-1.4,r'$m_{test}=%d$'%mtest) plt.savefig('../results/Piton/figures/tirages.png') plt.show()
def __init__(self): MultiOptions.__init__(self) print "ANALYSIS OF %s"%self.opdict['result_path'] self.results = read_binary_file(self.opdict['result_path']) self.opdict['feat_list'] = self.results['features'] del self.results['features'] self.do_analysis()
def read_extraction_results(filename): from results import AnalyseResultsExtraction res = AnalyseResultsExtraction() from obspy.core import utcdatetime, read DIC = read_binary_file(filename)
def read_extraction_results(filename): from results import AnalyseResultsExtraction res = AnalyseResultsExtraction() from obspy.core import utcdatetime,read DIC = read_binary_file(filename)
def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec','Ene','Kurto','RappMaxMean'] opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR','Z') list_files = glob.glob(os.path.join('../lib/Piton','learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) for feat,best,worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.,label='VT') plt.plot(g[feat]['vec'],g[feat]['EB'],'k--',lw=2.,label='EB') labels = ['best','worst'] colors = ['r','g'] b_file = list_files[best] w_file = list_files[worst] for ifile,file in enumerate([b_file,w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile]) plt.plot(g_train[feat]['vec'],g_train[feat]['EB'],'--',c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png'%(opt.opdict['fig_path'],feat)) plt.show()
def plot_waveforms(): """ Plot the waveforms of unsupervised classes. """ from matplotlib.gridspec import GridSpec from options import read_binary_file, Options from obspy.core import read opt = Options() DIC = read_binary_file(opt.opdict['result_path']) for stac in sorted(DIC): if stac == 'header': continue station = stac[0] comp = stac[1] datapath = glob.glob( os.path.join(opt.opdict['datadir'], station, '*%s*' % comp))[0] for tir in sorted(DIC[stac]): list_ev = DIC[stac][tir]['list_ev'] nclass = DIC[stac][tir]['NumClass'] K = len(np.unique(nclass)) fig = plt.figure() fig.set_facecolor('white') grid = GridSpec(2 * K, 3) for j, N in enumerate(np.unique(nclass)): index = list(np.where(nclass == N)[0]) ev = list_ev[index] permut = np.random.permutation(ev) for i in range(3): E = permut[i] file = glob.glob( os.path.join(datapath, '*%s_%s*' % (str(E)[:8], str(E)[8:])))[0] st = read(file) st.filter('bandpass', freqmin=1, freqmax=10) if i in [0, 1]: ax = fig.add_subplot(grid[2 * j, i + 1]) else: ax = fig.add_subplot(grid[2 * j + 1, :]) ax.plot(st[0], 'k') ax.set_axis_off() ax = fig.add_subplot(grid[2 * j, 0]) ax.text(.2, .5, N, transform=ax.transAxes) ax.set_axis_off() save = True if save: savename = '%s/WF_K%dclass.png' % (opt.opdict['fig_path'], K) print "Saved in %s" % savename plt.savefig(savename) plt.show()
def plot_waveforms(): """ Plot the waveforms of unsupervised classes. """ from matplotlib.gridspec import GridSpec from options import read_binary_file, Options from obspy.core import read opt = Options() DIC = read_binary_file(opt.opdict['result_path']) for stac in sorted(DIC): if stac == 'header': continue station = stac[0] comp = stac[1] datapath = glob.glob(os.path.join(opt.opdict['datadir'],station,'*%s*'%comp))[0] for tir in sorted(DIC[stac]): list_ev = DIC[stac][tir]['list_ev'] nclass = DIC[stac][tir]['NumClass'] K = len(np.unique(nclass)) fig = plt.figure() fig.set_facecolor('white') grid = GridSpec(2*K,3) for j,N in enumerate(np.unique(nclass)): index = list(np.where(nclass==N)[0]) ev = list_ev[index] permut = np.random.permutation(ev) for i in range(3): E = permut[i] file = glob.glob(os.path.join(datapath,'*%s_%s*'%(str(E)[:8],str(E)[8:])))[0] st = read(file) st.filter('bandpass',freqmin=1,freqmax=10) if i in [0,1]: ax = fig.add_subplot(grid[2*j,i+1]) else: ax = fig.add_subplot(grid[2*j+1,:]) ax.plot(st[0],'k') ax.set_axis_off() ax = fig.add_subplot(grid[2*j,0]) ax.text(.2,.5,N,transform=ax.transAxes) ax.set_axis_off() save = True if save: savename = '%s/WF_K%dclass.png'%(opt.opdict['fig_path'],K) print "Saved in %s"%savename plt.savefig(savename) plt.show()
def read_result_file(self): """ Reads the file containing the results """ dic = read_binary_file(self.opdict['result_path']) self.opdict['feat_list'] = dic['header']['features'] self.opdict['label_filename'] = '%s/%s'%(self.opdict['libdir'],dic['header']['catalog']) print "Nb features :", len(self.opdict['feat_list']) print "Types :", dic['header']['types'] self.results = dic self.opdict['stations'] = [key[0] for key in sorted(dic)] self.opdict['channels'] = [key[1] for key in sorted(dic)] self.opdict['Types'] = dic['header']['types'] del dic['header']
def compare_pdfs_train(): """ Affiche et compare les pdfs des différents training sets. """ from options import MultiOptions opt = MultiOptions() opt.opdict['stations'] = ['IJEN'] opt.opdict['channels'] = ['Z'] opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?'] opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir']) opt.opdict[ 'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir'] train = read_binary_file(opt.opdict['train_file']) nb_tir = len(train) for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: opt.x, opt.y = opt.features_onesta(sta, comp) X = opt.x Y = opt.y c = ['r', 'b', 'g'] lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v'] features = opt.opdict['feat_list'] for feat in features: print feat opt.opdict['feat_list'] = [feat] fig = plt.figure() fig.set_facecolor('white') for tir in range(nb_tir): tr = map(int, train[tir]) opt.x = X.reindex(index=tr, columns=[feat]) opt.y = Y.reindex(index=tr) opt.classname2number() opt.compute_pdfs() g = opt.gaussians for it, t in enumerate(opt.types): plt.plot(g[feat]['vec'], g[feat][t], ls=lines[tir], color=c[it]) plt.title(feat) plt.legend(opt.types) plt.show()
def plot_curves(filename): """ Evolution de la taille et de la composition du test set au cours des extractions pour la méthode "one-by-one". """ EXT = read_binary_file(filename) for key in sorted(EXT[0]): all_nb = [] for num_ext in sorted(EXT): all_nb.append(EXT[num_ext][key]) all_nb = np.array(all_nb) fig = plt.figure() fig.set_facecolor('white') for i in range(all_nb.shape[1]): plt.plot(range(len(EXT)), all_nb[:, i], '-') plt.xlabel('Extraction number') plt.ylabel('Number of events in the test set') plt.title('%s' % key.split('_')[1]) plt.show()
def compare_unsup_indet(): """ Essaie de faire un lien entre les événements indéterminés mal classés par LR ou SVM avec classes non-supervisées. """ from matplotlib.gridspec import GridSpec print "### COMPARE UNSUP AND SUP ###" from results import AnalyseResults opt = AnalyseResults() m = opt.man a = opt.auto unsup = read_binary_file( '../results/Ijen/KMEANS/results_kmeans_3c_11f_ini') nb_auto = [len(opt.auto[opt.auto.Type == t]) for t in opt.opdict['types']] NB_class = len(opt.opdict['types']) for cl in opt.opdict['types']: #for cl in ['?']: m = opt.man[opt.man.Type == cl] a = opt.auto.reindex(index=m.index) colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral'] opt.data_for_LR() opt.opdict['channels'] = 'Z' opt.opdict['stations'] = ['IJEN'] for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: u = pd.DataFrame(index=unsup[(sta, comp)][0]['list_ev'], columns=['Type', 'NumType']) u['Type'] = unsup[(sta, comp)][0]['StrClass'] u['NumType'] = unsup[(sta, comp)][0]['NumClass'] u = u.reindex(index=m.index) trad = unsup[(sta, comp)][0]['Equivalence'] fig = plt.figure(figsize=(12, 8)) fig.set_facecolor('white') nb_l, nb_c = 2, NB_class * 2 grid = GridSpec(nb_l, nb_c) ax = fig.add_subplot(grid[0, :nb_c / 2]) ax.pie(nb_auto, labels=opt.opdict['types'], autopct='%1.1f%%', colors=colors) ax.text(.4, -.1, r'# events = %d' % np.sum(nb_auto), transform=ax.transAxes) ax.axis("equal") nbs = [len(a[a.Type == t]) for t in opt.opdict['types']] ax = fig.add_subplot(grid[0, nb_c / 2:]) ax.pie(nbs, labels=opt.opdict['types'], autopct='%1.1f%%', colors=colors) ax.text(.4, -.1, r'# events = %d' % np.sum(nbs), transform=ax.transAxes) ax.axis("equal") lab_c = np.array(trad).copy() for it, t in enumerate(opt.opdict['types']): i = np.where(np.array(trad) == t)[0][0] lab_c[it] = i for it, t in enumerate(opt.opdict['types']): ared = a[a.Type == t] ured = u.reindex(index=ared.index) nbs = [ len(ured[ured.Type == ty]) for ty in opt.opdict['types'] ] ax = fig.add_subplot(grid[1, 2 * it:2 * it + 2]) ax.pie(nbs, labels=lab_c, autopct='%1.1f%%', colors=colors) ax.text(.3, -.1, r'# %s = %d' % (t, np.sum(nbs)), transform=ax.transAxes) #ax.set_title(t) plt.figtext(.1, .92, '(a) %s' % opt.opdict['method'].upper(), fontsize=16) plt.figtext(.55, .92, '(b) Manual repartition of %s' % cl, fontsize=16) plt.figtext(.1, .45, r'(c) $K$-means', fontsize=16) for it, t in enumerate(trad): plt.figtext(.3 + it * .15, .45, r'%s $\approx$ %s' % (it, trad[it])) plt.savefig( '../results/Ijen/KMEANS/figures/unsup_compSVM_%s.png' % cl) plt.show()
def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT') plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB') labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) plt.plot(g_train[feat]['vec'], g_train[feat]['EB'], '--', c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()
def plot_pdf_subsets(): """ Plots the pdfs of the training set, CV set and test set on the same figure. One subfigure for each event type. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') print len(y_all) list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure(figsize=(10, 4)) fig.set_facecolor('white') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.) ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.) labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians ax1.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) ax2.plot(g_train[feat]['vec'], g_train[feat]['EB'], '-', c=colors[ifile], label=labels[ifile]) # CV SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining:mtraining + mcv]) opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv]) opt.compute_pdfs() g_cv = opt.gaussians ax1.plot(g_cv[feat]['vec'], g_cv[feat]['VT'], '--', c=colors[ifile]) ax2.plot(g_cv[feat]['vec'], g_cv[feat]['EB'], '--', c=colors[ifile]) # TEST SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:]) opt.y = y_all.reindex(index=dic[mtraining + mcv:]) opt.compute_pdfs() g_test = opt.gaussians ax1.plot(g_test[feat]['vec'], g_test[feat]['VT'], ':', c=colors[ifile]) ax2.plot(g_test[feat]['vec'], g_test[feat]['EB'], ':', c=colors[ifile]) ax1.set_title('VT') ax2.set_title('EB') ax1.legend() ax2.legend() plt.suptitle(feat) plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()
def compare_training(): """ Compare the repartition of the training sets : decomposition in training (60%), CV (20%) and test (20%) sets. """ from matplotlib.gridspec import GridSpec from options import read_binary_file libpath = '../lib/Piton' list_files = glob.glob(os.path.join(libpath, 'learning*')) list_files.sort() df = pd.read_csv(os.path.join(libpath, 'class_train_set.csv')) labels = np.array(df.Type.values) m = len(labels) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) nbc, nbl = 3, 4 grid = GridSpec(nbl, nbc * 3) colors = ['lightskyblue', 'lightcoral'] fig = plt.figure(figsize=(18, 12)) fig.set_facecolor('white') for iter, file in enumerate(list_files): if iter % 2: colors = ['lightskyblue', 'lightcoral'] else: colors = ['powderblue', 'plum'] dic = read_binary_file(file) train = labels[dic[:mtraining]] cv = labels[dic[mtraining:mtraining + mcv]] test = labels[dic[mtraining + mcv:]] prop_train = [len(train[train == 'VT']), len(train[train == 'EB'])] prop_test = [len(test[test == 'VT']), len(test[test == 'EB'])] prop_cv = [len(cv[cv == 'VT']), len(cv[cv == 'EB'])] num = iter % nbc + iter + iter / nbc * nbc row = iter / nbc col = iter % nbc * 3 plt.subplot(grid[row, col], aspect='equal') plt.pie(prop_train, autopct='%1.1f%%', labels=['VT', 'EB'], colors=colors) plt.text(-0.5, 1.4, 'Training set') plt.text(-0.5, -1.4, r'$m_{training}=%d$' % mtraining) plt.subplot(grid[row, col + 1], aspect='equal') plt.pie(prop_cv, autopct='%1.1f%%', labels=['VT', 'EB'], colors=colors) plt.text(-0.3, 1.4, 'CV set') plt.text(-0.3, -1.4, r'$m_{CV}=%d$' % mcv) plt.text(-.5, 2., 'Tirage %d' % iter) plt.subplot(grid[row, col + 2], aspect='equal') plt.pie(prop_test, autopct='%1.1f%%', labels=['VT', 'EB'], colors=colors) plt.text(-0.3, 1.4, 'Test set') plt.text(-0.3, -1.4, r'$m_{test}=%d$' % mtest) plt.savefig('../results/Piton/figures/tirages.png') plt.show()
def plot_envelopes(): """ Plot d'un VT et d'un EB avec des enveloppes calculées avec plusieurs paramètres de lissage. """ from options import read_binary_file from features_extraction_piton import process_envelope datadir = '../data/Piton/envelope' fig = plt.figure() fig.set_facecolor('white') colors = ['r', 'b', 'g', 'y'] ### EB ### tr_eb = read_binary_file('%s/trace_EB' % datadir) time = np.linspace(0, len(tr_eb) * 0.01, len(tr_eb)) env_51 = process_envelope(tr_eb, w=51) env_101 = process_envelope(tr_eb, w=101) env_501 = process_envelope(tr_eb, w=501) env_1001 = process_envelope(tr_eb, w=1001) ax1 = fig.add_subplot(211) #ax1.plot(time,tr_eb,'k') ax1.plot(time[:-1], env_51, c=colors[0], label='0.5 s') ax1.plot(time[:-1], env_101, c=colors[1], label='1 s') ax1.plot(time[:-1], env_501, c=colors[2], lw=2., label='5 s') ax1.plot(time[:-1], env_1001, c=colors[3], lw=2., label='10 s') from mpl_toolkits.axes_grid1.inset_locator import inset_axes axins = inset_axes(ax1, width="30%", height="60%", loc=1) i1, i2 = 6000, 8000 ax1.axvspan(time[i1], time[i2], color='gray', alpha=.3) axins.plot(time[i1:i2], env_51[i1:i2], c=colors[0]) axins.plot(time[i1:i2], env_101[i1:i2], c=colors[1]) axins.plot(time[i1:i2], env_501[i1:i2], c=colors[2], lw=2.) axins.plot(time[i1:i2], env_1001[i1:i2], c=colors[3], lw=2.) axins.xaxis.set_ticks_position('bottom') axins.yaxis.set_ticklabels('') axins.yaxis.set_visible(False) ax1.set_title('Eboulement') ax1.set_xlim([0, time[-1]]) ax1.set_xticklabels('') ax1.legend(loc=2, prop={'size': 10}) ### VT ### tr_vt = read_binary_file('%s/trace_VT' % datadir) env_51 = process_envelope(tr_vt, w=51) env_101 = process_envelope(tr_vt, w=101) env_501 = process_envelope(tr_vt, w=501) env_1001 = process_envelope(tr_vt, w=1001) ax2 = fig.add_subplot(212) #ax2.plot(tr_vt,'k') ax2.plot(time[:-1], env_51, c=colors[0]) ax2.plot(time[:-1], env_101, c=colors[1]) ax2.plot(time[:-1], env_501, c=colors[2], lw=2.) ax2.plot(time[:-1], env_1001, c=colors[3], lw=2.) from mpl_toolkits.axes_grid1.inset_locator import inset_axes axins = inset_axes(ax2, width="30%", height="70%", loc=1) i1, i2 = 3000, 5000 ax2.axvspan(time[i1], time[i2], color='gray', alpha=.3) axins.plot(time[i1:i2], env_51[i1:i2], c=colors[0]) axins.plot(time[i1:i2], env_101[i1:i2], c=colors[1]) axins.plot(time[i1:i2], env_501[i1:i2], c=colors[2], lw=2.) axins.plot(time[i1:i2], env_1001[i1:i2], c=colors[3], lw=2.) axins.xaxis.set_ticks_position('bottom') axins.yaxis.set_ticklabels('') axins.yaxis.set_visible(False) ax2.set_title('Volcano-tectonique') ax2.set_xlim([0, time[-1]]) ax2.set_xlabel('Time (s)') plt.figtext(0.03, 0.89, '(a)') plt.figtext(0.03, 0.46, '(b)') #plt.savefig('../results/Piton/features/envelopes.png') plt.show()
def classifier(opt): """ Classification of the different types of events. opt is an object of the class Options() """ list_attr = opt.__dict__.keys() if not 'x' in list_attr: opt.do_tri() X = opt.x Y = opt.y list_attr = opt.__dict__.keys() if 'train_x' in list_attr: X_TRAIN = opt.train_x Y_TRAIN = opt.train_y dic_results = {} for isc in sorted(opt.xs): print "==========",opt.trad[isc],"==========" subdic = {} if isc > 0: if opt.trad[isc][0] == sta_prev: marker_sta = 1 else: marker_sta = 0 sta_prev = opt.trad[isc][0] else: marker_sta = 0 sta_prev = opt.trad[isc][0] if len(opt.xs[isc]) == 0: continue # About the training set if len(opt.opdict['stations']) == 1 and opt.opdict['boot'] > 1 and 'train_x' not in list_attr: if os.path.exists(opt.opdict['train_file']): print opt.opdict['train_file'] TRAIN_Y = read_binary_file(opt.opdict['train_file']) else: TRAIN_Y = {} for tir in range(opt.opdict['boot']): TRAIN_Y[tir] = {} elif 'train_x' in list_attr: opt.x = opt.xs_train[isc] opt.y = opt.ys_train[isc] if opt.opdict['plot_pdf']: opt.compute_pdfs() g_train = opt.gaussians del opt.gaussians opt.classname2number() x_ref_train = opt.x y_ref_train = opt.y # About the test set opt.x = opt.xs[isc] opt.y = opt.ys[isc] if opt.opdict['plot_pdf']: opt.compute_pdfs() set = pd.DataFrame(index=opt.ys[isc].index,columns=['Otime']) set['Otime'] = opt.xs[isc].index opt.classname2number() x_test = opt.x y_ref = opt.y x_ref = opt.x if opt.opdict['plot_dataset']: opt.composition_dataset() #K = len(opt.types) ### ITERATE OVER TRAINING SET DRAWS ### for b in range(opt.opdict['boot']): print "\n-------------------- # iter: %d --------------------\n"%(b+1) subsubdic = {} print "WHOLE SET", x_ref.shape, y_ref.shape ### if there is no pre-defined training set ### if 'train_x' not in list_attr: x_train = x_test.copy() if len(opt.opdict['stations']) == 1 and opt.opdict['boot'] > 1: if len(TRAIN_Y[b]) > 0: y_train = y_ref.reindex(index=TRAIN_Y[b]['training_set']) y_train = y_train.dropna(how='any') y_cv = y_ref.reindex(index=TRAIN_Y[b]['cv_set']) y_cv = y_cv.dropna(how='any') y_test = y_ref.reindex(index=TRAIN_Y[b]['test_set']) y_test = y_test.dropna(how='any') else: y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref) TRAIN_Y[b]['training_set'] = map(int,list(y_train.index)) TRAIN_Y[b]['cv_set'] = map(int,list(y_cv.index)) TRAIN_Y[b]['test_set'] = map(int,list(y_test.index)) ### multi-stations case ### else: if marker_sta == 0: y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref) list_ev_train = y_train.index list_ev_cv = y_cv.index list_ev_test = y_test.index else: y_train = y_ref.reindex(index=list_ev_train) y_train = y_train.dropna(how='any') y_cv = y_ref.reindex(index=list_ev_cv) y_cv = y_cv.dropna(how='any') y_test = y_ref.reindex(index=list_ev_test) y_test = y_test.dropna(how='any') x_train = x_ref.reindex(index=y_train.index) ### if a training set was pre-defined ### else: x_train = x_ref_train.copy() y_train = y_ref_train.copy() y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref,y_train=y_train) x_cv = x_ref.reindex(index=y_cv.index) x_test = x_ref.reindex(index=y_test.index) i_train = y_train.index x_train.index = range(x_train.shape[0]) y_train.index = range(y_train.shape[0]) print "TRAINING SET", x_train.shape, y_train.shape if x_train.shape[0] != y_train.shape[0]: print "Training set: Incoherence in x and y dimensions" sys.exit() i_cv = y_cv.index x_cv.index = range(x_cv.shape[0]) y_cv.index = range(y_cv.shape[0]) print "CROSS-VALIDATION SET", x_cv.shape, y_cv.shape if x_cv.shape[0] != y_cv.shape[0]: print "Cross-validation set: Incoherence in x and y dimensions" sys.exit() subsubdic['list_ev'] = np.array(y_test.index) i_test = y_test.index x_test.index = range(x_test.shape[0]) y_test.index = range(y_test.shape[0]) print "TEST SET", x_test.shape, y_test.shape if x_test.shape[0] != y_test.shape[0]: print "Test set: Incoherence in x and y dimensions" sys.exit() opt.train_x = x_train opt.x = x_test opt.train_y = y_train opt.y = y_test if opt.opdict['plot_pdf']: opt.plot_all_pdfs(save=opt.opdict['save_pdf']) if 'train_x' in list_attr: opt.plot_superposed_pdfs(g_train,save=opt.opdict['save_pdf']) else: opt.plot_all_pdfs(save=opt.opdict['save_pdf']) if opt.opdict['method'] == '1b1': # EXTRACTEURS print "********** EXTRACTION 1-BY-1 **********" opt.opdict['boot'] = 1 one_by_one(opt,x_ref,y_ref,set['Otime'],boot=10,method='svm') continue elif opt.opdict['method'] == 'ova': print "********** EXTRACTION 1-VS-ALL **********" opt.opdict['boot'] = 1 one_vs_all(opt,x_ref,y_ref,set['Otime'],boot=10,method='svm') continue elif opt.opdict['method'] in ['svm','svm_nl']: # SVM print "********** SVM **********" if opt.opdict['method'] == 'svm': kern = 'Lin' else: kern = 'NonLin' out = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern=kern,proba=opt.opdict['probas']) if 'map' in sorted(out): opt.map = out['map'] if 'thetas' in sorted(out): theta_vec = out['thetas'] theta,threshold = {},{} for it in range(len(theta_vec)): theta[it+1] = np.append(theta_vec[it][-1],theta_vec[it][:-1]) threshold[it+1] = 0.5 out['thetas'] = theta out['threshold'] = threshold elif opt.opdict['method'] == 'lrsk': # LOGISTIC REGRESSION (scikit learn) print "********* Logistic regression (sklearn) **********" out = implement_lr_sklearn(x_train,x_test,y_train,y_test) threshold, theta = {},{} for it in range(len(out['thetas'])): threshold[it+1] = 0.5 theta[it+1] = np.append(out['thetas'][it][-1],out['thetas'][it][:-1]) out['threshold'] = threshold out['thetas'] = theta elif opt.opdict['method'] == 'lr': # LOGISTIC REGRESSION print "********* Logistic regression **********" from LR_functions import do_all_logistic_regression out = do_all_logistic_regression(x_train,x_test,x_cv,y_train,y_test,y_cv) theta = out['thetas'] threshold = out['threshold'] if 'learn_file' in sorted(opt.opdict): learn_filename = opt.opdict['learn_file'] if not os.path.exists(learn_filename): wtr = write_binary_file(learn_filename,i_train) CLASS_test = out['label_test'] CLASS_train = out['label_train'] # TRAINING SET print "\t *TRAINING SET" y_train_np = y_train.NumType.values.ravel() from sklearn.metrics import confusion_matrix cmat_train = confusion_matrix(y_train_np,CLASS_train) p_tr = dic_percent(cmat_train,opt.types,verbose=True) out['rate_train'] = p_tr print " Global : %.2f%%"%p_tr['global'] if opt.opdict['plot_confusion'] or opt.opdict['save_confusion']: plot_confusion_mat(cmat_train,opt.types,'Training',opt.opdict['method'].upper()) if opt.opdict['save_confusion']: savefig = '%s/training_%s.png'%(opt.opdict['fig_path'],opt.opdict['result_file']) print "Confusion matrix saved in %s"%savefig plt.savefig(savefig) # TEST SET print "\t *TEST SET" y_test_np = y_test.NumType.values.ravel() cmat_test = confusion_matrix(y_test_np,CLASS_test) p_test = dic_percent(cmat_test,opt.types,verbose=True) out['rate_test'] = p_test print " Global : %.2f%%"%p_test['global'] if opt.opdict['plot_confusion'] or opt.opdict['save_confusion']: plot_confusion_mat(cmat_test,opt.types,'Test',opt.opdict['method'].upper()) if opt.opdict['save_confusion']: savefig = '%s/test_%s.png'%(opt.opdict['fig_path'],opt.opdict['result_file']) print "Confusion matrix saved in %s"%savefig plt.savefig(savefig) if opt.opdict['plot_confusion']: plt.show() else: plt.close() # PLOT PRECISION AND RECALL if opt.opdict['plot_prec_rec']: from LR_functions import normalize,plot_precision_recall x_train, x_test = normalize(x_train,x_test) plot_precision_recall(x_train,y_train.NumType,x_test,y_test.NumType,theta) pourcentages = (p_tr['global'],p_test['global']) out['method'] = opt.opdict['method'] out['types'] = opt.types opt.out = out # PLOT DECISION BOUNDARIES n_feat = x_train.shape[1] # number of features if n_feat < 4: if opt.opdict['plot_sep'] or opt.opdict['save_sep']: print "\nPLOTTING" print "Theta values:",theta print "Threshold:", threshold # COMPARE AND PLOT LR AND SVM RESULTS out_svm, out_nl = {},{} dir = '%s_SEP'%opt.opdict['method'].upper() if opt.opdict['method']=='lr' and opt.opdict['compare']: dir = 'LR_SVM_SEP' out_svm = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern='Lin') cmat_svm_tr = confusion_matrix(y_train_np,out_svm['label_train']) cmat_svm_test = confusion_matrix(y_test_np,out_svm['label_test']) svm_ptr = dic_percent(cmat_svm_tr,opt.types) svm_pt = dic_percent(cmat_svm_test,opt.types) theta_svm,t_svm = {},{} for it in range(len(out_svm['thetas'])): theta_svm[it+1] = np.append(out_svm['thetas'][it][-1],out_svm['thetas'][it][:-1]) t_svm[it+1] = 0.5 out_svm['thetas'] = theta_svm out_svm['threshold'] = t_svm out_svm['rate_test'] = svm_pt out_svm['rate_train'] = svm_ptr out_svm['method'] = 'SVM' if opt.opdict['method'] in ['lr','svm'] and opt.opdict['compare_nl']: dir = '%s_NL_SEP'%opt.opdict['method'].upper() out_nl = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern='NonLin') cmat_svm_tr = confusion_matrix(y_train_np,out_nl['label_train']) cmat_svm_test = confusion_matrix(y_test_np,out_nl['label_test']) svm_ptr = dic_percent(cmat_svm_tr,opt.types) svm_pt = dic_percent(cmat_svm_test,opt.types) out_nl['rate_test'] = svm_pt out_nl['rate_train'] = svm_ptr out_nl['method'] = 'SVM_NL' save_dir = os.path.join(opt.opdict['fig_path'],dir) opt.verify_and_create(save_dir) from LR_functions import normalize x_train, x_test = normalize(x_train,x_test) x_train_good = x_train.reindex(index=y_train[y_train.NumType.values==CLASS_train].index) x_train_bad = x_train.reindex(index=y_train[y_train.NumType.values!=CLASS_train].index) good_train = y_train.reindex(index=x_train_good.index) x_test_good = x_test.reindex(index=y_test[y_test.NumType.values==CLASS_test].index) x_test_bad = x_test.reindex(index=y_test[y_test.NumType.values!=CLASS_test].index) # PLOT FOR 1 ATTRIBUTE AND 2 CLASSES if n_feat == 1 and len(opt.opdict['types']) == 2: name = opt.opdict['feat_list'][0] from plot_functions import plot_hyp_func_1f, histo_pdfs if opt.opdict['method']=='lr' and opt.opdict['compare']: plot_hyp_func_1f(x_train,y_train,theta,opt.opdict['method'],threshold=threshold,x_ok=x_test_good,x_bad=x_test_bad,th_comp=theta_svm,cmat_test=cmat_test,cmat_svm=cmat_svm_test,cmat_train=cmat_train) else: #histo_pdfs(x_test,y_test,x_train=x_train,y_train=y_train) plot_hyp_func_1f(x_train,y_train,theta,opt.opdict['method'],threshold=threshold,x_ok=x_test_good,x_bad=x_test_bad,cmat_test=cmat_test,cmat_train=cmat_train) # PLOT FOR 2 ATTRIBUTES AND 2 to 3 CLASSES elif n_feat == 2: name = '%s_%s'%(opt.opdict['feat_list'][0],opt.opdict['feat_list'][1]) if opt.opdict['method'] in ['lr','svm']: from plot_2features import plot_2f_all plot_2f_all(out,x_train,y_train,x_test,y_test,x_test_bad) elif opt.opdict['method']=='lr' and opt.opdict['compare']: from plot_2features import plot_2f_all plot_2f_all(out,x_train,y_train,x_test,y_test,x_test_bad,out_comp=out_svm,map_nl=out_nl) elif opt.opdict['method'] == 'svm_nl': from plot_2features import plot_2f_nonlinear plot_2f_nonlinear(out,x_train,y_train,x_test,y_test,y_train=y_train) # PLOT FOR 3 ATTRIBUTES elif n_feat == 3: from plot_functions import plot_db_3d plot_db_3d(x_train,y_train.NumType,theta[1],title='Training set') plot_db_3d(x_test,y_test.NumType,theta[1],title='Test set') name = '%s_%s_%s'%(opt.opdict['feat_list'][0],opt.opdict['feat_list'][1],opt.opdict['feat_list'][2]) if opt.opdict['save_sep']: savename = '%s/CL_sep_%s.png'%(save_dir,name) print "Figure saved in %s"%savename plt.savefig(savename) if opt.opdict['plot_sep']: plt.show() else: plt.close() # WRITE RESULTS INTO A DICTIONARY subsubdic['%'] = pourcentages trad_CLASS_test = [] for i in CLASS_test: i = int(i) trad_CLASS_test.append(opt.types[i]) subsubdic['classification'] = trad_CLASS_test if opt.opdict['probas']: subsubdic['proba'] = out['probas'] if opt.opdict['plot_var']: subsubdic['out'] = out subdic[b] = subsubdic if opt.opdict['plot_var'] and opt.opdict['method'] in ['lr','svm','lrsk'] and n_feat==2 and len(opt.opdict['types'])==2: from plot_2features import plot_2f_variability plot_2f_variability(subdic,x_train,y_train,x_test,y_test) plt.savefig('%s/%s_variability_pas.png'%(opt.opdict['fig_path'],opt.opdict['method'].upper())) plt.show() dic_results[opt.trad[isc]] = subdic dic_results['header'] = {} dic_results['header']['features'] = opt.opdict['feat_list'] dic_results['header']['types'] = opt.opdict['types'] dic_results['header']['catalog'] = opt.opdict['label_test'] if opt.opdict['method'] in ['lr','lrsk','svm','svm_nl']: print "Save results in file %s"%opt.opdict['result_path'] write_binary_file(opt.opdict['result_path'],dic_results) if 'train_file' in sorted(opt.opdict): if not os.path.exists(opt.opdict['train_file']) and opt.opdict['boot'] > 1: write_binary_file(opt.opdict['train_file'],TRAIN_Y)