def compare_training():
  """
  Compare the repartition of the training sets :
  decomposition in training (60%), CV (20%) and test (20%) sets.
  """
  from matplotlib.gridspec import GridSpec
  from options import read_binary_file
  libpath = '../lib/Piton'
  list_files = glob.glob(os.path.join(libpath,'learning*'))
  list_files.sort()

  df = pd.read_csv(os.path.join(libpath,'class_train_set.csv'))
  labels = np.array(df.Type.values)

  m = len(labels)
  mtraining = int(0.6*m)
  mcv = int(0.2*m)
  mtest = int(0.2*m)

  nbc, nbl = 3,4
  grid = GridSpec(nbl,nbc*3)
  colors = ['lightskyblue', 'lightcoral']
  fig = plt.figure(figsize=(18,12))
  fig.set_facecolor('white')

  for iter,file in enumerate(list_files):
    if iter%2:
      colors = ['lightskyblue', 'lightcoral']
    else:
      colors = ['powderblue', 'plum']

    dic = read_binary_file(file)
    train = labels[dic[:mtraining]]
    cv = labels[dic[mtraining:mtraining+mcv]]
    test = labels[dic[mtraining+mcv:]]

    prop_train = [len(train[train=='VT']),len(train[train=='EB'])]
    prop_test = [len(test[test=='VT']),len(test[test=='EB'])]
    prop_cv = [len(cv[cv=='VT']),len(cv[cv=='EB'])]

    num = iter%nbc + iter + iter/nbc * nbc
    row = iter/nbc
    col = iter%nbc * 3

    plt.subplot(grid[row,col],aspect='equal')
    plt.pie(prop_train,autopct='%1.1f%%',labels=['VT','EB'],colors=colors)
    plt.text(-0.5,1.4,'Training set')
    plt.text(-0.5,-1.4,r'$m_{training}=%d$'%mtraining)
    plt.subplot(grid[row,col+1],aspect='equal')
    plt.pie(prop_cv,autopct='%1.1f%%',labels=['VT','EB'],colors=colors)
    plt.text(-0.3,1.4,'CV set')
    plt.text(-0.3,-1.4,r'$m_{CV}=%d$'%mcv)
    plt.text(-.5,2.,'Tirage %d'%iter)
    plt.subplot(grid[row,col+2],aspect='equal')
    plt.pie(prop_test,autopct='%1.1f%%',labels=['VT','EB'],colors=colors)
    plt.text(-0.3,1.4,'Test set')
    plt.text(-0.3,-1.4,r'$m_{test}=%d$'%mtest)
  plt.savefig('../results/Piton/figures/tirages.png')
  plt.show()
Exemple #2
0
  def __init__(self):
    MultiOptions.__init__(self)
    print "ANALYSIS OF %s"%self.opdict['result_path']
    self.results = read_binary_file(self.opdict['result_path'])
    self.opdict['feat_list'] = self.results['features']
    del self.results['features']

    self.do_analysis()
Exemple #3
0
def read_extraction_results(filename):

    from results import AnalyseResultsExtraction
    res = AnalyseResultsExtraction()

    from obspy.core import utcdatetime, read

    DIC = read_binary_file(filename)
Exemple #4
0
def read_extraction_results(filename):

  from results import AnalyseResultsExtraction
  res = AnalyseResultsExtraction()

  from obspy.core import utcdatetime,read

  DIC = read_binary_file(filename)
def plot_best_worst():
  """
  Plots the pdfs of the training set for the best and worst draws 
  and compare with the whole training set.
  """
  from options import MultiOptions, read_binary_file
  opt = MultiOptions()

  feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)]

  opt.opdict['feat_list'] = opt.opdict['feat_all']
  opt.opdict['feat_log'] = ['AsDec','Ene','Kurto','RappMaxMean']
  opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
  opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
  x_all, y_all = opt.features_onesta('BOR','Z')
  
  list_files = glob.glob(os.path.join('../lib/Piton','learning*'))
  list_files.sort()

  m = len(y_all)
  mtraining = int(0.6*m)
  mcv = int(0.2*m)
  mtest = int(0.2*m)

  for feat,best,worst in feat_list:
    print feat, best, worst
    fig = plt.figure()
    fig.set_facecolor('white')

    # ALL
    opt.x = x_all.reindex(columns=[feat])
    opt.y = y_all.reindex(index=opt.x.index)
    opt.opdict['feat_list'] = [feat]
    opt.compute_pdfs()
    g = opt.gaussians
    plt.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.,label='VT')
    plt.plot(g[feat]['vec'],g[feat]['EB'],'k--',lw=2.,label='EB')

    labels = ['best','worst']
    colors = ['r','g']
    b_file = list_files[best]
    w_file = list_files[worst]
    for ifile,file in enumerate([b_file,w_file]):
      dic = read_binary_file(file)

      # TRAINING SET
      opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining])
      opt.y = y_all.reindex(index=dic[:mtraining])
      opt.compute_pdfs()
      g_train = opt.gaussians
      plt.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile])
      plt.plot(g_train[feat]['vec'],g_train[feat]['EB'],'--',c=colors[ifile])

    plt.legend()
    plt.title(feat)
    plt.savefig('%s/best_worst_%s.png'%(opt.opdict['fig_path'],feat))
    plt.show()
Exemple #6
0
def plot_waveforms():
    """
  Plot the waveforms of unsupervised classes.
  """

    from matplotlib.gridspec import GridSpec
    from options import read_binary_file, Options
    from obspy.core import read
    opt = Options()
    DIC = read_binary_file(opt.opdict['result_path'])

    for stac in sorted(DIC):
        if stac == 'header':
            continue
        station = stac[0]
        comp = stac[1]
        datapath = glob.glob(
            os.path.join(opt.opdict['datadir'], station, '*%s*' % comp))[0]
        for tir in sorted(DIC[stac]):
            list_ev = DIC[stac][tir]['list_ev']
            nclass = DIC[stac][tir]['NumClass']
            K = len(np.unique(nclass))
            fig = plt.figure()
            fig.set_facecolor('white')
            grid = GridSpec(2 * K, 3)
            for j, N in enumerate(np.unique(nclass)):
                index = list(np.where(nclass == N)[0])
                ev = list_ev[index]
                permut = np.random.permutation(ev)
                for i in range(3):
                    E = permut[i]
                    file = glob.glob(
                        os.path.join(datapath,
                                     '*%s_%s*' % (str(E)[:8], str(E)[8:])))[0]
                    st = read(file)
                    st.filter('bandpass', freqmin=1, freqmax=10)
                    if i in [0, 1]:
                        ax = fig.add_subplot(grid[2 * j, i + 1])
                    else:
                        ax = fig.add_subplot(grid[2 * j + 1, :])
                    ax.plot(st[0], 'k')
                    ax.set_axis_off()
                ax = fig.add_subplot(grid[2 * j, 0])
                ax.text(.2, .5, N, transform=ax.transAxes)
                ax.set_axis_off()

    save = True
    if save:
        savename = '%s/WF_K%dclass.png' % (opt.opdict['fig_path'], K)
        print "Saved in %s" % savename
        plt.savefig(savename)
    plt.show()
def plot_waveforms():

  """
  Plot the waveforms of unsupervised classes.
  """

  from matplotlib.gridspec import GridSpec
  from options import read_binary_file, Options
  from obspy.core import read
  opt = Options()
  DIC = read_binary_file(opt.opdict['result_path'])

  for stac in sorted(DIC):
    if stac == 'header':
      continue
    station = stac[0]
    comp = stac[1]
    datapath = glob.glob(os.path.join(opt.opdict['datadir'],station,'*%s*'%comp))[0]
    for tir in sorted(DIC[stac]):
      list_ev = DIC[stac][tir]['list_ev']
      nclass = DIC[stac][tir]['NumClass']
      K = len(np.unique(nclass))
      fig = plt.figure()
      fig.set_facecolor('white')
      grid = GridSpec(2*K,3)
      for j,N in enumerate(np.unique(nclass)):
        index = list(np.where(nclass==N)[0])
        ev = list_ev[index]
        permut = np.random.permutation(ev)
        for i in range(3):
          E = permut[i]
          file = glob.glob(os.path.join(datapath,'*%s_%s*'%(str(E)[:8],str(E)[8:])))[0]
          st = read(file)
          st.filter('bandpass',freqmin=1,freqmax=10)
          if i in [0,1]:
            ax = fig.add_subplot(grid[2*j,i+1])
          else:
            ax = fig.add_subplot(grid[2*j+1,:])
          ax.plot(st[0],'k')
          ax.set_axis_off()
        ax = fig.add_subplot(grid[2*j,0])
        ax.text(.2,.5,N,transform=ax.transAxes)
        ax.set_axis_off()

  save = True
  if save:
    savename = '%s/WF_K%dclass.png'%(opt.opdict['fig_path'],K)
    print "Saved in %s"%savename
    plt.savefig(savename)
  plt.show()
Exemple #8
0
 def read_result_file(self):
   """
   Reads the file containing the results
   """
   dic = read_binary_file(self.opdict['result_path'])
   self.opdict['feat_list'] = dic['header']['features']
   self.opdict['label_filename'] = '%s/%s'%(self.opdict['libdir'],dic['header']['catalog'])
   print "Nb features :", len(self.opdict['feat_list'])
   print "Types :", dic['header']['types']
   self.results = dic
   self.opdict['stations'] = [key[0] for key in sorted(dic)]
   self.opdict['channels'] = [key[1] for key in sorted(dic)]
   self.opdict['Types'] = dic['header']['types']
   del dic['header']
Exemple #9
0
def compare_pdfs_train():
    """
  Affiche et compare les pdfs des différents training sets.
  """
    from options import MultiOptions
    opt = MultiOptions()

    opt.opdict['stations'] = ['IJEN']
    opt.opdict['channels'] = ['Z']
    opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?']

    opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir'])
    opt.opdict[
        'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir']

    train = read_binary_file(opt.opdict['train_file'])
    nb_tir = len(train)

    for sta in opt.opdict['stations']:
        for comp in opt.opdict['channels']:
            opt.x, opt.y = opt.features_onesta(sta, comp)

    X = opt.x
    Y = opt.y
    c = ['r', 'b', 'g']
    lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v']
    features = opt.opdict['feat_list']
    for feat in features:
        print feat
        opt.opdict['feat_list'] = [feat]
        fig = plt.figure()
        fig.set_facecolor('white')
        for tir in range(nb_tir):
            tr = map(int, train[tir])
            opt.x = X.reindex(index=tr, columns=[feat])
            opt.y = Y.reindex(index=tr)
            opt.classname2number()
            opt.compute_pdfs()
            g = opt.gaussians

            for it, t in enumerate(opt.types):
                plt.plot(g[feat]['vec'],
                         g[feat][t],
                         ls=lines[tir],
                         color=c[it])
        plt.title(feat)
        plt.legend(opt.types)
        plt.show()
Exemple #10
0
def plot_curves(filename):
    """
  Evolution de la taille et de la composition du test set au cours 
  des extractions pour la méthode "one-by-one".
  """

    EXT = read_binary_file(filename)
    for key in sorted(EXT[0]):
        all_nb = []
        for num_ext in sorted(EXT):
            all_nb.append(EXT[num_ext][key])

        all_nb = np.array(all_nb)
        fig = plt.figure()
        fig.set_facecolor('white')
        for i in range(all_nb.shape[1]):
            plt.plot(range(len(EXT)), all_nb[:, i], '-')
        plt.xlabel('Extraction number')
        plt.ylabel('Number of events in the test set')
        plt.title('%s' % key.split('_')[1])
    plt.show()
Exemple #11
0
def compare_unsup_indet():
    """
  Essaie de faire un lien entre les événements indéterminés mal classés par 
  LR ou SVM avec classes non-supervisées.
  """
    from matplotlib.gridspec import GridSpec

    print "### COMPARE UNSUP AND SUP ###"
    from results import AnalyseResults
    opt = AnalyseResults()

    m = opt.man
    a = opt.auto
    unsup = read_binary_file(
        '../results/Ijen/KMEANS/results_kmeans_3c_11f_ini')

    nb_auto = [len(opt.auto[opt.auto.Type == t]) for t in opt.opdict['types']]
    NB_class = len(opt.opdict['types'])

    for cl in opt.opdict['types']:
        #for cl in ['?']:
        m = opt.man[opt.man.Type == cl]
        a = opt.auto.reindex(index=m.index)

        colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral']

        opt.data_for_LR()
        opt.opdict['channels'] = 'Z'
        opt.opdict['stations'] = ['IJEN']
        for sta in opt.opdict['stations']:
            for comp in opt.opdict['channels']:
                u = pd.DataFrame(index=unsup[(sta, comp)][0]['list_ev'],
                                 columns=['Type', 'NumType'])
                u['Type'] = unsup[(sta, comp)][0]['StrClass']
                u['NumType'] = unsup[(sta, comp)][0]['NumClass']
                u = u.reindex(index=m.index)
                trad = unsup[(sta, comp)][0]['Equivalence']

                fig = plt.figure(figsize=(12, 8))
                fig.set_facecolor('white')
                nb_l, nb_c = 2, NB_class * 2
                grid = GridSpec(nb_l, nb_c)

                ax = fig.add_subplot(grid[0, :nb_c / 2])
                ax.pie(nb_auto,
                       labels=opt.opdict['types'],
                       autopct='%1.1f%%',
                       colors=colors)
                ax.text(.4,
                        -.1,
                        r'# events = %d' % np.sum(nb_auto),
                        transform=ax.transAxes)
                ax.axis("equal")

                nbs = [len(a[a.Type == t]) for t in opt.opdict['types']]
                ax = fig.add_subplot(grid[0, nb_c / 2:])
                ax.pie(nbs,
                       labels=opt.opdict['types'],
                       autopct='%1.1f%%',
                       colors=colors)
                ax.text(.4,
                        -.1,
                        r'# events = %d' % np.sum(nbs),
                        transform=ax.transAxes)
                ax.axis("equal")

                lab_c = np.array(trad).copy()
                for it, t in enumerate(opt.opdict['types']):
                    i = np.where(np.array(trad) == t)[0][0]
                    lab_c[it] = i

                for it, t in enumerate(opt.opdict['types']):
                    ared = a[a.Type == t]
                    ured = u.reindex(index=ared.index)
                    nbs = [
                        len(ured[ured.Type == ty])
                        for ty in opt.opdict['types']
                    ]
                    ax = fig.add_subplot(grid[1, 2 * it:2 * it + 2])
                    ax.pie(nbs, labels=lab_c, autopct='%1.1f%%', colors=colors)
                    ax.text(.3,
                            -.1,
                            r'# %s = %d' % (t, np.sum(nbs)),
                            transform=ax.transAxes)
                    #ax.set_title(t)

                plt.figtext(.1,
                            .92,
                            '(a) %s' % opt.opdict['method'].upper(),
                            fontsize=16)
                plt.figtext(.55,
                            .92,
                            '(b) Manual repartition of %s' % cl,
                            fontsize=16)
                plt.figtext(.1, .45, r'(c) $K$-means', fontsize=16)
                for it, t in enumerate(trad):
                    plt.figtext(.3 + it * .15, .45,
                                r'%s $\approx$ %s' % (it, trad[it]))
                plt.savefig(
                    '../results/Ijen/KMEANS/figures/unsup_compSVM_%s.png' % cl)

    plt.show()
Exemple #12
0
def plot_best_worst():
    """
  Plots the pdfs of the training set for the best and worst draws 
  and compare with the whole training set.
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure()
        fig.set_facecolor('white')

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT')
        plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB')

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '--',
                     c=colors[ifile])

        plt.legend()
        plt.title(feat)
        plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Exemple #13
0
def plot_pdf_subsets():
    """
  Plots the pdfs of the training set, CV set and test set on the same 
  figure. One subfigure for each event type. 
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')
    print len(y_all)

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure(figsize=(10, 4))
        fig.set_facecolor('white')

        ax1 = fig.add_subplot(121)
        ax2 = fig.add_subplot(122)

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.)
        ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.)

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            ax1.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            ax2.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])

            # CV SET
            opt.x = x_all.reindex(columns=[feat],
                                  index=dic[mtraining:mtraining + mcv])
            opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv])
            opt.compute_pdfs()
            g_cv = opt.gaussians
            ax1.plot(g_cv[feat]['vec'],
                     g_cv[feat]['VT'],
                     '--',
                     c=colors[ifile])
            ax2.plot(g_cv[feat]['vec'],
                     g_cv[feat]['EB'],
                     '--',
                     c=colors[ifile])

            # TEST SET
            opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:])
            opt.y = y_all.reindex(index=dic[mtraining + mcv:])
            opt.compute_pdfs()
            g_test = opt.gaussians
            ax1.plot(g_test[feat]['vec'],
                     g_test[feat]['VT'],
                     ':',
                     c=colors[ifile])
            ax2.plot(g_test[feat]['vec'],
                     g_test[feat]['EB'],
                     ':',
                     c=colors[ifile])

        ax1.set_title('VT')
        ax2.set_title('EB')
        ax1.legend()
        ax2.legend()
        plt.suptitle(feat)
        plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Exemple #14
0
def compare_training():
    """
  Compare the repartition of the training sets :
  decomposition in training (60%), CV (20%) and test (20%) sets.
  """
    from matplotlib.gridspec import GridSpec
    from options import read_binary_file
    libpath = '../lib/Piton'
    list_files = glob.glob(os.path.join(libpath, 'learning*'))
    list_files.sort()

    df = pd.read_csv(os.path.join(libpath, 'class_train_set.csv'))
    labels = np.array(df.Type.values)

    m = len(labels)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    nbc, nbl = 3, 4
    grid = GridSpec(nbl, nbc * 3)
    colors = ['lightskyblue', 'lightcoral']
    fig = plt.figure(figsize=(18, 12))
    fig.set_facecolor('white')

    for iter, file in enumerate(list_files):
        if iter % 2:
            colors = ['lightskyblue', 'lightcoral']
        else:
            colors = ['powderblue', 'plum']

        dic = read_binary_file(file)
        train = labels[dic[:mtraining]]
        cv = labels[dic[mtraining:mtraining + mcv]]
        test = labels[dic[mtraining + mcv:]]

        prop_train = [len(train[train == 'VT']), len(train[train == 'EB'])]
        prop_test = [len(test[test == 'VT']), len(test[test == 'EB'])]
        prop_cv = [len(cv[cv == 'VT']), len(cv[cv == 'EB'])]

        num = iter % nbc + iter + iter / nbc * nbc
        row = iter / nbc
        col = iter % nbc * 3

        plt.subplot(grid[row, col], aspect='equal')
        plt.pie(prop_train,
                autopct='%1.1f%%',
                labels=['VT', 'EB'],
                colors=colors)
        plt.text(-0.5, 1.4, 'Training set')
        plt.text(-0.5, -1.4, r'$m_{training}=%d$' % mtraining)
        plt.subplot(grid[row, col + 1], aspect='equal')
        plt.pie(prop_cv, autopct='%1.1f%%', labels=['VT', 'EB'], colors=colors)
        plt.text(-0.3, 1.4, 'CV set')
        plt.text(-0.3, -1.4, r'$m_{CV}=%d$' % mcv)
        plt.text(-.5, 2., 'Tirage %d' % iter)
        plt.subplot(grid[row, col + 2], aspect='equal')
        plt.pie(prop_test,
                autopct='%1.1f%%',
                labels=['VT', 'EB'],
                colors=colors)
        plt.text(-0.3, 1.4, 'Test set')
        plt.text(-0.3, -1.4, r'$m_{test}=%d$' % mtest)
    plt.savefig('../results/Piton/figures/tirages.png')
    plt.show()
Exemple #15
0
def plot_envelopes():
    """
  Plot d'un VT et d'un EB avec des enveloppes calculées avec 
  plusieurs paramètres de lissage.
  """
    from options import read_binary_file
    from features_extraction_piton import process_envelope
    datadir = '../data/Piton/envelope'

    fig = plt.figure()
    fig.set_facecolor('white')

    colors = ['r', 'b', 'g', 'y']

    ### EB ###
    tr_eb = read_binary_file('%s/trace_EB' % datadir)
    time = np.linspace(0, len(tr_eb) * 0.01, len(tr_eb))

    env_51 = process_envelope(tr_eb, w=51)
    env_101 = process_envelope(tr_eb, w=101)
    env_501 = process_envelope(tr_eb, w=501)
    env_1001 = process_envelope(tr_eb, w=1001)

    ax1 = fig.add_subplot(211)
    #ax1.plot(time,tr_eb,'k')
    ax1.plot(time[:-1], env_51, c=colors[0], label='0.5 s')
    ax1.plot(time[:-1], env_101, c=colors[1], label='1 s')
    ax1.plot(time[:-1], env_501, c=colors[2], lw=2., label='5 s')
    ax1.plot(time[:-1], env_1001, c=colors[3], lw=2., label='10 s')
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    axins = inset_axes(ax1, width="30%", height="60%", loc=1)
    i1, i2 = 6000, 8000
    ax1.axvspan(time[i1], time[i2], color='gray', alpha=.3)
    axins.plot(time[i1:i2], env_51[i1:i2], c=colors[0])
    axins.plot(time[i1:i2], env_101[i1:i2], c=colors[1])
    axins.plot(time[i1:i2], env_501[i1:i2], c=colors[2], lw=2.)
    axins.plot(time[i1:i2], env_1001[i1:i2], c=colors[3], lw=2.)
    axins.xaxis.set_ticks_position('bottom')
    axins.yaxis.set_ticklabels('')
    axins.yaxis.set_visible(False)
    ax1.set_title('Eboulement')
    ax1.set_xlim([0, time[-1]])
    ax1.set_xticklabels('')
    ax1.legend(loc=2, prop={'size': 10})

    ### VT ###
    tr_vt = read_binary_file('%s/trace_VT' % datadir)

    env_51 = process_envelope(tr_vt, w=51)
    env_101 = process_envelope(tr_vt, w=101)
    env_501 = process_envelope(tr_vt, w=501)
    env_1001 = process_envelope(tr_vt, w=1001)

    ax2 = fig.add_subplot(212)
    #ax2.plot(tr_vt,'k')
    ax2.plot(time[:-1], env_51, c=colors[0])
    ax2.plot(time[:-1], env_101, c=colors[1])
    ax2.plot(time[:-1], env_501, c=colors[2], lw=2.)
    ax2.plot(time[:-1], env_1001, c=colors[3], lw=2.)
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    axins = inset_axes(ax2, width="30%", height="70%", loc=1)
    i1, i2 = 3000, 5000
    ax2.axvspan(time[i1], time[i2], color='gray', alpha=.3)
    axins.plot(time[i1:i2], env_51[i1:i2], c=colors[0])
    axins.plot(time[i1:i2], env_101[i1:i2], c=colors[1])
    axins.plot(time[i1:i2], env_501[i1:i2], c=colors[2], lw=2.)
    axins.plot(time[i1:i2], env_1001[i1:i2], c=colors[3], lw=2.)
    axins.xaxis.set_ticks_position('bottom')
    axins.yaxis.set_ticklabels('')
    axins.yaxis.set_visible(False)
    ax2.set_title('Volcano-tectonique')
    ax2.set_xlim([0, time[-1]])
    ax2.set_xlabel('Time (s)')

    plt.figtext(0.03, 0.89, '(a)')
    plt.figtext(0.03, 0.46, '(b)')
    #plt.savefig('../results/Piton/features/envelopes.png')
    plt.show()
def classifier(opt):
  """
  Classification of the different types of events.
  opt is an object of the class Options()
  """

  list_attr = opt.__dict__.keys()
  if not 'x' in list_attr:
    opt.do_tri()

  X = opt.x
  Y = opt.y

  list_attr = opt.__dict__.keys()
  if 'train_x' in list_attr:
    X_TRAIN = opt.train_x
    Y_TRAIN = opt.train_y

  dic_results = {}
  for isc in sorted(opt.xs):

    print "==========",opt.trad[isc],"=========="
    subdic = {}

    if isc > 0:
      if opt.trad[isc][0] == sta_prev:
        marker_sta = 1
      else:
        marker_sta = 0
        sta_prev = opt.trad[isc][0]
    else:
      marker_sta = 0
      sta_prev = opt.trad[isc][0]

    if len(opt.xs[isc]) == 0:
      continue


    # About the training set
    if len(opt.opdict['stations']) == 1 and opt.opdict['boot'] > 1 and 'train_x' not in list_attr:
      if os.path.exists(opt.opdict['train_file']):
        print opt.opdict['train_file']
        TRAIN_Y = read_binary_file(opt.opdict['train_file'])
      else:
        TRAIN_Y = {}
        for tir in range(opt.opdict['boot']):
          TRAIN_Y[tir] = {}
    elif 'train_x' in list_attr:
      opt.x = opt.xs_train[isc]
      opt.y = opt.ys_train[isc]
      if opt.opdict['plot_pdf']:
        opt.compute_pdfs()
        g_train = opt.gaussians
        del opt.gaussians
      opt.classname2number()
      x_ref_train = opt.x
      y_ref_train = opt.y


    # About the test set
    opt.x = opt.xs[isc]
    opt.y = opt.ys[isc]
    if opt.opdict['plot_pdf']:
      opt.compute_pdfs()
 
    set = pd.DataFrame(index=opt.ys[isc].index,columns=['Otime'])
    set['Otime'] = opt.xs[isc].index

    opt.classname2number()
    x_test = opt.x
    y_ref = opt.y
    x_ref = opt.x

    if opt.opdict['plot_dataset']:
      opt.composition_dataset()

    #K = len(opt.types)

    ### ITERATE OVER TRAINING SET DRAWS ###
    for b in range(opt.opdict['boot']):
      print "\n-------------------- # iter: %d --------------------\n"%(b+1)

      subsubdic = {}
      print "WHOLE SET", x_ref.shape, y_ref.shape

      ### if there is no pre-defined training set ###
      if 'train_x' not in list_attr:
        x_train = x_test.copy()
        if len(opt.opdict['stations']) == 1 and opt.opdict['boot'] > 1:
          if len(TRAIN_Y[b]) > 0:
            y_train = y_ref.reindex(index=TRAIN_Y[b]['training_set'])
            y_train = y_train.dropna(how='any')
            y_cv = y_ref.reindex(index=TRAIN_Y[b]['cv_set'])
            y_cv = y_cv.dropna(how='any')
            y_test = y_ref.reindex(index=TRAIN_Y[b]['test_set'])
            y_test = y_test.dropna(how='any')
          else:
            y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref)
            TRAIN_Y[b]['training_set'] = map(int,list(y_train.index))
            TRAIN_Y[b]['cv_set'] = map(int,list(y_cv.index))
            TRAIN_Y[b]['test_set'] = map(int,list(y_test.index))

        ### multi-stations case ###
        else:
          if marker_sta == 0:
            y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref)
            list_ev_train = y_train.index
            list_ev_cv = y_cv.index
            list_ev_test = y_test.index
          else:
            y_train = y_ref.reindex(index=list_ev_train)
            y_train = y_train.dropna(how='any')
            y_cv = y_ref.reindex(index=list_ev_cv)
            y_cv = y_cv.dropna(how='any')
            y_test = y_ref.reindex(index=list_ev_test)
            y_test = y_test.dropna(how='any')

        x_train = x_ref.reindex(index=y_train.index)

      ### if a training set was pre-defined ###
      else:
        x_train = x_ref_train.copy()
        y_train = y_ref_train.copy()
        y_train, y_cv, y_test = generate_datasets(opt.opdict['proportions'],opt.numt,y_ref,y_train=y_train)

      x_cv = x_ref.reindex(index=y_cv.index)
      x_test = x_ref.reindex(index=y_test.index)

      i_train = y_train.index
      x_train.index = range(x_train.shape[0])
      y_train.index = range(y_train.shape[0])
      print "TRAINING SET", x_train.shape, y_train.shape
      if x_train.shape[0] != y_train.shape[0]:
        print "Training set: Incoherence in x and y dimensions"
        sys.exit()

      i_cv = y_cv.index
      x_cv.index = range(x_cv.shape[0])
      y_cv.index = range(y_cv.shape[0])
      print "CROSS-VALIDATION SET", x_cv.shape, y_cv.shape
      if x_cv.shape[0] != y_cv.shape[0]:
        print "Cross-validation set: Incoherence in x and y dimensions"
        sys.exit()

      subsubdic['list_ev'] = np.array(y_test.index)

      i_test = y_test.index
      x_test.index = range(x_test.shape[0])
      y_test.index = range(y_test.shape[0])
      print "TEST SET", x_test.shape, y_test.shape
      if x_test.shape[0] != y_test.shape[0]:
        print "Test set: Incoherence in x and y dimensions"
        sys.exit()

      opt.train_x = x_train
      opt.x = x_test
      opt.train_y = y_train
      opt.y = y_test

      if opt.opdict['plot_pdf']:
        opt.plot_all_pdfs(save=opt.opdict['save_pdf'])
        if 'train_x' in list_attr:
          opt.plot_superposed_pdfs(g_train,save=opt.opdict['save_pdf'])
        else:
          opt.plot_all_pdfs(save=opt.opdict['save_pdf'])

      if opt.opdict['method'] == '1b1':
        # EXTRACTEURS
        print "********** EXTRACTION 1-BY-1 **********"
        opt.opdict['boot'] = 1
        one_by_one(opt,x_ref,y_ref,set['Otime'],boot=10,method='svm')
        continue

      elif opt.opdict['method'] == 'ova':
        print "********** EXTRACTION 1-VS-ALL **********"
        opt.opdict['boot'] = 1
        one_vs_all(opt,x_ref,y_ref,set['Otime'],boot=10,method='svm')
        continue

      elif opt.opdict['method'] in ['svm','svm_nl']:
        # SVM
        print "********** SVM **********"
        if opt.opdict['method'] == 'svm':
          kern = 'Lin'
        else:
          kern = 'NonLin'

        out = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern=kern,proba=opt.opdict['probas'])

        if 'map' in sorted(out):
          opt.map = out['map']

        if 'thetas' in sorted(out):
          theta_vec = out['thetas']
          theta,threshold = {},{}
          for it in range(len(theta_vec)):
            theta[it+1] = np.append(theta_vec[it][-1],theta_vec[it][:-1])
            threshold[it+1] = 0.5
          out['thetas'] = theta
          out['threshold'] = threshold

      elif opt.opdict['method'] == 'lrsk':
        # LOGISTIC REGRESSION (scikit learn)
        print "********* Logistic regression (sklearn) **********"
        out = implement_lr_sklearn(x_train,x_test,y_train,y_test)
        threshold, theta = {},{}
        for it in range(len(out['thetas'])):
          threshold[it+1] = 0.5
          theta[it+1] = np.append(out['thetas'][it][-1],out['thetas'][it][:-1])
        out['threshold'] = threshold
        out['thetas'] = theta

      elif opt.opdict['method'] == 'lr':
        # LOGISTIC REGRESSION
        print "********* Logistic regression **********"
        from LR_functions import do_all_logistic_regression
        out = do_all_logistic_regression(x_train,x_test,x_cv,y_train,y_test,y_cv)
        theta = out['thetas']
        threshold = out['threshold']
        if 'learn_file' in sorted(opt.opdict):
          learn_filename = opt.opdict['learn_file']
          if not os.path.exists(learn_filename):
            wtr = write_binary_file(learn_filename,i_train)

      CLASS_test = out['label_test']
      CLASS_train = out['label_train']

      # TRAINING SET
      print "\t *TRAINING SET"
      y_train_np = y_train.NumType.values.ravel()  
      from sklearn.metrics import confusion_matrix
      cmat_train = confusion_matrix(y_train_np,CLASS_train)
      p_tr = dic_percent(cmat_train,opt.types,verbose=True)
      out['rate_train'] = p_tr
      print "   Global : %.2f%%"%p_tr['global']
      if opt.opdict['plot_confusion'] or opt.opdict['save_confusion']:
        plot_confusion_mat(cmat_train,opt.types,'Training',opt.opdict['method'].upper())
        if opt.opdict['save_confusion']:
          savefig = '%s/training_%s.png'%(opt.opdict['fig_path'],opt.opdict['result_file'])
          print "Confusion matrix saved in %s"%savefig
          plt.savefig(savefig)

      # TEST SET
      print "\t *TEST SET"
      y_test_np = y_test.NumType.values.ravel()
      cmat_test = confusion_matrix(y_test_np,CLASS_test)
      p_test = dic_percent(cmat_test,opt.types,verbose=True)
      out['rate_test'] = p_test
      print "   Global : %.2f%%"%p_test['global']
      if opt.opdict['plot_confusion'] or opt.opdict['save_confusion']:
        plot_confusion_mat(cmat_test,opt.types,'Test',opt.opdict['method'].upper())
        if opt.opdict['save_confusion']:
          savefig = '%s/test_%s.png'%(opt.opdict['fig_path'],opt.opdict['result_file'])
          print "Confusion matrix saved in %s"%savefig
          plt.savefig(savefig)
        if opt.opdict['plot_confusion']:
          plt.show()
        else:
          plt.close()

      # PLOT PRECISION AND RECALL
      if opt.opdict['plot_prec_rec']:
        from LR_functions import normalize,plot_precision_recall
        x_train, x_test = normalize(x_train,x_test)
        plot_precision_recall(x_train,y_train.NumType,x_test,y_test.NumType,theta)

      pourcentages = (p_tr['global'],p_test['global'])
      out['method'] = opt.opdict['method']
      out['types'] = opt.types
      opt.out = out

      # PLOT DECISION BOUNDARIES
      n_feat = x_train.shape[1] # number of features
      if n_feat < 4:
        if opt.opdict['plot_sep'] or opt.opdict['save_sep']:
          print "\nPLOTTING"
          print "Theta values:",theta
          print "Threshold:", threshold

          # COMPARE AND PLOT LR AND SVM RESULTS
          out_svm, out_nl = {},{}
          dir = '%s_SEP'%opt.opdict['method'].upper()
          if opt.opdict['method']=='lr' and opt.opdict['compare']:
            dir = 'LR_SVM_SEP'
            out_svm = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern='Lin')
            cmat_svm_tr = confusion_matrix(y_train_np,out_svm['label_train'])
            cmat_svm_test = confusion_matrix(y_test_np,out_svm['label_test'])
            svm_ptr = dic_percent(cmat_svm_tr,opt.types)
            svm_pt = dic_percent(cmat_svm_test,opt.types)
            theta_svm,t_svm = {},{}
            for it in range(len(out_svm['thetas'])):
              theta_svm[it+1] = np.append(out_svm['thetas'][it][-1],out_svm['thetas'][it][:-1])
              t_svm[it+1] = 0.5
            out_svm['thetas'] = theta_svm
            out_svm['threshold'] = t_svm
            out_svm['rate_test'] = svm_pt
            out_svm['rate_train'] = svm_ptr
            out_svm['method'] = 'SVM'

          if opt.opdict['method'] in ['lr','svm'] and opt.opdict['compare_nl']:
            dir = '%s_NL_SEP'%opt.opdict['method'].upper()
            out_nl = implement_svm(x_train,x_test,y_train,y_test,opt.types,opt.opdict,kern='NonLin')
            cmat_svm_tr = confusion_matrix(y_train_np,out_nl['label_train'])
            cmat_svm_test = confusion_matrix(y_test_np,out_nl['label_test'])
            svm_ptr = dic_percent(cmat_svm_tr,opt.types)
            svm_pt = dic_percent(cmat_svm_test,opt.types)
            out_nl['rate_test'] = svm_pt
            out_nl['rate_train'] = svm_ptr
            out_nl['method'] = 'SVM_NL'

          save_dir = os.path.join(opt.opdict['fig_path'],dir)
          opt.verify_and_create(save_dir)

          from LR_functions import normalize
          x_train, x_test = normalize(x_train,x_test)

          x_train_good = x_train.reindex(index=y_train[y_train.NumType.values==CLASS_train].index)
          x_train_bad = x_train.reindex(index=y_train[y_train.NumType.values!=CLASS_train].index)
          good_train = y_train.reindex(index=x_train_good.index)

          x_test_good = x_test.reindex(index=y_test[y_test.NumType.values==CLASS_test].index)
          x_test_bad = x_test.reindex(index=y_test[y_test.NumType.values!=CLASS_test].index)

          # PLOT FOR 1 ATTRIBUTE AND 2 CLASSES
          if n_feat == 1 and len(opt.opdict['types']) == 2:
            name = opt.opdict['feat_list'][0]
            from plot_functions import plot_hyp_func_1f, histo_pdfs
            if opt.opdict['method']=='lr' and opt.opdict['compare']:
              plot_hyp_func_1f(x_train,y_train,theta,opt.opdict['method'],threshold=threshold,x_ok=x_test_good,x_bad=x_test_bad,th_comp=theta_svm,cmat_test=cmat_test,cmat_svm=cmat_svm_test,cmat_train=cmat_train)
            else:
              #histo_pdfs(x_test,y_test,x_train=x_train,y_train=y_train)
              plot_hyp_func_1f(x_train,y_train,theta,opt.opdict['method'],threshold=threshold,x_ok=x_test_good,x_bad=x_test_bad,cmat_test=cmat_test,cmat_train=cmat_train)

          # PLOT FOR 2 ATTRIBUTES AND 2 to 3 CLASSES
          elif n_feat == 2:
            name = '%s_%s'%(opt.opdict['feat_list'][0],opt.opdict['feat_list'][1])
            if opt.opdict['method'] in ['lr','svm']:
              from plot_2features import plot_2f_all
              plot_2f_all(out,x_train,y_train,x_test,y_test,x_test_bad)
            elif opt.opdict['method']=='lr' and opt.opdict['compare']:
              from plot_2features import plot_2f_all
              plot_2f_all(out,x_train,y_train,x_test,y_test,x_test_bad,out_comp=out_svm,map_nl=out_nl)
            elif opt.opdict['method'] == 'svm_nl':
              from plot_2features import plot_2f_nonlinear
              plot_2f_nonlinear(out,x_train,y_train,x_test,y_test,y_train=y_train)

          # PLOT FOR 3 ATTRIBUTES
          elif n_feat == 3:
            from plot_functions import plot_db_3d
            plot_db_3d(x_train,y_train.NumType,theta[1],title='Training set')
            plot_db_3d(x_test,y_test.NumType,theta[1],title='Test set')
            name = '%s_%s_%s'%(opt.opdict['feat_list'][0],opt.opdict['feat_list'][1],opt.opdict['feat_list'][2])

          if opt.opdict['save_sep']:
            savename = '%s/CL_sep_%s.png'%(save_dir,name)
            print "Figure saved in %s"%savename
            plt.savefig(savename)
          if opt.opdict['plot_sep']:
            plt.show()
          else:
            plt.close()

      # WRITE RESULTS INTO A DICTIONARY
      subsubdic['%'] = pourcentages
      trad_CLASS_test = []
      for i in CLASS_test:
        i = int(i)
        trad_CLASS_test.append(opt.types[i])
      subsubdic['classification'] = trad_CLASS_test
      if opt.opdict['probas']:
        subsubdic['proba'] = out['probas']
      if opt.opdict['plot_var']:
        subsubdic['out'] = out
      subdic[b] = subsubdic

    if opt.opdict['plot_var'] and opt.opdict['method'] in ['lr','svm','lrsk'] and n_feat==2 and len(opt.opdict['types'])==2:
      from plot_2features import plot_2f_variability
      plot_2f_variability(subdic,x_train,y_train,x_test,y_test)
      plt.savefig('%s/%s_variability_pas.png'%(opt.opdict['fig_path'],opt.opdict['method'].upper()))
      plt.show()


    dic_results[opt.trad[isc]] = subdic

  dic_results['header'] = {}
  dic_results['header']['features'] = opt.opdict['feat_list']
  dic_results['header']['types'] = opt.opdict['types']
  dic_results['header']['catalog'] = opt.opdict['label_test']

  if opt.opdict['method'] in ['lr','lrsk','svm','svm_nl']:
    print "Save results in file %s"%opt.opdict['result_path']
    write_binary_file(opt.opdict['result_path'],dic_results)

  if 'train_file' in sorted(opt.opdict):
    if not os.path.exists(opt.opdict['train_file']) and opt.opdict['boot'] > 1:
      write_binary_file(opt.opdict['train_file'],TRAIN_Y)