Example #1
0
def plot_soutenance():
    """
  Plot des PDFs des 4 attributs définis par Clément pour le ppt 
  de la soutenance.
  """
    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    #opt.opdict['feat_train'] = 'clement_train.csv'
    #opt.opdict['feat_test'] = 'clement_test.csv'
    opt.opdict['feat_list'] = ['AsDec', 'Dur', 'Ene', 'KRapp']
    #opt.opdict['feat_log'] = ['AsDec','Dur','Ene','KRapp']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()

    gauss = opt.gaussians

    fig = plt.figure(figsize=(12, 2.5))
    fig.set_facecolor('white')
    for ifeat, feat in enumerate(sorted(gauss)):
        ax = fig.add_subplot(1, 4, ifeat + 1)
        ax.plot(gauss[feat]['vec'], gauss[feat]['VT'], ls='-', c='b', lw=2.)
        ax.plot(gauss[feat]['vec'], gauss[feat]['EB'], ls='-', c='r', lw=2.)
        ax.set_title(feat)
        ax.xaxis.set_ticks_position('bottom')
        ax.xaxis.set_ticklabels('')
        ax.yaxis.set_ticks_position('left')
        ax.yaxis.set_ticklabels('')
        if ifeat == 0:
            ax.legend(['VT', 'EB'], loc=1, prop={'size': 10})
    plt.savefig('/home/nadege/Dropbox/Soutenance/pdfs.png')
    plt.show()
Example #2
0
    def __init__(self):
        MultiOptions.__init__(self)
        print "ANALYSIS OF %s" % self.opdict['result_path']
        self.results = self.read_binary_file(self.opdict['result_path'])
        self.opdict['feat_list'] = self.results['features']
        del self.results['features']

        self.do_analysis()
Example #3
0
def run_unsupervised():

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['method'] = 'kmean'

    from unsupervised import classifier
    classifier(opt)
def run_unsupervised():

  from options import MultiOptions
  opt = MultiOptions()
  opt.opdict['method'] = 'kmean'

  from unsupervised import classifier
  classifier(opt)
Example #5
0
  def __init__(self):
    MultiOptions.__init__(self)
    print "ANALYSIS OF %s"%self.opdict['result_path']
    self.results = self.read_binary_file(self.opdict['result_path'])
    self.opdict['feat_list'] = self.results['features']
    del self.results['features']

    self.do_analysis()
Example #6
0
  def __init__(self):
    MultiOptions.__init__(self)

    print "ANALYSIS OF %s"%self.opdict['result_path']
    self.opdict['class_auto_file'] = 'auto_class.csv'
    self.opdict['class_auto_path'] = '%s/%s/%s'%(self.opdict['outdir'],self.opdict['method'].upper(),self.opdict['class_auto_file'])

    self.concatenate_results()
    self.display_results()
Example #7
0
def plot_soutenance():
  """
  Plot des PDFs des 4 attributs définis par Clément pour le ppt 
  de la soutenance.
  """
  from options import MultiOptions
  opt = MultiOptions()
  opt.opdict['channels'] = ['Z']

  #opt.opdict['feat_train'] = 'clement_train.csv'
  #opt.opdict['feat_test'] = 'clement_test.csv'
  opt.opdict['feat_list'] = ['AsDec','Dur','Ene','KRapp']
  #opt.opdict['feat_log'] = ['AsDec','Dur','Ene','KRapp']
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.compute_pdfs()

  gauss = opt.gaussians

  fig = plt.figure(figsize=(12,2.5))
  fig.set_facecolor('white') 
  for ifeat,feat in enumerate(sorted(gauss)):
    ax = fig.add_subplot(1,4,ifeat+1)
    ax.plot(gauss[feat]['vec'],gauss[feat]['VT'],ls='-',c='b',lw=2.)
    ax.plot(gauss[feat]['vec'],gauss[feat]['EB'],ls='-',c='r',lw=2.)
    ax.set_title(feat)
    ax.xaxis.set_ticks_position('bottom')
    ax.xaxis.set_ticklabels('')
    ax.yaxis.set_ticks_position('left')
    ax.yaxis.set_ticklabels('')
    if ifeat == 0:
      ax.legend(['VT','EB'],loc=1,prop={'size':10})
  plt.savefig('/home/nadege/Dropbox/Soutenance/pdfs.png')
  plt.show()
Example #8
0
    def __init__(self):
        MultiOptions.__init__(self)

        print "ANALYSIS OF %s" % self.opdict['result_path']
        self.opdict['class_auto_file'] = 'auto_class.csv'
        self.opdict['class_auto_path'] = '%s/%s/%s' % (
            self.opdict['outdir'], self.opdict['method'].upper(),
            self.opdict['class_auto_file'])

        self.concatenate_results()
        self.display_results()
Example #9
0
def run_all():

  from options import MultiOptions
  opt = MultiOptions()
  #opt.count_number_of_events()

  ### UNSUPERVISED METHOD ### 
  if opt.opdict['method'] == 'kmeans':
    from unsupervised import classifier
    classifier(opt)

  ### SUPERVISED METHODS ###
  elif opt.opdict['method'] in ['lr','svm','svm_nl','lrsk']:
    from do_classification import classifier
    classifier(opt)

    from results import AnalyseResults
    res = AnalyseResults()
    if res.opdict['plot_confusion']:
      res.plot_confusion()

  elif opt.opdict['method'] in ['ova','1b1']:
    from do_classification import classifier
    classifier(opt)

    from results import AnalyseResultsExtraction
    res = AnalyseResultsExtraction()
Example #10
0
def run_all():

    from options import MultiOptions
    opt = MultiOptions()
    #opt.count_number_of_events()

    from do_classification import classifier
    classifier(opt)

    if opt.opdict['method'] == 'lr' or opt.opdict[
            'method'] == 'svm' or opt.opdict['method'] == 'lrsk':
        from results import AnalyseResults
        res = AnalyseResults()
        if res.opdict['plot_confusion']:
            res.plot_confusion()

    else:
        from results import AnalyseResultsExtraction
        res = AnalyseResultsExtraction()
Example #11
0
def compare_pdfs_train():
    """
  Affiche et compare les pdfs des différents training sets.
  """
    from options import MultiOptions
    opt = MultiOptions()

    opt.opdict['stations'] = ['IJEN']
    opt.opdict['channels'] = ['Z']
    opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?']

    opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir'])
    opt.opdict[
        'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir']

    train = read_binary_file(opt.opdict['train_file'])
    nb_tir = len(train)

    for sta in opt.opdict['stations']:
        for comp in opt.opdict['channels']:
            opt.x, opt.y = opt.features_onesta(sta, comp)

    X = opt.x
    Y = opt.y
    c = ['r', 'b', 'g']
    lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v']
    features = opt.opdict['feat_list']
    for feat in features:
        print feat
        opt.opdict['feat_list'] = [feat]
        fig = plt.figure()
        fig.set_facecolor('white')
        for tir in range(nb_tir):
            tr = map(int, train[tir])
            opt.x = X.reindex(index=tr, columns=[feat])
            opt.y = Y.reindex(index=tr)
            opt.classname2number()
            opt.compute_pdfs()
            g = opt.gaussians

            for it, t in enumerate(opt.types):
                plt.plot(g[feat]['vec'],
                         g[feat][t],
                         ls=lines[tir],
                         color=c[it])
        plt.title(feat)
        plt.legend(opt.types)
        plt.show()
Example #12
0
def compare_lissage():
    """
  Comparaison des kurtosis avec deux lissages différents.
  """

    plot_envelopes()

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    # Lissage sur des fenêtres de 0.5 s
    opt.opdict['feat_list'] = ['Kurto']
    opt.opdict['feat_log'] = ['Kurto']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.x.columns = opt.opdict['feat_list']
    opt.compute_pdfs()
    gauss_stand = opt.gaussians

    # Lissage sur des fenêtres de 1 s
    opt.opdict['feat_train'] = '0610_Piton_trainset.csv'
    opt.opdict['feat_test'] = '0610_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_1s = opt.gaussians

    # Lissage sur des fenêtres de 5 s
    opt.opdict['feat_train'] = '1809_Piton_trainset.csv'
    opt.opdict['feat_test'] = '1809_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_5s = opt.gaussians

    # Lissage sur des fenêtres de 10 s
    opt.opdict['feat_train'] = '0510_Piton_trainset.csv'
    opt.opdict['feat_test'] = '0510_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_10s = opt.gaussians

    ### PLOT OF SUPERPOSED PDFs ###
    fig = plt.figure(figsize=(12, 2.5))
    fig.set_facecolor('white')
    for feat in sorted(opt.gaussians):
        maxi = int(
            np.max([
                gauss_stand[feat]['vec'], gauss_1s[feat]['vec'],
                gauss_5s[feat]['vec'], gauss_10s[feat]['vec']
            ]))

        ax1 = fig.add_subplot(141)
        ax1.plot(gauss_stand[feat]['vec'],
                 gauss_stand[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.,
                 label='VT')
        ax1.plot(gauss_stand[feat]['vec'],
                 gauss_stand[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.,
                 label='EB')
        ax1.set_xlim([0, maxi])
        ax1.set_xlabel(feat)
        ax1.set_title('0.5 s')
        ax1.legend(prop={'size': 10})

        ax2 = fig.add_subplot(142)
        ax2.plot(gauss_1s[feat]['vec'],
                 gauss_1s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax2.plot(gauss_1s[feat]['vec'],
                 gauss_1s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax2.set_xlim([0, maxi])
        ax2.set_xlabel(feat)
        ax2.set_title('1 s')
        ax2.set_yticklabels('')

        ax3 = fig.add_subplot(143)
        ax3.plot(gauss_5s[feat]['vec'],
                 gauss_5s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax3.plot(gauss_5s[feat]['vec'],
                 gauss_5s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax3.set_xlim([0, maxi])
        ax3.set_xlabel(feat)
        ax3.set_title('5 s')
        ax3.set_yticklabels('')

        ax4 = fig.add_subplot(144)
        ax4.plot(gauss_10s[feat]['vec'],
                 gauss_10s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax4.plot(gauss_10s[feat]['vec'],
                 gauss_10s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax4.set_xlim([0, maxi])
        ax4.set_xlabel(feat)
        ax4.set_title('10 s')
        ax4.set_yticklabels('')

        #plt.savefig('%s/features/comp_%s.png'%(opt.opdict['outdir'],feat))
        plt.show()
Example #13
0
def plot_best_worst():
    """
  Plots the pdfs of the training set for the best and worst draws 
  and compare with the whole training set.
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure()
        fig.set_facecolor('white')

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT')
        plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB')

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '--',
                     c=colors[ifile])

        plt.legend()
        plt.title(feat)
        plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Example #14
0
def compare_pdfs_train():
  """
  Affiche et compare les pdfs des différents training sets.
  """
  from options import MultiOptions
  opt = MultiOptions()

  opt.opdict['stations'] = ['IJEN']
  opt.opdict['channels'] = ['Z']
  opt.opdict['Types'] = ['Tremor','VulkanikB','?']
 
  opt.opdict['train_file'] = '%s/train_10'%(opt.opdict['libdir'])
  opt.opdict['label_filename'] = '%s/Ijen_reclass_all.csv'%opt.opdict['libdir']

  train = opt.read_binary_file(opt.opdict['train_file'])
  nb_tir = len(train)

  for sta in opt.opdict['stations']:
    for comp in opt.opdict['channels']:
      opt.x, opt.y = opt.features_onesta(sta,comp)

  X = opt.x
  Y = opt.y
  c = ['r','b','g']
  lines = ['-','--','-.',':','-','--','-.',':','*','v']
  features = opt.opdict['feat_list']
  for feat in features:
    print feat
    opt.opdict['feat_list'] = [feat]
    fig = plt.figure()
    fig.set_facecolor('white')
    for tir in range(nb_tir):
      tr = map(int,train[tir])
      opt.x = X.reindex(index=tr,columns=[feat])
      opt.y = Y.reindex(index=tr)
      opt.classname2number()
      opt.compute_pdfs()
      g = opt.gaussians

      for it,t in enumerate(opt.types):
        plt.plot(g[feat]['vec'],g[feat][t],ls=lines[tir],color=c[it])
    plt.title(feat)
    plt.legend(opt.types)
    plt.show()
Example #15
0
def plot_pdf_subsets():
    """
  Plots the pdfs of the training set, CV set and test set on the same 
  figure. One subfigure for each event type. 
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')
    print len(y_all)

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure(figsize=(10, 4))
        fig.set_facecolor('white')

        ax1 = fig.add_subplot(121)
        ax2 = fig.add_subplot(122)

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.)
        ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.)

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            ax1.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            ax2.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])

            # CV SET
            opt.x = x_all.reindex(columns=[feat],
                                  index=dic[mtraining:mtraining + mcv])
            opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv])
            opt.compute_pdfs()
            g_cv = opt.gaussians
            ax1.plot(g_cv[feat]['vec'],
                     g_cv[feat]['VT'],
                     '--',
                     c=colors[ifile])
            ax2.plot(g_cv[feat]['vec'],
                     g_cv[feat]['EB'],
                     '--',
                     c=colors[ifile])

            # TEST SET
            opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:])
            opt.y = y_all.reindex(index=dic[mtraining + mcv:])
            opt.compute_pdfs()
            g_test = opt.gaussians
            ax1.plot(g_test[feat]['vec'],
                     g_test[feat]['VT'],
                     ':',
                     c=colors[ifile])
            ax2.plot(g_test[feat]['vec'],
                     g_test[feat]['EB'],
                     ':',
                     c=colors[ifile])

        ax1.set_title('VT')
        ax2.set_title('EB')
        ax1.legend()
        ax2.legend()
        plt.suptitle(feat)
        plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Example #16
0
def plot_pdf_subsets():
  """
  Plots the pdfs of the training set, CV set and test set on the same 
  figure. One subfigure for each event type. 
  """
  from options import MultiOptions, read_binary_file
  opt = MultiOptions()

  feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)]

  opt.opdict['feat_list'] = opt.opdict['feat_all']
  opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
  opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
  x_all, y_all = opt.features_onesta('BOR','Z')
  print len(y_all)
  
  list_files = glob.glob(os.path.join('../lib/Piton','learning*'))
  list_files.sort()

  m = len(y_all)
  mtraining = int(0.6*m)
  mcv = int(0.2*m)
  mtest = int(0.2*m)

  for feat,best,worst in feat_list:
    print feat, best, worst
    fig = plt.figure(figsize=(10,4))
    fig.set_facecolor('white')

    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    # ALL
    opt.x = x_all.reindex(columns=[feat])
    opt.y = y_all.reindex(index=opt.x.index)
    opt.opdict['feat_list'] = [feat]
    opt.compute_pdfs()
    g = opt.gaussians
    ax1.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.)
    ax2.plot(g[feat]['vec'],g[feat]['EB'],'k',lw=2.)

    labels = ['best','worst']
    colors = ['r','g']
    b_file = list_files[best]
    w_file = list_files[worst]
    for ifile,file in enumerate([b_file,w_file]):
      dic = read_binary_file(file)

      # TRAINING SET
      opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining])
      opt.y = y_all.reindex(index=dic[:mtraining])
      opt.compute_pdfs()
      g_train = opt.gaussians
      ax1.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile])
      ax2.plot(g_train[feat]['vec'],g_train[feat]['EB'],'-',c=colors[ifile],label=labels[ifile])

      # CV SET
      opt.x = x_all.reindex(columns=[feat],index=dic[mtraining:mtraining+mcv])
      opt.y = y_all.reindex(index=dic[mtraining:mtraining+mcv])
      opt.compute_pdfs()
      g_cv = opt.gaussians
      ax1.plot(g_cv[feat]['vec'],g_cv[feat]['VT'],'--',c=colors[ifile])
      ax2.plot(g_cv[feat]['vec'],g_cv[feat]['EB'],'--',c=colors[ifile])

      # TEST SET
      opt.x = x_all.reindex(columns=[feat],index=dic[mtraining+mcv:])
      opt.y = y_all.reindex(index=dic[mtraining+mcv:])
      opt.compute_pdfs()
      g_test = opt.gaussians
      ax1.plot(g_test[feat]['vec'],g_test[feat]['VT'],':',c=colors[ifile])
      ax2.plot(g_test[feat]['vec'],g_test[feat]['EB'],':',c=colors[ifile])

    ax1.set_title('VT')
    ax2.set_title('EB')
    ax1.legend()
    ax2.legend()
    plt.suptitle(feat)
    plt.savefig('%s/subsets_%s.png'%(opt.opdict['fig_path'],feat))
    plt.show()
def read_data_for_features_extraction(save=False):
  """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
  from options import MultiOptions
  opt = MultiOptions()

  if save:
    if os.path.exists(opt.opdict['feat_filepath']):
      print "WARNING !! File %s already exists"%opt.opdict['feat_filepath']
      print "Check if you really want to replace it..." 
      sys.exit()

  list_features = opt.opdict['feat_list']
  df = pd.DataFrame(columns=list_features)

  hob_all = {}

  # Classification
  tsort = opt.read_classification()
  tsort.index = tsort.Date
  tsort = tsort.reindex(columns=['Date','Type'])

  list_sta = opt.opdict['stations']
  for ifile in range(tsort.shape[0]):
    date = tsort.values[ifile,0]
    type = tsort.values[ifile,1]

    for sta in list_sta:
      print "#####",sta
      counter = 0
      for comp in opt.opdict['channels']:
        ind = (date,sta,comp)
        dic = pd.DataFrame(columns=list_features,index=[ind])
        dic['EventType'] = type
        dic['Ponset'] = 0
        list_files = glob.glob(os.path.join(opt.opdict['datadir'],sta,'*%s.D'%comp,'*%s.D*%s_%s*'%(comp,str(date)[:8],str(date)[8:])))
        list_files.sort()
        if len(list_files) > 0:
          file =  list_files[0]
          print ifile, file
          if opt.opdict['option'] == 'norm':
            counter = counter + 1
            dic = extract_norm_features(list_features,date,file,dic)
          elif opt.opdict['option'] == 'hash':
            permut_file = '%s/permut_%s'%(opt.opdict['libdir'],opt.opdict['feat_test'].split('.')[0])
            dic = extract_hash_features(list_features,date,file,dic,permut_file,plot=True)
          df = df.append(dic)

      if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features):
        from waveform_features import polarization_analysis
        d_mean = (df.Dur[(date,sta,comp)] + df.Dur[(date,sta,'E')] + df.Dur[(date,sta,'Z')])/3.
        po_mean = int((df.Ponset[(date,sta,comp)] + df.Ponset[(date,sta,'E')] + df.Ponset[(date,sta,'Z')])/3)
        list_files = [file,file.replace("N.D","E.D"),file.replace("N.D","Z.D")]
        rect, plan, az, iang = polarization_analysis(list_files,d_mean,po_mean,plot=False)
        if 'Rectilinearity' in list_features:
          df.Rectilinearity[(date,sta,'Z')], df.Rectilinearity[(date,sta,'N')], df.Rectilinearity[(date,sta,'E')] = rect, rect, rect
        if 'Planarity' in list_features:
          df.Planarity[(date,sta,'Z')], df.Planarity[(date,sta,'N')], df.Planarity[(date,sta,'E')] = plan, plan, plan
        if list_features or 'Azimuth':
          df.Azimuth[(date,sta,'Z')], df.Azimuth[(date,sta,'N')], df.Azimuth[(date,sta,'E')] = az, az, az
        if 'Incidence' in list_features:
          df.Incidence[(date,sta,'Z')], df.Incidence[(date,sta,'N')], df.Incidence[(date,sta,'E')] = iang, iang, iang

  if save:
    print "Features written in %s"%opt.opdict['feat_filepath']
    df.to_csv(opt.opdict['feat_filepath'])
def read_data_for_features_extraction(set='test',save=False):
  """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
  from scipy.io.matlab import mio
  from options import MultiOptions
  opt = MultiOptions()

  if set == 'train':
    opt.opdict['feat_filename'] = '%s/features/%s'%(opt.opdict['outdir'],opt.opdict['feat_train'])
  print opt.opdict['feat_filename']

  if save:
    if os.path.exists(opt.opdict['feat_filename']):
      print "WARNING !! File %s already exists"%opt.opdict['feat_filename']
      print "Check if you really want to replace it..." 
      sys.exit()

  list_features = opt.opdict['feat_list']
  df = pd.DataFrame(columns=list_features)

  if set == 'test':
    datafiles = glob.glob(os.path.join(opt.opdict['datadir'],'TestSet/SigEve_*'))
    datafiles.sort()
    liste = [os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles))]
    liste = map(int,liste) # sort the list of file following the event number
    liste.sort()

    tsort = opt.read_csvfile(opt.opdict['label_filename'])
    tsort.index = tsort.Date

    for ifile,numfile in enumerate(liste):
      file = os.path.join(opt.opdict['datadir'],'TestSet/SigEve_%d.mat'%numfile)
      print ifile,file
      mat = mio.loadmat(file)

      counter = 0
      for comp in opt.opdict['channels']:
        counter = counter + 1
        ind = (numfile,'BOR',comp)
        dic = pd.DataFrame(columns=list_features,index=[ind])
        dic['EventType'] = tsort[tsort.Date==numfile].Type.values[0]
        dic['Ponset'] = 0

        s = SeismicTraces(mat,comp)
        list_attr = s.__dict__.keys()

        if len(list_attr) > 2:
          if opt.opdict['option'] == 'norm':
            dic = extract_norm_features(s,list_features,dic)
          elif opt.opdict['option'] == 'hash':
            if ifile in [409,1026,1027,1028,1993,2121,2122,2123,2424,2441,3029,3058,3735,3785,3852,3930,4200,4463,4464,4746,6150,6382,6672,6733]:
              continue
            dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False)
          df = df.append(dic)

        if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features):
          d_mean = (df.Dur[(numfile,'BOR',comp)] + df.Dur[(numfile,'BOR','E')] + df.Dur[(numfile,'BOR','Z')])/3.
          po_mean = int((df.Ponset[(numfile,'BOR',comp)] + df.Ponset[(numfile,'BOR','E')] + df.Ponset[(numfile,'BOR','Z')])/3)
          s.read_all_files(mat,False)
          rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False)
          if 'Rectilinearity' in list_features:
            df.Rectilinearity[(numfile,'BOR','Z')], df.Rectilinearity[(numfile,'BOR','N')], df.Rectilinearity[(numfile,'BOR','E')] = rect, rect, rect
          if 'Planarity' in list_features:
            df.Planarity[(numfile,'BOR','Z')], df.Planarity[(numfile,'BOR','N')], df.Planarity[(numfile,'BOR','E')] = plan, plan, plan
          if list_features or 'Azimuth':
            df.Azimuth[(numfile,'BOR','Z')], df.Azimuth[(numfile,'BOR','N')], df.Azimuth[(numfile,'BOR','E')] = az, az, az
          if 'Incidence' in list_features:
            df.Incidence[(numfile,'BOR','Z')], df.Incidence[(numfile,'BOR','N')], df.Incidence[(numfile,'BOR','E')] = iang, iang, iang

  elif set == 'train':
    datafile = os.path.join(opt.opdict['datadir'],'TrainingSetPlusSig_2.mat')
    mat = mio.loadmat(datafile)
    hob_all_EB = {}
    for i in range(mat['KurtoEB'].shape[1]):
      print "EB", i
      if i!=10 and i!=61:
        continue
      counter = 0
      for comp in opt.opdict['channels']:
        counter = counter + 1
        dic = pd.DataFrame(columns=list_features,index=[(i,'BOR',comp)])
        dic['EventType'] = 'EB'
        dic['Ponset'] = 0
        
        s = SeismicTraces(mat,comp,train=[i,'EB'])
        list_attr = s.__dict__.keys()
        if len(list_attr) > 2:
          if opt.opdict['option'] == 'norm':
            dic = extract_norm_features(s,list_features,dic)
          elif opt.opdict['option'] == 'hash':
            dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False)
          df = df.append(dic)

      neb = i+1
      if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features):
        d_mean = (df.Dur[(i,'BOR',comp)] + df.Dur[(i,'BOR','E')] + df.Dur[(i,'BOR','Z')])/3.
        po_mean = int((df.Ponset[(i,'BOR',comp)] + df.Ponset[(i,'BOR','E')] + df.Ponset[(i,'BOR','Z')])/3)
        s.read_all_files(mat,train=[i,'EB'])
        rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False)
        if 'Rectilinearity' in list_features:
          df.Rectilinearity[(i,'BOR','Z')], df.Rectilinearity[(i,'BOR','N')], df.Rectilinearity[(i,'BOR','E')] = rect, rect, rect
        if 'Planarity' in list_features:
          df.Planarity[(i,'BOR','Z')], df.Planarity[(i,'BOR','N')], df.Planarity[(i,'BOR','E')] = plan, plan, plan
        if 'Azimuth' in list_features:
          df.Azimuth[(i,'BOR','Z')], df.Azimuth[(i,'BOR','N')], df.Azimuth[(i,'BOR','E')] = az, az, az
        if 'Incidence' in list_features:
          df.Incidence[(i,'BOR','Z')], df.Incidence[(i,'BOR','N')], df.Incidence[(i,'BOR','E')] = iang, iang, iang


    for i in range(mat['KurtoVT'].shape[1]):
      print "VT", i+neb
      if i != 5:
        continue
      counter = 0
      for comp in opt.opdict['channels']:
        counter = counter + 1
        dic = pd.DataFrame(columns=list_features,index=[(i+neb,'BOR',comp)])
        dic['EventType'] = 'VT'
        dic['Ponset'] = 0

        s = SeismicTraces(mat,comp,train=[i,'VT'])

        list_attr = s.__dict__.keys()
        if len(list_attr) > 2:
          if opt.opdict['option'] == 'norm':
            dic = extract_norm_features(s,list_features,dic)
          elif opt.opdict['option'] == 'hash':
            dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False)
          df = df.append(dic)
      if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features):
        d_mean = (df.Dur[(i+neb,'BOR',comp)] + df.Dur[(i+neb,'BOR','E')] + df.Dur[(i+neb,'BOR','Z')])/3.
        po_mean = int((df.Ponset[(i+neb,'BOR',comp)] + df.Ponset[(i+neb,'BOR','E')] + df.Ponset[(i+neb,'BOR','Z')])/3)
        s.read_all_files(mat,train=[i,'VT'])
        rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False)
        if 'Rectilinearity' in list_features:
          df.Rectilinearity[(i+neb,'BOR','Z')], df.Rectilinearity[(i+neb,'BOR','N')], df.Rectilinearity[(i+neb,'BOR','E')] = rect, rect, rect
        if 'Planarity' in list_features:
          df.Planarity[(i+neb,'BOR','Z')], df.Planarity[(i+neb,'BOR','N')], df.Planarity[(i+neb,'BOR','E')] = plan, plan, plan
        if 'Azimuth' in list_features:
          df.Azimuth[(i+neb,'BOR','Z')], df.Azimuth[(i+neb,'BOR','N')], df.Azimuth[(i+neb,'BOR','E')] = az, az, az
        if 'Incidence' in list_features:
          df.Incidence[(i+neb,'BOR','Z')], df.Incidence[(i+neb,'BOR','N')], df.Incidence[(i+neb,'BOR','E')] = iang, iang, iang

  if save:
    print "Features written in %s"%opt.opdict['feat_filename']
    df.to_csv(opt.opdict['feat_filename'])
Example #19
0
def compare_ponsets(set='test'):
    """
  Compare the Ponsets determined either with the kurtosis gradient, either with the 
  frequency stack of the spectrogram.
  """
    from scipy.io.matlab import mio
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        df = pd.read_csv('%s/features/Piton_testset.csv' %
                         opt.opdict['outdir'],
                         index_col=False)
        df = df.reindex(
            columns=['Dur_freq', 'Ponset_freq', 'Dur_grad', 'Ponset_grad'])
        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)
            for comp in opt.opdict['channels']:
                ind = (numfile, 'BOR', comp)
                df_one = df.reindex(index=[str(ind)])
                pfr = df_one.Ponset_freq
                pgr = df_one.Ponset_grad
                dfr = df_one.Dur_freq
                dgr = df_one.Dur_grad

                s = SeismicTraces(mat, comp)
                fig = plt.figure(figsize=(9, 4))
                fig.set_facecolor('white')
                plt.plot(s.tr, 'k')
                plt.plot([pfr, pfr], [np.min(s.tr), np.max(s.tr)],
                         'r',
                         lw=2.,
                         label='freq')
                plt.plot([pgr, pgr], [np.min(s.tr), np.max(s.tr)],
                         'r--',
                         lw=2.,
                         label='grad')
                plt.plot([pfr + dfr * 1. / s.dt, pfr + dfr * 1. / s.dt],
                         [np.min(s.tr), np.max(s.tr)],
                         'y',
                         lw=2.)
                plt.plot([pgr + dgr * 1. / s.dt, pgr + dgr * 1. / s.dt],
                         [np.min(s.tr), np.max(s.tr)],
                         'y--',
                         lw=2.)
                plt.legend()
                plt.show()
Example #20
0
def read_data_for_features_extraction(set='test', save=False):
    """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
    from scipy.io.matlab import mio
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'train':
        opt.opdict['feat_filename'] = '%s/features/%s' % (
            opt.opdict['outdir'], opt.opdict['feat_train'])
    print opt.opdict['feat_filename']

    if save:
        if os.path.exists(opt.opdict['feat_filename']):
            print "WARNING !! File %s already exists" % opt.opdict[
                'feat_filename']
            print "Check if you really want to replace it..."
            sys.exit()

    list_features = opt.opdict['feat_list']
    df = pd.DataFrame(columns=list_features)

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        tsort = opt.read_csvfile(opt.opdict['label_filename'])
        tsort.index = tsort.Date

        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)

            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                ind = (numfile, 'BOR', comp)
                dic = pd.DataFrame(columns=list_features, index=[ind])
                dic['EventType'] = tsort[tsort.Date == numfile].Type.values[0]
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp)
                list_attr = s.__dict__.keys()

                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        if ifile in [
                                409, 1026, 1027, 1028, 1993, 2121, 2122, 2123,
                                2424, 2441, 3029, 3058, 3735, 3785, 3852, 3930,
                                4200, 4463, 4464, 4746, 6150, 6382, 6672, 6733
                        ]:
                            continue
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)

                if counter == 3 and ('Rectilinearity' in list_features
                                     or 'Planarity' in list_features
                                     or 'Azimuth' in list_features
                                     or 'Incidence' in list_features):
                    d_mean = (df.Dur[(numfile, 'BOR', comp)] +
                              df.Dur[(numfile, 'BOR', 'E')] +
                              df.Dur[(numfile, 'BOR', 'Z')]) / 3.
                    po_mean = int((df.Ponset[(numfile, 'BOR', comp)] +
                                   df.Ponset[(numfile, 'BOR', 'E')] +
                                   df.Ponset[(numfile, 'BOR', 'Z')]) / 3)
                    s.read_all_files(mat, False)
                    rect, plan, az, iang = polarization_analysis(s,
                                                                 d_mean,
                                                                 po_mean,
                                                                 plot=False)
                    if 'Rectilinearity' in list_features:
                        df.Rectilinearity[(numfile, 'BOR',
                                           'Z')], df.Rectilinearity[(
                                               numfile, 'BOR',
                                               'N')], df.Rectilinearity[(
                                                   numfile, 'BOR',
                                                   'E')] = rect, rect, rect
                    if 'Planarity' in list_features:
                        df.Planarity[(numfile, 'BOR', 'Z')], df.Planarity[(
                            numfile, 'BOR',
                            'N')], df.Planarity[(numfile, 'BOR',
                                                 'E')] = plan, plan, plan
                    if list_features or 'Azimuth':
                        df.Azimuth[(numfile, 'BOR', 'Z')], df.Azimuth[(
                            numfile, 'BOR',
                            'N')], df.Azimuth[(numfile, 'BOR',
                                               'E')] = az, az, az
                    if 'Incidence' in list_features:
                        df.Incidence[(numfile, 'BOR', 'Z')], df.Incidence[(
                            numfile, 'BOR',
                            'N')], df.Incidence[(numfile, 'BOR',
                                                 'E')] = iang, iang, iang

    elif set == 'train':
        datafile = os.path.join(opt.opdict['datadir'],
                                'TrainingSetPlusSig_2.mat')
        mat = mio.loadmat(datafile)
        hob_all_EB = {}
        for i in range(mat['KurtoEB'].shape[1]):
            print "EB", i
            if i != 10 and i != 61:
                continue
            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                dic = pd.DataFrame(columns=list_features,
                                   index=[(i, 'BOR', comp)])
                dic['EventType'] = 'EB'
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp, train=[i, 'EB'])
                list_attr = s.__dict__.keys()
                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)

            neb = i + 1
            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                d_mean = (df.Dur[(i, 'BOR', comp)] + df.Dur[(i, 'BOR', 'E')] +
                          df.Dur[(i, 'BOR', 'Z')]) / 3.
                po_mean = int(
                    (df.Ponset[(i, 'BOR', comp)] + df.Ponset[(i, 'BOR', 'E')] +
                     df.Ponset[(i, 'BOR', 'Z')]) / 3)
                s.read_all_files(mat, train=[i, 'EB'])
                rect, plan, az, iang = polarization_analysis(s,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(i, 'BOR', 'Z')], df.Rectilinearity[(
                        i, 'BOR',
                        'N')], df.Rectilinearity[(i, 'BOR',
                                                  'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(i, 'BOR', 'Z')], df.Planarity[(
                        i, 'BOR', 'N')], df.Planarity[(i, 'BOR',
                                                       'E')] = plan, plan, plan
                if 'Azimuth' in list_features:
                    df.Azimuth[(i, 'BOR', 'Z')], df.Azimuth[(
                        i, 'BOR', 'N')], df.Azimuth[(i, 'BOR',
                                                     'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(i, 'BOR', 'Z')], df.Incidence[(
                        i, 'BOR', 'N')], df.Incidence[(i, 'BOR',
                                                       'E')] = iang, iang, iang

        for i in range(mat['KurtoVT'].shape[1]):
            print "VT", i + neb
            if i != 5:
                continue
            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                dic = pd.DataFrame(columns=list_features,
                                   index=[(i + neb, 'BOR', comp)])
                dic['EventType'] = 'VT'
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp, train=[i, 'VT'])

                list_attr = s.__dict__.keys()
                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)
            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                d_mean = (df.Dur[(i + neb, 'BOR', comp)] +
                          df.Dur[(i + neb, 'BOR', 'E')] +
                          df.Dur[(i + neb, 'BOR', 'Z')]) / 3.
                po_mean = int((df.Ponset[(i + neb, 'BOR', comp)] +
                               df.Ponset[(i + neb, 'BOR', 'E')] +
                               df.Ponset[(i + neb, 'BOR', 'Z')]) / 3)
                s.read_all_files(mat, train=[i, 'VT'])
                rect, plan, az, iang = polarization_analysis(s,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(
                        i + neb, 'BOR', 'Z')], df.Rectilinearity[(
                            i + neb, 'BOR',
                            'N')], df.Rectilinearity[(i + neb, 'BOR',
                                                      'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(i + neb, 'BOR', 'Z')], df.Planarity[(
                        i + neb, 'BOR',
                        'N')], df.Planarity[(i + neb, 'BOR',
                                             'E')] = plan, plan, plan
                if 'Azimuth' in list_features:
                    df.Azimuth[(i + neb, 'BOR', 'Z')], df.Azimuth[(
                        i + neb, 'BOR', 'N')], df.Azimuth[(i + neb, 'BOR',
                                                           'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(i + neb, 'BOR', 'Z')], df.Incidence[(
                        i + neb, 'BOR',
                        'N')], df.Incidence[(i + neb, 'BOR',
                                             'E')] = iang, iang, iang

    if save:
        print "Features written in %s" % opt.opdict['feat_filename']
        df.to_csv(opt.opdict['feat_filename'])
Example #21
0
def compare_clement():
  """
  Comparaison des attributs de Clément avec ceux que j'ai recalculés.
  """

  from options import MultiOptions
  opt = MultiOptions()
  opt.opdict['channels'] = ['Z']

  # Mes calculs
  opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','KRapp']
  opt.opdict['feat_log'] = ['AsDec','RappMaxMean','Kurto']
  #opt.opdict['feat_list'] = ['Ene']
  #opt.opdict['feat_log'] = ['Ene']
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.x.columns = opt.opdict['feat_list']
  opt.compute_pdfs()
  my_gauss = opt.gaussians

  if 'Kurto' in opt.opdict['feat_list'] and 'RappMaxMean' in opt.opdict['feat_list']:
    fig = plt.figure()
    fig.set_facecolor('white')
    plt.plot(np.log(opt.x.Kurto),np.log(opt.x.RappMaxMean),'ko')
    plt.xlabel('Kurto')
    plt.ylabel('RappMaxMean')
    plt.show()

  # Les calculs de Clément
  #opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','Ene']
  opt.opdict['feat_log'] = []
  opt.opdict['feat_train'] = 'clement_train.csv'
  opt.opdict['feat_test'] = 'clement_test.csv'
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.compute_pdfs()

  # Trait plein --> Clément
  # Trait tireté --> moi
  opt.plot_superposed_pdfs(my_gauss,save=False)
Example #22
0
def read_data_for_features_extraction(save=False):
    """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
    from options import MultiOptions
    opt = MultiOptions()

    if save:
        if os.path.exists(opt.opdict['feat_filename']):
            print "WARNING !! File %s already exists" % opt.opdict[
                'feat_filename']
            print "Check if you really want to replace it..."
            sys.exit()

    list_features = opt.opdict['feat_list']
    df = pd.DataFrame(columns=list_features)

    hob_all = {}

    # Classification
    tsort = opt.read_classification()
    tsort.index = tsort.Date
    tsort = tsort.reindex(columns=['Date', 'Type'])

    list_sta = opt.opdict['stations']
    for ifile in range(tsort.shape[0]):
        date = tsort.values[ifile, 0]
        type = tsort.values[ifile, 1]

        for sta in list_sta:
            print "#####", sta
            counter = 0
            for comp in opt.opdict['channels']:
                ind = (date, sta, comp)
                dic = pd.DataFrame(columns=list_features, index=[ind])
                dic['EventType'] = type
                dic['Ponset'] = 0
                list_files = glob.glob(
                    os.path.join(
                        opt.opdict['datadir'], sta, '*%s.D' % comp,
                        '*%s.D*%s_%s*' % (comp, str(date)[:8], str(date)[8:])))
                list_files.sort()
                if len(list_files) > 0:
                    file = list_files[0]
                    print ifile, file
                    if opt.opdict['option'] == 'norm':
                        counter = counter + 1
                        dic = extract_norm_features(list_features, date, file,
                                                    dic)
                    elif opt.opdict['option'] == 'hash':
                        permut_file = '%s/permut_%s' % (
                            opt.opdict['libdir'],
                            opt.opdict['feat_test'].split('.')[0])
                        dic = extract_hash_features(list_features,
                                                    date,
                                                    file,
                                                    dic,
                                                    permut_file,
                                                    plot=True)
                    df = df.append(dic)

            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                from waveform_features import polarization_analysis
                d_mean = (df.Dur[(date, sta, comp)] + df.Dur[
                    (date, sta, 'E')] + df.Dur[(date, sta, 'Z')]) / 3.
                po_mean = int((df.Ponset[(date, sta, comp)] + df.Ponset[
                    (date, sta, 'E')] + df.Ponset[(date, sta, 'Z')]) / 3)
                list_files = [
                    file,
                    file.replace("N.D", "E.D"),
                    file.replace("N.D", "Z.D")
                ]
                rect, plan, az, iang = polarization_analysis(list_files,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(date, sta, 'Z')], df.Rectilinearity[(
                        date, sta,
                        'N')], df.Rectilinearity[(date, sta,
                                                  'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(date, sta, 'Z')], df.Planarity[(
                        date, sta,
                        'N')], df.Planarity[(date, sta,
                                             'E')] = plan, plan, plan
                if list_features or 'Azimuth':
                    df.Azimuth[(date, sta, 'Z')], df.Azimuth[(
                        date, sta, 'N')], df.Azimuth[(date, sta,
                                                      'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(date, sta, 'Z')], df.Incidence[(
                        date, sta,
                        'N')], df.Incidence[(date, sta,
                                             'E')] = iang, iang, iang

    if save:
        print "Features written in %s" % opt.opdict['feat_filename']
        df.to_csv(opt.opdict['feat_filename'])
Example #23
0
def plot_best_worst():
  """
  Plots the pdfs of the training set for the best and worst draws 
  and compare with the whole training set.
  """
  from options import MultiOptions, read_binary_file
  opt = MultiOptions()

  feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)]

  opt.opdict['feat_list'] = opt.opdict['feat_all']
  opt.opdict['feat_log'] = ['AsDec','Ene','Kurto','RappMaxMean']
  opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
  opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
  x_all, y_all = opt.features_onesta('BOR','Z')
  
  list_files = glob.glob(os.path.join('../lib/Piton','learning*'))
  list_files.sort()

  m = len(y_all)
  mtraining = int(0.6*m)
  mcv = int(0.2*m)
  mtest = int(0.2*m)

  for feat,best,worst in feat_list:
    print feat, best, worst
    fig = plt.figure()
    fig.set_facecolor('white')

    # ALL
    opt.x = x_all.reindex(columns=[feat])
    opt.y = y_all.reindex(index=opt.x.index)
    opt.opdict['feat_list'] = [feat]
    opt.compute_pdfs()
    g = opt.gaussians
    plt.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.,label='VT')
    plt.plot(g[feat]['vec'],g[feat]['EB'],'k--',lw=2.,label='EB')

    labels = ['best','worst']
    colors = ['r','g']
    b_file = list_files[best]
    w_file = list_files[worst]
    for ifile,file in enumerate([b_file,w_file]):
      dic = read_binary_file(file)

      # TRAINING SET
      opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining])
      opt.y = y_all.reindex(index=dic[:mtraining])
      opt.compute_pdfs()
      g_train = opt.gaussians
      plt.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile])
      plt.plot(g_train[feat]['vec'],g_train[feat]['EB'],'--',c=colors[ifile])

    plt.legend()
    plt.title(feat)
    plt.savefig('%s/best_worst_%s.png'%(opt.opdict['fig_path'],feat))
    plt.show()
Example #24
0
def compare_clement():
    """
  Comparaison des attributs de Clément avec ceux que j'ai recalculés.
  """

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    # Mes calculs
    opt.opdict['feat_list'] = ['Dur', 'AsDec', 'RappMaxMean', 'Kurto', 'KRapp']
    opt.opdict['feat_log'] = ['AsDec', 'RappMaxMean', 'Kurto']
    #opt.opdict['feat_list'] = ['Ene']
    #opt.opdict['feat_log'] = ['Ene']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.x.columns = opt.opdict['feat_list']
    opt.compute_pdfs()
    my_gauss = opt.gaussians

    if 'Kurto' in opt.opdict['feat_list'] and 'RappMaxMean' in opt.opdict[
            'feat_list']:
        fig = plt.figure()
        fig.set_facecolor('white')
        plt.plot(np.log(opt.x.Kurto), np.log(opt.x.RappMaxMean), 'ko')
        plt.xlabel('Kurto')
        plt.ylabel('RappMaxMean')
        plt.show()

    # Les calculs de Clément
    #opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','Ene']
    opt.opdict['feat_log'] = []
    opt.opdict['feat_train'] = 'clement_train.csv'
    opt.opdict['feat_test'] = 'clement_test.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()

    # Trait plein --> Clément
    # Trait tireté --> moi
    opt.plot_superposed_pdfs(my_gauss, save=False)
Example #25
0
def compare_lissage():
  """
  Comparaison des kurtosis avec deux lissages différents.
  """

  plot_envelopes()

  from options import MultiOptions
  opt = MultiOptions()
  opt.opdict['channels'] = ['Z']

  # Lissage sur des fenêtres de 0.5 s 
  opt.opdict['feat_list'] = ['Kurto']
  opt.opdict['feat_log'] = ['Kurto']
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.x.columns = opt.opdict['feat_list']
  opt.compute_pdfs()
  gauss_stand = opt.gaussians

  # Lissage sur des fenêtres de 1 s
  opt.opdict['feat_train'] = '0610_Piton_trainset.csv'
  opt.opdict['feat_test'] = '0610_Piton_testset.csv'
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.compute_pdfs()
  gauss_1s = opt.gaussians

  # Lissage sur des fenêtres de 5 s
  opt.opdict['feat_train'] = '1809_Piton_trainset.csv'
  opt.opdict['feat_test'] = '1809_Piton_testset.csv'
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.compute_pdfs()
  gauss_5s = opt.gaussians

  # Lissage sur des fenêtres de 10 s
  opt.opdict['feat_train'] = '0510_Piton_trainset.csv'
  opt.opdict['feat_test'] = '0510_Piton_testset.csv'
  opt.do_tri()
  opt.x = opt.xs[0]
  opt.y = opt.ys[0]
  opt.compute_pdfs()
  gauss_10s = opt.gaussians

  ### PLOT OF SUPERPOSED PDFs ###
  fig = plt.figure(figsize=(12,2.5))
  fig.set_facecolor('white') 
  for feat in sorted(opt.gaussians):
    maxi = int(np.max([gauss_stand[feat]['vec'],gauss_1s[feat]['vec'],gauss_5s[feat]['vec'],gauss_10s[feat]['vec']]))

    ax1 = fig.add_subplot(141)
    ax1.plot(gauss_stand[feat]['vec'],gauss_stand[feat]['VT'],ls='-',c='b',lw=2.,label='VT')
    ax1.plot(gauss_stand[feat]['vec'],gauss_stand[feat]['EB'],ls='-',c='r',lw=2.,label='EB')
    ax1.set_xlim([0,maxi])
    ax1.set_xlabel(feat)
    ax1.set_title('0.5 s')
    ax1.legend(prop={'size':10})

    ax2 = fig.add_subplot(142)
    ax2.plot(gauss_1s[feat]['vec'],gauss_1s[feat]['VT'],ls='-',c='b',lw=2.)
    ax2.plot(gauss_1s[feat]['vec'],gauss_1s[feat]['EB'],ls='-',c='r',lw=2.)
    ax2.set_xlim([0,maxi])
    ax2.set_xlabel(feat)
    ax2.set_title('1 s')
    ax2.set_yticklabels('')

    ax3 = fig.add_subplot(143)
    ax3.plot(gauss_5s[feat]['vec'],gauss_5s[feat]['VT'],ls='-',c='b',lw=2.)
    ax3.plot(gauss_5s[feat]['vec'],gauss_5s[feat]['EB'],ls='-',c='r',lw=2.)
    ax3.set_xlim([0,maxi])
    ax3.set_xlabel(feat)
    ax3.set_title('5 s')
    ax3.set_yticklabels('')

    ax4 = fig.add_subplot(144)
    ax4.plot(gauss_10s[feat]['vec'],gauss_10s[feat]['VT'],ls='-',c='b',lw=2.)
    ax4.plot(gauss_10s[feat]['vec'],gauss_10s[feat]['EB'],ls='-',c='r',lw=2.)
    ax4.set_xlim([0,maxi])
    ax4.set_xlabel(feat)
    ax4.set_title('10 s')
    ax4.set_yticklabels('')

    #plt.savefig('%s/features/comp_%s.png'%(opt.opdict['outdir'],feat))
    plt.show()
Example #26
0
def compare_ponsets(set='test'):
    """
  Compare the Ponsets determined with the frequency stack of the spectrogram 
  in function of the spectrogram computation parameters...
  """
    from scipy.io.matlab import mio
    from features_extraction_piton import SeismicTraces
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        df_norm = pd.read_csv('%s/features/Piton_testset.csv' %
                              opt.opdict['outdir'],
                              index_col=False)
        df_norm = df_norm.reindex(columns=['Ponset_freq', 'Dur'])

        df_clement = pd.read_csv('%s/features/clement_test.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_clement = df_clement.reindex(columns=['Dur'])

        df_hash_64 = pd.read_csv('%s/features/HT_Piton_testset.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_hash_64 = df_hash_64.reindex(columns=['Ponset'])

        df_hash_32 = pd.read_csv('%s/features/HT32_Piton_testset.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_hash_32 = df_hash_32.reindex(columns=['Ponset'])

        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)
            for comp in opt.opdict['channels']:
                ind = (numfile, 'BOR', comp)
                p_norm = df_norm.reindex(index=[str(ind)]).Ponset_freq
                p_hash_64 = df_hash_64.reindex(index=[str(ind)]).Ponset
                p_hash_32 = df_hash_32.reindex(index=[str(ind)]).Ponset
                dur = df_norm.reindex(index=[str(ind)]).Dur * 100
                dur_cl = df_clement.reindex(index=[str(ind)]).Dur * 100

                s = SeismicTraces(mat, comp)
                fig = plt.figure(figsize=(9, 4))
                fig.set_facecolor('white')
                plt.plot(s.tr, 'k')
                plt.plot([p_norm, p_norm],
                         [np.min(s.tr), np.max(s.tr)],
                         'r',
                         lw=2.,
                         label='norm')
                plt.plot([p_norm + dur, p_norm + dur],
                         [np.min(s.tr), np.max(s.tr)],
                         'r--',
                         lw=2.)
                plt.plot([p_norm + dur_cl, p_norm + dur_cl],
                         [np.min(s.tr), np.max(s.tr)],
                         '--',
                         c='orange',
                         lw=2.)
                plt.plot([p_hash_64, p_hash_64],
                         [np.min(s.tr), np.max(s.tr)],
                         'g',
                         lw=2.,
                         label='hash_64')
                plt.plot([p_hash_32, p_hash_32],
                         [np.min(s.tr), np.max(s.tr)],
                         'y',
                         lw=2.,
                         label='hash_32')
                plt.legend()
                plt.show()