Example #1
0
def run_all():

  from options import MultiOptions
  opt = MultiOptions()
  #opt.count_number_of_events()

  ### UNSUPERVISED METHOD ### 
  if opt.opdict['method'] == 'kmeans':
    from unsupervised import classifier
    classifier(opt)

  ### SUPERVISED METHODS ###
  elif opt.opdict['method'] in ['lr','svm','svm_nl','lrsk']:
    from do_classification import classifier
    classifier(opt)

    from results import AnalyseResults
    res = AnalyseResults()
    if res.opdict['plot_confusion']:
      res.plot_confusion()

  elif opt.opdict['method'] in ['ova','1b1']:
    from do_classification import classifier
    classifier(opt)

    from results import AnalyseResultsExtraction
    res = AnalyseResultsExtraction()
Example #2
0
def plot_soutenance():
    """
  Plot des PDFs des 4 attributs définis par Clément pour le ppt 
  de la soutenance.
  """
    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    #opt.opdict['feat_train'] = 'clement_train.csv'
    #opt.opdict['feat_test'] = 'clement_test.csv'
    opt.opdict['feat_list'] = ['AsDec', 'Dur', 'Ene', 'KRapp']
    #opt.opdict['feat_log'] = ['AsDec','Dur','Ene','KRapp']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()

    gauss = opt.gaussians

    fig = plt.figure(figsize=(12, 2.5))
    fig.set_facecolor('white')
    for ifeat, feat in enumerate(sorted(gauss)):
        ax = fig.add_subplot(1, 4, ifeat + 1)
        ax.plot(gauss[feat]['vec'], gauss[feat]['VT'], ls='-', c='b', lw=2.)
        ax.plot(gauss[feat]['vec'], gauss[feat]['EB'], ls='-', c='r', lw=2.)
        ax.set_title(feat)
        ax.xaxis.set_ticks_position('bottom')
        ax.xaxis.set_ticklabels('')
        ax.yaxis.set_ticks_position('left')
        ax.yaxis.set_ticklabels('')
        if ifeat == 0:
            ax.legend(['VT', 'EB'], loc=1, prop={'size': 10})
    plt.savefig('/home/nadege/Dropbox/Soutenance/pdfs.png')
    plt.show()
Example #3
0
def run_unsupervised():

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['method'] = 'kmean'

    from unsupervised import classifier
    classifier(opt)
Example #4
0
def compare_pdfs_train():
    """
  Affiche et compare les pdfs des différents training sets.
  """
    from options import MultiOptions
    opt = MultiOptions()

    opt.opdict['stations'] = ['IJEN']
    opt.opdict['channels'] = ['Z']
    opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?']

    opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir'])
    opt.opdict[
        'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir']

    train = read_binary_file(opt.opdict['train_file'])
    nb_tir = len(train)

    for sta in opt.opdict['stations']:
        for comp in opt.opdict['channels']:
            opt.x, opt.y = opt.features_onesta(sta, comp)

    X = opt.x
    Y = opt.y
    c = ['r', 'b', 'g']
    lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v']
    features = opt.opdict['feat_list']
    for feat in features:
        print feat
        opt.opdict['feat_list'] = [feat]
        fig = plt.figure()
        fig.set_facecolor('white')
        for tir in range(nb_tir):
            tr = map(int, train[tir])
            opt.x = X.reindex(index=tr, columns=[feat])
            opt.y = Y.reindex(index=tr)
            opt.classname2number()
            opt.compute_pdfs()
            g = opt.gaussians

            for it, t in enumerate(opt.types):
                plt.plot(g[feat]['vec'],
                         g[feat][t],
                         ls=lines[tir],
                         color=c[it])
        plt.title(feat)
        plt.legend(opt.types)
        plt.show()
Example #5
0
def run_all():

    from options import MultiOptions
    opt = MultiOptions()
    #opt.count_number_of_events()

    from do_classification import classifier
    classifier(opt)

    if opt.opdict['method'] == 'lr' or opt.opdict[
            'method'] == 'svm' or opt.opdict['method'] == 'lrsk':
        from results import AnalyseResults
        res = AnalyseResults()
        if res.opdict['plot_confusion']:
            res.plot_confusion()

    else:
        from results import AnalyseResultsExtraction
        res = AnalyseResultsExtraction()
Example #6
0
def compare_clement():
    """
  Comparaison des attributs de Clément avec ceux que j'ai recalculés.
  """

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    # Mes calculs
    opt.opdict['feat_list'] = ['Dur', 'AsDec', 'RappMaxMean', 'Kurto', 'KRapp']
    opt.opdict['feat_log'] = ['AsDec', 'RappMaxMean', 'Kurto']
    #opt.opdict['feat_list'] = ['Ene']
    #opt.opdict['feat_log'] = ['Ene']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.x.columns = opt.opdict['feat_list']
    opt.compute_pdfs()
    my_gauss = opt.gaussians

    if 'Kurto' in opt.opdict['feat_list'] and 'RappMaxMean' in opt.opdict[
            'feat_list']:
        fig = plt.figure()
        fig.set_facecolor('white')
        plt.plot(np.log(opt.x.Kurto), np.log(opt.x.RappMaxMean), 'ko')
        plt.xlabel('Kurto')
        plt.ylabel('RappMaxMean')
        plt.show()

    # Les calculs de Clément
    #opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','Ene']
    opt.opdict['feat_log'] = []
    opt.opdict['feat_train'] = 'clement_train.csv'
    opt.opdict['feat_test'] = 'clement_test.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()

    # Trait plein --> Clément
    # Trait tireté --> moi
    opt.plot_superposed_pdfs(my_gauss, save=False)
Example #7
0
def compare_lissage():
    """
  Comparaison des kurtosis avec deux lissages différents.
  """

    plot_envelopes()

    from options import MultiOptions
    opt = MultiOptions()
    opt.opdict['channels'] = ['Z']

    # Lissage sur des fenêtres de 0.5 s
    opt.opdict['feat_list'] = ['Kurto']
    opt.opdict['feat_log'] = ['Kurto']
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.x.columns = opt.opdict['feat_list']
    opt.compute_pdfs()
    gauss_stand = opt.gaussians

    # Lissage sur des fenêtres de 1 s
    opt.opdict['feat_train'] = '0610_Piton_trainset.csv'
    opt.opdict['feat_test'] = '0610_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_1s = opt.gaussians

    # Lissage sur des fenêtres de 5 s
    opt.opdict['feat_train'] = '1809_Piton_trainset.csv'
    opt.opdict['feat_test'] = '1809_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_5s = opt.gaussians

    # Lissage sur des fenêtres de 10 s
    opt.opdict['feat_train'] = '0510_Piton_trainset.csv'
    opt.opdict['feat_test'] = '0510_Piton_testset.csv'
    opt.do_tri()
    opt.x = opt.xs[0]
    opt.y = opt.ys[0]
    opt.compute_pdfs()
    gauss_10s = opt.gaussians

    ### PLOT OF SUPERPOSED PDFs ###
    fig = plt.figure(figsize=(12, 2.5))
    fig.set_facecolor('white')
    for feat in sorted(opt.gaussians):
        maxi = int(
            np.max([
                gauss_stand[feat]['vec'], gauss_1s[feat]['vec'],
                gauss_5s[feat]['vec'], gauss_10s[feat]['vec']
            ]))

        ax1 = fig.add_subplot(141)
        ax1.plot(gauss_stand[feat]['vec'],
                 gauss_stand[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.,
                 label='VT')
        ax1.plot(gauss_stand[feat]['vec'],
                 gauss_stand[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.,
                 label='EB')
        ax1.set_xlim([0, maxi])
        ax1.set_xlabel(feat)
        ax1.set_title('0.5 s')
        ax1.legend(prop={'size': 10})

        ax2 = fig.add_subplot(142)
        ax2.plot(gauss_1s[feat]['vec'],
                 gauss_1s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax2.plot(gauss_1s[feat]['vec'],
                 gauss_1s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax2.set_xlim([0, maxi])
        ax2.set_xlabel(feat)
        ax2.set_title('1 s')
        ax2.set_yticklabels('')

        ax3 = fig.add_subplot(143)
        ax3.plot(gauss_5s[feat]['vec'],
                 gauss_5s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax3.plot(gauss_5s[feat]['vec'],
                 gauss_5s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax3.set_xlim([0, maxi])
        ax3.set_xlabel(feat)
        ax3.set_title('5 s')
        ax3.set_yticklabels('')

        ax4 = fig.add_subplot(144)
        ax4.plot(gauss_10s[feat]['vec'],
                 gauss_10s[feat]['VT'],
                 ls='-',
                 c='b',
                 lw=2.)
        ax4.plot(gauss_10s[feat]['vec'],
                 gauss_10s[feat]['EB'],
                 ls='-',
                 c='r',
                 lw=2.)
        ax4.set_xlim([0, maxi])
        ax4.set_xlabel(feat)
        ax4.set_title('10 s')
        ax4.set_yticklabels('')

        #plt.savefig('%s/features/comp_%s.png'%(opt.opdict['outdir'],feat))
        plt.show()
Example #8
0
def plot_best_worst():
    """
  Plots the pdfs of the training set for the best and worst draws 
  and compare with the whole training set.
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure()
        fig.set_facecolor('white')

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT')
        plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB')

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            plt.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '--',
                     c=colors[ifile])

        plt.legend()
        plt.title(feat)
        plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Example #9
0
def plot_pdf_subsets():
    """
  Plots the pdfs of the training set, CV set and test set on the same 
  figure. One subfigure for each event type. 
  """
    from options import MultiOptions, read_binary_file
    opt = MultiOptions()

    feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0),
                 ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4),
                 ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2),
                 ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0),
                 ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1),
                 ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5),
                 ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3),
                 ('Planarity', 1, 2)]

    opt.opdict['feat_list'] = opt.opdict['feat_all']
    opt.opdict[
        'feat_filename'] = '../results/Piton/features/Piton_trainset.csv'
    opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv'
    x_all, y_all = opt.features_onesta('BOR', 'Z')
    print len(y_all)

    list_files = glob.glob(os.path.join('../lib/Piton', 'learning*'))
    list_files.sort()

    m = len(y_all)
    mtraining = int(0.6 * m)
    mcv = int(0.2 * m)
    mtest = int(0.2 * m)

    for feat, best, worst in feat_list:
        print feat, best, worst
        fig = plt.figure(figsize=(10, 4))
        fig.set_facecolor('white')

        ax1 = fig.add_subplot(121)
        ax2 = fig.add_subplot(122)

        # ALL
        opt.x = x_all.reindex(columns=[feat])
        opt.y = y_all.reindex(index=opt.x.index)
        opt.opdict['feat_list'] = [feat]
        opt.compute_pdfs()
        g = opt.gaussians
        ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.)
        ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.)

        labels = ['best', 'worst']
        colors = ['r', 'g']
        b_file = list_files[best]
        w_file = list_files[worst]
        for ifile, file in enumerate([b_file, w_file]):
            dic = read_binary_file(file)

            # TRAINING SET
            opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining])
            opt.y = y_all.reindex(index=dic[:mtraining])
            opt.compute_pdfs()
            g_train = opt.gaussians
            ax1.plot(g_train[feat]['vec'],
                     g_train[feat]['VT'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])
            ax2.plot(g_train[feat]['vec'],
                     g_train[feat]['EB'],
                     '-',
                     c=colors[ifile],
                     label=labels[ifile])

            # CV SET
            opt.x = x_all.reindex(columns=[feat],
                                  index=dic[mtraining:mtraining + mcv])
            opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv])
            opt.compute_pdfs()
            g_cv = opt.gaussians
            ax1.plot(g_cv[feat]['vec'],
                     g_cv[feat]['VT'],
                     '--',
                     c=colors[ifile])
            ax2.plot(g_cv[feat]['vec'],
                     g_cv[feat]['EB'],
                     '--',
                     c=colors[ifile])

            # TEST SET
            opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:])
            opt.y = y_all.reindex(index=dic[mtraining + mcv:])
            opt.compute_pdfs()
            g_test = opt.gaussians
            ax1.plot(g_test[feat]['vec'],
                     g_test[feat]['VT'],
                     ':',
                     c=colors[ifile])
            ax2.plot(g_test[feat]['vec'],
                     g_test[feat]['EB'],
                     ':',
                     c=colors[ifile])

        ax1.set_title('VT')
        ax2.set_title('EB')
        ax1.legend()
        ax2.legend()
        plt.suptitle(feat)
        plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat))
        plt.show()
Example #10
0
def compare_ponsets(set='test'):
    """
  Compare the Ponsets determined with the frequency stack of the spectrogram 
  in function of the spectrogram computation parameters...
  """
    from scipy.io.matlab import mio
    from features_extraction_piton import SeismicTraces
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        df_norm = pd.read_csv('%s/features/Piton_testset.csv' %
                              opt.opdict['outdir'],
                              index_col=False)
        df_norm = df_norm.reindex(columns=['Ponset_freq', 'Dur'])

        df_clement = pd.read_csv('%s/features/clement_test.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_clement = df_clement.reindex(columns=['Dur'])

        df_hash_64 = pd.read_csv('%s/features/HT_Piton_testset.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_hash_64 = df_hash_64.reindex(columns=['Ponset'])

        df_hash_32 = pd.read_csv('%s/features/HT32_Piton_testset.csv' %
                                 opt.opdict['outdir'],
                                 index_col=False)
        df_hash_32 = df_hash_32.reindex(columns=['Ponset'])

        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)
            for comp in opt.opdict['channels']:
                ind = (numfile, 'BOR', comp)
                p_norm = df_norm.reindex(index=[str(ind)]).Ponset_freq
                p_hash_64 = df_hash_64.reindex(index=[str(ind)]).Ponset
                p_hash_32 = df_hash_32.reindex(index=[str(ind)]).Ponset
                dur = df_norm.reindex(index=[str(ind)]).Dur * 100
                dur_cl = df_clement.reindex(index=[str(ind)]).Dur * 100

                s = SeismicTraces(mat, comp)
                fig = plt.figure(figsize=(9, 4))
                fig.set_facecolor('white')
                plt.plot(s.tr, 'k')
                plt.plot([p_norm, p_norm],
                         [np.min(s.tr), np.max(s.tr)],
                         'r',
                         lw=2.,
                         label='norm')
                plt.plot([p_norm + dur, p_norm + dur],
                         [np.min(s.tr), np.max(s.tr)],
                         'r--',
                         lw=2.)
                plt.plot([p_norm + dur_cl, p_norm + dur_cl],
                         [np.min(s.tr), np.max(s.tr)],
                         '--',
                         c='orange',
                         lw=2.)
                plt.plot([p_hash_64, p_hash_64],
                         [np.min(s.tr), np.max(s.tr)],
                         'g',
                         lw=2.,
                         label='hash_64')
                plt.plot([p_hash_32, p_hash_32],
                         [np.min(s.tr), np.max(s.tr)],
                         'y',
                         lw=2.,
                         label='hash_32')
                plt.legend()
                plt.show()
Example #11
0
def read_data_for_features_extraction(save=False):
    """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
    from options import MultiOptions
    opt = MultiOptions()

    if save:
        if os.path.exists(opt.opdict['feat_filename']):
            print "WARNING !! File %s already exists" % opt.opdict[
                'feat_filename']
            print "Check if you really want to replace it..."
            sys.exit()

    list_features = opt.opdict['feat_list']
    df = pd.DataFrame(columns=list_features)

    hob_all = {}

    # Classification
    tsort = opt.read_classification()
    tsort.index = tsort.Date
    tsort = tsort.reindex(columns=['Date', 'Type'])

    list_sta = opt.opdict['stations']
    for ifile in range(tsort.shape[0]):
        date = tsort.values[ifile, 0]
        type = tsort.values[ifile, 1]

        for sta in list_sta:
            print "#####", sta
            counter = 0
            for comp in opt.opdict['channels']:
                ind = (date, sta, comp)
                dic = pd.DataFrame(columns=list_features, index=[ind])
                dic['EventType'] = type
                dic['Ponset'] = 0
                list_files = glob.glob(
                    os.path.join(
                        opt.opdict['datadir'], sta, '*%s.D' % comp,
                        '*%s.D*%s_%s*' % (comp, str(date)[:8], str(date)[8:])))
                list_files.sort()
                if len(list_files) > 0:
                    file = list_files[0]
                    print ifile, file
                    if opt.opdict['option'] == 'norm':
                        counter = counter + 1
                        dic = extract_norm_features(list_features, date, file,
                                                    dic)
                    elif opt.opdict['option'] == 'hash':
                        permut_file = '%s/permut_%s' % (
                            opt.opdict['libdir'],
                            opt.opdict['feat_test'].split('.')[0])
                        dic = extract_hash_features(list_features,
                                                    date,
                                                    file,
                                                    dic,
                                                    permut_file,
                                                    plot=True)
                    df = df.append(dic)

            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                from waveform_features import polarization_analysis
                d_mean = (df.Dur[(date, sta, comp)] + df.Dur[
                    (date, sta, 'E')] + df.Dur[(date, sta, 'Z')]) / 3.
                po_mean = int((df.Ponset[(date, sta, comp)] + df.Ponset[
                    (date, sta, 'E')] + df.Ponset[(date, sta, 'Z')]) / 3)
                list_files = [
                    file,
                    file.replace("N.D", "E.D"),
                    file.replace("N.D", "Z.D")
                ]
                rect, plan, az, iang = polarization_analysis(list_files,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(date, sta, 'Z')], df.Rectilinearity[(
                        date, sta,
                        'N')], df.Rectilinearity[(date, sta,
                                                  'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(date, sta, 'Z')], df.Planarity[(
                        date, sta,
                        'N')], df.Planarity[(date, sta,
                                             'E')] = plan, plan, plan
                if list_features or 'Azimuth':
                    df.Azimuth[(date, sta, 'Z')], df.Azimuth[(
                        date, sta, 'N')], df.Azimuth[(date, sta,
                                                      'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(date, sta, 'Z')], df.Incidence[(
                        date, sta,
                        'N')], df.Incidence[(date, sta,
                                             'E')] = iang, iang, iang

    if save:
        print "Features written in %s" % opt.opdict['feat_filename']
        df.to_csv(opt.opdict['feat_filename'])
Example #12
0
def compare_ponsets(set='test'):
    """
  Compare the Ponsets determined either with the kurtosis gradient, either with the 
  frequency stack of the spectrogram.
  """
    from scipy.io.matlab import mio
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        df = pd.read_csv('%s/features/Piton_testset.csv' %
                         opt.opdict['outdir'],
                         index_col=False)
        df = df.reindex(
            columns=['Dur_freq', 'Ponset_freq', 'Dur_grad', 'Ponset_grad'])
        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)
            for comp in opt.opdict['channels']:
                ind = (numfile, 'BOR', comp)
                df_one = df.reindex(index=[str(ind)])
                pfr = df_one.Ponset_freq
                pgr = df_one.Ponset_grad
                dfr = df_one.Dur_freq
                dgr = df_one.Dur_grad

                s = SeismicTraces(mat, comp)
                fig = plt.figure(figsize=(9, 4))
                fig.set_facecolor('white')
                plt.plot(s.tr, 'k')
                plt.plot([pfr, pfr], [np.min(s.tr), np.max(s.tr)],
                         'r',
                         lw=2.,
                         label='freq')
                plt.plot([pgr, pgr], [np.min(s.tr), np.max(s.tr)],
                         'r--',
                         lw=2.,
                         label='grad')
                plt.plot([pfr + dfr * 1. / s.dt, pfr + dfr * 1. / s.dt],
                         [np.min(s.tr), np.max(s.tr)],
                         'y',
                         lw=2.)
                plt.plot([pgr + dgr * 1. / s.dt, pgr + dgr * 1. / s.dt],
                         [np.min(s.tr), np.max(s.tr)],
                         'y--',
                         lw=2.)
                plt.legend()
                plt.show()
Example #13
0
def read_data_for_features_extraction(set='test', save=False):
    """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
    from scipy.io.matlab import mio
    from options import MultiOptions
    opt = MultiOptions()

    if set == 'train':
        opt.opdict['feat_filename'] = '%s/features/%s' % (
            opt.opdict['outdir'], opt.opdict['feat_train'])
    print opt.opdict['feat_filename']

    if save:
        if os.path.exists(opt.opdict['feat_filename']):
            print "WARNING !! File %s already exists" % opt.opdict[
                'feat_filename']
            print "Check if you really want to replace it..."
            sys.exit()

    list_features = opt.opdict['feat_list']
    df = pd.DataFrame(columns=list_features)

    if set == 'test':
        datafiles = glob.glob(
            os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*'))
        datafiles.sort()
        liste = [
            os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0]
            for i in range(len(datafiles))
        ]
        liste = map(int,
                    liste)  # sort the list of file following the event number
        liste.sort()

        tsort = opt.read_csvfile(opt.opdict['label_filename'])
        tsort.index = tsort.Date

        for ifile, numfile in enumerate(liste):
            file = os.path.join(opt.opdict['datadir'],
                                'TestSet/SigEve_%d.mat' % numfile)
            print ifile, file
            mat = mio.loadmat(file)

            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                ind = (numfile, 'BOR', comp)
                dic = pd.DataFrame(columns=list_features, index=[ind])
                dic['EventType'] = tsort[tsort.Date == numfile].Type.values[0]
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp)
                list_attr = s.__dict__.keys()

                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        if ifile in [
                                409, 1026, 1027, 1028, 1993, 2121, 2122, 2123,
                                2424, 2441, 3029, 3058, 3735, 3785, 3852, 3930,
                                4200, 4463, 4464, 4746, 6150, 6382, 6672, 6733
                        ]:
                            continue
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)

                if counter == 3 and ('Rectilinearity' in list_features
                                     or 'Planarity' in list_features
                                     or 'Azimuth' in list_features
                                     or 'Incidence' in list_features):
                    d_mean = (df.Dur[(numfile, 'BOR', comp)] +
                              df.Dur[(numfile, 'BOR', 'E')] +
                              df.Dur[(numfile, 'BOR', 'Z')]) / 3.
                    po_mean = int((df.Ponset[(numfile, 'BOR', comp)] +
                                   df.Ponset[(numfile, 'BOR', 'E')] +
                                   df.Ponset[(numfile, 'BOR', 'Z')]) / 3)
                    s.read_all_files(mat, False)
                    rect, plan, az, iang = polarization_analysis(s,
                                                                 d_mean,
                                                                 po_mean,
                                                                 plot=False)
                    if 'Rectilinearity' in list_features:
                        df.Rectilinearity[(numfile, 'BOR',
                                           'Z')], df.Rectilinearity[(
                                               numfile, 'BOR',
                                               'N')], df.Rectilinearity[(
                                                   numfile, 'BOR',
                                                   'E')] = rect, rect, rect
                    if 'Planarity' in list_features:
                        df.Planarity[(numfile, 'BOR', 'Z')], df.Planarity[(
                            numfile, 'BOR',
                            'N')], df.Planarity[(numfile, 'BOR',
                                                 'E')] = plan, plan, plan
                    if list_features or 'Azimuth':
                        df.Azimuth[(numfile, 'BOR', 'Z')], df.Azimuth[(
                            numfile, 'BOR',
                            'N')], df.Azimuth[(numfile, 'BOR',
                                               'E')] = az, az, az
                    if 'Incidence' in list_features:
                        df.Incidence[(numfile, 'BOR', 'Z')], df.Incidence[(
                            numfile, 'BOR',
                            'N')], df.Incidence[(numfile, 'BOR',
                                                 'E')] = iang, iang, iang

    elif set == 'train':
        datafile = os.path.join(opt.opdict['datadir'],
                                'TrainingSetPlusSig_2.mat')
        mat = mio.loadmat(datafile)
        hob_all_EB = {}
        for i in range(mat['KurtoEB'].shape[1]):
            print "EB", i
            if i != 10 and i != 61:
                continue
            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                dic = pd.DataFrame(columns=list_features,
                                   index=[(i, 'BOR', comp)])
                dic['EventType'] = 'EB'
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp, train=[i, 'EB'])
                list_attr = s.__dict__.keys()
                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)

            neb = i + 1
            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                d_mean = (df.Dur[(i, 'BOR', comp)] + df.Dur[(i, 'BOR', 'E')] +
                          df.Dur[(i, 'BOR', 'Z')]) / 3.
                po_mean = int(
                    (df.Ponset[(i, 'BOR', comp)] + df.Ponset[(i, 'BOR', 'E')] +
                     df.Ponset[(i, 'BOR', 'Z')]) / 3)
                s.read_all_files(mat, train=[i, 'EB'])
                rect, plan, az, iang = polarization_analysis(s,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(i, 'BOR', 'Z')], df.Rectilinearity[(
                        i, 'BOR',
                        'N')], df.Rectilinearity[(i, 'BOR',
                                                  'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(i, 'BOR', 'Z')], df.Planarity[(
                        i, 'BOR', 'N')], df.Planarity[(i, 'BOR',
                                                       'E')] = plan, plan, plan
                if 'Azimuth' in list_features:
                    df.Azimuth[(i, 'BOR', 'Z')], df.Azimuth[(
                        i, 'BOR', 'N')], df.Azimuth[(i, 'BOR',
                                                     'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(i, 'BOR', 'Z')], df.Incidence[(
                        i, 'BOR', 'N')], df.Incidence[(i, 'BOR',
                                                       'E')] = iang, iang, iang

        for i in range(mat['KurtoVT'].shape[1]):
            print "VT", i + neb
            if i != 5:
                continue
            counter = 0
            for comp in opt.opdict['channels']:
                counter = counter + 1
                dic = pd.DataFrame(columns=list_features,
                                   index=[(i + neb, 'BOR', comp)])
                dic['EventType'] = 'VT'
                dic['Ponset'] = 0

                s = SeismicTraces(mat, comp, train=[i, 'VT'])

                list_attr = s.__dict__.keys()
                if len(list_attr) > 2:
                    if opt.opdict['option'] == 'norm':
                        dic = extract_norm_features(s, list_features, dic)
                    elif opt.opdict['option'] == 'hash':
                        dic = extract_hash_features(s,
                                                    list_features,
                                                    dic,
                                                    opt.opdict['permut_file'],
                                                    plot=False)
                    df = df.append(dic)
            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                d_mean = (df.Dur[(i + neb, 'BOR', comp)] +
                          df.Dur[(i + neb, 'BOR', 'E')] +
                          df.Dur[(i + neb, 'BOR', 'Z')]) / 3.
                po_mean = int((df.Ponset[(i + neb, 'BOR', comp)] +
                               df.Ponset[(i + neb, 'BOR', 'E')] +
                               df.Ponset[(i + neb, 'BOR', 'Z')]) / 3)
                s.read_all_files(mat, train=[i, 'VT'])
                rect, plan, az, iang = polarization_analysis(s,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(
                        i + neb, 'BOR', 'Z')], df.Rectilinearity[(
                            i + neb, 'BOR',
                            'N')], df.Rectilinearity[(i + neb, 'BOR',
                                                      'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(i + neb, 'BOR', 'Z')], df.Planarity[(
                        i + neb, 'BOR',
                        'N')], df.Planarity[(i + neb, 'BOR',
                                             'E')] = plan, plan, plan
                if 'Azimuth' in list_features:
                    df.Azimuth[(i + neb, 'BOR', 'Z')], df.Azimuth[(
                        i + neb, 'BOR', 'N')], df.Azimuth[(i + neb, 'BOR',
                                                           'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(i + neb, 'BOR', 'Z')], df.Incidence[(
                        i + neb, 'BOR',
                        'N')], df.Incidence[(i + neb, 'BOR',
                                             'E')] = iang, iang, iang

    if save:
        print "Features written in %s" % opt.opdict['feat_filename']
        df.to_csv(opt.opdict['feat_filename'])