def read_data_for_features_extraction(save=False):
  """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
  from options import MultiOptions
  opt = MultiOptions()

  if save:
    if os.path.exists(opt.opdict['feat_filepath']):
      print "WARNING !! File %s already exists"%opt.opdict['feat_filepath']
      print "Check if you really want to replace it..." 
      sys.exit()

  list_features = opt.opdict['feat_list']
  df = pd.DataFrame(columns=list_features)

  hob_all = {}

  # Classification
  tsort = opt.read_classification()
  tsort.index = tsort.Date
  tsort = tsort.reindex(columns=['Date','Type'])

  list_sta = opt.opdict['stations']
  for ifile in range(tsort.shape[0]):
    date = tsort.values[ifile,0]
    type = tsort.values[ifile,1]

    for sta in list_sta:
      print "#####",sta
      counter = 0
      for comp in opt.opdict['channels']:
        ind = (date,sta,comp)
        dic = pd.DataFrame(columns=list_features,index=[ind])
        dic['EventType'] = type
        dic['Ponset'] = 0
        list_files = glob.glob(os.path.join(opt.opdict['datadir'],sta,'*%s.D'%comp,'*%s.D*%s_%s*'%(comp,str(date)[:8],str(date)[8:])))
        list_files.sort()
        if len(list_files) > 0:
          file =  list_files[0]
          print ifile, file
          if opt.opdict['option'] == 'norm':
            counter = counter + 1
            dic = extract_norm_features(list_features,date,file,dic)
          elif opt.opdict['option'] == 'hash':
            permut_file = '%s/permut_%s'%(opt.opdict['libdir'],opt.opdict['feat_test'].split('.')[0])
            dic = extract_hash_features(list_features,date,file,dic,permut_file,plot=True)
          df = df.append(dic)

      if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features):
        from waveform_features import polarization_analysis
        d_mean = (df.Dur[(date,sta,comp)] + df.Dur[(date,sta,'E')] + df.Dur[(date,sta,'Z')])/3.
        po_mean = int((df.Ponset[(date,sta,comp)] + df.Ponset[(date,sta,'E')] + df.Ponset[(date,sta,'Z')])/3)
        list_files = [file,file.replace("N.D","E.D"),file.replace("N.D","Z.D")]
        rect, plan, az, iang = polarization_analysis(list_files,d_mean,po_mean,plot=False)
        if 'Rectilinearity' in list_features:
          df.Rectilinearity[(date,sta,'Z')], df.Rectilinearity[(date,sta,'N')], df.Rectilinearity[(date,sta,'E')] = rect, rect, rect
        if 'Planarity' in list_features:
          df.Planarity[(date,sta,'Z')], df.Planarity[(date,sta,'N')], df.Planarity[(date,sta,'E')] = plan, plan, plan
        if list_features or 'Azimuth':
          df.Azimuth[(date,sta,'Z')], df.Azimuth[(date,sta,'N')], df.Azimuth[(date,sta,'E')] = az, az, az
        if 'Incidence' in list_features:
          df.Incidence[(date,sta,'Z')], df.Incidence[(date,sta,'N')], df.Incidence[(date,sta,'E')] = iang, iang, iang

  if save:
    print "Features written in %s"%opt.opdict['feat_filepath']
    df.to_csv(opt.opdict['feat_filepath'])
Example #2
0
def read_data_for_features_extraction(save=False):
    """
  Extracts the features from all seismic files
  If option 'save' is set, then save the pandas DataFrame as a .csv file
  """
    from options import MultiOptions
    opt = MultiOptions()

    if save:
        if os.path.exists(opt.opdict['feat_filename']):
            print "WARNING !! File %s already exists" % opt.opdict[
                'feat_filename']
            print "Check if you really want to replace it..."
            sys.exit()

    list_features = opt.opdict['feat_list']
    df = pd.DataFrame(columns=list_features)

    hob_all = {}

    # Classification
    tsort = opt.read_classification()
    tsort.index = tsort.Date
    tsort = tsort.reindex(columns=['Date', 'Type'])

    list_sta = opt.opdict['stations']
    for ifile in range(tsort.shape[0]):
        date = tsort.values[ifile, 0]
        type = tsort.values[ifile, 1]

        for sta in list_sta:
            print "#####", sta
            counter = 0
            for comp in opt.opdict['channels']:
                ind = (date, sta, comp)
                dic = pd.DataFrame(columns=list_features, index=[ind])
                dic['EventType'] = type
                dic['Ponset'] = 0
                list_files = glob.glob(
                    os.path.join(
                        opt.opdict['datadir'], sta, '*%s.D' % comp,
                        '*%s.D*%s_%s*' % (comp, str(date)[:8], str(date)[8:])))
                list_files.sort()
                if len(list_files) > 0:
                    file = list_files[0]
                    print ifile, file
                    if opt.opdict['option'] == 'norm':
                        counter = counter + 1
                        dic = extract_norm_features(list_features, date, file,
                                                    dic)
                    elif opt.opdict['option'] == 'hash':
                        permut_file = '%s/permut_%s' % (
                            opt.opdict['libdir'],
                            opt.opdict['feat_test'].split('.')[0])
                        dic = extract_hash_features(list_features,
                                                    date,
                                                    file,
                                                    dic,
                                                    permut_file,
                                                    plot=True)
                    df = df.append(dic)

            if counter == 3 and ('Rectilinearity' in list_features
                                 or 'Planarity' in list_features
                                 or 'Azimuth' in list_features
                                 or 'Incidence' in list_features):
                from waveform_features import polarization_analysis
                d_mean = (df.Dur[(date, sta, comp)] + df.Dur[
                    (date, sta, 'E')] + df.Dur[(date, sta, 'Z')]) / 3.
                po_mean = int((df.Ponset[(date, sta, comp)] + df.Ponset[
                    (date, sta, 'E')] + df.Ponset[(date, sta, 'Z')]) / 3)
                list_files = [
                    file,
                    file.replace("N.D", "E.D"),
                    file.replace("N.D", "Z.D")
                ]
                rect, plan, az, iang = polarization_analysis(list_files,
                                                             d_mean,
                                                             po_mean,
                                                             plot=False)
                if 'Rectilinearity' in list_features:
                    df.Rectilinearity[(date, sta, 'Z')], df.Rectilinearity[(
                        date, sta,
                        'N')], df.Rectilinearity[(date, sta,
                                                  'E')] = rect, rect, rect
                if 'Planarity' in list_features:
                    df.Planarity[(date, sta, 'Z')], df.Planarity[(
                        date, sta,
                        'N')], df.Planarity[(date, sta,
                                             'E')] = plan, plan, plan
                if list_features or 'Azimuth':
                    df.Azimuth[(date, sta, 'Z')], df.Azimuth[(
                        date, sta, 'N')], df.Azimuth[(date, sta,
                                                      'E')] = az, az, az
                if 'Incidence' in list_features:
                    df.Incidence[(date, sta, 'Z')], df.Incidence[(
                        date, sta,
                        'N')], df.Incidence[(date, sta,
                                             'E')] = iang, iang, iang

    if save:
        print "Features written in %s" % opt.opdict['feat_filename']
        df.to_csv(opt.opdict['feat_filename'])