def extract_hash_features(list_features,date,file,dic,permut_file,plot=False):
  """
  Extracts hash table values.
  """
  from fingerprint_functions import FuncFingerprint, spectrogram, ponset_grad, vec_compute_signature, LSH

  s = SeismicTraces(file,utcdatetime.UTCDateTime(str(date)))
  list_attr = s.__dict__.keys()
  if 'tr_grad' not in list_attr:
    for feat in list_features:
      dic[feat] = np.nan
    return dic

  full_tr = s.tr
  grad = s.tr_grad
  q = [100.,.8,1]
  (full_spectro,f,full_time,end) = spectrogram(full_tr,param=q)
  ponset = ponset_grad(full_tr,grad,full_time,plot=plot)
  
  idebut = ponset-int(2*full_spectro.shape[1]/full_time[-1])
  print ponset, idebut
  if idebut < 0:
    idebut = 0
  ifin = idebut+full_spectro.shape[0]
  time = full_time[idebut:ifin]
  spectro = full_spectro[:,idebut:ifin]
  if spectro.shape[1] < spectro.shape[0]:
    spectro = full_spectro[:,-spectro.shape[0]:]
  haar_bin = FuncFingerprint(np.log10(spectro),time,full_tr,f,end,plot=plot,error=plot)

  m = haar_bin.shape[1]  # number of columns
  n = haar_bin.shape[0]  # number of rows
  vec_bin = np.zeros(m*n)
  for i in range(n):
    for j in range(m):
      vec_bin[m*i+j] = haar_bin[i,j]
  
  # HASH TABLES
  pp = 500
  p_file = '%s_%d'%(permut_file,full_spectro.shape[0])
  if not os.path.isfile(p_file):
    print "Permutation"
    from fingerprint_functions import define_permutation
    import cPickle
    permut = define_permutation(len(vec_bin),pp)
    with open(p_file,'wb') as file:
      my_pickler = cPickle.Pickler(file)
      my_pickler.dump(permut)
      file.close()
  else:
    import cPickle
    with open(p_file,'rb') as file:
      my_depickler = cPickle.Unpickler(file)
      permut = my_depickler.load()
      file.close()

  MH_sign = vec_compute_signature(vec_bin,permut)
  HashTab = LSH(MH_sign,l=50)
  for iht,ht in enumerate(HashTab):
    dic['%d'%iht] = ht

  if plot:
    plt.show()

  return dic
Esempio n. 2
0
def extract_hash_features(list_features,
                          date,
                          file,
                          dic,
                          permut_file,
                          plot=False):
    """
  Extracts hash table values.
  """
    from fingerprint_functions import FuncFingerprint, spectrogram, ponset_grad, vec_compute_signature, LSH

    s = SeismicTraces(file, utcdatetime.UTCDateTime(str(date)))
    list_attr = s.__dict__.keys()
    if 'tr_grad' not in list_attr:
        for feat in list_features:
            dic[feat] = np.nan
        return dic

    full_tr = s.tr
    grad = s.tr_grad
    q = [100., .8, 1]
    (full_spectro, f, full_time, end) = spectrogram(full_tr, param=q)
    ponset = ponset_grad(full_tr, grad, full_time, plot=plot)

    idebut = ponset - int(2 * full_spectro.shape[1] / full_time[-1])
    print ponset, idebut
    if idebut < 0:
        idebut = 0
    ifin = idebut + full_spectro.shape[0]
    time = full_time[idebut:ifin]
    spectro = full_spectro[:, idebut:ifin]
    if spectro.shape[1] < spectro.shape[0]:
        spectro = full_spectro[:, -spectro.shape[0]:]
    haar_bin = FuncFingerprint(np.log10(spectro),
                               time,
                               full_tr,
                               f,
                               end,
                               plot=plot,
                               error=plot)

    m = haar_bin.shape[1]  # number of columns
    n = haar_bin.shape[0]  # number of rows
    vec_bin = np.zeros(m * n)
    for i in range(n):
        for j in range(m):
            vec_bin[m * i + j] = haar_bin[i, j]

    # HASH TABLES
    pp = 500
    p_file = '%s_%d' % (permut_file, full_spectro.shape[0])
    if not os.path.isfile(p_file):
        print "Permutation"
        from fingerprint_functions import define_permutation
        import cPickle
        permut = define_permutation(len(vec_bin), pp)
        with open(p_file, 'wb') as file:
            my_pickler = cPickle.Pickler(file)
            my_pickler.dump(permut)
            file.close()
    else:
        import cPickle
        with open(p_file, 'rb') as file:
            my_depickler = cPickle.Unpickler(file)
            permut = my_depickler.load()
            file.close()

    MH_sign = vec_compute_signature(vec_bin, permut)
    HashTab = LSH(MH_sign, l=50)
    for iht, ht in enumerate(HashTab):
        dic['%d' % iht] = ht

    if plot:
        plt.show()

    return dic
def extract_norm_features(list_features,date,file,dic):

    """
    Extraction of all features given by list_features, except hash 
    table values.
    """

    s = SeismicTraces(file,utcdatetime.UTCDateTime(str(date)))
    list_attr = s.__dict__.keys()
    if 'tr_grad' not in list_attr:
      for feat in list_features:
        dic[feat] = np.nan
      return dic
    # Determine P-onset
    s.duration()
    #s.display_traces()
    #s.amplitude_distribution()

    if len(list_attr) > 7:

      if 'Dur' in list_features:
        # Mean of the predominant frequency
        from waveform_features import spectrogram
        s, dic['MeanPredF'], dic['TimeMaxSpec'], dic['NbPeaks'], dic['Width'], hob, vals, dic['sPredF'] = spectrogram(s,plot=False)
        for i in range(len(vals)):
          dic['v%d'%i] = vals[i]
        dic['Dur'] = s.dur

      if 'Acorr' in list_features:
        from waveform_features import autocorrelation,filt_ratio
        vals = autocorrelation(s,plot=False)
        for i in range(len(vals)):
          dic['acorr%d'%i] = vals[i]

        vals = filt_ratio(s,plot=False)
        for i in range(len(vals)):
          dic['fratio%d'%i] = vals[i]

      if 'Ene20-30' in list_features:
       # Energy between 10 and 30 Hz
        from waveform_features import energy_between_10Hz_and_30Hz
        f1, f2 = 20,30
        dic['Ene%d-%d'%(f1,f2)] = energy_between_10Hz_and_30Hz(s.tr[s.i1:s.i2]/np.max(s.tr[s.i1:s.i2]),s.dt,wd=f1,wf=f2,ponset=s.ponset-s.i1,tend=s.tend-s.i1)

      if 'Ene5-10' in list_features:
        f1, f2 = 5,10
        dic['Ene%d-%d'%(f1,f2)] = energy_between_10Hz_and_30Hz(s.tr[s.i1:s.i2]/np.max(s.tr[s.i1:s.i2]),s.dt,wd=f1,wf=f2,ponset=s.ponset-s.i1,tend=s.tend-s.i1)

      if 'Ene0-5' in list_features:
        f1, f2 = .5,5
        dic['Ene%d-%d'%(f1,f2)] = energy_between_10Hz_and_30Hz(s.tr[s.i1:s.i2]/np.max(s.tr[s.i1:s.i2]),s.dt,wd=f1,wf=f2,ponset=s.ponset-s.i1,tend=s.tend-s.i1)

      if 'RappMaxMean' in list_features:
        # Max over mean ratio of the envelope
        from waveform_features import max_over_mean
        dic['RappMaxMean'] = max_over_mean(s.tr[s.i1:s.i2])

      if 'AsDec' in list_features:
        # Ascendant phase duration over descendant phase duration
        from waveform_features import growth_over_decay
        p = growth_over_decay(s)
        if p > 0:
          dic['AsDec'] = p

      if 'Growth' in list_features:
        from waveform_features import desc_and_asc
        dic['Growth'] = desc_and_asc(s)

      if 'Skewness' in list_features:
        # Skewness
        from waveform_features import skewness
        sk = skewness(s.tr_env[s.i1:s.i2])
        dic['Skewness'] = sk
        #s.amplitude_distribution()

      if 'Kurto' in list_features: 
        # Kurtosis
        from waveform_features import kurtosis_envelope
        k = kurtosis_envelope(s.tr_env[s.i1:s.i2])
        dic['Kurto'] = k

      if ('F_low' in list_features) or ('F_up' in list_features):
        # Lowest and highest frequency for kurtogram analysis
        from waveform_features import kurto_bandpass
        dic['F_low'],dic['F_up'] = kurto_bandpass(s,plot=False)

      if 'Centroid_time' in list_features:
        # Centroid time
        from waveform_features import centroid_time
        dic['Centroid_time'] = centroid_time(s.tr[s.i1:s.i2],s.dt,s.TF,s.ponset-s.i1,tend=s.tend-s.i1,plot=False)

      if 'RappMaxMeanTF' in list_features:
        # Max over mean ratio of the amplitude spectrum
        from waveform_features import max_over_mean
        dic['RappMaxMeanTF'] = max_over_mean(np.abs(s.TF[:len(s.TF)/2]))

      if 'IFslope' in list_features:
        # Average of the instantaneous frequency and slope of the unwrapped instantaneous phase
        from waveform_features import instant_freq
        #p,pf = instant_freq(s.tr[s.i1:s.i2],s.dt,s.TF,plot=False)
        #dic['IFslope'] = np.mean((p,pf[len(pf)-1]))
        vals, dic['IFslope'] = instant_freq(s.tr[s.i1:s.i2],s.dt,s.TF,s.ponset-s.i1,s.tend-s.i1,plot=False)
        for i in range(len(vals)):
          dic['if%d'%i] = vals[i]

      if ('ibw0' in list_features) or ('Norm_envelope' in list_features):
        # Average of the instantaneous frequency and normalized envelope
        from waveform_features import instant_bw
        vals, Nenv = instant_bw(s.tr[s.i1:s.i2],s.tr_env[s.i1:s.i2],s.dt,s.TF,s.ponset-s.i1,s.tend-s.i1,plot=False)
        dic['Norm_envelope'] = Nenv
        for i in range(len(vals)):
          dic['ibw%d'%i] = vals[i]

      if ('PredF' in list_features) or ('CentralF' in list_features) or ('Bandwidth' in list_features):
        # Spectral attributes
        from waveform_features import around_freq
        dic['PredF'], dic['Bandwidth'], dic['CentralF'] = around_freq(s.TF,s.freqs,plot=False)

      if 'Cepstrum' in list_features:
        # Cepstrum
        from waveform_features import cepstrum
        #dic['Cepstrum'] = cepstrum(s.TF,s.freqs,plot=True)
        cep = cepstrum(s.TF,s.freqs,plot=False)

      dic['Ponset'] = s.ponset
      return dic
Esempio n. 4
0
def extract_norm_features(list_features, date, file, dic):
    """
    Extraction of all features given by list_features, except hash 
    table values.
    """

    s = SeismicTraces(file, utcdatetime.UTCDateTime(str(date)))
    list_attr = s.__dict__.keys()
    if 'tr_grad' not in list_attr:
        for feat in list_features:
            dic[feat] = np.nan
        return dic
    # Determine P-onset
    s.duration()
    #s.display_traces()
    #s.amplitude_distribution()

    if len(list_attr) > 5:

        from waveform_features import spectrogram
        s, dic['MeanPredF'], dic['TimeMaxSpec'], dic['NbPeaks'], dic[
            'Width'], hob, vals, dic['sPredF'] = spectrogram(s, plot=False)
        for i in range(len(vals)):
            dic['v%d' % i] = vals[i]
        dic['Dur'] = s.dur

        # Compute the spectrum
        s.spectrum(plot=False)

        if 'Acorr' in list_features:
            from waveform_features import autocorrelation, filt_ratio
            vals = autocorrelation(s, plot=False)
            for i in range(len(vals)):
                dic['acorr%d' % i] = vals[i]

            vals = filt_ratio(s, plot=False)
            for i in range(len(vals)):
                dic['fratio%d' % i] = vals[i]

        if 'Ene20-30' in list_features:
            # Energy between 10 and 30 Hz
            from waveform_features import energy_between_10Hz_and_30Hz
            f1, f2 = 20, 30
            dic['Ene%d-%d' % (f1, f2)] = energy_between_10Hz_and_30Hz(
                s.tr[s.i1:s.i2] / np.max(s.tr[s.i1:s.i2]),
                s.dt,
                wd=f1,
                wf=f2,
                ponset=s.ponset - s.i1,
                tend=s.tend - s.i1)

        if 'Ene5-10' in list_features:
            f1, f2 = 5, 10
            dic['Ene%d-%d' % (f1, f2)] = energy_between_10Hz_and_30Hz(
                s.tr[s.i1:s.i2] / np.max(s.tr[s.i1:s.i2]),
                s.dt,
                wd=f1,
                wf=f2,
                ponset=s.ponset - s.i1,
                tend=s.tend - s.i1)

        if 'Ene0-5' in list_features:
            f1, f2 = .5, 5
            dic['Ene%d-%d' % (f1, f2)] = energy_between_10Hz_and_30Hz(
                s.tr[s.i1:s.i2] / np.max(s.tr[s.i1:s.i2]),
                s.dt,
                wd=f1,
                wf=f2,
                ponset=s.ponset - s.i1,
                tend=s.tend - s.i1)

        if 'RappMaxMean' in list_features:
            # Max over mean ratio of the envelope
            from waveform_features import max_over_mean
            dic['RappMaxMean'] = max_over_mean(s.tr[s.ponset:s.tend])

        if 'AsDec' in list_features:
            # Ascendant phase duration over descendant phase duration
            from waveform_features import growth_over_decay
            p = growth_over_decay(s)
            if p > 0:
                dic['AsDec'] = p

        if 'Growth' in list_features:
            from waveform_features import desc_and_asc
            dic['Growth'] = desc_and_asc(s)

        if 'Skewness' in list_features:
            # Skewness
            from waveform_features import skewness
            sk = skewness(s.tr_env[s.ponset:s.tend])
            dic['Skewness'] = sk
            #s.amplitude_distribution()

        if 'Kurto' in list_features:
            # Kurtosis
            from waveform_features import kurtosis_envelope
            k = kurtosis_envelope(s.tr_env[s.ponset:s.tend])
            dic['Kurto'] = k

        if ('F_low' in list_features) or ('F_up' in list_features):
            # Lowest and highest frequency for kurtogram analysis
            from waveform_features import kurto_bandpass
            dic['F_low'], dic['F_up'] = kurto_bandpass(s, plot=False)

        if 'Centroid_time' in list_features:
            # Centroid time
            from waveform_features import centroid_time
            dic['Centroid_time'] = centroid_time(s.tr[s.i1:s.i2],
                                                 s.dt,
                                                 s.TF,
                                                 s.ponset - s.i1,
                                                 tend=s.tend - s.i1,
                                                 plot=False)

        if 'RappMaxMeanTF' in list_features:
            # Max over mean ratio of the amplitude spectrum
            from waveform_features import max_over_mean
            dic['RappMaxMeanTF'] = max_over_mean(np.abs(s.TF[:len(s.TF) / 2]))

        if 'IFslope' in list_features:
            # Average of the instantaneous frequency and slope of the unwrapped instantaneous phase
            from waveform_features import instant_freq
            #p,pf = instant_freq(s.tr[s.i1:s.i2],s.dt,s.TF,plot=False)
            #dic['IFslope'] = np.mean((p,pf[len(pf)-1]))
            vals, dic['IFslope'] = instant_freq(s.tr[s.i1:s.i2],
                                                s.dt,
                                                s.TF,
                                                s.ponset - s.i1,
                                                s.tend - s.i1,
                                                plot=False)
            for i in range(len(vals)):
                dic['if%d' % i] = vals[i]

        if ('ibw0' in list_features) or ('Norm_envelope' in list_features):
            # Average of the instantaneous frequency and normalized envelope
            from waveform_features import instant_bw
            vals, Nenv = instant_bw(s.tr[s.i1:s.i2],
                                    s.tr_env[s.i1:s.i2],
                                    s.dt,
                                    s.TF,
                                    s.ponset - s.i1,
                                    s.tend - s.i1,
                                    plot=False)
            dic['Norm_envelope'] = Nenv
            for i in range(len(vals)):
                dic['ibw%d' % i] = vals[i]

        if ('PredF' in list_features) or ('CentralF' in list_features) or (
                'Bandwidth' in list_features):
            # Spectral attributes
            from waveform_features import around_freq
            dic['PredF'], dic['Bandwidth'], dic['CentralF'] = around_freq(
                s.TF, s.freqs, plot=False)

        if 'Cepstrum' in list_features:
            # Cepstrum
            from waveform_features import cepstrum
            #dic['Cepstrum'] = cepstrum(s.TF,s.freqs,plot=True)
            cep = cepstrum(s.TF, s.freqs, plot=False)

        dic['Ponset'] = s.ponset
        return dic