def read_data_for_features_extraction(set='test',save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from scipy.io.matlab import mio from options import MultiOptions opt = MultiOptions() if set == 'train': opt.opdict['feat_filename'] = '%s/features/%s'%(opt.opdict['outdir'],opt.opdict['feat_train']) print opt.opdict['feat_filename'] if save: if os.path.exists(opt.opdict['feat_filename']): print "WARNING !! File %s already exists"%opt.opdict['feat_filename'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) if set == 'test': datafiles = glob.glob(os.path.join(opt.opdict['datadir'],'TestSet/SigEve_*')) datafiles.sort() liste = [os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles))] liste = map(int,liste) # sort the list of file following the event number liste.sort() tsort = opt.read_csvfile(opt.opdict['label_filename']) tsort.index = tsort.Date for ifile,numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'],'TestSet/SigEve_%d.mat'%numfile) print ifile,file mat = mio.loadmat(file) counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 ind = (numfile,'BOR',comp) dic = pd.DataFrame(columns=list_features,index=[ind]) dic['EventType'] = tsort[tsort.Date==numfile].Type.values[0] dic['Ponset'] = 0 s = SeismicTraces(mat,comp) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': if ifile in [409,1026,1027,1028,1993,2121,2122,2123,2424,2441,3029,3058,3735,3785,3852,3930,4200,4463,4464,4746,6150,6382,6672,6733]: continue dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(numfile,'BOR',comp)] + df.Dur[(numfile,'BOR','E')] + df.Dur[(numfile,'BOR','Z')])/3. po_mean = int((df.Ponset[(numfile,'BOR',comp)] + df.Ponset[(numfile,'BOR','E')] + df.Ponset[(numfile,'BOR','Z')])/3) s.read_all_files(mat,False) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(numfile,'BOR','Z')], df.Rectilinearity[(numfile,'BOR','N')], df.Rectilinearity[(numfile,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(numfile,'BOR','Z')], df.Planarity[(numfile,'BOR','N')], df.Planarity[(numfile,'BOR','E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(numfile,'BOR','Z')], df.Azimuth[(numfile,'BOR','N')], df.Azimuth[(numfile,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(numfile,'BOR','Z')], df.Incidence[(numfile,'BOR','N')], df.Incidence[(numfile,'BOR','E')] = iang, iang, iang elif set == 'train': datafile = os.path.join(opt.opdict['datadir'],'TrainingSetPlusSig_2.mat') mat = mio.loadmat(datafile) hob_all_EB = {} for i in range(mat['KurtoEB'].shape[1]): print "EB", i if i!=10 and i!=61: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features,index=[(i,'BOR',comp)]) dic['EventType'] = 'EB' dic['Ponset'] = 0 s = SeismicTraces(mat,comp,train=[i,'EB']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) neb = i+1 if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i,'BOR',comp)] + df.Dur[(i,'BOR','E')] + df.Dur[(i,'BOR','Z')])/3. po_mean = int((df.Ponset[(i,'BOR',comp)] + df.Ponset[(i,'BOR','E')] + df.Ponset[(i,'BOR','Z')])/3) s.read_all_files(mat,train=[i,'EB']) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i,'BOR','Z')], df.Rectilinearity[(i,'BOR','N')], df.Rectilinearity[(i,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i,'BOR','Z')], df.Planarity[(i,'BOR','N')], df.Planarity[(i,'BOR','E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i,'BOR','Z')], df.Azimuth[(i,'BOR','N')], df.Azimuth[(i,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i,'BOR','Z')], df.Incidence[(i,'BOR','N')], df.Incidence[(i,'BOR','E')] = iang, iang, iang for i in range(mat['KurtoVT'].shape[1]): print "VT", i+neb if i != 5: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features,index=[(i+neb,'BOR',comp)]) dic['EventType'] = 'VT' dic['Ponset'] = 0 s = SeismicTraces(mat,comp,train=[i,'VT']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i+neb,'BOR',comp)] + df.Dur[(i+neb,'BOR','E')] + df.Dur[(i+neb,'BOR','Z')])/3. po_mean = int((df.Ponset[(i+neb,'BOR',comp)] + df.Ponset[(i+neb,'BOR','E')] + df.Ponset[(i+neb,'BOR','Z')])/3) s.read_all_files(mat,train=[i,'VT']) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i+neb,'BOR','Z')], df.Rectilinearity[(i+neb,'BOR','N')], df.Rectilinearity[(i+neb,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i+neb,'BOR','Z')], df.Planarity[(i+neb,'BOR','N')], df.Planarity[(i+neb,'BOR','E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i+neb,'BOR','Z')], df.Azimuth[(i+neb,'BOR','N')], df.Azimuth[(i+neb,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i+neb,'BOR','Z')], df.Incidence[(i+neb,'BOR','N')], df.Incidence[(i+neb,'BOR','E')] = iang, iang, iang if save: print "Features written in %s"%opt.opdict['feat_filename'] df.to_csv(opt.opdict['feat_filename'])
def read_data_for_features_extraction(set='test', save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from scipy.io.matlab import mio from options import MultiOptions opt = MultiOptions() if set == 'train': opt.opdict['feat_filename'] = '%s/features/%s' % ( opt.opdict['outdir'], opt.opdict['feat_train']) print opt.opdict['feat_filename'] if save: if os.path.exists(opt.opdict['feat_filename']): print "WARNING !! File %s already exists" % opt.opdict[ 'feat_filename'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) if set == 'test': datafiles = glob.glob( os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*')) datafiles.sort() liste = [ os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles)) ] liste = map(int, liste) # sort the list of file following the event number liste.sort() tsort = opt.read_csvfile(opt.opdict['label_filename']) tsort.index = tsort.Date for ifile, numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_%d.mat' % numfile) print ifile, file mat = mio.loadmat(file) counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 ind = (numfile, 'BOR', comp) dic = pd.DataFrame(columns=list_features, index=[ind]) dic['EventType'] = tsort[tsort.Date == numfile].Type.values[0] dic['Ponset'] = 0 s = SeismicTraces(mat, comp) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': if ifile in [ 409, 1026, 1027, 1028, 1993, 2121, 2122, 2123, 2424, 2441, 3029, 3058, 3735, 3785, 3852, 3930, 4200, 4463, 4464, 4746, 6150, 6382, 6672, 6733 ]: continue dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(numfile, 'BOR', comp)] + df.Dur[(numfile, 'BOR', 'E')] + df.Dur[(numfile, 'BOR', 'Z')]) / 3. po_mean = int((df.Ponset[(numfile, 'BOR', comp)] + df.Ponset[(numfile, 'BOR', 'E')] + df.Ponset[(numfile, 'BOR', 'Z')]) / 3) s.read_all_files(mat, False) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(numfile, 'BOR', 'Z')], df.Rectilinearity[( numfile, 'BOR', 'N')], df.Rectilinearity[( numfile, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(numfile, 'BOR', 'Z')], df.Planarity[( numfile, 'BOR', 'N')], df.Planarity[(numfile, 'BOR', 'E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(numfile, 'BOR', 'Z')], df.Azimuth[( numfile, 'BOR', 'N')], df.Azimuth[(numfile, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(numfile, 'BOR', 'Z')], df.Incidence[( numfile, 'BOR', 'N')], df.Incidence[(numfile, 'BOR', 'E')] = iang, iang, iang elif set == 'train': datafile = os.path.join(opt.opdict['datadir'], 'TrainingSetPlusSig_2.mat') mat = mio.loadmat(datafile) hob_all_EB = {} for i in range(mat['KurtoEB'].shape[1]): print "EB", i if i != 10 and i != 61: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features, index=[(i, 'BOR', comp)]) dic['EventType'] = 'EB' dic['Ponset'] = 0 s = SeismicTraces(mat, comp, train=[i, 'EB']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) neb = i + 1 if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i, 'BOR', comp)] + df.Dur[(i, 'BOR', 'E')] + df.Dur[(i, 'BOR', 'Z')]) / 3. po_mean = int( (df.Ponset[(i, 'BOR', comp)] + df.Ponset[(i, 'BOR', 'E')] + df.Ponset[(i, 'BOR', 'Z')]) / 3) s.read_all_files(mat, train=[i, 'EB']) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i, 'BOR', 'Z')], df.Rectilinearity[( i, 'BOR', 'N')], df.Rectilinearity[(i, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i, 'BOR', 'Z')], df.Planarity[( i, 'BOR', 'N')], df.Planarity[(i, 'BOR', 'E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i, 'BOR', 'Z')], df.Azimuth[( i, 'BOR', 'N')], df.Azimuth[(i, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i, 'BOR', 'Z')], df.Incidence[( i, 'BOR', 'N')], df.Incidence[(i, 'BOR', 'E')] = iang, iang, iang for i in range(mat['KurtoVT'].shape[1]): print "VT", i + neb if i != 5: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features, index=[(i + neb, 'BOR', comp)]) dic['EventType'] = 'VT' dic['Ponset'] = 0 s = SeismicTraces(mat, comp, train=[i, 'VT']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i + neb, 'BOR', comp)] + df.Dur[(i + neb, 'BOR', 'E')] + df.Dur[(i + neb, 'BOR', 'Z')]) / 3. po_mean = int((df.Ponset[(i + neb, 'BOR', comp)] + df.Ponset[(i + neb, 'BOR', 'E')] + df.Ponset[(i + neb, 'BOR', 'Z')]) / 3) s.read_all_files(mat, train=[i, 'VT']) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[( i + neb, 'BOR', 'Z')], df.Rectilinearity[( i + neb, 'BOR', 'N')], df.Rectilinearity[(i + neb, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i + neb, 'BOR', 'Z')], df.Planarity[( i + neb, 'BOR', 'N')], df.Planarity[(i + neb, 'BOR', 'E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i + neb, 'BOR', 'Z')], df.Azimuth[( i + neb, 'BOR', 'N')], df.Azimuth[(i + neb, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i + neb, 'BOR', 'Z')], df.Incidence[( i + neb, 'BOR', 'N')], df.Incidence[(i + neb, 'BOR', 'E')] = iang, iang, iang if save: print "Features written in %s" % opt.opdict['feat_filename'] df.to_csv(opt.opdict['feat_filename'])