def process_02(lc): if lc[0].shape[0] < 20: return np.full(63, np.nan, dtype=np.float32) fs = feets.FeatureSpace(data=['time', 'magnitude', 'error']) try: _, values = fs.extract(*lc) except: return np.full(63, np.nan, dtype=np.float32) return values.astype(np.float32)
def setup_method(self): features = [] feets_features = [] for cls in _FeetsTest.__subclasses__(): if cls.feets_feature is None or not hasattr(cls, "feature"): continue features.append(cls.feature) feets_features.append(cls.feets_feature) self.feature = lc.Extractor(*features) self.feets_extractor = feets.FeatureSpace( only=feets_features, data=["time", "magnitude", "error"])
def feets_parallel(i, times, mags, errs, koi_n, save_f): resumen = pd.read_csv(save_f) if koi_n in resumen["KOI Name"].values: print ("Se omite curva %d: %s "%(i,koi_n)) else: start_time= time.time() print ("Trabajando en curva %d: %s... "%(i,koi_n), end='') fs = feets.FeatureSpace(data=['time','magnitude','error']) features, values = fs.extract(time=times,magnitude=mags, error=errs) res = [koi_n]+list(values.T) safe_write(save_f, res) #read and save print("Terminado! en %f segundos"%(time.time()-start_time))
def setup(self): raise Exception("Add vs_catalog and version") print("min_observation:", self.min_observation) print("chunk_size:", self.chunk_size) print("write_limit:", self.write_limit) print("mp_cores:", self.mp_cores) print("mp_split:", self.mp_split) self.fs = feets.FeatureSpace(data=["magnitude", "time", "error"], exclude=[ "SlottedA_length", "StetsonK_AC", "StructureFunction_index_21", "StructureFunction_index_31", "StructureFunction_index_32" ])
def get_feet(data,fea): t=[] for i in fea: fs = feets.FeatureSpace(only = ['Amplitude',#'Con', # 'Autocor_length', 'Gskew', # 'Meanvariance', # 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend', # 'Autocor_length', # 'AndersonDarling', 'Q31', 'Rcs', 'SmallKurtosis']) _, values = fs.extract(magnitude=data[i]) a = dict(zip(_,values)) a = pd.DataFrame(a,index=[0]) a.columns =[i+'_'+j for j in a.columns] t.append(a) rst = t[0] for val in t[1:]: rst = pd.concat([rst,val],axis=1) rst.reset_index(drop=True, inplace=True) return rst
def get_feats_blank(field, year, month, day, min_num, max_num): path = "/fred/oz100/NOAO_archive/archive_NOAO_data/data_outputs/" + year + "/" + month + "/" + field + "/g_band/single/lightcurves/files/" #print(ath) lc_with_only_zeros = [] used_lcs = [] filenames = [] Autocor_length = [] Beyond1Std = [] CAR_sigma = [] CAR_mean = [] CAR_tau = [] Con = [] Eta_e = [] LinearTrend = [] MaxSlope = [] Mean = [] Meanvariance = [] MedianAbsDev = [] MedianBRP = [] PairSlopeTrend = [] PercentAmplitude = [] Q31 = [] Rcs = [] Skew = [] SlottedA_length = [] SmallKurtosis = [] Std = [] StetsonK_AC = [] Amplitudes = [] VariabilityIndex = [] pmra = [] pmde = [] gaia_G_RP = [] gaia_BP_G = [] detection_fraction = [] hl_ratio = [] test_filename = os.listdir(path) #print(test_filename[0:1]) amp1_val = [] amp_2_1_ratio = [] amp_3_1_ratio = [] phase_2_1_ratio = [] phase_3_1_ratio = [] #print(len(test_filename)) for filename in test_filename[min_num:max_num]: if filename.endswith(day): try: mjd, mag, emag, uplim = np.loadtxt(path + filename, unpack=True, skiprows=1) #print mjd except: print('FILE EMPTY)') #mjd = np.loadtxt(path+i, usecols =1, skiprows =1) sum_mag = np.sum(mag) print(sum_mag) if sum_mag == 0: lc_with_only_zeros.append(filename) elif sum_mag != 0: used_lcs.append(filename) #print(flux) # Remove the non detections clean_mjd = [] clean_mag = [] clean_emag = [] for l, m, n in zip(mjd, mag, emag): if m != 0: if m < 25 and n < 0.8: clean_mjd.append(l) clean_mag.append(m) clean_emag.append(n) elif m == 0: pass #print(clean_mjd, clean_mag, clean_emag) print(clean_mjd) if len(clean_mjd) > 3: print('in cleaned loop') lc = np.array([clean_mag, clean_mjd, clean_emag]) fs = feets.FeatureSpace(only=[ 'Autocor_length', 'Beyond1Std', 'CAR_sigma', 'CAR_mean', 'CAR_tau', 'Con', 'LinearTrend', 'MaxSlope', 'Mean', 'Meanvariance', 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend', 'PercentAmplitude', 'Q31', 'Rcs', 'Skew', 'SlottedA_length', 'SmallKurtosis', 'Std', 'StetsonK_AC' ]) features, values = fs.extract(*lc) results = dict(zip(features, values)) #except: # print filename print('after feets') filenames.append(filename) Autocor_length.append(results['Autocor_length']) Beyond1Std.append(results['Beyond1Std']) CAR_sigma.append(results['CAR_sigma']) CAR_mean.append(results['CAR_mean']) CAR_tau.append(results['CAR_tau']) Con.append(results['Con']) #Eta_e.append(results['Eta_e']) LinearTrend.append(results['LinearTrend']) MaxSlope.append(results['MaxSlope']) Mean.append(results['Mean']) Meanvariance.append(results['Meanvariance']) MedianAbsDev.append(results['MedianAbsDev']) MedianBRP.append(results['MedianBRP']) PairSlopeTrend.append(results['PairSlopeTrend']) PercentAmplitude.append(results['PercentAmplitude']) Q31.append(results['Q31']) Rcs.append(results['Rcs']) Skew.append(results['Skew']) SlottedA_length.append(results['SlottedA_length']) SmallKurtosis.append(results['SmallKurtosis']) Std.append(results['Std']) StetsonK_AC.append(results['StetsonK_AC']) N = len(clean_mag) sorted_mag = np.sort(clean_mag) amp = (np.median(sorted_mag[-int(math.ceil(0.05 * N)):]) - np.median(sorted_mag[0:int(math.ceil(0.05 * N))])) / 2 Amplitudes.append(amp) N = len(clean_mag) clean_mag_array = np.asarray(clean_mag) sigma2 = np.var(clean_mag) VarIndex = 1 / ((N - 1) * sigma2) * np.sum( np.power(clean_mag_array[1:] - clean_mag_array[:-1], 2)) VariabilityIndex.append(VarIndex) non_detects = [] for lim in uplim: if lim != 0: non_detects.append(lim) len_mags = len(clean_mag) len_uplim = len(non_detects) if len_uplim == 0: detection_fraction.append(1) elif len_uplim != 0: detection_fraction.append(len_mags / len_uplim) fft = np.fft.rfft(clean_mag) amps = np.sqrt(fft.real**2 + fft.imag**2) amp1 = amps[0] amp1_val.append(amp1) amp2 = amps[1] amp3 = amps[2] amp_2_1 = amp2 / amp1 amp_3_1 = amp3 / amp1 amp_2_1_ratio.append(amp_2_1) amp_3_1_ratio.append(amp_3_1) phases = np.arctan2(fft.imag, fft.real) phase1 = phases[0] phase2 = phases[1] phase3 = phases[2] phase_2_1 = phase2 / phase1 phase_3_1 = phase3 / phase1 phase_2_1_ratio.append(phase_2_1) phase_3_1_ratio.append(phase_3_1) else: print('Not enough data points') #print 'Not used' #print len(lc_with_only_zeros) #print 'Used' #print len(used_lcs) feature_table = Table() feature_table['LC_name'] = filenames feature_table['Autocor_length'] = Autocor_length feature_table['Beyond1Std'] = Beyond1Std feature_table['CAR_sigma'] = CAR_sigma feature_table['CAR_mean'] = CAR_mean feature_table['CAR_tau'] = CAR_tau feature_table['Con'] = Con feature_table['LinearTrend'] = LinearTrend feature_table['MaxSlope'] = MaxSlope feature_table['Mean'] = Mean feature_table['Meanvariance'] = Meanvariance feature_table['MedianAbsDev'] = MedianAbsDev feature_table['MedianBRP'] = MedianBRP feature_table['PairSlopeTrend'] = PairSlopeTrend feature_table['PercentAmplitude'] = PercentAmplitude feature_table['Q31'] = Q31 feature_table['Rcs'] = Rcs feature_table['Skew'] = Skew feature_table['SlottedA_length'] = SlottedA_length feature_table['SmallKurtosis'] = SmallKurtosis feature_table['Std'] = Std feature_table['StetsonK_AC'] = StetsonK_AC feature_table['Amplitudes'] = Amplitudes feature_table['VariabilityIndex'] = VariabilityIndex #feature_table['DetectionFraction'] = detection_fraction feature_table['amp1'] = amp1_val feature_table['amp_2_1_ratio'] = amp_2_1_ratio feature_table['amp_3_1_ratio'] = amp_3_1_ratio feature_table['phase_2_1_ratio'] = phase_2_1_ratio feature_table['phase_3_1_ratio'] = phase_3_1_ratio output = '/home/swebb/oz100/LC_CLUSTERS/feature_lists/' + year + '_' + month + '_' + field + '_' + day + '/' + year + '_' + month + '_' + field + '_' + day + '_feat_' + str( max_num) + '.csv' print(output) df = feature_table.to_pandas() df.to_csv(output) #feature_table.write(output, format = 'ascii', overwrite = True) return feature_table
else: start_time = time.time() print("Trabajando en curva %d: %s... " % (i, koi_n), end='') sys.stdout.flush() fs = feets.FeatureSpace(data=['time', 'magnitude']) features, values = fs.extract(time=times, magnitude=mags) res = [koi_n] + list(values.T) safe_write(save_f, res) #read and save print("Terminado! en %f segundos" % (time.time() - start_time)) name_saved_file = "Feets_Features/ResumenFeets_sinError_seq.csv" if not os.path.isfile(name_saved_file): fs = feets.FeatureSpace(data=['time', 'magnitude']) resumen = pd.DataFrame(columns=['KOI Name'] + list(fs.features_as_array_)) resumen.to_csv(name_saved_file, index=False) start_i = pd.read_csv(name_saved_file).shape[0] #leido desde archivo if start_i % cores != 0: start_i = int(start_i / cores) * cores if start_i == N: sys_out.write("Ya se realizó!") assert False print("Comienza ejecucion en ", start_i) for i in range(N): feets_parallel( i, coupled_time[i],
#class name based labels label_features = ['class_' + str(cl) for cl in all_classes] return target_map, label_features, all_classes, all_class_weights #Q31 try: import sys ft = sys.argv[1] except IndexError: print("using default feets") ft = ['Beyond1Std'] # ft = ['Eta_e', 'Amplitude', 'Autocor_length', 'Beyond1Std'] fs = feets.FeatureSpace(only=ft) print(ft) def lcperiod(df_main): df_main = df_main.sort_values('mjd') try: frequency, power = LombScargle( df_main['mjd'], df_main['flux'], dy=df_main['flux_err']).autopower(nyquist_factor=1) period = 1 / frequency[np.argmax(power)] power = power.mean() except ValueError: period = 0 power = 0
'Freq1_harmonics_amplitude_3', 'Freq1_harmonics_rel_phase_1', 'Freq1_harmonics_rel_phase_2', 'Freq1_harmonics_rel_phase_3', 'Freq2_harmonics_amplitude_0', 'Freq2_harmonics_amplitude_1', 'Freq2_harmonics_amplitude_2', 'Freq2_harmonics_amplitude_3', 'Freq2_harmonics_rel_phase_1', 'Freq2_harmonics_rel_phase_2', 'Freq2_harmonics_rel_phase_3', 'Freq3_harmonics_amplitude_0', 'Freq3_harmonics_amplitude_1', 'Freq3_harmonics_amplitude_2', 'Freq3_harmonics_amplitude_3', 'Freq3_harmonics_rel_phase_1', 'Freq3_harmonics_rel_phase_2', 'Freq3_harmonics_rel_phase_3', 'Gskew', 'LinearTrend', 'MaxSlope', 'Mean', 'Meanvariance', 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend', 'PercentAmplitude', 'PercentDifferenceFluxPercentile', 'PeriodLS', 'Period_fit', 'Psi_CS', 'Psi_eta', 'Q31', 'Rcs', 'Skew', 'SmallKurtosis', 'Std', 'StetsonK' ] fs = feets.FeatureSpace(data=["magnitude", "time", "error"], only=only_all) def extract(sid, obs, old_feats): time = obs.pwp_stack_src_hjd.values magnitude = obs.pwp_stack_src_mag3.values error = obs.pwp_stack_src_mag_err3.values sort = np.argsort(time) time, magnitude, error = time[sort], magnitude[sort], error[sort] time, magnitude, error = preprocess.remove_noise(time, magnitude, error, std_limit=3) new_feats = dict(
descr[2] = (descr[2][0], '|S13') descr = [(str(n), t) for n, t in descr] dt = np.dtype(descr) return sources.astype(dt) fs = feets.FeatureSpace( data=["magnitude", "time", "error"], only=[ "PeriodLS", "Period_fit", "Psi_CS", "Psi_eta", "Freq1_harmonics_amplitude_0", "Freq1_harmonics_amplitude_1", "Freq1_harmonics_amplitude_2", "Freq1_harmonics_amplitude_3", "Freq2_harmonics_amplitude_0", "Freq2_harmonics_amplitude_1", "Freq2_harmonics_amplitude_2", "Freq2_harmonics_amplitude_3", "Freq3_harmonics_amplitude_0", "Freq3_harmonics_amplitude_1", "Freq3_harmonics_amplitude_2", "Freq3_harmonics_amplitude_3", "Freq1_harmonics_rel_phase_0", "Freq1_harmonics_rel_phase_1", "Freq1_harmonics_rel_phase_2", "Freq1_harmonics_rel_phase_3", "Freq2_harmonics_rel_phase_0", "Freq2_harmonics_rel_phase_1", "Freq2_harmonics_rel_phase_2", "Freq2_harmonics_rel_phase_3", "Freq3_harmonics_rel_phase_0", "Freq3_harmonics_rel_phase_1", "Freq3_harmonics_rel_phase_2", "Freq3_harmonics_rel_phase_3"]) import ipdb; ipdb.set_trace() def main(): with db.session_scope() as ses: query = ses.query(LightCurves).filter(LightCurves.tile.has(name="b278"))
from tsfresh.feature_extraction import extract_features from tsfresh.feature_extraction import feature_calculators from xgboost import XGBClassifier from lightgbm import LGBMClassifier from numba import jit from filby import * from tsfresh_extra import * np.random.seed(35) import math import feets import feets.preprocess fs = feets.FeatureSpace(data=['magnitude', 'time', 'error'], only=['StetsonK', 'SlottedA_length', 'StetsonK_AC']) import time feature_calculators.__dict__[ "FluxPercentileRatioMid80"] = FluxPercentileRatioMid80 feature_calculators.__dict__[ "FluxPercentileRatioMid20"] = FluxPercentileRatioMid20 feature_calculators.__dict__[ "FluxPercentileRatioMid35"] = FluxPercentileRatioMid35 feature_calculators.__dict__[ "FluxPercentileRatioMid50"] = FluxPercentileRatioMid50 feature_calculators.__dict__[ "FluxPercentileRatioMid65"] = FluxPercentileRatioMid65 feature_calculators.__dict__["SmallKurtosis"] = SmallKurtosis
def setup_method(self): self.feets_extractor = feets.FeatureSpace( only=[self.feets_feature], data=["time", "magnitude", "error"])
'Freq3_harmonics_rel_phase_3', 'Gskew', 'LinearTrend', 'MaxSlope', 'Mean', 'Meanvariance', 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend', 'PercentAmplitude', 'PercentDifferenceFluxPercentile', 'PeriodLS', 'Period_fit', 'Psi_CS', 'Psi_eta', 'Q31', 'Rcs', 'Skew', 'SmallKurtosis', 'Std', 'StetsonK' ] COLUMNS_NO_FEATURES = ['id', 'cnt', 'ra_k', 'dec_k', 'vs_type', 'vs_catalog'] COLUMNS_TO_PRESERVE = COLUMNS_NO_FEATURES + [ 'c89_jk_color', 'c89_hk_color', 'c89_jh_color', 'n09_jk_color', 'n09_hk_color', 'n09_jh_color', 'c89_m2', 'c89_m4', 'c89_c3', 'n09_m2', 'n09_m4', 'n09_c3', 'ppmb', "PeriodLS" ] FEATURE_SPACE = feets.FeatureSpace(data=["magnitude", "time", "error"], only=FEATURES_TO_CALULATE) # ============================================================================= # FUNCTIONS # ============================================================================= def sigma_clip(obs): time = obs.pwp_stack_src_hjd.values magnitude = obs.pwp_stack_src_mag3.values error = obs.pwp_stack_src_mag_err3.values sort = np.argsort(time) time, magnitude, error = time[sort], magnitude[sort], error[sort] time, magnitude, error = preprocess.remove_noise(time,