def test_Con(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['Con'] , Con=1) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= 0.04 and a.result(method='array') <= 0.05)
def test_Eta_e(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['Eta_e']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= 1.9 and a.result(method='array') <= 2.1)
def test_MedianAbsDev(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['MedianAbsDev']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= 0.630 and a.result(method='array') <= 0.700)
def test_Meanvariance(uniform_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['Meanvariance']) a=a.calculateFeature(uniform_lc) assert(a.result(method='array') >= 0.575 and a.result(method='array') <= 0.580)
def test_Eta_e(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['Eta_e'] ) a=a.calculateFeature(fake_lc[0]) assert(a.result(method='array') >= 0.043 and a.result(method='array') <= 0.046)
def test_PairSlopeTrend(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['PairSlopeTrend']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= -0.25 and a.result(method='array') <= 0.25)
def test_Period_Psi(periodic_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['PeriodLS', 'Period_fit','Psi_CS','Psi_eta']) a=a.calculateFeature(periodic_lc) # print a.result(method='array'), len(periodic_lc[0]) assert(a.result(method='array')[0] >= 19 and a.result(method='array')[0] <= 21)
def test_CAR(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['CAR_sigma', 'CAR_tau', 'CAR_tmean'] , CAR_sigma=[mjd, error]) a=a.calculateFeature(fake_lc[0]) assert(a.result(method='array') >= 0.043 and a.result(method='array') <= 0.046)
def test_Beyond1Std(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['Beyond1Std']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= 0.30 and a.result(method='array') <= 0.40)
def test_Period_Psi(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['PeriodLS', 'Period_fit','Psi_CS','Psi_eta'], PeriodLS = mjd, Psi_CS= mjd) a=a.calculateFeature(fake_lc[0]) assert(a.result(method='array') >= 0.043 and a.result(method='array') <= 0.046)
def test_PercentDifferenceFluxPercentile(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['PercentDifferenceFluxPercentile']) a=a.calculateFeature(fake_lc[0]) assert(a.result(method='array') >= 0.043 and a.result(method='array') <= 0.046)
def test_MedianBRP(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['MedianBRP'] , MaxSlope=mjd) a=a.calculateFeature(fake_lc[0]) assert(a.result(method='array') >= 0.043 and a.result(method='array') <= 0.046)
def test_Stetson(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['SlottedA_length','StetsonK', 'StetsonK_AC', 'StetsonJ', 'StetsonL']) a=a.calculateFeature(white_noise) assert(a.result(method='array')[1] >= 0.790 and a.result(method='array')[1] <= 0.85) assert(a.result(method='array')[2] >= 0.20 and a.result(method='array')[2] <= 0.45) assert(a.result(method='array')[3] >= -0.1 and a.result(method='array')[3] <= 0.1) assert(a.result(method='array')[4] >= -0.1 and a.result(method='array')[4] <= 0.1)
def test_FluxPercentile(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['FluxPercentileRatioMid20','FluxPercentileRatioMid35','FluxPercentileRatioMid50','FluxPercentileRatioMid65','FluxPercentileRatioMid80'] ) a=a.calculateFeature(white_noise) assert(a.result(method='array')[0] >= 0.145 and a.result(method='array')[0] <= 0.160) assert(a.result(method='array')[1] >= 0.260 and a.result(method='array')[1] <= 0.290) assert(a.result(method='array')[2] >= 0.350 and a.result(method='array')[2] <= 0.450) assert(a.result(method='array')[3] >= 0.540 and a.result(method='array')[3] <= 0.580) assert(a.result(method='array')[4] >= 0.760 and a.result(method='array')[4] <= 0.800)
def main(argv): path = argv + '/' count = 0 check = False for filename in os.listdir(path): [mag, time, error] = R.ReadLC_Catalina(path+filename) a = FeatureSpace(Data=['magnitude', 'time', 'error'], featureList=None) lc = np.array([mag,time,error]) try: a=a.calculateFeature(lc) idx = filename.split('.')[0] count = count + 1 if count == 1: df = pd.DataFrame(np.asarray(a.result(method='array')).reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) else: df2 = pd.DataFrame(np.asarray(a.result(method='array')).reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) df = pd.concat([df, df2]) check = True except: pass if check: file_name = path.split('/')[4] + '.csv' #df.to_csv('/n/home10/inun/Extract_features/'+file_name) df.to_csv('/n/regal/TSC/Catalina_features/'+file_name)
def calculate_features(lc_fn, feature_list): lc = ReadLC_MACHO(lc_fn) [data, mjd, error] = lc.ReadLC() preprocessed_data = Preprocess_LC(data, mjd, error) fs = FeatureSpace(featureList=feature_list, Automean=[0,0], #Beyond1Std=[np.array(error)], CAR_sigma=[mjd, error], Eta_e=mjd, LinearTrend=mjd, MaxSlope=mjd, PeriodLS=mjd, Psi_CS=mjd ) values = fs.calculateFeature(data) value_dict = values.result(method='dict') A, PH, scaledPH = calculate_periodic_features(mjd, data) for i in range(len(A)): for j in range(len(A[i])): value_dict['freq'+str(i+1)+'_harmonics_amplitude_'+str(j)] = A[i][j] value_dict['freq'+str(i+1)+'_harmonics_rel_phase_'+str(j)] = scaledPH[i][j] return value_dict
import numpy as np #"test" data = np.random.uniform(-5,-3, 1000) second_data = np.random.uniform(-5,-3, 1000) error= np.random.uniform(0.000001,1, 1000) mjd= np.random.uniform(40000,50000, 1000) # minper=1. # maxper=100. # subsample=1 # Npeaks=1 # clip=5.0 # clipiter=1 # whiten=0 # a = FeatureSpace(category='all',featureList=None, automean=[0,0], StetsonL=second_data , B_R=second_data, Beyond1Std=error, StetsonJ=second_data, MaxSlope=mjd, LinearTrend=mjd, Eta_B_R=second_data, Eta_e=mjd, Q31B_R=second_data, PeriodLS=mjd) # PeriodLS=[mjd,error,minper, maxper, subsample, Npeaks, clip, clipiter, whiten] a = FeatureSpace(category='basic', automean=[0,0]) #print a.featureList a=a.calculateFeature(data) #print a.result(method='') np.savetxt('test.txt',a.result(method='array')) print a.result(method='dict')
def main(argv): check = False if tarfile.is_tarfile(self.path): df = [] contador = 0 tar = tarfile.open(self.path, 'r') for member in tar.getmembers(): if member.name.endswith("B.mjd"): id = member.name.split('lc_')[1] for member2 in tar.getmembers(): if member2.name == (member.name[:-5] + 'R.mjd'): check = True f = tar.extractfile(member) g = tar.extractfile(member2) content1 = f.read().split('\n') content2 = g.read().split('\n') lc_B = ReadLC_MACHO(content1) lc_R = ReadLC_MACHO(content2) [data, mjd, error] = lc_B.ReadLC() [data2, mjd2, error2] = lc_R.ReadLC() preproccesed_data = Preprocess_LC(data, mjd, error) [data, mjd, error] = preproccesed_data.Preprocess() preproccesed_data = Preprocess_LC(data2, mjd2, error2) [second_data, mjd2, error2] = preproccesed_data.Preprocess() if len(data) != len(second_data): [aligned_data, aligned_second_data, aligned_mjd] = Align_LC(mjd, mjd2, data, second_data, error, error2) else: aligned_data = data aligned_second_data = second_data aligned_mjd = mjd a = FeatureSpace( featureList=['Bmean'], automean=[0, 0], StetsonL=[aligned_second_data, aligned_data], Color=second_data, Beyond1Std=error, StetsonJ=[aligned_second_data, aligned_data], MaxSlope=mjd, LinearTrend=mjd, Eta_color=[ aligned_second_data, aligned_data, aligned_mjd ], Eta_e=mjd, Q31_color=[aligned_second_data, aligned_data], PeriodLS=mjd, CAR_sigma=[mjd, error], SlottedA_length=mjd) try: a = a.calculateFeature(data) idx = [id[:-6]] contador = contador + 1 if contador == 1: df = pd.DataFrame( a.result(method='array').reshape( (1, len(a.result(method='array')))), columns=a.result(method='features'), index=[idx]) #df.to_csv('sabrina.csv') else: df2 = pd.DataFrame( a.result(method='array').reshape( (1, len(a.result(method='array')))), columns=a.result(method='features'), index=[idx]) df = pd.concat([df, df2]) except: pass if check: folder = (member.name.split('lc')[0]).split('/')[0] field = (member.name.split('lc')[0]).split('/')[1] file_name = folder + '_' + field + '.csv' df.to_csv(file_name)
def test_Mean(white_noise): a = FeatureSpace(featureList=['Mean']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= -0.1 and a.result(method='array') <= 0.1)
preproccesed_mag = Preprocess_LC(mag2, time2, error2) [mag2, time2, error2] = preproccesed_mag.Preprocess() if len(mag) != len(mag2): [aligned_mag, aligned_mag2, aligned_time] = Align_LC(time, time2, mag, mag2, error, error2) else: aligned_mag = mag aligned_mag2 = mag2 aligned_time = time lc = np.array([mag,time,error,mag2,aligned_mag, aligned_mag2, aligned_time]) a = FeatureSpace(mag='all',featureList=None) try: a=a.calculateFeature(lc) idx = [id[:-6]] contador = contador + 1 check = True if contador == 1: print "contador1" df = pd.magFrame(a.result(method='array').reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) print "hice mi primer mag frame" else: df2 = pd.magFrame(a.result(method='array').reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) df = pd.concat([df, df2]) except: pass
def test_SlottedA(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['SlottedA'], SlottedA = [mjd, 1]) a=a.calculateFeature(fake_lc[0])
def test_Q31B_R(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['Q31B_R'], Q31B_R = [aligned_second_data, aligned_data]) a=a.calculateFeature(fake_lc[0])
def test_SmallKurtosis(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['SmallKurtosis']) a=a.calculateFeature(white_noise) assert(a.result(method='array') >= -0.2 and a.result(method='array') <= 0.2)
def test_Std(fake_lc): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = fake_lc() a = FeatureSpace(featureList=['Std']) a=a.calculateFeature(fake_lc[0])
StetsonK_AC=mjd, B_R=second_data, Beyond1Std=error, StetsonJ=[aligned_second_data, aligned_data], MaxSlope=mjd, LinearTrend=mjd, Eta_B_R=[ aligned_second_data, aligned_data, aligned_mjd ], Eta_e=mjd, Q31B_R=[aligned_second_data, aligned_data], PeriodLS=mjd, CAR_sigma=[mjd, error], SlottedA=mjd) try: a = a.calculateFeature(data) guardar = np.vstack( (guardar, np.hstack( (id[:-6], a.result(method='array'))))) except: pass if count == 1: folder = (member.name.split('lc')[0]).split('/')[0] field = (member.name.split('lc')[0]).split('/')[1] file_name = folder + '_' + field + '.csv' nombres = np.hstack(("MACHO_Id", a.result(method='features'))) guardar = np.vstack((nombres, guardar[1:])) np.savetxt(file_name, guardar, delimiter=",", fmt="%s") guardar = np.zeros(shape=(1, 2))
def test_Rcs(white_noise): # data, mjd, error, second_data, aligned_data, aligned_second_data, aligned_mjd = white_noise() a = FeatureSpace(featureList=['Rcs']) a = a.calculateFeature(white_noise) assert (a.result(method='array') >= 0 and a.result(method='array') <= 0.1)
def main(argv): check = False if tarfile.is_tarfile(self.path): df = [] contador = 0 tar = tarfile.open(self.path, 'r') for member in tar.getmembers(): if member.name.endswith("B.mjd"): id = member.name.split('lc_')[1] for member2 in tar.getmembers(): if member2.name == (member.name[:-5] + 'R.mjd'): check = True f = tar.extractfile(member) g = tar.extractfile(member2) content1 = f.read().split('\n') content2 = g.read().split('\n') lc_B = ReadLC_MACHO(content1) lc_R = ReadLC_MACHO(content2) [data, mjd, error] = lc_B.ReadLC() [data2, mjd2, error2] = lc_R.ReadLC() preproccesed_data = Preprocess_LC(data, mjd, error) [data, mjd, error] = preproccesed_data.Preprocess() preproccesed_data = Preprocess_LC(data2, mjd2, error2) [second_data, mjd2, error2] = preproccesed_data.Preprocess() if len(data) != len(second_data): [aligned_data, aligned_second_data, aligned_mjd] = Align_LC(mjd, mjd2, data, second_data, error, error2) else: aligned_data = data aligned_second_data = second_data aligned_mjd = mjd a = FeatureSpace(featureList=['Bmean'], automean=[0,0], StetsonL=[aligned_second_data, aligned_data] , Color=second_data, Beyond1Std=error, StetsonJ=[aligned_second_data, aligned_data], MaxSlope=mjd, LinearTrend=mjd, Eta_color=[aligned_second_data, aligned_data, aligned_mjd], Eta_e=mjd, Q31_color=[aligned_second_data, aligned_data], PeriodLS=mjd, CAR_sigma=[mjd, error], SlottedA_length = mjd) try: a=a.calculateFeature(data) idx = [id[:-6]] contador = contador + 1 if contador == 1: df = pd.DataFrame(a.result(method='array').reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) #df.to_csv('sabrina.csv') else: df2 = pd.DataFrame(a.result(method='array').reshape((1,len(a.result(method='array')))), columns = a.result(method='features'), index =[idx]) df = pd.concat([df, df2]) except: pass if check: folder = (member.name.split('lc')[0]).split('/')[0] field = (member.name.split('lc')[0]).split('/')[1] file_name = folder + '_' + field + '.csv' df.to_csv(file_name)