connection.commit() #### #### get the flux / time measurements for some objects #### sql_cmd = '''SELECT source_id FROM sources WHERE survey='debosscher' AND original_source_id=source_id''' cursor.execute(sql_cmd) db_info = cursor.fetchall() db_info source_id = [] for i in db_info: source_id.append(i[0]) db_output.tfeOutput(source_id, cursor, 'tfe.txt') db_output.outputRfile(source_id, cursor, 'features.txt') #### #### get class of object we are looking at #### sql_cmd = '''SELECT source_id,classification FROM sources WHERE survey='debosscher' AND number_points > 200 AND original_source_id=source_id''' cursor.execute(sql_cmd) db_info = cursor.fetchall() db_info to_output = [] for i in range(len(db_info)): to_output.append(str(db_info[i][0]) + '; ' + str(db_info[i][1]) + '\n') g = open('class_output.txt', 'w')
derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, number_processors=2, delete_existing=True) connection.commit() ### check that we have filenames ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/hip_ogle_plot.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/hip_ogle_plot_tfe.dat') connection.commit()
# make a nice view of the features table sql_cmd = """CREATE VIEW IF NOT EXISTS features_short AS SELECT source_id,freq1_harmonics_freq_0,std,max,weighted_average FROM features""" cursor.execute(sql_cmd) # take a look at the features sql_cmd = """SELECT * FROM features_short""" cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i ####### ####### OUTPUT FOR ANALYSIS IN R ####### # output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/hipparcos/sources00001.dat') # output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/hipparcos/tfe00001.dat')
## retreive everything sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/sharmo_outliers.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/sharmo_outliers_tfe.dat') connection.commit()
db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/job_talk.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/job_talk_tfe.dat') connection.commit() # sql_cmd = """SELECT number_points FROM sources""" # cursor.execute(sql_cmd) # db_info = cursor.fetchall() # print db_info
# take a look at the features sql_cmd = """SELECT * FROM features_short""" cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i # output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/visualize_feat_error.dat') # output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/visualize_feat_error_tfe.dat')
source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, number_processors=2, delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/sharmo_outliers.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/sharmo_outliers_tfe.dat') connection.commit()
hip = synthetic_data.CadenceFromSurvey(database_location='../db/hip_three_class.db') ## test hip and ogle cadence_dict = {'hip':hip,'ogle':ogle} derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,cadence_dict,number_processors=2,delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/hip_train_three_class.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/hip_train_three_class_tfe.dat') connection.commit() #### write function (or modify tfeOutput) so that we can write smoothed tfe's out for #### examination
db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) connection.commit() ### check that we have filenames ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/hip_ogle_plot.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/hip_ogle_plot_tfe.dat') connection.commit()
noise_dict, cursor, connection, number_processors=2, delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/job_talk.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/job_talk_tfe.dat') connection.commit() # sql_cmd = """SELECT number_points FROM sources""" # cursor.execute(sql_cmd) # db_info = cursor.fetchall() # print db_info
db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/eclipse-rr.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/eclipse-rr-tfe.dat') connection.commit()
cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i ############# ############# OUTPUT R FILES FOR ANALYSIS ############# # output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/simulated_data.dat') # output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/simulated_data_tfe.dat')
derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, cadence_dict, number_processors=2, delete_existing=True) ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile( source_ids, cursor, '../data_processed/cadence_comparison/ogle_versus_hipparcos.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput( source_ids, cursor, '../data_processed/cadence_comparison/tfe_ogle_versus_hipparcos.dat') connection.commit()
source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, number_processors=2, delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/cadence_comparison/sources00001.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/cadence_comparison/DELETE_THIS.dat') connection.commit()
derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/hipparcos_dubath_sources.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/hipparcos_dubath_tfe.dat') connection.commit()
source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, number_processors=2, delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/hipparcos_dubath_sources.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/hipparcos_dubath_tfe.dat') connection.commit()
# make a nice view of the features table sql_cmd = """CREATE VIEW IF NOT EXISTS features_short AS SELECT source_id,freq1_harmonics_freq_0,std,max,weighted_average FROM features""" cursor.execute(sql_cmd) # take a look at the features sql_cmd = """SELECT * FROM features_short""" cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i ####### ####### OUTPUT FOR ANALYSIS IN R ####### # output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/hipparcos/sources00001.dat') # output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/hipparcos/tfe00001.dat')
## get rid of missing data by imputing median / mode ########## ########## OUTPUT RESULTS ########## features_to_remove = ['n_points','min','max','median','weighted_average','small_kurtosis', \ 'fold2P_slope_90percentile','fold2P_slope_10percentile','medperc90_2p_p',\ 'freq_signif','p2p_scatter_over_mad',] ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/RR2convexMeta.dat') ## output original sources information ## column for class, column for source id, column for features, so ## (#columns) = 2 + (#features) sql_cmd = """SELECT source_id FROM sources WHERE source_id = original_source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputOriginalOnly(source_ids,cursor,'../data_processed/RR2convexPoint.dat',features_to_remove) ## output intervals for sources ## NOTE: removing the WHERE will ensure that range of interval covers ## the entire feature, this may not be such a bad idea sql_cmd = """SELECT source_id FROM sources WHERE source_id != original_source_id"""
## DERIVE FEATURES sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/ogleIIIall-fund.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/ogleIIIall-fund-tfe.dat') connection.commit()
delete_existing=True) ## get rid of missing data by imputing median / mode ########## ########## OUTPUT RESULTS ########## features_to_remove = ['n_points','min','max','median','weighted_average','small_kurtosis', \ 'fold2P_slope_90percentile','fold2P_slope_10percentile','medperc90_2p_p',\ 'freq_signif','p2p_scatter_over_mad',] ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/RR2convexMeta.dat') ## output original sources information ## column for class, column for source id, column for features, so ## (#columns) = 2 + (#features) sql_cmd = """SELECT source_id FROM sources WHERE source_id = original_source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputOriginalOnly(source_ids, cursor, '../data_processed/RR2convexPoint.dat', features_to_remove) ## output intervals for sources ## NOTE: removing the WHERE will ensure that range of interval covers ## the entire feature, this may not be such a bad idea
source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output features file sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/asas.dat') connection.commit() ## output tfes (i.e. time, fluxes (magnitudes actually), and errors) in ## a file, nice for doing visualization sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/asas_tfe.dat') connection.commit()
cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() hip = synthetic_data.CadenceFromSurvey(database_location='../db/hipparcos_cadences.db') ogle = synthetic_data.CadenceFromSurvey(database_location='../db/ogle_cadences.db') cadence_dict = {'hip':hip,'ogle':ogle} derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,cadence_dict,number_processors=2,delete_existing=True) ########## ########## OUTPUT RESULTS ########## ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/cadence_comparison/ogle_versus_hipparcos.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/cadence_comparison/tfe_ogle_versus_hipparcos.dat') connection.commit()
cadence_dict = {'ogle':ogle} ## derive the features and save derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,cadence_dict,number_processors=2,delete_existing=True) connection.commit() ########## ########## OUTPUT RESULTS ########## ## output features file sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/test.dat') connection.commit() ## output tfes (i.e. time, fluxes (magnitudes actually), and errors) in ## a file, nice for doing visualization sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/test_tfe.dat') connection.commit() ## output smoothed tfes, same thing as previous lines but uses measurements_smoothed table sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall()
#### #### get the flux / time measurements for some objects #### sql_cmd = '''SELECT source_id FROM sources WHERE survey='debosscher' AND original_source_id=source_id''' cursor.execute(sql_cmd) db_info = cursor.fetchall() db_info source_id = [] for i in db_info: source_id.append(i[0]) db_output.tfeOutput(source_id,cursor,'tfe.txt') db_output.outputRfile(source_id,cursor,'features.txt') #### #### get class of object we are looking at #### sql_cmd = '''SELECT source_id,classification FROM sources WHERE survey='debosscher' AND number_points > 200 AND original_source_id=source_id''' cursor.execute(sql_cmd) db_info = cursor.fetchall() db_info
noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids, noise_dict, cursor, connection, number_processors=2, delete_existing=True) # take a look at the features sql_cmd = """SELECT * FROM features_short""" cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i # output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids, cursor, '../data_processed/visualize_feat_error.dat') # output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids, cursor, '../data_processed/visualize_feat_error_tfe.dat')
cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) noise_dict = noisification.get_noisification_dict() derive_features.derive_features_par(source_ids,noise_dict,cursor,connection,number_processors=2,delete_existing=True) ####### ####### OUTPUT FOR ANALYSIS IN R ####### ## output all sources to R file for analysis sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.outputRfile(source_ids,cursor,'../data_processed/cadence_comparison/sources00001.dat') ## output tfes sql_cmd = """SELECT source_id FROM sources WHERE original_source_id = source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() source_ids = tolist(db_info) db_output.tfeOutput(source_ids,cursor,'../data_processed/cadence_comparison/DELETE_THIS.dat') connection.commit()