def format_data(database, dataset_id, release,host,port, user, pword, lightcurves): # # Extract the required data from the TraP database and put it into the required format for later analysis # if not os.path.isfile('ds_'+str(dataset_id)+'_sources.csv'): # grab the data if it has not been previously extracted from the database get_data(database, dataset_id, release,host,port, user, pword) if not os.path.isfile('ds_'+str(dataset_id)+'_transients.csv'): # if no new sources were detected, create an empty list transients=[] else: transients = generic_tools.extract_data('ds_'+str(dataset_id)+'_transients.csv') sources = generic_tools.extract_data('ds_'+str(dataset_id)+'_sources.csv') frequencies, new_source = read_src_lc(sources, lightcurves) trans_data = collate_trans_data(new_source,frequencies,transients) output3 = open('ds'+str(dataset_id)+'_trans_data.txt','w') output3.write('#Runcat_id, eta_nu, V_nu, flux, fluxrat, freq, dpts, RA, Dec, trans_type, max_rms_sigma, min_rms_sigma, detection_threshold \n') for x in range(len(trans_data)): string='%s' % ','.join(str(val) for val in trans_data[x]) output3.write(string+'\n') output3.close() print 'Data extracted and saved' return
anomaly=False if sys.argv[5] == 'T': logistic=True else: logistic=False if sys.argv[6] == 'T': transSrc=True else: transSrc=False if sys.argv[7] == 'T': tests=True else: tests=False # sort the transient/variable datasets and the stable datasets into the format required for training. trans_data=generic_tools.extract_data('stable_trans_data.txt') stable_data = generic_tools.label_data(trans_data,'stable',0) files = glob.glob('sim_*_trans_data.txt') trans_data=[] for filename in files: sim_name = filename.split('m_')[1].split('_trans_data')[0] trans_data_tmp=generic_tools.extract_data('sim_'+sim_name+'_trans_data.txt') trans_data = trans_data + generic_tools.label_data(trans_data_tmp,sim_name,1) full_data=stable_data+trans_data variables = [x for x in full_data if x[-5]=='2'] # Sort data into transient and non-transient variable = [[x[0],x[1],float(x[2])/1.6,x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12]] for x in variables if float(x[-1]) != 0. if float(x[1]) > 0. if float(x[2]) > 0.] stable = [x for x in variables if float(x[-1]) == 0. if float(x[1]) > 0. if float(x[2]) > 0.]
exit() database = sys.argv[1] username = sys.argv[2] password = sys.argv[3] dataset_id = str(sys.argv[4]) release = str(sys.argv[5]) host = str(sys.argv[6]) port = int(sys.argv[7]) sigma1 = float(sys.argv[8]) sigma2 = float(sys.argv[9]) lightcurves = sys.argv[10] # get TraP data from the database and sort it into the required array which is then loaded if not os.path.isfile('ds'+str(dataset_id)+'_trans_data.txt'): format_TraP_data.format_data(database,dataset_id,release,host,port,username,password,lightcurves) trans_data=generic_tools.extract_data('ds'+str(dataset_id)+'_trans_data.txt') # make first array for the scatter_hist plot: [log10(eta_nu), log10(V_nu), nu] data=[[trans_data[n][0],np.log10(float(trans_data[n][1])),np.log10(float(trans_data[n][2])),trans_data[n][5], trans_data[n][-1]] for n in range(len(trans_data)) if float(trans_data[n][1]) > 0 if float(trans_data[n][2]) > 0 if trans_data[n][-4]=='2'] # print out the transients that TraP automatically found print 'Identified Transient Candidates (no margin)' print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-2])>=float(x[-1]) if float(x[-3])<float(x[-1])]))) print 'Identified Transients (no margin)' print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-3])>=float(x[-1])]))) # Find the thresholds for a given sigma (in log space) sigcutx,paramx,range_x = generic_tools.get_sigcut([a[1] for a in data],sigma1) sigcuty,paramy,range_y = generic_tools.get_sigcut([a[2] for a in data],sigma2) if sigma1 == 0: sigcutx=0 if sigma2 == 0: