Example #1
0
def format_data(database, dataset_id, release,host,port, user, pword, lightcurves):
#
# Extract the required data from the TraP database and put it into the required format for later analysis
#    
    if not os.path.isfile('ds_'+str(dataset_id)+'_sources.csv'):
    # grab the data if it has not been previously extracted from the database
        get_data(database, dataset_id, release,host,port, user, pword)
    if not os.path.isfile('ds_'+str(dataset_id)+'_transients.csv'):
    # if no new sources were detected, create an empty list
        transients=[]
    else:
        transients = generic_tools.extract_data('ds_'+str(dataset_id)+'_transients.csv')
    sources = generic_tools.extract_data('ds_'+str(dataset_id)+'_sources.csv')
    frequencies, new_source = read_src_lc(sources, lightcurves)
    trans_data = collate_trans_data(new_source,frequencies,transients)
    output3 = open('ds'+str(dataset_id)+'_trans_data.txt','w')
    output3.write('#Runcat_id, eta_nu, V_nu, flux, fluxrat, freq, dpts, RA, Dec, trans_type, max_rms_sigma, min_rms_sigma, detection_threshold  \n')
    for x in range(len(trans_data)):
        string='%s' % ','.join(str(val) for val in trans_data[x])
        output3.write(string+'\n')
    output3.close()
    print 'Data extracted and saved'
    return
Example #2
0
    anomaly=False
if sys.argv[5] == 'T':
    logistic=True
else:
    logistic=False
if sys.argv[6] == 'T':
    transSrc=True
else:
    transSrc=False
if sys.argv[7] == 'T':
    tests=True
else:
    tests=False

# sort the transient/variable datasets and the stable datasets into the format required for training.
trans_data=generic_tools.extract_data('stable_trans_data.txt')
stable_data = generic_tools.label_data(trans_data,'stable',0)
files = glob.glob('sim_*_trans_data.txt')
trans_data=[]
for filename in files:
    sim_name = filename.split('m_')[1].split('_trans_data')[0]
    trans_data_tmp=generic_tools.extract_data('sim_'+sim_name+'_trans_data.txt')
    trans_data = trans_data + generic_tools.label_data(trans_data_tmp,sim_name,1)
full_data=stable_data+trans_data
variables = [x for x in full_data if x[-5]=='2']


# Sort data into transient and non-transient
variable = [[x[0],x[1],float(x[2])/1.6,x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12]] for x in variables if float(x[-1]) != 0.  if float(x[1]) > 0. if float(x[2]) > 0.]
stable = [x for x in variables if float(x[-1]) == 0. if float(x[1]) > 0. if float(x[2]) > 0.]
Example #3
0
    exit()
database = sys.argv[1]
username = sys.argv[2]
password = sys.argv[3]
dataset_id = str(sys.argv[4])
release = str(sys.argv[5])
host = str(sys.argv[6])
port = int(sys.argv[7])
sigma1 = float(sys.argv[8])
sigma2 = float(sys.argv[9])
lightcurves = sys.argv[10]

# get TraP data from the database and sort it into the required array which is then loaded
if not os.path.isfile('ds'+str(dataset_id)+'_trans_data.txt'):
    format_TraP_data.format_data(database,dataset_id,release,host,port,username,password,lightcurves)
trans_data=generic_tools.extract_data('ds'+str(dataset_id)+'_trans_data.txt')
# make first array for the scatter_hist plot: [log10(eta_nu), log10(V_nu), nu]
data=[[trans_data[n][0],np.log10(float(trans_data[n][1])),np.log10(float(trans_data[n][2])),trans_data[n][5], trans_data[n][-1]] for n in range(len(trans_data)) if float(trans_data[n][1]) > 0 if float(trans_data[n][2]) > 0 if trans_data[n][-4]=='2']

# print out the transients that TraP automatically found
print 'Identified Transient Candidates (no margin)'
print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-2])>=float(x[-1]) if float(x[-3])<float(x[-1])])))
print 'Identified Transients (no margin)'
print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-3])>=float(x[-1])])))

# Find the thresholds for a given sigma (in log space)
sigcutx,paramx,range_x = generic_tools.get_sigcut([a[1] for a in data],sigma1)
sigcuty,paramy,range_y = generic_tools.get_sigcut([a[2] for a in data],sigma2)
if sigma1 == 0:
    sigcutx=0
if sigma2 == 0: