Esempio n. 1
0
def tests(args):
    # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters
    xi,yi,zi1,zi2, data, xvals, yvals, xstable, ystable, precis, recall = args

    # Find the combination of x and y which is closest to the two thresholds
    combinations=[[xi[a][b],yi[a][b],zi1[a][b],zi2[a][b]] for a in range(len(zi1)) for b in range(len(zi1[0])) if zi1[a][b]>=precis]
    ID=np.array([((a[2]-precis)**2. + (a[3]-recall)**2.) for a in combinations]).argmin()
    above_thresh_sigma=combinations[ID]
    
    # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model
    sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data],above_thresh_sigma[0])
    sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data],above_thresh_sigma[1])

    # Count up the different numbers of tp, fp, fn
    fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive
    tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive
    fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative
    
    # Use these values to calculate the precision and recall values obtained with the trained threshold.
    # If the test is successful, the outputs should meet or exceed the input parameters.
    results1, results2 = generic_tools.precision_and_recall(tp,fp,fn)

    return [precis, recall, results1, results2]
Esempio n. 2
0
def tests(args):
    # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters
    xi, yi, zi1, zi2, data, xvals, yvals, xstable, ystable, precis, recall = args

    # Find the combination of x and y which is closest to the two thresholds
    combinations = [[xi[a][b], yi[a][b], zi1[a][b], zi2[a][b]]
                    for a in range(len(zi1)) for b in range(len(zi1[0]))
                    if zi1[a][b] >= precis]
    ID = np.array([((a[2] - precis)**2. + (a[3] - recall)**2.)
                   for a in combinations]).argmin()
    above_thresh_sigma = combinations[ID]

    # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model
    sigcutx, paramx, range_x = generic_tools.get_sigcut(
        [float(x[0]) for x in data], above_thresh_sigma[0])
    sigcuty, paramy, range_y = generic_tools.get_sigcut(
        [float(x[1]) for x in data], above_thresh_sigma[1])

    # Count up the different numbers of tp, fp, fn
    fp = len([
        z for z in range(len(xstable))
        if (xstable[z] > sigcutx and ystable[z] > sigcuty)
    ])  # False Positive
    tp = len([
        z for z in range(len(xvals))
        if (xvals[z] > sigcutx and yvals[z] > sigcuty)
    ])  # True Positive
    fn = len([
        z for z in range(len(xvals))
        if (xvals[z] < sigcutx or yvals[z] < sigcuty)
    ])  # False Negative

    # Use these values to calculate the precision and recall values obtained with the trained threshold.
    # If the test is successful, the outputs should meet or exceed the input parameters.
    results1, results2 = generic_tools.precision_and_recall(tp, fp, fn)

    return [precis, recall, results1, results2]
Esempio n. 3
0
def trial_data(args):
    # Find the precision and recall for a given pair of thresholds
    data, sigma1, sigma2 = args

    # Sort data into transient and non-transient
    xvals = [float(x[0]) for x in data if float(x[-1]) != 0.]
    yvals = [float(x[1]) for x in data if float(x[-1]) != 0.]
    xstable = [float(x[0]) for x in data if float(x[-1]) == 0.]
    ystable = [float(x[1]) for x in data if float(x[-1]) == 0.]

    # Find the thresholds for a given sigma, by fitting data with a Gaussian model
    sigcutx, paramx, range_x = generic_tools.get_sigcut(
        [float(x[0]) for x in data if float(x[-1]) == 0.], sigma1)
    sigcuty, paramy, range_y = generic_tools.get_sigcut(
        [float(x[1]) for x in data if float(x[-1]) == 0.], sigma2)

    # Count up the different numbers of tn, tp, fp, fn
    fp = len([
        z for z in range(len(xstable))
        if (xstable[z] > sigcutx and ystable[z] > sigcuty)
    ])  # False Positive
    tn = len([
        z for z in range(len(xstable))
        if (xstable[z] < sigcutx or ystable[z] < sigcuty)
    ])  # True Negative
    tp = len([
        z for z in range(len(xvals))
        if (xvals[z] > sigcutx and yvals[z] > sigcuty)
    ])  # True Positive
    fn = len([
        z for z in range(len(xvals))
        if (xvals[z] < sigcutx or yvals[z] < sigcuty)
    ])  # False Negative

    # Use these values to calculate the precision and recall values
    precision, recall = generic_tools.precision_and_recall(tp, fp, fn)
    return [sigma1, sigma2, precision, recall]
Esempio n. 4
0
def trial_data(args):
    # Find the precision and recall for a given pair of thresholds
    data,sigma1,sigma2 = args

    # Sort data into transient and non-transient
    xvals = [float(x[0]) for x in data if float(x[-1]) != 0.]
    yvals = [float(x[1]) for x in data if float(x[-1]) != 0.]
    xstable = [float(x[0]) for x in data if float(x[-1]) == 0.]
    ystable = [float(x[1]) for x in data if float(x[-1]) == 0.]

    # Find the thresholds for a given sigma, by fitting data with a Gaussian model
    sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data if float(x[-1]) == 0.],sigma1)
    sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data if float(x[-1]) == 0.],sigma2)

    # Count up the different numbers of tn, tp, fp, fn
    fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive
    tn=len([z for z in range(len(xstable)) if (xstable[z]<sigcutx or ystable[z]<sigcuty)]) # True Negative
    tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive
    fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative

    # Use these values to calculate the precision and recall values
    precision, recall = generic_tools.precision_and_recall(tp,fp,fn)
    print sigma1, sigma2, precision, recall
    return [sigma1, sigma2, precision, recall]
Esempio n. 5
0
def random_test(stable, variable, train, valid, precis_thresh, recall_thresh,
                path):

    multiple_trials(
        [[np.log10(float(x[1])),
          np.log10(float(x[2])),
          float(x[-1])] for x in train if float(x[1]) > 0 if float(x[2]) > 0],
        "temp_sigma_data.txt")
    data2 = np.genfromtxt('temp_sigma_data.txt', delimiter=' ')
    data = [[
        np.log10(float(train[n][1])),
        np.log10(float(train[n][2])), train[n][5],
        float(train[n][-1])
    ] for n in range(len(train)) if float(train[n][1]) > 0
            if float(train[n][2]) > 0]
    best_sigma1, best_sigma2 = find_best_sigmas(precis_thresh, recall_thresh,
                                                data2, False, data, False,
                                                path)

    # Find the thresholds for a given sigma (in log space)
    sigcutx, paramx, range_x = generic_tools.get_sigcut(
        [a[0] for a in data if a[3] == 0.], best_sigma1)
    sigcuty, paramy, range_y = generic_tools.get_sigcut(
        [a[1] for a in data if a[3] == 0.], best_sigma2)

    trainIDs = [str(int(x[0])) for x in train]
    validIDs = [str(int(x[0])) for x in valid]

    # Calculate the training error
    fp = len([[z[0],
               float(z[1]),
               float(z[2]),
               float(z[3]),
               float(z[4]), 'FP'] for z in stable
              if (float(z[1]) >= 10.**sigcutx and float(z[2]) >= 10.**sigcuty)
              if z[0] in trainIDs])  # False Positive
    fn = len([[z[0],
               float(z[1]),
               float(z[2]),
               float(z[3]),
               float(z[4]), 'FN'] for z in variable
              if (float(z[1]) < 10.**sigcutx or float(z[2]) < 10.**sigcuty)
              if z[0] in trainIDs])  # False Negative
    trainErr = check_error(fp, fn, len(train))

    # Caluculate the validation error
    fp = len([[z[0],
               float(z[1]),
               float(z[2]),
               float(z[3]),
               float(z[4]), 'FP'] for z in stable
              if (float(z[1]) >= 10.**sigcutx and float(z[2]) >= 10.**sigcuty)
              if z[0] in validIDs])  # False Positive
    fn = len([[z[0],
               float(z[1]),
               float(z[2]),
               float(z[3]),
               float(z[4]), 'FN'] for z in variable
              if (float(z[1]) < 10.**sigcutx or float(z[2]) < 10.**sigcuty)
              if z[0] in validIDs])  # False Negative
    validErr = check_error(fp, fn, len(valid))

    #plotting_tools.plotLC(len(validErr), trainErr, validErr, path+'random', False, True, 'Trial number')
    return trainErr, validErr
Esempio n. 6
0
def learning_curve(stable, variable, train, valid, precis_thresh,
                   recall_thresh, rangeNums, path):
    validErr = []
    valid_list = np.unique([x[0] for x in valid])
    output = open(path + 'sigma_train.txt', 'w')
    trainErr = []
    for num in rangeNums:
        if num > 0:
            filename = open(path + "temp_sigma_data.txt", "w")
            filename.write('')
            filename.close()
            trainTMP = train[:num, :]
            train_list = np.unique([int(x[0]) for x in trainTMP])
            multiple_trials(
                [[np.log10(float(x[1])),
                  np.log10(float(x[2])),
                  float(x[-1])]
                 for x in trainTMP if float(x[1]) > 0 if float(x[2]) > 0],
                path + "temp_sigma_data.txt")
            data2 = np.genfromtxt(path + 'temp_sigma_data.txt', delimiter=' ')
            data = [[
                np.log10(float(trainTMP[n][1])),
                np.log10(float(trainTMP[n][2])), trainTMP[n][5],
                float(trainTMP[n][-1])
            ] for n in range(num) if float(trainTMP[n][1]) > 0
                    if float(trainTMP[n][2]) > 0]
            best_sigma1, best_sigma2 = find_best_sigmas(
                precis_thresh, recall_thresh, data2, False, data, False, path)

            # Find the thresholds for a given sigma (in log space)
            sigcutx, paramx, range_x = generic_tools.get_sigcut(
                [a[0] for a in data if a[3] == 0.], best_sigma1)
            sigcuty, paramy, range_y = generic_tools.get_sigcut(
                [a[1] for a in data if a[3] == 0.], best_sigma2)

            # Calculate the training error
            fp = len([[
                z[0],
                float(z[1]),
                float(z[2]),
                float(z[3]),
                float(z[4]), 'FP'
            ] for z in stable if (
                float(z[1]) >= 10.**sigcutx and float(z[2]) >= 10.**sigcuty)
                      if int(z[0]) in train_list])  # False Positive
            fn = len(
                [[
                    z[0],
                    float(z[1]),
                    float(z[2]),
                    float(z[3]),
                    float(z[4]), 'FN'
                ] for z in variable
                 if (float(z[1]) < 10.**sigcutx or float(z[2]) < 10.**sigcuty)
                 if int(z[0]) in train_list])  # False Negative
            trainErr.append(check_error(fp, fn, len(trainTMP)))

            # Caluculate the validation error
            fp = len([[
                z[0],
                float(z[1]),
                float(z[2]),
                float(z[3]),
                float(z[4]), 'FP'
            ] for z in stable if (
                float(z[1]) >= 10.**sigcutx and float(z[2]) >= 10.**sigcuty)
                      if int(z[0]) in valid_list])  # False Positive
            fn = len(
                [[
                    z[0],
                    float(z[1]),
                    float(z[2]),
                    float(z[3]),
                    float(z[4]), 'FN'
                ] for z in variable
                 if (float(z[1]) < 10.**sigcutx or float(z[2]) < 10.**sigcuty)
                 if int(z[0]) in valid_list])  # False Negative
            validErr.append(check_error(fp, fn, len(valid)))
            output.write(str(num) + ',' + str(trainErr) + ',' + str(validErr))
    output.close()

    return trainErr, validErr
Esempio n. 7
0
if anomaly:
######### ANOMALY DETECTION ##########

    # train the anomaly detection algorithm by conducting multiple trials.
    if not os.path.exists('sigma_data.txt'):
        filename = open("sigma_data.txt", "w")
        filename.write('')
        filename.close()
        train_anomaly_detect.multiple_trials([[np.log10(float(x[1])), np.log10(float(x[2])), float(x[-1])] for x in variables if float(x[1]) > 0 if float(x[2]) > 0])
    data2=np.genfromtxt('sigma_data.txt', delimiter=' ')
    data=[[np.log10(float(variables[n][1])),np.log10(float(variables[n][2])),variables[n][5],float(variables[n][-1])] for n in range(len(variables)) if float(variables[n][1]) > 0 if float(variables[n][2]) > 0]
    best_sigma1, best_sigma2 = train_anomaly_detect.find_best_sigmas(precis_thresh,recall_thresh,data2,tests,data)
    print 'sigma_(eta_nu)='+str(best_sigma1)+', sigma_(V_nu)='+str(best_sigma2)    
    
    # Find the thresholds for a given sigma (in log space)
    sigcutx,paramx,range_x = generic_tools.get_sigcut([a[0] for a in data if a[3]==0.],best_sigma1)
    sigcuty,paramy,range_y = generic_tools.get_sigcut([a[1] for a in data if a[3]==0.],best_sigma2)
    print(r'Gaussian Fit $\eta$: '+str(round(10.**paramx[0],2))+'(+'+str(round((10.**(paramx[0]+paramx[1])-10.**paramx[0]),2))+' '+str(round((10.**(paramx[0]-paramx[1])-10.**paramx[0]),2))+')')
    print(r'Gaussian Fit $V$: '+str(round(10.**paramy[0],2))+'(+'+str(round((10.**(paramy[0]+paramy[1])-10.**paramy[0]),2))+' '+str(round((10.**(paramy[0]-paramy[1])-10.**paramy[0]),2))+')')
    print 'Eta_nu threshold='+str(10.**sigcutx)+', V_nu threshold='+str(10.**sigcuty)

    data=[[variables[n][0],np.log10(float(variables[n][1])),np.log10(float(variables[n][2])),variables[n][5],float(variables[n][-1])] for n in range(len(variables)) if float(variables[n][1]) > 0 if float(variables[n][2]) > 0]
    
    # Get the different frequencies in the dataset
    frequencies = generic_tools.get_frequencies(data)
    
    # Create the scatter_hist plot
    plotting_tools.create_scatter_hist(data,0,0,paramx,paramy,range_x,range_y,'',frequencies)
    
    # make second array for the diagnostic plot: [eta_nu, V_nu, maxflx_nu, flxrat_nu, nu]
    data2=[[variables[n][0],float(variables[n][1]),float(variables[n][2]),float(variables[n][3]),float(variables[n][4]),variables[n][5]] for n in range(len(variables)) if float(variables[n][1]) > 0 if float(variables[n][2]) > 0] 
Esempio n. 8
0
# get TraP data from the database and sort it into the required array which is then loaded
if not os.path.isfile('ds'+str(dataset_id)+'_trans_data.txt'):
    format_TraP_data.format_data(database,dataset_id,release,host,port,username,password,lightcurves)
trans_data=generic_tools.extract_data('ds'+str(dataset_id)+'_trans_data.txt')
# make first array for the scatter_hist plot: [log10(eta_nu), log10(V_nu), nu]
data=[[trans_data[n][0],np.log10(float(trans_data[n][1])),np.log10(float(trans_data[n][2])),trans_data[n][5], trans_data[n][-1]] for n in range(len(trans_data)) if float(trans_data[n][1]) > 0 if float(trans_data[n][2]) > 0 if trans_data[n][-4]=='2']

# print out the transients that TraP automatically found
print 'Identified Transient Candidates (no margin)'
print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-2])>=float(x[-1]) if float(x[-3])<float(x[-1])])))
print 'Identified Transients (no margin)'
print np.sort(list(set([int(x[0]) for x in trans_data if x[-4]!='2' if float(x[-3])>=float(x[-1])])))

# Find the thresholds for a given sigma (in log space)
sigcutx,paramx,range_x = generic_tools.get_sigcut([a[1] for a in data],sigma1)
sigcuty,paramy,range_y = generic_tools.get_sigcut([a[2] for a in data],sigma2)
if sigma1 == 0:
    sigcutx=0
if sigma2 == 0:
    sigcuty=0
print(r'Gaussian Fit $\eta$: '+str(round(10.**paramx[0],2))+'(+'+str(round((10.**(paramx[0]+paramx[1])-10.**paramx[0]),2))+' '+str(round((10.**(paramx[0]-paramx[1])-10.**paramx[0]),2))+')')
print(r'Gaussian Fit $V$: '+str(round(10.**paramy[0],2))+'(+'+str(round((10.**(paramy[0]+paramy[1])-10.**paramy[0]),2))+' '+str(round((10.**(paramy[0]-paramy[1])-10.**paramy[0]),2))+')')
print 'Eta_nu threshold='+str(10.**sigcutx)+', V_nu threshold='+str(10.**sigcuty)

# Get the different frequencies in the dataset
frequencies = generic_tools.get_frequencies(data)

# Create the scatter_hist plot
IdTrans = plotting_tools.create_scatter_hist(data,sigcutx,sigcuty,paramx,paramy,range_x,range_y,dataset_id,frequencies)