Ejemplo n.º 1
0
def find_sigma_margin(best_data, worst_data, best_sim, worst_sim, detection_threshold):
    # find the precision, recall and F-score for different margins using the best and worst expected significances
    
    sigma_thresh=np.arange(0.,100.,1)
    best_plot_data=[]
    worst_plot_data=[]
    for sigma in sigma_thresh:
        best_tp=len([a for a in best_sim if a>=(detection_threshold+sigma)])
        best_fn=len([a for a in best_sim if a<(detection_threshold+sigma)])
        best_fp=len([a for a in best_data if a>=(detection_threshold+sigma)])
        best_tn=len([a for a in best_data if a<(detection_threshold+sigma)])
        worst_tp=len([a for a in worst_sim if a>=(detection_threshold+sigma)])
        worst_fn=len([a for a in worst_sim if a<(detection_threshold+sigma)])
        worst_fp=len([a for a in worst_data if a>=(detection_threshold+sigma)])
        worst_tn=len([a for a in worst_data if a<(detection_threshold+sigma)])
        best_precision,best_recall = generic_tools.precision_and_recall(best_tp,best_fp,best_fn)
        worst_precision,worst_recall = generic_tools.precision_and_recall(worst_tp,worst_fp,worst_fn)
        if best_precision==0 or best_recall==0:
            best_plot_data.append([sigma,best_precision,best_recall,0])
        else:
            best_plot_data.append([sigma,best_precision,best_recall,(2*best_precision*best_recall)/(best_precision+best_recall)])
        if worst_precision==0 or best_recall==0:
            worst_plot_data.append([sigma,worst_precision,worst_recall,0])
        else:
            worst_plot_data.append([sigma,worst_precision,worst_recall,(2*worst_precision*worst_recall)/(worst_precision+worst_recall)])
    return best_plot_data, worst_plot_data
Ejemplo n.º 2
0
def tests(args):
    # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters
    xi,yi,zi1,zi2, data, xvals, yvals, xstable, ystable, precis, recall = args

    # Find the combination of x and y which is closest to the two thresholds
    combinations=[[xi[a][b],yi[a][b],zi1[a][b],zi2[a][b]] for a in range(len(zi1)) for b in range(len(zi1[0])) if zi1[a][b]>=precis]
    ID=np.array([((a[2]-precis)**2. + (a[3]-recall)**2.) for a in combinations]).argmin()
    above_thresh_sigma=combinations[ID]
    
    # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model
    sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data],above_thresh_sigma[0])
    sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data],above_thresh_sigma[1])

    # Count up the different numbers of tp, fp, fn
    fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive
    tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive
    fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative
    
    # Use these values to calculate the precision and recall values obtained with the trained threshold.
    # If the test is successful, the outputs should meet or exceed the input parameters.
    results1, results2 = generic_tools.precision_and_recall(tp,fp,fn)

    return [precis, recall, results1, results2]
Ejemplo n.º 3
0
def tests(args):
    # Test multiple input precision and recall values to check out if we are meeting and exceeding the input parameters
    xi, yi, zi1, zi2, data, xvals, yvals, xstable, ystable, precis, recall = args

    # Find the combination of x and y which is closest to the two thresholds
    combinations = [[xi[a][b], yi[a][b], zi1[a][b], zi2[a][b]]
                    for a in range(len(zi1)) for b in range(len(zi1[0]))
                    if zi1[a][b] >= precis]
    ID = np.array([((a[2] - precis)**2. + (a[3] - recall)**2.)
                   for a in combinations]).argmin()
    above_thresh_sigma = combinations[ID]

    # Find the thresholds for these sigmas, by fitting the observed data with a Gaussian model
    sigcutx, paramx, range_x = generic_tools.get_sigcut(
        [float(x[0]) for x in data], above_thresh_sigma[0])
    sigcuty, paramy, range_y = generic_tools.get_sigcut(
        [float(x[1]) for x in data], above_thresh_sigma[1])

    # Count up the different numbers of tp, fp, fn
    fp = len([
        z for z in range(len(xstable))
        if (xstable[z] > sigcutx and ystable[z] > sigcuty)
    ])  # False Positive
    tp = len([
        z for z in range(len(xvals))
        if (xvals[z] > sigcutx and yvals[z] > sigcuty)
    ])  # True Positive
    fn = len([
        z for z in range(len(xvals))
        if (xvals[z] < sigcutx or yvals[z] < sigcuty)
    ])  # False Negative

    # Use these values to calculate the precision and recall values obtained with the trained threshold.
    # If the test is successful, the outputs should meet or exceed the input parameters.
    results1, results2 = generic_tools.precision_and_recall(tp, fp, fn)

    return [precis, recall, results1, results2]
Ejemplo n.º 4
0
def trial_data(args):
    # Find the precision and recall for a given pair of thresholds
    data, sigma1, sigma2 = args

    # Sort data into transient and non-transient
    xvals = [float(x[0]) for x in data if float(x[-1]) != 0.]
    yvals = [float(x[1]) for x in data if float(x[-1]) != 0.]
    xstable = [float(x[0]) for x in data if float(x[-1]) == 0.]
    ystable = [float(x[1]) for x in data if float(x[-1]) == 0.]

    # Find the thresholds for a given sigma, by fitting data with a Gaussian model
    sigcutx, paramx, range_x = generic_tools.get_sigcut(
        [float(x[0]) for x in data if float(x[-1]) == 0.], sigma1)
    sigcuty, paramy, range_y = generic_tools.get_sigcut(
        [float(x[1]) for x in data if float(x[-1]) == 0.], sigma2)

    # Count up the different numbers of tn, tp, fp, fn
    fp = len([
        z for z in range(len(xstable))
        if (xstable[z] > sigcutx and ystable[z] > sigcuty)
    ])  # False Positive
    tn = len([
        z for z in range(len(xstable))
        if (xstable[z] < sigcutx or ystable[z] < sigcuty)
    ])  # True Negative
    tp = len([
        z for z in range(len(xvals))
        if (xvals[z] > sigcutx and yvals[z] > sigcuty)
    ])  # True Positive
    fn = len([
        z for z in range(len(xvals))
        if (xvals[z] < sigcutx or yvals[z] < sigcuty)
    ])  # False Negative

    # Use these values to calculate the precision and recall values
    precision, recall = generic_tools.precision_and_recall(tp, fp, fn)
    return [sigma1, sigma2, precision, recall]
Ejemplo n.º 5
0
def trial_data(args):
    # Find the precision and recall for a given pair of thresholds
    data,sigma1,sigma2 = args

    # Sort data into transient and non-transient
    xvals = [float(x[0]) for x in data if float(x[-1]) != 0.]
    yvals = [float(x[1]) for x in data if float(x[-1]) != 0.]
    xstable = [float(x[0]) for x in data if float(x[-1]) == 0.]
    ystable = [float(x[1]) for x in data if float(x[-1]) == 0.]

    # Find the thresholds for a given sigma, by fitting data with a Gaussian model
    sigcutx,paramx,range_x = generic_tools.get_sigcut([float(x[0]) for x in data if float(x[-1]) == 0.],sigma1)
    sigcuty,paramy,range_y = generic_tools.get_sigcut([float(x[1]) for x in data if float(x[-1]) == 0.],sigma2)

    # Count up the different numbers of tn, tp, fp, fn
    fp=len([z for z in range(len(xstable)) if (xstable[z]>sigcutx and ystable[z]>sigcuty)]) # False Positive
    tn=len([z for z in range(len(xstable)) if (xstable[z]<sigcutx or ystable[z]<sigcuty)]) # True Negative
    tp=len([z for z in range(len(xvals)) if (xvals[z]>sigcutx and yvals[z]>sigcuty)]) # True Positive
    fn=len([z for z in range(len(xvals)) if (xvals[z]<sigcutx or yvals[z]<sigcuty)]) # False Negative

    # Use these values to calculate the precision and recall values
    precision, recall = generic_tools.precision_and_recall(tp,fp,fn)
    print sigma1, sigma2, precision, recall
    return [sigma1, sigma2, precision, recall]
Ejemplo n.º 6
0
def find_sigma_margin(best_data, worst_data, detection_threshold):
    # find the precision, recall and F-score for different margins using the best and worst expected significances

    sigma_thresh = np.arange(0., 100., 1)
    best_plot_data = []
    worst_plot_data = []

    for sigma in sigma_thresh:
        best_tp = len([
            best_data[a, 0] for a in range(len(best_data))
            if best_data[a, 0] >= (detection_threshold + sigma)
            if best_data[a, 1] == 1
        ])
        best_fn = len([
            best_data[a, 0] for a in range(len(best_data))
            if best_data[a, 0] < (detection_threshold + sigma)
            if best_data[a, 1] == 1
        ])
        best_fp = len([
            best_data[a, 0] for a in range(len(best_data))
            if best_data[a, 0] >= (detection_threshold + sigma)
            if best_data[a, 1] == 0
        ])
        best_tn = len([
            best_data[a, 0] for a in range(len(best_data))
            if best_data[a, 0] < (detection_threshold + sigma)
            if best_data[a, 1] == 0
        ])
        worst_tp = len([
            worst_data[a, 0] for a in range(len(worst_data))
            if worst_data[a, 0] >= (detection_threshold + sigma)
            if worst_data[a, 1] == 1
        ])
        worst_fn = len([
            worst_data[a, 0] for a in range(len(worst_data))
            if worst_data[a, 0] < (detection_threshold + sigma)
            if worst_data[a, 1] == 1
        ])
        worst_fp = len([
            worst_data[a, 0] for a in range(len(worst_data))
            if worst_data[a, 0] >= (detection_threshold + sigma)
            if worst_data[a, 1] == 0
        ])
        worst_tn = len([
            worst_data[a, 0] for a in range(len(worst_data))
            if worst_data[a, 0] < (detection_threshold + sigma)
            if worst_data[a, 1] == 0
        ])
        best_precision, best_recall = generic_tools.precision_and_recall(
            best_tp, best_fp, best_fn)
        worst_precision, worst_recall = generic_tools.precision_and_recall(
            worst_tp, worst_fp, worst_fn)
        if best_precision == 0 or best_recall == 0:
            best_plot_data.append([sigma, best_precision, best_recall, 0])
        else:
            best_plot_data.append([
                sigma, best_precision, best_recall,
                (2 * best_precision * best_recall) /
                (best_precision + best_recall)
            ])
        if worst_precision == 0 or best_recall == 0:
            worst_plot_data.append([sigma, worst_precision, worst_recall, 0])
        else:
            worst_plot_data.append([
                sigma, worst_precision, worst_recall,
                (2 * worst_precision * worst_recall) /
                (worst_precision + worst_recall)
            ])
    Fbest = max([x[3] for x in best_plot_data])
    Fworst = max([x[3] for x in worst_plot_data])
    sigBest = [x[0] for x in best_plot_data if x[3] == Fbest][0]
    sigWorst = [x[0] for x in worst_plot_data if x[3] == Fworst][0]
    return best_plot_data, worst_plot_data, sigBest, sigWorst
Ejemplo n.º 7
0
    # make second array for the diagnostic plot: [eta_nu, V_nu, maxflx_nu, flxrat_nu, nu]
    data2=[[variables[n][0],float(variables[n][1]),float(variables[n][2]),float(variables[n][3]),float(variables[n][4]),variables[n][5]] for n in range(len(variables)) if float(variables[n][1]) > 0 if float(variables[n][2]) > 0] 
    
    # Create the diagnostic plot
    plotting_tools.create_diagnostic(data2,0,0,frequencies,'')

    # Setup data to make TP/FP/TN/FN plots
    # Create arrays containing the data to plot
    fp=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'FP'] for z in stable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # False Positive
    tn=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'TN'] for z in stable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # True Negative
    tp=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'TP'] for z in variable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # True Positive
    fn=[[z[0],np.log10(float(z[1])),np.log10(float(z[2])),'FN'] for z in variable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # False Negative
    data3=fp+tn+tp+fn

    # Print out the actual precision and recall using the training data.
    precision, recall =  generic_tools.precision_and_recall(len(tp),len(fp),len(fn))
    print "Precision: "+str(precision)+", Recall: "+str(recall)

    # Get the different frequencies in the dataset
    frequencies = generic_tools.get_frequencies(data3)

    # Create the scatter_hist plot
    plotting_tools.create_scatter_hist(data3,sigcutx,sigcuty,paramx,paramy,range_x,range_y,'_ADresults',frequencies)
    
    # Create arrays containing the data to plot
    fp=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'FP'] for z in stable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # False Positive
    tn=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'TN'] for z in stable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # True Negative
    tp=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'TP'] for z in variable if (float(z[1])>=10.**sigcutx and float(z[2])>=10.**sigcuty)] # True Positive
    fn=[[z[0],float(z[1]),float(z[2]),float(z[3]),float(z[4]),'FN'] for z in variable if (float(z[1])<10.**sigcutx or float(z[2])<10.**sigcuty)] # False Negative
    data4=fp+tn+tp+fn