Ejemplos de energy_distance en Python, ejemplos de scipy.stats.energy_distance en Python

Ejemplo n.º 1

0

Mostrar archivo

def two_validate(a, b, c, d):
    emd = wasserstein_distance(a, b, c, d)
    energy = energy_distance(a, b, d, c)
    #print('EMD value is: ', emd)
    #print('Energy value is: ', energy)
    return emd, energy

Ejemplo n.º 2

0

Mostrar archivo

 def calculatePathDistance(self, pathA, pathB):
     courseNames = abstract.extractAllCourseNames([pathA, pathB])
     semesterA, semesterB = abstract.createExtendedLookUpList(
         pathA, pathB, courseNames)
     distance = energy_distance(semesterA, semesterB)
     return distance

Ejemplo n.º 3

0

Mostrar archivo

Archivo: stats.py Proyecto: yacth/scipy

 def time_energy_distance(self, n_size):
     distance = stats.energy_distance(self.u_values, self.v_values,
                                      self.u_weights, self.v_weights)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: energy_distance_generate_results.py Proyecto: MitoSegNet/MitoSegNet_AccuracyTesting_Manuscript

def get_energy_distance(path, save_path, seg_name):

    gt_folder_path = path + os.sep + "Ground_Truth"
    raw_folder_path = path + os.sep + "Original"
    seg_folder_path = path + os.sep + seg_name

    gt_image_list = os.listdir(gt_folder_path)

    columns = [
        "Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter",
        "Solidity", "Number of branches", "Branch length",
        "Total branch length", "Curvature index", "Mean intensity"
    ]

    df = pd.DataFrame(columns=columns)

    list_area = []
    list_ecc = []
    list_ar = []
    list_per = []
    list_sol = []

    list_nb = []
    list_bl = []
    list_tbl = []
    list_ci = []

    list_i = []

    for image in gt_image_list:

        print(image)

        gt = cv2.imread(gt_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE)
        seg = cv2.imread(seg_folder_path + os.sep + image,
                         cv2.IMREAD_GRAYSCALE)
        org = cv2.imread(raw_folder_path + os.sep + image,
                         cv2.IMREAD_GRAYSCALE)

        # label image mask
        gt_labelled = label(gt)
        seg_labelled = label(seg)

        # Get region props of labelled images
        gt_reg_props = regionprops(label_image=gt_labelled,
                                   intensity_image=org,
                                   coordinates='xy')
        seg_reg_props = regionprops(label_image=seg_labelled,
                                    intensity_image=org,
                                    coordinates='xy')

        # compare shape descriptor distributions

        # Area
        gt_area = [i.area for i in gt_reg_props]
        seg_area = [i.area for i in seg_reg_props]

        list_area.append(energy_distance(gt_area, seg_area))

        # Eccentricity
        gt_ecc = [i.eccentricity for i in gt_reg_props]
        seg_ecc = [i.eccentricity for i in seg_reg_props]

        list_ecc.append(energy_distance(gt_ecc, seg_ecc))

        # Aspect ratio
        gt_ar = [
            i.major_axis_length / i.minor_axis_length for i in gt_reg_props
        ]
        seg_ar = [
            i.major_axis_length / i.minor_axis_length for i in seg_reg_props
        ]

        list_ar.append(energy_distance(gt_ar, seg_ar))

        # Perimeter
        gt_per = [i.perimeter for i in gt_reg_props]
        seg_per = [i.perimeter for i in seg_reg_props]

        list_per.append(energy_distance(gt_per, seg_per))

        # Solidity
        gt_sol = [i.solidity for i in gt_reg_props]
        seg_sol = [i.solidity for i in seg_reg_props]

        list_sol.append(energy_distance(gt_sol, seg_sol))

        # branch descriptors
        gt_nb, gt_bl, gt_tbl, gt_ci = get_branch_meas(gt_folder_path + os.sep +
                                                      image)
        seg_nb, seg_bl, seg_tbl, seg_ci = get_branch_meas(seg_folder_path +
                                                          os.sep + image)

        list_nb.append(energy_distance(gt_nb, seg_nb))
        list_bl.append(energy_distance(gt_bl, seg_bl))
        list_tbl.append(energy_distance(gt_tbl, seg_tbl))
        list_ci.append(energy_distance(gt_ci, seg_ci))

        # Intensity
        gt_int = [i.mean_intensity for i in gt_reg_props]
        seg_int = [i.mean_intensity for i in seg_reg_props]

        list_i.append(energy_distance(gt_int, seg_int))

        def show(gt, seg):

            sb.distplot(gt, color="green")
            sb.distplot(seg, color="red")
            plt.show()

        def norm(gt, seg):

            print(normaltest(gt)[1])
            print(normaltest(seg)[1])

    df["Image"] = gt_image_list
    df["Area"] = list_area
    df["Eccentricity"] = list_ecc
    df["Aspect Ratio"] = list_ar
    df["Perimeter"] = list_per
    df["Solidity"] = list_sol
    df["Number of branches"] = list_nb
    df["Branch length"] = list_bl
    df["Total branch length"] = list_tbl
    df["Curvature index"] = list_ci
    df["Mean intensity"] = list_i

    # raw data
    df.to_csv(save_path + os.sep + seg_name + "_EnergyDistance.csv")

Ejemplo n.º 5

0

Mostrar archivo

Archivo: energy_distance.py Proyecto: bio-chris/Python

def get_energy_distance(path, save_path, seg_name):

    gt_folder_path = path + "Ground_Truth"
    seg_folder_path = path + seg_name

    gt_image_list = os.listdir(gt_folder_path)
    seg_image_list = os.listdir(seg_folder_path)

    columns = [
        "Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter",
        "Solidity"
    ]

    df = pd.DataFrame(columns=columns)

    list_area = []
    list_ecc = []
    list_ar = []
    list_per = []
    list_sol = []

    ###################################################

    path_temp = "C:/Users/Christian/Desktop/Fourth_CV/Complete_images/MitoSegNet"

    gt_image_list = os.listdir(path_temp)
    seg_image_list = gt_image_list

    ###################################################

    for gt_image, seg_image in zip(gt_image_list, seg_image_list):

        print(gt_image)

        gt = cv2.imread(gt_folder_path + "/" + gt_image, cv2.IMREAD_GRAYSCALE)
        seg = cv2.imread(seg_folder_path + "/" + seg_image,
                         cv2.IMREAD_GRAYSCALE)

        # label image mask
        gt_labelled = label(gt)
        seg_labelled = label(seg)

        # Get region props of labelled images
        gt_reg_props = regionprops(gt_labelled)
        seg_reg_props = regionprops(seg_labelled)

        # compare shape descriptor distributions
        #################################

        # Area
        gt_area = [i.area for i in gt_reg_props]
        seg_area = [i.area for i in seg_reg_props]

        list_area.append((energy_distance(gt_area, seg_area)))

        # Eccentricity
        gt_ecc = [i.eccentricity for i in gt_reg_props]
        seg_ecc = [i.eccentricity for i in seg_reg_props]

        list_ecc.append((energy_distance(gt_ecc, seg_ecc)))

        # Aspect ratio
        gt_ar = [
            i.major_axis_length / i.minor_axis_length for i in gt_reg_props
        ]
        seg_ar = [
            i.major_axis_length / i.minor_axis_length for i in seg_reg_props
        ]

        list_ar.append((energy_distance(gt_ar, seg_ar)))

        # Perimeter
        gt_per = [i.perimeter for i in gt_reg_props]
        seg_per = [i.perimeter for i in seg_reg_props]

        list_per.append((energy_distance(gt_per, seg_per)))

        # Solidity
        gt_sol = [i.solidity for i in gt_reg_props]
        seg_sol = [i.solidity for i in seg_reg_props]

        list_sol.append((energy_distance(gt_sol, seg_sol)))

        #################################

        def show(gt, seg):

            sb.distplot(gt, color="green")
            sb.distplot(seg, color="red")
            plt.show()

        def norm(gt, seg):

            print(normaltest(gt)[1])
            print(normaltest(seg)[1])

    df["Image"] = gt_image_list
    df["Area"] = list_area
    df["Eccentricity"] = list_ecc
    df["Aspect Ratio"] = list_ar
    df["Perimeter"] = list_per
    df["Solidity"] = list_sol

    total_values = len(gt_image_list) * 5

    zero_p = 0
    one_p = 0
    two_p = 0
    three_p = 0

    for index, row in df.iterrows():

        for column in row:

            if column == 0:
                zero_p += 1

            elif column == 1:
                one_p += 1

            elif column == 2:
                two_p += 1

            elif column == 3:
                three_p += 1

    # raw data
    df.to_csv(save_path + seg_name + "_EnergyDistance.csv")

Ejemplo n.º 6

0

Mostrar archivo

Archivo: distance_calculator.py Proyecto: vampypandya/distance_metric

def energy_dist(data1, data2):
    # data1 = data1.flatten()
    # data2 = data2.flatten()
    ene = energy_distance(data1, data2)
    print(ene)
    return str(ene)

Ejemplo n.º 7

0

Mostrar archivo

def distance_function(data,  metric="default", normalize=None):
    """ calculate distance between two distributions
    ----------
    data: tuple of size 4, (post_hist, pre_hist, post_pre)
    metric: the metric of the distance metric
    normalize: normalize the inputs
    -------
    returns the distance calculation
    """
    
    if (data[0][0].shape != data[1][0].shape):
        raise  Exception(" Histograms a and b not the same size a:{0} b:{1}".format(data[0][0].shape[0], data[1][0].shape[0]))
        return 99999.0
    elif (data[2].shape != data[3].shape):
        raise  Exception(" input data a and b not the same size a:{0} b:{1}".format(data[2].shape[0], data[3].shape[0]))
        return 99999.0

    
    #normalize_measures = ["euclidean", "correlation", "manhattan","braycurtis", "hellinger", "kullbackleiber"]
    
    #if ( normalize == True or (normalize == None and  name in normalize_measures)):
        #max_a = np.max(hista.shape[0])
        #max_b = np.max(histb.shape[0])
        #a = a / max_a
        #b = b / max_b
    hista = data[0][0]
    histb = data[1][0]
    binsa = data[0][1]
    binsb = data[1][1]

    binctr_a = 0.5*(binsa[1:]+binsa[:-1])
    binctr_b = 0.5*(binsb[1:]+binsb[:-1])
    #print(binctr_a.shape, binctr_b.shape, hista.shape, histb.shape, binsa.shape)
    
    if (metric == "default" or metric == "euclidean"):
        dist = distance.euclidean(hista, histb)
    elif(metric == "manhattan"):
        dist = distance.cityblock(hista, histb)
    elif(metric == "nonintersection"):
        dist = non_intersection(hista, histb)
    elif(metric == "kullbackleiber"):
        "Kullback-Leibler divergence D(P || Q) for discrete distributions"
        dist = stats.entropy(hista, histb)# switched for test
    elif(metric == "bhattacharyya"):
        dist = bhattacharyya(hista, histb)
    elif(metric == "matusita"):
        dist = matusita(hista, histb )
    #elif(metric in ["nearestneighbor", "furthestneighbor"]):
        #cdist_calc = distance_matrix(data[2], data[3], metric_name="cityblock")
        #dist = intensitydistance(cdist_calc, metric)
    elif(metric == "meandistance"):
        dist = np.abs(data[2].mean() - data[3].mean())
    elif(metric == "averagedistance"):
        dist = average_distance(data[2], data[3], metric_name="cityblock", chunksize=10**2 )
    elif(metric == "jensenshannon"):
        dist = distance.jensenshannon(hista, histb)
    elif(metric == "correlation"):
        dist = distance.correlation(hista, histb)
    elif(metric =="braycurtis"):
        dist =distance.braycurtis(hista, histb)
    elif(metric == "hellinger"):
        dist = hellinger(hista, histb)
    elif(metric == "wasserstein"):
        
        dist = stats.wasserstein_distance(binctr_a, binctr_b,  u_weights=hista, v_weights=histb)
    elif(metric == "energy"):
        dist = stats.energy_distance(binctr_a, binctr_b, u_weights=hista, v_weights=histb)
    elif(metric == "kolmogorovsmirnov"):
        res = stats.ks_2samp(hista, histb)
        dist = res.statistic
    elif(metric == "additivechi2"):
        res = metrics.pairwise.additive_chi2_kernel(np.atleast_2d(hista), np.atleast_2d(histb))
        dist = res[0][0]
    elif(metric == "chi2_kernel"):
        res = metrics.pairwise.chi2_kernel(np.atleast_2d(hista), np.atleast_2d(histb))
        dist = res[0][0]
    elif(metric == "MPDA"):
        dist = minimum_difference_pair_assignments(hista, histb)
    else:
        raise  Exception("unknown distance measure {0}".format(metric))

    #elif(metric == "earthmovers"):
    #    dist = emd_function(a,b)
    return dist

Ejemplo n.º 8

0

Mostrar archivo

    def match_histograms(self, cur_all_model="all"):
        """
        Compares the greyscale, RGB and HSV histograms of the query video with each of the saved average histograms
        using different distance metrics such as the Correlation, Intersection, Chi-Square Distance, Hellinger Distance,
        Earth's Mover Distance and Energy Distance metrics. Finally, prints the results for each histogram model and
        metric in a console table and writes the data to a CSV file.
        :param cur_all_model: the current histogram model when operating with all 3 models
        :return: None
        """
        # variables used for finding the match to the recorded video
        video_match = ""
        video_match_value = 0

        # get histogram for the recorded video to match - todo: calculate the histogram on the go
        query_histogram = dict()
        if config.model == "gray" or (cur_all_model == "gray"
                                      and config.model == "all"):
            query_histogram = {
                'gray':
                np.loadtxt("../histogram_data/{}/hist-gray.txt".format(
                    self.file_name),
                           dtype=np.float32,
                           unpack=False)
            }
        elif config.model == "rgb" or (cur_all_model == "rgb"
                                       and config.model == "all"):
            query_histogram = {
                'b':
                np.loadtxt("../histogram_data/{}/hist-b.txt".format(
                    self.file_name),
                           dtype=np.float32,
                           unpack=False),
                'g':
                np.loadtxt("../histogram_data/{}/hist-g.txt".format(
                    self.file_name),
                           dtype=np.float32,
                           unpack=False),
                'r':
                np.loadtxt("../histogram_data/{}/hist-r.txt".format(
                    self.file_name),
                           dtype=np.float32,
                           unpack=False)
            }
        elif config.model == "hsv" or (cur_all_model == "hsv"
                                       and config.model == "all"):
            hsv_data = np.loadtxt("../histogram_data/{}/hist-hsv.txt".format(
                self.file_name))
            query_histogram = {'hsv': hsv_data.reshape((8, 12, 3))}

        # compare query histogram with each DB video histogram
        print("\n{} Histogram Comparison Results:\n".format(
            _get_chosen_model_string(cur_all_model)))

        method = ""
        csv_field_names = ["video", "score"]

        # use OpenCV's compareHist function for RGB and greyscale histograms (works with 2d arrays only)
        if config.model == "rgb" \
                or config.model == "gray" \
                or (cur_all_model == "gray" and config.model == "all") \
                or (cur_all_model == "rgb" and config.model == "all"):
            for m in self.histcmp_methods:
                if m == 0:
                    method = "CORRELATION"
                elif m == 1:
                    method = "CHI-SQUARE"
                elif m == 2:
                    method = "INTERSECTION"
                elif m == 3:
                    method = "HELLINGER"

                # CSV file to write data to for each method
                if config.model == "all":
                    csv_file = open(
                        "../results/csv/{}-{}-{}.csv".format(
                            config.model, cur_all_model, method), 'w')
                else:
                    csv_file = open(
                        "../results/csv/{}-{}.csv".format(
                            config.model, method), 'w')
                with csv_file:
                    writer = csv.DictWriter(csv_file,
                                            fieldnames=csv_field_names)
                    writer.writeheader()

                    table_data = list()
                    for i, file in enumerate(
                            get_video_filenames("../footage/")):
                        comparison = 0
                        if config.model == "gray" or (cur_all_model == "gray"
                                                      and config.model
                                                      == "all"):
                            dbvideo_greyscale_histogram = np.loadtxt(
                                "../histogram_data/{}/hist-gray.txt".format(
                                    file),
                                dtype=np.float32,
                                unpack=False)
                            comparison = cv2.compareHist(
                                query_histogram['gray'],
                                dbvideo_greyscale_histogram, m)
                        elif config.model == "rgb" or (cur_all_model == "rgb"
                                                       and config.model
                                                       == "all"):
                            dbvideo_b_histogram = np.loadtxt(
                                "../histogram_data/{}/hist-b.txt".format(file),
                                dtype=np.float32,
                                unpack=False)
                            dbvideo_g_histogram = np.loadtxt(
                                "../histogram_data/{}/hist-g.txt".format(file),
                                dtype=np.float32,
                                unpack=False)
                            dbvideo_r_histogram = np.loadtxt(
                                "../histogram_data/{}/hist-r.txt".format(file),
                                dtype=np.float32,
                                unpack=False)
                            comparison_b = cv2.compareHist(
                                query_histogram['b'], dbvideo_b_histogram, m)
                            comparison_g = cv2.compareHist(
                                query_histogram['g'], dbvideo_g_histogram, m)
                            comparison_r = cv2.compareHist(
                                query_histogram['r'], dbvideo_r_histogram, m)
                            comparison = (comparison_b + comparison_g +
                                          comparison_r) / 3

                        # append data to table
                        table_data.append([file, round(comparison, 5)])

                        # write data to CSV file
                        writer.writerow({
                            "video": file,
                            "score": round(comparison, 5)
                        })

                        if i == 0:
                            video_match = file
                            video_match_value = comparison
                        else:
                            # Higher score = better match (Correlation and Intersection)
                            if m in [0, 2] and comparison > video_match_value:
                                video_match = file
                                video_match_value = comparison
                            # Lower score = better match
                            # (Chi-square, Alternative chi-square, Hellinger and Kullback-Leibler Divergence)
                            elif m in [1, 3, 4, 5
                                       ] and comparison < video_match_value:
                                video_match = file
                                video_match_value = comparison

                # append video match found to results list (using weights)
                if cur_all_model == "gray":
                    for _ in range(0,
                                   self.histogram_comparison_weigths['gray'],
                                   1):
                        self.results_array.append(video_match)
                elif cur_all_model == "rgb":
                    for _ in range(0, self.histogram_comparison_weigths['rgb'],
                                   1):
                        self.results_array.append(video_match)

                print_terminal_table(table_data, method)
                print("{} {} match found: ".format(
                    _get_chosen_model_string(cur_all_model), method) +
                      "\x1b[1;31m" + video_match + "\x1b[0m" + "\n\n")

        # use SciPy's statistical distances functions for HSV histograms (compareHist does not work with 3d arrays)
        elif config.model == "hsv" or config.model == "all":
            for m in self.histcmp_3d_methods:
                if m == "earths_mover_distance":
                    method = "EARTH'S MOVER DISTANCE"
                elif m == "energy_distance":
                    method = "ENERGY DISTANCE"

                # CSV file to write data to for each method
                if config.model == "all":
                    csv_file = open(
                        "../results/csv/{}-{}-{}.csv".format(
                            config.model, cur_all_model, method), 'w')
                else:
                    csv_file = open(
                        "../results/csv/{}-{}.csv".format(
                            config.model, method), 'w')
                with csv_file:

                    writer = csv.DictWriter(csv_file,
                                            fieldnames=csv_field_names)
                    writer.writeheader()

                    table_data = list()
                    for i, file in enumerate(
                            get_video_filenames("../footage/")):
                        dbvideo_hsv_histogram_data = np.loadtxt(
                            "../histogram_data/{}/hist-hsv.txt".format(file))
                        dbvideo_hsv_histogram = dbvideo_hsv_histogram_data.reshape(
                            (8, 12, 3))
                        comparison = 0
                        for h in range(0,
                                       self.bins[0]):  # loop through hue bins
                            for s in range(0, self.bins[1]
                                           ):  # loop through saturation bins
                                query_histogram_slice = query_histogram['hsv'][
                                    h][s]
                                dbvideo_histogram_slice = dbvideo_hsv_histogram[
                                    h][s]
                                if method == "EARTH'S MOVER DISTANCE":
                                    comparison += wasserstein_distance(
                                        query_histogram_slice,
                                        dbvideo_histogram_slice)
                                elif method == "ENERGY DISTANCE":
                                    comparison += energy_distance(
                                        query_histogram_slice,
                                        dbvideo_histogram_slice)

                        # append data to table
                        table_data.append([file, round(comparison, 5)])

                        # write data to CSV file
                        writer.writerow({
                            "video": file,
                            "score": round(comparison, 5)
                        })

                        if i == 0:
                            video_match = file
                            video_match_value = comparison
                        else:
                            if comparison < video_match_value:
                                video_match = file
                                video_match_value = comparison

                # append video match found to results list (using weights)
                for _ in range(0, self.histogram_comparison_weigths['hsv']):
                    self.results_array.append(video_match)

                print_terminal_table(table_data, method)
                print("{} {} Match found: ".format(
                    _get_chosen_model_string(cur_all_model), method) +
                      "\x1b[1;31m" + video_match + "\x1b[0m" + "\n\n")

Ejemplo n.º 9

0

Mostrar archivo

Archivo: prototype.py Proyecto: Jhko725/Imaris-MuscleSizeAnalysis

# %%
# %%

# %%
col_ind = 1
sample0 = np.array(fed_fast_dataset[0].iloc[:, col_ind], dtype = float)
sample1 = np.array(fed_fast_dataset[1].iloc[:, col_ind], dtype = float)
sample2 = np.array(fed_fast_dataset[2].iloc[:, col_ind], dtype = float)

wdist1 = scstats.wasserstein_distance(sample0, sample1)
wdist2 = scstats.wasserstein_distance(sample0, sample2)


# %%
edist1 = scstats.energy_distance(sample0, sample1)
edist2 = scstats.energy_distance(sample0, sample2)
# %%
edist1
# %%
edist2
# %%
kolmogorov_distance(sample0, sample1)
# %%
def calculate_distance_metrics(dataframe1, dataframe2, column_index, metric_functions, metric_names):
    data1 = np.array(dataframe1.iloc[column_index], float)
    data2 = np.array(dataframe2.iloc[column_index], float)

    metrics_dict = {name: f(data1, data2) for f, name in zip(metric_functions, metric_names)}
    metrics_df = pd.DataFrame.from_dict(metrics_dict, orient = "index")
    return metrics_df

Ejemplo n.º 10

0

Mostrar archivo

Archivo: metrics.py Proyecto: mozilla/canary_anomaly_detection

def log_energy_dist(points_1, points_2):
    """
    Computes log of 1 + energy_distance
    """
    return np.log(1 + energy_distance(points_1, points_2))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: distr_tools.py Proyecto: clbarnes/cleft_scripts

def energy_coefficient(x, y):
    """0 for identical distribution; max 1"""
    D2 = stats.energy_distance(x, y)
    xv, yv = np.meshgrid(x, y)
    expected_abs_diff = np.abs(xv - yv).mean()
    return D2 / expected_abs_diff

Ejemplo n.º 12

0

Mostrar archivo

	def choose(self,a:LightCurve,b:LightCurve) -> float:
		from scipy.stats import energy_distance
		c1 = a.curve
		c2 = b.curve
		return energy_distance(c1,c2)

Ejemplo n.º 13

0

Mostrar archivo

# Testing Script for Statistical Loss Function in Pytorch

BATCH = 16
DIM = 32

# Making Data for Testing
vec_1 = np.random.random((BATCH,DIM))
vec_2 = np.random.random((BATCH,DIM))
vec_list = np.arange(DIM)

# Making Scipy Numpy Results
result_1=0
result_2=0
for i in range(BATCH):
    vec_dist_1 = stats.wasserstein_distance(vec_list, vec_list, vec_1[i], vec_2[i])
    vec_dist_2 = stats.energy_distance(vec_list,vec_list,vec_1[i],vec_2[i])
    result_1 += vec_dist_1
    result_2 += vec_dist_2
print("Numpy-Based Scipy Results: \n",
      "Wasserstein distance",result_1/BATCH,"\n",
      "Energy distance",result_2/BATCH,"\n")

# Making Pytorch Variable Calculations
tensor_1=Variable(torch.from_numpy(vec_1))
tensor_2=Variable(torch.from_numpy(vec_2),requires_grad=True)
tensor_3=Variable(torch.rand(BATCH+1,DIM))

# Show results
print("Pytorch-Based Results:")
print("Wasserstein loss",stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).data,stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).requires_grad)
print("Energy loss",stats_loss.torch_energy_loss(tensor_1,tensor_2).data,stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).requires_grad)

Ejemplo n.º 14

0

Mostrar archivo

def analyse_differences(training_ds, conditions, energies_inputted,
                        variables_of_interest):
    for num, var in enumerate(variables_of_interest):
        all_p_values = []
        distance = [[], []]
        varString = ''
        if (var == 's1'):
            varString = r'$S_1$'
        elif (var == 's2'):
            varString = r'$S_2$'
        else:
            varString = r'$f_{200}$'
        for i in range(len(energies_inputted)):
            #print('Observed whitney (U,P): %.6f %.6f' % mannwhitneyu(x, y))
            x = np.array(conditions[num][i])
            y = np.array(training_ds[i][var])
            test_variance = lambda x, y: np.abs(
                moment(x, moment=2) / moment(y, moment=2))
            test_man = lambda x, y: mannwhitneyu(x, y)[0]
            test_amean = lambda x, y: np.abs(np.mean(x) - np.mean(y))
            test_gmean = lambda x, y: np.abs(gmean(x) - gmean(y))
            test_ks = lambda x, y: ks_2samp(x, y)[0]
            test_chi = lambda x, y: chisquare(
                np.histogram(x, bins=50)[0],
                np.histogram(y, bins=50)[0])[0]
            p_value = permutation_test(x,
                                       y,
                                       method='approximate',
                                       num_rounds=1000,
                                       func=test_gmean,
                                       seed=0)
            x = x / max(y)
            y = normalize(y)
            distance[0].append([wasserstein_distance(x, y), i])
            distance[1].append([energy_distance(x, y), i])
            all_p_values.append([p_value, i])

        #all_p_values_mean=[[0.001, 0], [0.0, 1], [0.0, 2], [0.0, 3], [0.0, 4], [0.0, 5], [0.0, 6], [0.0, 7], [0.0, 8], [0.0, 9], [0.0, 10], [0.0, 11], [0.0, 12], [0.0, 13], [0.0, 14], [0.0, 15], [0.0, 16], [0.0, 17], [0.0, 18], [0.0, 19], [0.0, 20], [0.0, 21], [0.0, 22], [0.0, 23], [0.0, 24], [0.0, 25], [0.0, 26], [0.0, 27], [0.0, 28], [0.0, 29], [0.0, 30], [0.0, 31], [0.0, 32], [0.0, 33], [0.0, 34], [0.0, 35], [0.0, 36], [0.0, 37], [0.0, 38], [0.0, 39], [0.0, 40], [0.0, 41], [0.0, 42], [0.0, 43], [0.0, 44], [0.0, 45], [0.0, 46], [0.0, 47], [0.0, 48], [0.0, 49], [0.0, 50], [0.0, 51], [0.0, 52], [0.0, 53], [0.0, 54], [0.0, 55], [0.0, 56], [0.0, 57], [0.0, 58], [0.0, 59], [0.0, 60], [0.0, 61], [0.0, 62], [0.0, 63], [0.0, 64], [0.0, 65], [0.0, 66], [0.0, 67], [0.0, 68], [0.0, 69], [0.0, 70], [0.0, 71], [0.0, 72], [0.0, 73], [0.0, 74], [0.0, 75], [0.022, 76], [0.0, 77], [0.571, 78], [0.444, 79], [0.198, 80], [0.0, 81], [0.0, 82], [0.0, 83], [0.0, 84], [0.009, 85], [0.116, 86], [0.0, 87], [0.0, 88], [0.0, 89], [0.066, 90], [0.009, 91], [0.0, 92], [0.0, 93], [0.101, 94], [0.0, 95], [0.0, 96], [0.0, 97], [0.0, 98], [0.036, 99], [0.19, 100], [0.004, 101], [0.207, 102], [0.651, 103], [0.416, 104], [0.0, 105], [0.9, 106], [0.0, 107], [0.0, 108], [0.232, 109], [0.0, 110], [0.766, 111], [0.0, 112], [0.138, 113], [0.928, 114], [0.554, 115], [0.0, 116], [0.125, 117], [0.0, 118], [0.347, 119], [0.01, 120], [0.0, 121], [0.0, 122], [0.41, 123], [0.046, 124], [0.53, 125], [0.066, 126], [0.0, 127], [0.04, 128], [0.0, 129], [0.004, 130], [0.0, 131], [0.0, 132], [0.022, 133], [0.091, 134], [0.005, 135], [0.298, 136], [0.307, 137], [0.0, 138], [0.055, 139], [0.001, 140], [0.518, 141], [0.425, 142], [0.0, 143], [0.447, 144], [0.0, 145], [0.0, 146], [0.179, 147], [0.0, 148], [0.0, 149], [0.0, 150], [0.364, 151], [0.0, 152], [0.0, 153], [0.0, 154], [0.075, 155], [0.0, 156], [0.0, 157], [0.0, 158], [0.0, 159], [0.0, 160], [0.172, 161], [0.0, 162], [0.0, 163], [0.053, 164], [0.0, 165], [0.0, 166], [0.029, 167], [0.0, 168], [0.0, 169], [0.161, 170], [0.006, 171], [0.0, 172], [0.975, 173], [0.692, 174], [0.0, 175], [0.503, 176], [0.071, 177], [0.938, 178], [0.003, 179], [0.006, 180], [0.0, 181], [0.0, 182], [0.118, 183], [0.105, 184], [0.0, 185], [0.0, 186], [0.0, 187], [0.0, 188], [0.066, 189], [0.27, 190], [0.038, 191], [0.793, 192], [0.931, 193], [0.107, 194], [0.0, 195], [0.0, 196], [0.149, 197], [0.603, 198], [0.0, 199], [0.0, 200], [0.204, 201], [0.0, 202], [0.068, 203], [0.0, 204], [0.0, 205], [0.297, 206], [0.769, 207], [0.675, 208], [0.035, 209], [0.0, 210], [0.006, 211], [0.0, 212], [0.0, 213], [0.0, 214], [0.0, 215], [0.0, 216], [0.0, 217], [0.0, 218], [0.0, 219], [0.0, 220], [0.0, 221], [0.0, 222], [0.0, 223], [0.0, 224], [0.0, 225], [0.0, 226], [0.0, 227], [0.0, 228], [0.0, 229]]
        x = [el[1] for el in all_p_values]
        y = [el[0] for el in all_p_values]
        x2 = [
            all_p_values[el][1] for el in range(len(all_p_values))
            if (all_p_values[el][0] == 0)
        ]
        y2 = [
            all_p_values[el][0] for el in range(len(all_p_values))
            if (all_p_values[el][0] == 0)
        ]

        accepted = np.where(np.array(y) > 0.5)[0]
        fig = plt.figure()
        ax = fig.add_subplot()
        ax2 = ax.twinx()
        print("Percentage in which p > 0.5 for %s : %.5f" %
              (var, 100 * (accepted.size) / len(y)))
        ax.scatter(x, y, marker='o', s=1.5, color='blue')
        ax.set_yscale('log')
        ax2.get_yaxis().set_visible(False)
        ax2.margins(0.01)
        ax2.scatter(x2,
                    y2,
                    marker='o',
                    s=1.5,
                    color='red',
                    label='p=0\n(low statistics)')
        ax2.scatter([0], [1000000], color='white')
        ax2.legend(loc='center left')
        ax.set_xlabel("Nuclear Recoil Energy", size=11, labelpad=5)
        ax.set_ylabel("P value", size=11, labelpad=5)
        ax.set_title("Permutation test for \n" + varString +
                     " on two-tailed geometric mean differences.")
        plt.savefig('./final_result/analysis/pval_gmean_test_' + var + '.png')
        plt.close()

        x = [el[1] for el in distance[0]]
        y = [el[0] for el in distance[0]]
        plt.scatter(x,
                    y,
                    marker='x',
                    s=1.5,
                    color='blue',
                    label='All p values (100%)',
                    alpha=0.5)
        x3 = [
            distance[0][el][1] for el in range(len(all_p_values))
            if (all_p_values[el][0] > 0.1)
        ]
        y3 = [
            distance[0][el][0] for el in range(len(all_p_values))
            if (all_p_values[el][0] > 0.1)
        ]
        plt.scatter(x3,
                    y3,
                    marker='x',
                    s=1.5,
                    color='green',
                    label="p>0.1 (%.2f%%)" % (100 * (len(y3) / len(y))))
        x2 = [
            distance[0][el][1] for el in range(len(all_p_values))
            if (all_p_values[el][0] > 0.5)
        ]
        y2 = [
            distance[0][el][0] for el in range(len(all_p_values))
            if (all_p_values[el][0] > 0.5)
        ]
        plt.scatter(x2,
                    y2,
                    marker='x',
                    s=1.5,
                    color='red',
                    label="p>0.5 (%.2f%%)" % (100 * (len(y2) / len(y))))
        plt.xlabel("Nuclear Recoil Energy (keV)", size=11, labelpad=5)
        plt.ylabel("Wasserstein Distance", size=11, labelpad=5)
        plt.legend()
        plt.title("Wasserstein distance for \n" + varString +
                  " for varying recoil energies")
        plt.savefig('./final_result/analysis/wasserstein_distance_' + var +
                    '.png')
        plt.close()