Esempio n. 1
0
from turbustat.statistics import Cramer_Distance

cramer_distance = Cramer_Distance(
    dataset1["cube"], dataset2["cube"], noise_value1=0.1,
    noise_value2=0.1).distance_metric(normalize=False)

cramer_val = cramer_distance.data_matrix1

# Dendrograms

from turbustat.statistics import DendroDistance, Dendrogram_Stats

min_deltas = np.logspace(-1.5, 0.5, 40)

dendro_distance = DendroDistance(dataset1["cube"],
                                 dataset2["cube"],
                                 min_deltas=min_deltas).distance_metric()

dendrogram_val = dendro_distance.dendro1.numfeatures

# With periodic boundaries
dendro = Dendrogram_Stats(dataset1['cube'], min_deltas=min_deltas)
dendro.run(periodic_bounds=True)

dendrogram_periodic_val = dendro.numfeatures

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
def stats_wrapper(dataset1,
                  dataset2,
                  fiducial_models=None,
                  statistics=None,
                  multicore=False,
                  vca_break=None,
                  vcs_break=None,
                  vcs_regrid=[None, None],
                  dendro_params=None,
                  periodic_bounds=[True, True],
                  noise_value=[-np.inf, -np.inf],
                  dendro_saves=[None, None],
                  scf_saves=[None, None],
                  inertial_range=[[None] * 2, [None] * 2],
                  spatial_range=[[None] * 2, [None] * 2]):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    vcs_break : float, optional
        Pass an initial guess for the location of the VCS break.
    vcs_regrid : list of bools, optional
        The simulated cubes lack information on the smallest spectral scales.
        When enabled, the cube is downsampled by a factor of 5 spectrally
        before running VCS.
    dendro_params : dict or list, optional
        Provides parameters to use when computing the initial dendrogram.
        If different parameters are required for each dataset, the
        the input should be a list containing the two dictionaries.
    periodic_bounds : list of bools
        Set whether the boundaries should be handled as 'continuous' (True) or
        not ('cut' or 'fill'; False).
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = statistics_list

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:
        fiducial_models = {}
        for statistic in statistics:
            if "PDF" in statistic:
                statistic = "PDF"
            elif statistic == "Skewness" or statistic == "Kurtosis":
                statistic = "stat_moments"
            elif "Dendrogram" in statistic:
                statistic = "Dendrogram"
            elif "DeltaVariance_Centroid" in statistic:
                statistic = "DeltaVariance_Centroid"
            elif "DeltaVariance" in statistic and "Centroid" not in statistic:
                statistic = "DeltaVariance"
            fiducial_models[statistic] = None

    if any("Wavelet" in s for s in statistics):
        wavelet_distance = \
            Wavelet_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models["Wavelet"],
                             xlow=spatial_range[0],
                             xhigh=spatial_range[1])
        wavelet_distance.distance_metric()
        distances["Wavelet"] = wavelet_distance.distance
        if not multicore:
            fiducial_models["Wavelet"] = copy(wavelet_distance.wt1)

        del wavelet_distance

    if any("MVC" in s for s in statistics):
        mvc_distance = \
            MVC_Distance(dataset1, dataset2,
                         fiducial_model=fiducial_models["MVC"],
                         low_cut=inertial_range[0],
                         high_cut=inertial_range[1])
        mvc_distance.distance_metric()
        distances["MVC"] = mvc_distance.distance
        if not multicore:
            fiducial_models["MVC"] = copy(mvc_distance.mvc1)

        del mvc_distance

    if any("PSpec" in s for s in statistics):
        pspec_distance = \
            PSpec_Distance(dataset1["moment0"],
                           dataset2["moment0"],
                           fiducial_model=fiducial_models['PSpec'],
                           low_cut=inertial_range[0],
                           high_cut=inertial_range[1])
        pspec_distance.distance_metric()
        distances["PSpec"] = pspec_distance.distance
        if not multicore:
            fiducial_models["PSpec"] = copy(pspec_distance.pspec1)

        del pspec_distance

    if any("Bispectrum" in s for s in statistics):
        bispec_distance = \
            BiSpectrum_Distance(dataset1["moment0"],
                                dataset2["moment0"],
                                fiducial_model=fiducial_models['Bispectrum'])
        bispec_distance.distance_metric()
        distances["Bispectrum"] = bispec_distance.distance
        if not multicore:
            fiducial_models["Bispectrum"] = copy(bispec_distance.bispec1)

        del bispec_distance

    if any("DeltaVariance_Slope" in s for s in statistics) or \
       any("DeltaVariance_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["moment0"],
                                   dataset2["moment0"],
                                   weights1=dataset1["moment0_error"][0]**-2,
                                   weights2=dataset2["moment0_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])

        delvar_distance.distance_metric()
        distances["DeltaVariance_Curve"] = delvar_distance.curve_distance
        distances["DeltaVariance_Slope"] = delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance"] = copy(delvar_distance.delvar1)

        del delvar_distance

    if any("DeltaVariance_Centroid_Slope" in s for s in statistics) or \
       any("DeltaVariance_Centroid_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["centroid"],
                                   dataset2["centroid"],
                                   weights1=dataset1["centroid_error"][0]**-2,
                                   weights2=dataset2["centroid_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance_Centroid"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])
        delvar_distance.distance_metric()
        distances["DeltaVariance_Centroid_Curve"] = \
            delvar_distance.curve_distance
        distances["DeltaVariance_Centroid_Slope"] = \
            delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance_Centroid"] = \
                copy(delvar_distance.delvar1)

        del delvar_distance

    if any("Genus" in s for s in statistics):
        genus_distance = \
            GenusDistance(dataset1["moment0"],
                          dataset2["moment0"],
                          fiducial_model=fiducial_models['Genus'])
        genus_distance.distance_metric()
        distances["Genus"] = genus_distance.distance
        if not multicore:
            fiducial_models["Genus"] = copy(genus_distance.genus1)

        del genus_distance

    if any("VCS" in s for s in statistics):

        # Regrid the cube to lower spectral resolution
        if any(vcs_regrid):
            from spectral_cube import SpectralCube
            import astropy.io.fits as fits

            from analysis_funcs import spectral_regrid_cube

            if vcs_regrid[0] is not None:
                cube1_hdu = fits.PrimaryHDU(dataset1["cube"][0],
                                            header=dataset1["cube"][1])
                cube1 = SpectralCube.read(cube1_hdu)

                cube1 = spectral_regrid_cube(cube1, int(vcs_regrid[0]))
            else:
                cube1 = dataset1["cube"]

            if vcs_regrid[1] is not None:
                cube2_hdu = fits.PrimaryHDU(dataset2["cube"][0],
                                            header=dataset2["cube"][1])
                cube2 = SpectralCube.read(cube2_hdu)

                cube2 = spectral_regrid_cube(cube2, int(vcs_regrid[1]))
            else:
                cube2 = dataset2["cube"]

        else:
            cube1 = dataset1["cube"]
            cube2 = dataset2["cube"]

        vcs_distance = VCS_Distance(cube1,
                                    cube2,
                                    breaks=vcs_break,
                                    fiducial_model=fiducial_models['VCS'])
        vcs_distance.distance_metric()
        distances["VCS"] = vcs_distance.distance
        distances["VCS_Small_Scale"] = vcs_distance.small_scale_distance
        distances["VCS_Large_Scale"] = vcs_distance.large_scale_distance
        distances["VCS_Break"] = vcs_distance.break_distance
        if not multicore:
            fiducial_models["VCS"] = copy(vcs_distance.vcs1)

        del vcs_distance

    if any("VCA" in s for s in statistics):
        vca_distance = VCA_Distance(dataset1["cube"],
                                    dataset2["cube"],
                                    breaks=vca_break,
                                    fiducial_model=fiducial_models['VCA'],
                                    low_cut=inertial_range[0],
                                    high_cut=inertial_range[1])
        vca_distance.distance_metric()
        distances["VCA"] = vca_distance.distance
        if not multicore:
            fiducial_models["VCA"] = copy(vca_distance.vca1)

        del vca_distance

    if any("Tsallis" in s for s in statistics):
        tsallis_distance = \
            Tsallis_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models['Tsallis'])
        tsallis_distance.distance_metric()
        distances["Tsallis"] = tsallis_distance.distance
        if not multicore:
            fiducial_models["Tsallis"] = copy(tsallis_distance.tsallis1)

        del tsallis_distance

    if any("Skewness" in s for s in statistics) or\
       any("Kurtosis" in s for s in statistics):
        moment_distance = \
            StatMoments_Distance(dataset1["moment0"],
                                 dataset2["moment0"], radius=5,
                                 weights1=dataset1["moment0_error"][0]**-2,
                                 weights2=dataset2["moment0_error"][0]**-2,
                                 fiducial_model=fiducial_models['stat_moments'])
        moment_distance.distance_metric()
        distances["Skewness"] = moment_distance.skewness_distance
        distances["Kurtosis"] = moment_distance.kurtosis_distance
        if not multicore:
            fiducial_models["stat_moments"] = \
                copy(moment_distance.moments1)

        del moment_distance

    if any("PCA" in s for s in statistics):
        pca_distance = \
            PCA_Distance(dataset1["cube"],
                         dataset2["cube"],
                         fiducial_model=fiducial_models['PCA'])
        pca_distance.distance_metric()
        distances["PCA"] = pca_distance.distance
        if not multicore:
            fiducial_models["PCA"] = pca_distance.pca1

        del pca_distance

    if any("SCF" in s for s in statistics):

        # Switch the inputs such that the save file is the "fiducial"
        # or first cube input below
        # if scf_saves[0] is not None:
        #     fid_model = SCF.load_results(scf_saves[0])
        #     cube1 = dataset1["cube"]
        #     cube2 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        # if scf_saves[1] is not None:
        #     fid_model = SCF.load_results(scf_saves[1])
        #     cube2 = dataset1["cube"]
        #     cube1 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[1] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[0] else 'cut'

        boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        scf_distance = \
            SCF_Distance(cube1, cube2,
                         boundary=[boundary1, boundary2],
                         fiducial_model=fiducial_models["SCF"])
        # fiducial_model=fid_model)
        scf_distance.distance_metric()
        distances["SCF"] = scf_distance.distance
        if not multicore:
            fiducial_models["SCF"] = copy(scf_distance.scf1)

        del scf_distance

    if any("Cramer" in s for s in statistics):
        cramer_distance = \
            Cramer_Distance(dataset1["cube"],
                            dataset2["cube"],
                            noise_value1=noise_value[0],
                            noise_value2=noise_value[1]).distance_metric()
        distances["Cramer"] = cramer_distance.distance

        del cramer_distance

    if any("Dendrogram_Hist" in s for s in statistics) or \
       any("Dendrogram_Num" in s for s in statistics):

        if dendro_saves[0] is None:
            input1 = dataset1["cube"]

        elif isinstance(dendro_saves[0], str):
            input1 = dendro_saves[0]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        if dendro_saves[1] is None:
            input2 = dataset2["cube"]
        elif isinstance(dendro_saves[1], str):
            input2 = dendro_saves[1]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        dendro_distance = \
            DendroDistance(input1, input2,
                           dendro_params=dendro_params,
                           fiducial_model=fiducial_models['Dendrogram'],
                           periodic_bounds=periodic_bounds,
                           min_features=40)
        dendro_distance.distance_metric()

        distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
        distances["Dendrogram_Num"] = dendro_distance.num_distance
        if not multicore:
            fiducial_models["Dendrogram"] = copy(dendro_distance.dendro1)

        del dendro_distance

    if any("PDF_Hellinger" in s for s in statistics) or \
       any("PDF_KS" in s for s in statistics) or \
       any("PDF_Lognormal" in s for s in statistics):  # or \
        # any("PDF_AD" in s for s in statistics):
        pdf_distance = \
            PDF_Distance(dataset1["moment0"],
                         dataset2["moment0"],
                         min_val1=2 * noise_value[0],
                         min_val2=2 * noise_value[1])

        pdf_distance.distance_metric()

        distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
        distances["PDF_KS"] = pdf_distance.ks_distance
        distances["PDF_Lognormal"] = pdf_distance.lognormal_distance
        # distances["PDF_AD"] = pdf_distance.ad_distance
        if not multicore:
            fiducial_models["PDF"] = copy(pdf_distance.PDF1)

        del pdf_distance

    if multicore:
        return distances
    else:
        return distances, fiducial_models
def stats_wrapper(dataset1, dataset2, fiducial_models=None,
                  statistics=None, multicore=False, vca_break=None,
                  vcs_break=None, vcs_regrid=[None, None],
                  dendro_params=None,
                  periodic_bounds=[True, True],
                  noise_value=[-np.inf, -np.inf],
                  dendro_saves=[None, None],
                  scf_saves=[None, None],
                  inertial_range=[[None] * 2, [None] * 2],
                  spatial_range=[[None] * 2, [None] * 2]):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    vcs_break : float, optional
        Pass an initial guess for the location of the VCS break.
    vcs_regrid : list of bools, optional
        The simulated cubes lack information on the smallest spectral scales.
        When enabled, the cube is downsampled by a factor of 5 spectrally
        before running VCS.
    dendro_params : dict or list, optional
        Provides parameters to use when computing the initial dendrogram.
        If different parameters are required for each dataset, the
        the input should be a list containing the two dictionaries.
    periodic_bounds : list of bools
        Set whether the boundaries should be handled as 'continuous' (True) or
        not ('cut' or 'fill'; False).
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = statistics_list

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:
        fiducial_models = {}
        for statistic in statistics:
            if "PDF" in statistic:
                statistic = "PDF"
            elif statistic == "Skewness" or statistic == "Kurtosis":
                statistic = "stat_moments"
            elif "Dendrogram" in statistic:
                statistic = "Dendrogram"
            elif "DeltaVariance_Centroid" in statistic:
                statistic = "DeltaVariance_Centroid"
            elif "DeltaVariance" in statistic and "Centroid" not in statistic:
                statistic = "DeltaVariance"
            fiducial_models[statistic] = None

    if any("Wavelet" in s for s in statistics):
        wavelet_distance = \
            Wavelet_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models["Wavelet"],
                             xlow=spatial_range[0],
                             xhigh=spatial_range[1])
        wavelet_distance.distance_metric()
        distances["Wavelet"] = wavelet_distance.distance
        if not multicore:
            fiducial_models["Wavelet"] = copy(wavelet_distance.wt1)

        del wavelet_distance

    if any("MVC" in s for s in statistics):
        mvc_distance = \
            MVC_Distance(dataset1, dataset2,
                         fiducial_model=fiducial_models["MVC"],
                         low_cut=inertial_range[0],
                         high_cut=inertial_range[1])
        mvc_distance.distance_metric()
        distances["MVC"] = mvc_distance.distance
        if not multicore:
            fiducial_models["MVC"] = copy(mvc_distance.mvc1)

        del mvc_distance

    if any("PSpec" in s for s in statistics):
        pspec_distance = \
            PSpec_Distance(dataset1["moment0"],
                           dataset2["moment0"],
                           fiducial_model=fiducial_models['PSpec'],
                           low_cut=inertial_range[0],
                           high_cut=inertial_range[1])
        pspec_distance.distance_metric()
        distances["PSpec"] = pspec_distance.distance
        if not multicore:
            fiducial_models["PSpec"] = copy(pspec_distance.pspec1)

        del pspec_distance

    if any("Bispectrum" in s for s in statistics):
        bispec_distance = \
            BiSpectrum_Distance(dataset1["moment0"],
                                dataset2["moment0"],
                                fiducial_model=fiducial_models['Bispectrum'])
        bispec_distance.distance_metric()
        distances["Bispectrum"] = bispec_distance.distance
        if not multicore:
            fiducial_models["Bispectrum"] = copy(bispec_distance.bispec1)

        del bispec_distance

    if any("DeltaVariance_Slope" in s for s in statistics) or \
       any("DeltaVariance_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["moment0"],
                                   dataset2["moment0"],
                                   weights1=dataset1["moment0_error"][0]**-2,
                                   weights2=dataset2["moment0_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])

        delvar_distance.distance_metric()
        distances["DeltaVariance_Curve"] = delvar_distance.curve_distance
        distances["DeltaVariance_Slope"] = delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance"] = copy(delvar_distance.delvar1)

        del delvar_distance

    if any("DeltaVariance_Centroid_Slope" in s for s in statistics) or \
       any("DeltaVariance_Centroid_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["centroid"],
                                   dataset2["centroid"],
                                   weights1=dataset1["centroid_error"][0]**-2,
                                   weights2=dataset2["centroid_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance_Centroid"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])
        delvar_distance.distance_metric()
        distances["DeltaVariance_Centroid_Curve"] = \
            delvar_distance.curve_distance
        distances["DeltaVariance_Centroid_Slope"] = \
            delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance_Centroid"] = \
                copy(delvar_distance.delvar1)

        del delvar_distance

    if any("Genus" in s for s in statistics):
        genus_distance = \
            GenusDistance(dataset1["moment0"],
                          dataset2["moment0"],
                          fiducial_model=fiducial_models['Genus'])
        genus_distance.distance_metric()
        distances["Genus"] = genus_distance.distance
        if not multicore:
            fiducial_models["Genus"] = copy(genus_distance.genus1)

        del genus_distance

    if any("VCS" in s for s in statistics):

        # Regrid the cube to lower spectral resolution
        if any(vcs_regrid):
            from spectral_cube import SpectralCube
            import astropy.io.fits as fits

            from analysis_funcs import spectral_regrid_cube

            if vcs_regrid[0] is not None:
                cube1_hdu = fits.PrimaryHDU(dataset1["cube"][0],
                                            header=dataset1["cube"][1])
                cube1 = SpectralCube.read(cube1_hdu)

                cube1 = spectral_regrid_cube(cube1, int(vcs_regrid[0]))
            else:
                cube1 = dataset1["cube"]

            if vcs_regrid[1] is not None:
                cube2_hdu = fits.PrimaryHDU(dataset2["cube"][0],
                                            header=dataset2["cube"][1])
                cube2 = SpectralCube.read(cube2_hdu)

                cube2 = spectral_regrid_cube(cube2, int(vcs_regrid[1]))
            else:
                cube2 = dataset2["cube"]

        else:
            cube1 = dataset1["cube"]
            cube2 = dataset2["cube"]

        vcs_distance = VCS_Distance(cube1,
                                    cube2,
                                    breaks=vcs_break,
                                    fiducial_model=fiducial_models['VCS'])
        vcs_distance.distance_metric()
        distances["VCS"] = vcs_distance.distance
        distances["VCS_Small_Scale"] = vcs_distance.small_scale_distance
        distances["VCS_Large_Scale"] = vcs_distance.large_scale_distance
        distances["VCS_Break"] = vcs_distance.break_distance
        if not multicore:
            fiducial_models["VCS"] = copy(vcs_distance.vcs1)

        del vcs_distance

    if any("VCA" in s for s in statistics):
        vca_distance = VCA_Distance(dataset1["cube"],
                                    dataset2["cube"],
                                    breaks=vca_break,
                                    fiducial_model=fiducial_models['VCA'],
                                    low_cut=inertial_range[0],
                                    high_cut=inertial_range[1])
        vca_distance.distance_metric()
        distances["VCA"] = vca_distance.distance
        if not multicore:
            fiducial_models["VCA"] = copy(vca_distance.vca1)

        del vca_distance

    if any("Tsallis" in s for s in statistics):
        tsallis_distance = \
            Tsallis_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models['Tsallis'])
        tsallis_distance.distance_metric()
        distances["Tsallis"] = tsallis_distance.distance
        if not multicore:
            fiducial_models["Tsallis"] = copy(tsallis_distance.tsallis1)

        del tsallis_distance

    if any("Skewness" in s for s in statistics) or\
       any("Kurtosis" in s for s in statistics):
        moment_distance = \
            StatMoments_Distance(dataset1["moment0"],
                                 dataset2["moment0"], radius=5,
                                 weights1=dataset1["moment0_error"][0]**-2,
                                 weights2=dataset2["moment0_error"][0]**-2,
                                 fiducial_model=fiducial_models['stat_moments'])
        moment_distance.distance_metric()
        distances["Skewness"] = moment_distance.skewness_distance
        distances["Kurtosis"] = moment_distance.kurtosis_distance
        if not multicore:
            fiducial_models["stat_moments"] = \
                copy(moment_distance.moments1)

        del moment_distance

    if any("PCA" in s for s in statistics):
        pca_distance = \
            PCA_Distance(dataset1["cube"],
                         dataset2["cube"],
                         fiducial_model=fiducial_models['PCA'])
        pca_distance.distance_metric()
        distances["PCA"] = pca_distance.distance
        if not multicore:
            fiducial_models["PCA"] = pca_distance.pca1

        del pca_distance

    if any("SCF" in s for s in statistics):

        # Switch the inputs such that the save file is the "fiducial"
        # or first cube input below
        # if scf_saves[0] is not None:
        #     fid_model = SCF.load_results(scf_saves[0])
        #     cube1 = dataset1["cube"]
        #     cube2 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        # if scf_saves[1] is not None:
        #     fid_model = SCF.load_results(scf_saves[1])
        #     cube2 = dataset1["cube"]
        #     cube1 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[1] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[0] else 'cut'

        boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        scf_distance = \
            SCF_Distance(cube1, cube2,
                         boundary=[boundary1, boundary2],
                         fiducial_model=fiducial_models["SCF"])
                         # fiducial_model=fid_model)
        scf_distance.distance_metric()
        distances["SCF"] = scf_distance.distance
        if not multicore:
            fiducial_models["SCF"] = copy(scf_distance.scf1)

        del scf_distance

    if any("Cramer" in s for s in statistics):
        cramer_distance = \
            Cramer_Distance(dataset1["cube"],
                            dataset2["cube"],
                            noise_value1=noise_value[0],
                            noise_value2=noise_value[1]).distance_metric()
        distances["Cramer"] = cramer_distance.distance

        del cramer_distance

    if any("Dendrogram_Hist" in s for s in statistics) or \
       any("Dendrogram_Num" in s for s in statistics):

        if dendro_saves[0] is None:
            input1 = dataset1["cube"]

        elif isinstance(dendro_saves[0], str):
            input1 = dendro_saves[0]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        if dendro_saves[1] is None:
            input2 = dataset2["cube"]
        elif isinstance(dendro_saves[1], str):
            input2 = dendro_saves[1]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        dendro_distance = \
            DendroDistance(input1, input2,
                           dendro_params=dendro_params,
                           fiducial_model=fiducial_models['Dendrogram'],
                           periodic_bounds=periodic_bounds,
                           min_features=40)
        dendro_distance.distance_metric()

        distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
        distances["Dendrogram_Num"] = dendro_distance.num_distance
        if not multicore:
            fiducial_models["Dendrogram"] = copy(dendro_distance.dendro1)

        del dendro_distance

    if any("PDF_Hellinger" in s for s in statistics) or \
       any("PDF_KS" in s for s in statistics) or \
       any("PDF_Lognormal" in s for s in statistics):  # or \
       # any("PDF_AD" in s for s in statistics):
        pdf_distance = \
            PDF_Distance(dataset1["moment0"],
                         dataset2["moment0"],
                         min_val1=2 * noise_value[0],
                         min_val2=2 * noise_value[1])

        pdf_distance.distance_metric()

        distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
        distances["PDF_KS"] = pdf_distance.ks_distance
        distances["PDF_Lognormal"] = pdf_distance.lognormal_distance
        # distances["PDF_AD"] = pdf_distance.ad_distance
        if not multicore:
                fiducial_models["PDF"] = copy(pdf_distance.PDF1)

        del pdf_distance

    if multicore:
        return distances
    else:
        return distances, fiducial_models
Esempio n. 4
0
def wrapper(dataset1, dataset2, fiducial_models=None,
            statistics=None, multicore=False, filenames=None):

    if statistics is None:  # Run them all
        statistics = ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance",
                      "Genus", "VCS", "VCA", "VCS_Density", "VCS_Velocity",
                      "Tsallis", "PCA", "SCF", "Cramer", "Skewness",
                      "Kurtosis", "SCF", "PCA", "Dendrogram_Hist",
                      "Dendrogram_Num"]

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:

        fiducial_models = {}

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance
            if not multicore:
                fiducial_models["Wavelet"] = wavelet_distance.wt1

        if any("MVC" in s for s in statistics):
            mvc_distance = MVC_distance(dataset1, dataset2).distance_metric()
            distances["MVC"] = mvc_distance.distance
            if not multicore:
                fiducial_models["MVC"] = mvc_distance.mvc1

        if any("PSpec" in s for s in statistics):
            pspec_distance = PSpec_Distance(dataset1,
                                            dataset2).distance_metric()
            distances["PSpec"] = pspec_distance.distance
            if not multicore:
                fiducial_models["PSpec"] = pspec_distance.pspec1

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance
            if not multicore:
                fiducial_models["Bispectrum"] = bispec_distance.bispec1

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
                DeltaVariance_Distance(dataset1["integrated_intensity"],
                                       dataset1["integrated_intensity_error"][0],
                                       dataset2["integrated_intensity"],
                                       dataset2["integrated_intensity_error"][0]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance
            if not multicore:
                fiducial_models["DeltaVariance"] = delvar_distance.delvar1

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0]).distance_metric()
            distances["Genus"] = genus_distance.distance
            if not multicore:
                fiducial_models["Genus"] = genus_distance.genus1

        if any("VCS" in s for s in statistics):
            vcs_distance = VCS_Distance(dataset1["cube"],
                                        dataset2["cube"]).distance_metric()
            distances["VCS"] = vcs_distance.distance
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            if not multicore:
                fiducial_models["VCS"] = vcs_distance.vcs1

        if any("VCA" in s for s in statistics):
            vca_distance = VCA_Distance(dataset1["cube"],
                                        dataset2["cube"]).distance_metric()
            distances["VCA"] = vca_distance.distance
            if not multicore:
                fiducial_models["VCA"] = vca_distance.vca1

        if any("Tsallis" in s for s in statistics):
            tsallis_distance = \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance
            if not multicore:
                fiducial_models["Tsallis"] = tsallis_distance.tsallis1

        if any("Skewness" in s for s in statistics) or\
           any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0], 5).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance
            if not multicore:
                fiducial_models["stat_moments"] = moment_distance.moments1

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["PCA"] = pca_distance.distance
            if not multicore:
                fiducial_models["PCA"] = pca_distance.pca1

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["SCF"] = scf_distance.distance
            if not multicore:
                fiducial_models["SCF"] = scf_distance.scf1

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance
            if not multicore:
                fiducial_models["Dendrogram"] = dendro_distance.dendro1

        if multicore:
            return distances
        else:
            return distances, fiducial_models

    else:

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"],
                                 fiducial_model=fiducial_models["Wavelet"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance

        if any("MVC" in s for s in statistics):
            mvc_distance = \
                MVC_distance(dataset1,
                             dataset2,
                             fiducial_model=fiducial_models["MVC"]).distance_metric()
            distances["MVC"] = mvc_distance.distance

        if any("PSpec" in s for s in statistics):
            pspec_distance = \
                PSpec_Distance(dataset1,
                               dataset2,
                               fiducial_model=fiducial_models["PSpec"]).distance_metric()
            distances["PSpec"] = pspec_distance.distance

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"],
                                    fiducial_model=fiducial_models["Bispectrum"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
                DeltaVariance_Distance(dataset1["integrated_intensity"],
                                       dataset1["integrated_intensity_error"][0], \
                                       dataset2["integrated_intensity"],
                                       dataset2["integrated_intensity_error"][0],
                                       fiducial_model=fiducial_models["DeltaVariance"]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0],
                              fiducial_model=fiducial_models["Genus"]).distance_metric()
            distances["Genus"] = genus_distance.distance

        if any("VCS" in s for s in statistics):
            vcs_distance = \
                VCS_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCS"]).distance_metric()
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            distances["VCS"] = vcs_distance.distance

        if any("VCA" in s for s in statistics):
            vca_distance = \
                VCA_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCA"]).distance_metric()
            distances["VCA"] = vca_distance.distance

        if any("Tsallis" in s for s in statistics):
            tsallis_distance= \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0],
                                 fiducial_model=fiducial_models["Tsallis"]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance

        if any("Skewness" in s for s in statistics) or any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0],
                                    5,
                                    fiducial_model=fiducial_models["stat_moments"]).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["PCA"]).distance_metric()
            distances["PCA"] = pca_distance.distance

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["SCF"]).distance_metric()
            distances["SCF"] = scf_distance.distance

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0],
                                             fiducial_model=fiducial_models["Dendrogram"])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance

        return distances
Esempio n. 5
0
def stats_wrapper(dataset1, dataset2, fiducial_models=None,
                  statistics=None, multicore=False, vca_break=None,
                  vcs_break=None, cleanup=True):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance",
                      "Genus", "VCS", "VCA", "VCS_Density", "VCS_Velocity",
                      "VCS_Break",
                      "Tsallis", "PCA", "SCF", "Cramer", "Skewness",
                      "Kurtosis", "SCF", "PCA", "Dendrogram_Hist",
                      "Dendrogram_Num", "PDF_Hellinger", "PDF_KS"]

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:

        fiducial_models = {}

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance
            if not multicore:
                fiducial_models["Wavelet"] = wavelet_distance.wt1

            if cleanup:
                del wavelet_distance

        if any("MVC" in s for s in statistics):
            mvc_distance = MVC_distance(dataset1, dataset2).distance_metric()
            distances["MVC"] = mvc_distance.distance
            if not multicore:
                fiducial_models["MVC"] = mvc_distance.mvc1

            if cleanup:
                del mvc_distance

        if any("PSpec" in s for s in statistics):
            pspec_distance = \
              PSpec_Distance(dataset1["integrated_intensity"],
                             dataset2["integrated_intensity"],
                             weights1=dataset1["integrated_intensity_error"][0]**2.,
                             weights2=dataset2["integrated_intensity_error"][0]**2.).distance_metric()
            distances["PSpec"] = pspec_distance.distance
            if not multicore:
                fiducial_models["PSpec"] = pspec_distance.pspec1

            if cleanup:
                del pspec_distance

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance
            if not multicore:
                fiducial_models["Bispectrum"] = bispec_distance.bispec1

            if cleanup:
                del bispec_distance

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
              DeltaVariance_Distance(dataset1["integrated_intensity"],
                                     dataset2["integrated_intensity"],
                                     weights1=dataset1["integrated_intensity_error"][0],
                                     weights2=dataset2["integrated_intensity_error"][0]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance
            if not multicore:
                fiducial_models["DeltaVariance"] = delvar_distance.delvar1

            if cleanup:
                del delvar_distance

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0]).distance_metric()
            distances["Genus"] = genus_distance.distance
            if not multicore:
                fiducial_models["Genus"] = genus_distance.genus1

            if cleanup:
                del genus_distance

        if any("VCS" in s for s in statistics):
            vcs_distance = VCS_Distance(dataset1["cube"],
                                        dataset2["cube"],
                                        breaks=vcs_break).distance_metric()
            distances["VCS"] = vcs_distance.distance
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            distances["VCS_Break"] = vcs_distance.break_distance
            if not multicore:
                fiducial_models["VCS"] = vcs_distance.vcs1


            if cleanup:
                del vcs_distance

        if any("VCA" in s for s in statistics):
            vca_distance = VCA_Distance(dataset1["cube"],
                                        dataset2["cube"],
                                        breaks=vca_break).distance_metric()
            distances["VCA"] = vca_distance.distance
            if not multicore:
                fiducial_models["VCA"] = vca_distance.vca1

            if cleanup:
                del vca_distance

        if any("Tsallis" in s for s in statistics):
            tsallis_distance = \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance
            if not multicore:
                fiducial_models["Tsallis"] = tsallis_distance.tsallis1

            if cleanup:
                del tsallis_distance

        if any("Skewness" in s for s in statistics) or\
           any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0], 5).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance
            if not multicore:
                fiducial_models["stat_moments"] = moment_distance.moments1

            if cleanup:
                del moment_distance

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["PCA"] = pca_distance.distance
            if not multicore:
                fiducial_models["PCA"] = pca_distance.pca1

            if cleanup:
                del pca_distance

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["SCF"] = scf_distance.distance
            if not multicore:
                fiducial_models["SCF"] = scf_distance.scf1

            if cleanup:
                del scf_distance

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

            if cleanup:
                del cramer_distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance
            if not multicore:
                fiducial_models["Dendrogram"] = dendro_distance.dendro1

            if cleanup:
                del dendro_distance

        if any("PDF_Hellinger" in s for s in statistics) or \
           any("PDF_KS" in s for s in statistics) or \
           any("PDF_AD" in s for s in statistics):
            pdf_distance = \
                PDF_Distance(dataset1["integrated_intensity"][0],
                             dataset2["integrated_intensity"][0],
                             min_val1=0.05,
                             min_val2=0.05,
                             weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                             weights2=dataset2["integrated_intensity_error"][0] ** -2.)

            pdf_distance.distance_metric()

            distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
            distances["PDF_KS"] = pdf_distance.ks_distance
            distances["PDF_AD"] = pdf_distance.ad_distance
            if not multicore:
                    fiducial_models["PDF"] = pdf_distance.PDF1

            if cleanup:
                del pdf_distance

        if multicore:
            return distances
        else:
            return distances, fiducial_models

    else:

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"],
                                 fiducial_model=fiducial_models["Wavelet"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance

            if cleanup:
                del wavelet_distance

        if any("MVC" in s for s in statistics):
            mvc_distance = \
                MVC_distance(dataset1,
                             dataset2,
                             fiducial_model=fiducial_models["MVC"]).distance_metric()
            distances["MVC"] = mvc_distance.distance

            if cleanup:
                del mvc_distance

        if any("PSpec" in s for s in statistics):
            pspec_distance = \
              PSpec_Distance(dataset1["integrated_intensity"],
                           dataset2["integrated_intensity"],
                           weight1=dataset1["integrated_intensity_error"][0]**2.,
                           weight2=dataset2["integrated_intensity_error"][0]**2.,
                           fiducial_model=fiducial_models["PSpec"]).distance_metric()
            distances["PSpec"] = pspec_distance.distance

            if cleanup:
                del pspec_distance

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"],
                                    fiducial_model=fiducial_models["Bispectrum"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance

            if cleanup:
                del bispec_distance

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
                DeltaVariance_Distance(dataset1["integrated_intensity"],
                                     dataset2["integrated_intensity"],
                                     weights1=dataset1["integrated_intensity_error"][0],
                                     weights2=dataset2["integrated_intensity_error"][0],
                                     fiducial_model=fiducial_models["DeltaVariance"]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance

            if cleanup:
                del delvar_distance

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0],
                              fiducial_model=fiducial_models["Genus"]).distance_metric()
            distances["Genus"] = genus_distance.distance

            if cleanup:
                del genus_distance

        if any("VCS" in s for s in statistics):
            vcs_distance = \
                VCS_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCS"],
                             breaks=vcs_break).distance_metric()
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            distances["VCS_Break"] = vcs_distance.break_distance
            distances["VCS"] = vcs_distance.distance

            if cleanup:
                del vcs_distance

        if any("VCA" in s for s in statistics):
            vca_distance = \
                VCA_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCA"],
                             breaks=vca_break).distance_metric()
            distances["VCA"] = vca_distance.distance

            if cleanup:
                del vca_distance

        if any("Tsallis" in s for s in statistics):
            tsallis_distance= \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0],
                                 fiducial_model=fiducial_models["Tsallis"]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance

            if cleanup:
                del tsallis_distance

        if any("Skewness" in s for s in statistics) or any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0],
                                    5,
                                    fiducial_model=fiducial_models["stat_moments"]).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance

            if cleanup:
                del moment_distance

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["PCA"]).distance_metric()
            distances["PCA"] = pca_distance.distance

            if cleanup:
                del pca_distance

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["SCF"]).distance_metric()
            distances["SCF"] = scf_distance.distance

            if cleanup:
                del scf_distance

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

            if cleanup:
                del cramer_distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0],
                                             fiducial_model=fiducial_models["Dendrogram"])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance

            if cleanup:
                del dendro_distance

        if any("PDF_Hellinger" in s for s in statistics) or \
           any("PDF_KS" in s for s in statistics) or \
           any("PDF_AD" in s for s in statistics):
            pdf_distance = \
                PDF_Distance(dataset1["integrated_intensity"][0],
                             dataset2["integrated_intensity"][0],
                             min_val1=0.05,
                             min_val2=0.05,
                             weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                             weights2=dataset2["integrated_intensity_error"][0] ** -2.)

            pdf_distance.distance_metric()

            distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
            distances["PDF_KS"] = pdf_distance.ks_distance
            distances["PDF_AD"] = pdf_distance.ad_distance

            if cleanup:
                del pdf_distance

        return distances
Esempio n. 6
0
# SCF

scf_distance = SCF_Distance(dataset1["cube"],
                            dataset2["cube"], size=21)
scf_distance.distance_metric(verbose=True, label1=label1,
                             label2=label2)

print "SCF Distance: %s" % (scf_distance.distance)

p.savefig(os.path.join(figure_path, "scf_example.pdf"))
p.clf()

# Dendrogram Stats

dendro_distance = \
    DendroDistance(dataset1["cube"],
                   dataset2["cube"])

filename = os.path.join(figure_path, "dendrograms_histogram_example.pdf")
dendro_distance.histogram_stat(verbose=True, label1=label1,
                               label2=label2, savename=filename)
filename = os.path.join(figure_path, "dendrograms_numfeature_example.pdf")
dendro_distance.numfeature_stat(verbose=True, label1=label1,
                                label2=label2, savename=filename)

p.clf()

print dendro_distance.num_distance
print dendro_distance.histogram_distance

# PDF
Esempio n. 7
0
moment_distance = StatMoments_Distance(data1,
                                       data2).distance_metric(verbose=True,
                                                              label1=fits1,
                                                              label2=fits2)

print "Kurtosis Distance: %s" % (moment_distance.kurtosis_distance)

print "Skewness Distance: %s" % (moment_distance.skewness_distance)

# # Dendrogram Stats

from turbustat.statistics import DendroDistance

dendro_distance = DendroDistance(data1, data2).distance_metric(verbose=True,
                                                               label1=fits1,
                                                               label2=fits2)

print "Dendrogram Number Distance: %s " % (dendro_distance.num_distance)
print "Dendrogram Histogram Distance: %s " % \
    (dendro_distance.histogram_distance)

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(data1,
                 data2).distance_metric(verbose=True, label1=fits1,
                                        label2=fits2)
Esempio n. 8
0
# Cramer Statistic

from turbustat.statistics import Cramer_Distance

cramer_distance = Cramer_Distance(dataset1["cube"],
                                  dataset2["cube"]).distance_metric()

print "Cramer Distance: %s" % (cramer_distance.distance)

# Dendrogram Stats

from turbustat.statistics import DendroDistance

dendro_distance = DendroDistance(
    dataset1["cube"], dataset2["cube"]).distance_metric(verbose=True,
                                                        label1=fits1,
                                                        label2=fits2)

print "Dendrogram Number Distance: %s " % (dendro_distance.num_distance)
print "Dendrogram Histogram Distance: %s " % \
    (dendro_distance.histogram_distance)

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=scale,
                 min_val2=scale,