Ejemplo n.º 1
0
# With periodic boundaries
dendro = Dendrogram_Stats(dataset1['cube'], min_deltas=min_deltas)
dendro.run(periodic_bounds=True)

dendrogram_periodic_val = dendro.numfeatures

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=0.05,
                 min_val2=0.05,
                 weights1=dataset1["moment0_error"][0]**-2.,
                 weights2=dataset2["moment0_error"][0]**-2.,
                 do_fit=False,
                 normalization_type='standardize')

pdf_distance.distance_metric()

pdf_val = pdf_distance.PDF1.pdf
pdf_ecdf = pdf_distance.PDF1.ecdf
pdf_bins = pdf_distance.bins

# Do a fitted version of the PDF pca
pdf_fit_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
Ejemplo n.º 2
0
def stats_wrapper(dataset1, dataset2, fiducial_models=None,
                  statistics=None, multicore=False, vca_break=None,
                  vcs_break=None, vcs_regrid=[None, None],
                  dendro_params=None,
                  periodic_bounds=[True, True],
                  noise_value=[-np.inf, -np.inf],
                  dendro_saves=[None, None],
                  scf_saves=[None, None],
                  inertial_range=[[None] * 2, [None] * 2],
                  spatial_range=[[None] * 2, [None] * 2]):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    vcs_break : float, optional
        Pass an initial guess for the location of the VCS break.
    vcs_regrid : list of bools, optional
        The simulated cubes lack information on the smallest spectral scales.
        When enabled, the cube is downsampled by a factor of 5 spectrally
        before running VCS.
    dendro_params : dict or list, optional
        Provides parameters to use when computing the initial dendrogram.
        If different parameters are required for each dataset, the
        the input should be a list containing the two dictionaries.
    periodic_bounds : list of bools
        Set whether the boundaries should be handled as 'continuous' (True) or
        not ('cut' or 'fill'; False).
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = statistics_list

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:
        fiducial_models = {}
        for statistic in statistics:
            if "PDF" in statistic:
                statistic = "PDF"
            elif statistic == "Skewness" or statistic == "Kurtosis":
                statistic = "stat_moments"
            elif "Dendrogram" in statistic:
                statistic = "Dendrogram"
            elif "DeltaVariance_Centroid" in statistic:
                statistic = "DeltaVariance_Centroid"
            elif "DeltaVariance" in statistic and "Centroid" not in statistic:
                statistic = "DeltaVariance"
            fiducial_models[statistic] = None

    if any("Wavelet" in s for s in statistics):
        wavelet_distance = \
            Wavelet_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models["Wavelet"],
                             xlow=spatial_range[0],
                             xhigh=spatial_range[1])
        wavelet_distance.distance_metric()
        distances["Wavelet"] = wavelet_distance.distance
        if not multicore:
            fiducial_models["Wavelet"] = copy(wavelet_distance.wt1)

        del wavelet_distance

    if any("MVC" in s for s in statistics):
        mvc_distance = \
            MVC_Distance(dataset1, dataset2,
                         fiducial_model=fiducial_models["MVC"],
                         low_cut=inertial_range[0],
                         high_cut=inertial_range[1])
        mvc_distance.distance_metric()
        distances["MVC"] = mvc_distance.distance
        if not multicore:
            fiducial_models["MVC"] = copy(mvc_distance.mvc1)

        del mvc_distance

    if any("PSpec" in s for s in statistics):
        pspec_distance = \
            PSpec_Distance(dataset1["moment0"],
                           dataset2["moment0"],
                           fiducial_model=fiducial_models['PSpec'],
                           low_cut=inertial_range[0],
                           high_cut=inertial_range[1])
        pspec_distance.distance_metric()
        distances["PSpec"] = pspec_distance.distance
        if not multicore:
            fiducial_models["PSpec"] = copy(pspec_distance.pspec1)

        del pspec_distance

    if any("Bispectrum" in s for s in statistics):
        bispec_distance = \
            BiSpectrum_Distance(dataset1["moment0"],
                                dataset2["moment0"],
                                fiducial_model=fiducial_models['Bispectrum'])
        bispec_distance.distance_metric()
        distances["Bispectrum"] = bispec_distance.distance
        if not multicore:
            fiducial_models["Bispectrum"] = copy(bispec_distance.bispec1)

        del bispec_distance

    if any("DeltaVariance_Slope" in s for s in statistics) or \
       any("DeltaVariance_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["moment0"],
                                   dataset2["moment0"],
                                   weights1=dataset1["moment0_error"][0]**-2,
                                   weights2=dataset2["moment0_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])

        delvar_distance.distance_metric()
        distances["DeltaVariance_Curve"] = delvar_distance.curve_distance
        distances["DeltaVariance_Slope"] = delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance"] = copy(delvar_distance.delvar1)

        del delvar_distance

    if any("DeltaVariance_Centroid_Slope" in s for s in statistics) or \
       any("DeltaVariance_Centroid_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["centroid"],
                                   dataset2["centroid"],
                                   weights1=dataset1["centroid_error"][0]**-2,
                                   weights2=dataset2["centroid_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance_Centroid"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])
        delvar_distance.distance_metric()
        distances["DeltaVariance_Centroid_Curve"] = \
            delvar_distance.curve_distance
        distances["DeltaVariance_Centroid_Slope"] = \
            delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance_Centroid"] = \
                copy(delvar_distance.delvar1)

        del delvar_distance

    if any("Genus" in s for s in statistics):
        genus_distance = \
            GenusDistance(dataset1["moment0"],
                          dataset2["moment0"],
                          fiducial_model=fiducial_models['Genus'])
        genus_distance.distance_metric()
        distances["Genus"] = genus_distance.distance
        if not multicore:
            fiducial_models["Genus"] = copy(genus_distance.genus1)

        del genus_distance

    if any("VCS" in s for s in statistics):

        # Regrid the cube to lower spectral resolution
        if any(vcs_regrid):
            from spectral_cube import SpectralCube
            import astropy.io.fits as fits

            from analysis_funcs import spectral_regrid_cube

            if vcs_regrid[0] is not None:
                cube1_hdu = fits.PrimaryHDU(dataset1["cube"][0],
                                            header=dataset1["cube"][1])
                cube1 = SpectralCube.read(cube1_hdu)

                cube1 = spectral_regrid_cube(cube1, int(vcs_regrid[0]))
            else:
                cube1 = dataset1["cube"]

            if vcs_regrid[1] is not None:
                cube2_hdu = fits.PrimaryHDU(dataset2["cube"][0],
                                            header=dataset2["cube"][1])
                cube2 = SpectralCube.read(cube2_hdu)

                cube2 = spectral_regrid_cube(cube2, int(vcs_regrid[1]))
            else:
                cube2 = dataset2["cube"]

        else:
            cube1 = dataset1["cube"]
            cube2 = dataset2["cube"]

        vcs_distance = VCS_Distance(cube1,
                                    cube2,
                                    breaks=vcs_break,
                                    fiducial_model=fiducial_models['VCS'])
        vcs_distance.distance_metric()
        distances["VCS"] = vcs_distance.distance
        distances["VCS_Small_Scale"] = vcs_distance.small_scale_distance
        distances["VCS_Large_Scale"] = vcs_distance.large_scale_distance
        distances["VCS_Break"] = vcs_distance.break_distance
        if not multicore:
            fiducial_models["VCS"] = copy(vcs_distance.vcs1)

        del vcs_distance

    if any("VCA" in s for s in statistics):
        vca_distance = VCA_Distance(dataset1["cube"],
                                    dataset2["cube"],
                                    breaks=vca_break,
                                    fiducial_model=fiducial_models['VCA'],
                                    low_cut=inertial_range[0],
                                    high_cut=inertial_range[1])
        vca_distance.distance_metric()
        distances["VCA"] = vca_distance.distance
        if not multicore:
            fiducial_models["VCA"] = copy(vca_distance.vca1)

        del vca_distance

    if any("Tsallis" in s for s in statistics):
        tsallis_distance = \
            Tsallis_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models['Tsallis'])
        tsallis_distance.distance_metric()
        distances["Tsallis"] = tsallis_distance.distance
        if not multicore:
            fiducial_models["Tsallis"] = copy(tsallis_distance.tsallis1)

        del tsallis_distance

    if any("Skewness" in s for s in statistics) or\
       any("Kurtosis" in s for s in statistics):
        moment_distance = \
            StatMoments_Distance(dataset1["moment0"],
                                 dataset2["moment0"], radius=5,
                                 weights1=dataset1["moment0_error"][0]**-2,
                                 weights2=dataset2["moment0_error"][0]**-2,
                                 fiducial_model=fiducial_models['stat_moments'])
        moment_distance.distance_metric()
        distances["Skewness"] = moment_distance.skewness_distance
        distances["Kurtosis"] = moment_distance.kurtosis_distance
        if not multicore:
            fiducial_models["stat_moments"] = \
                copy(moment_distance.moments1)

        del moment_distance

    if any("PCA" in s for s in statistics):
        pca_distance = \
            PCA_Distance(dataset1["cube"],
                         dataset2["cube"],
                         fiducial_model=fiducial_models['PCA'])
        pca_distance.distance_metric()
        distances["PCA"] = pca_distance.distance
        if not multicore:
            fiducial_models["PCA"] = pca_distance.pca1

        del pca_distance

    if any("SCF" in s for s in statistics):

        # Switch the inputs such that the save file is the "fiducial"
        # or first cube input below
        # if scf_saves[0] is not None:
        #     fid_model = SCF.load_results(scf_saves[0])
        #     cube1 = dataset1["cube"]
        #     cube2 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        # if scf_saves[1] is not None:
        #     fid_model = SCF.load_results(scf_saves[1])
        #     cube2 = dataset1["cube"]
        #     cube1 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[1] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[0] else 'cut'

        boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        scf_distance = \
            SCF_Distance(cube1, cube2,
                         boundary=[boundary1, boundary2],
                         fiducial_model=fiducial_models["SCF"])
                         # fiducial_model=fid_model)
        scf_distance.distance_metric()
        distances["SCF"] = scf_distance.distance
        if not multicore:
            fiducial_models["SCF"] = copy(scf_distance.scf1)

        del scf_distance

    if any("Cramer" in s for s in statistics):
        cramer_distance = \
            Cramer_Distance(dataset1["cube"],
                            dataset2["cube"],
                            noise_value1=noise_value[0],
                            noise_value2=noise_value[1]).distance_metric()
        distances["Cramer"] = cramer_distance.distance

        del cramer_distance

    if any("Dendrogram_Hist" in s for s in statistics) or \
       any("Dendrogram_Num" in s for s in statistics):

        if dendro_saves[0] is None:
            input1 = dataset1["cube"]

        elif isinstance(dendro_saves[0], str):
            input1 = dendro_saves[0]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        if dendro_saves[1] is None:
            input2 = dataset2["cube"]
        elif isinstance(dendro_saves[1], str):
            input2 = dendro_saves[1]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        dendro_distance = \
            DendroDistance(input1, input2,
                           dendro_params=dendro_params,
                           fiducial_model=fiducial_models['Dendrogram'],
                           periodic_bounds=periodic_bounds,
                           min_features=40)
        dendro_distance.distance_metric()

        distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
        distances["Dendrogram_Num"] = dendro_distance.num_distance
        if not multicore:
            fiducial_models["Dendrogram"] = copy(dendro_distance.dendro1)

        del dendro_distance

    if any("PDF_Hellinger" in s for s in statistics) or \
       any("PDF_KS" in s for s in statistics) or \
       any("PDF_Lognormal" in s for s in statistics):  # or \
       # any("PDF_AD" in s for s in statistics):
        pdf_distance = \
            PDF_Distance(dataset1["moment0"],
                         dataset2["moment0"],
                         min_val1=2 * noise_value[0],
                         min_val2=2 * noise_value[1])

        pdf_distance.distance_metric()

        distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
        distances["PDF_KS"] = pdf_distance.ks_distance
        distances["PDF_Lognormal"] = pdf_distance.lognormal_distance
        # distances["PDF_AD"] = pdf_distance.ad_distance
        if not multicore:
                fiducial_models["PDF"] = copy(pdf_distance.PDF1)

        del pdf_distance

    if multicore:
        return distances
    else:
        return distances, fiducial_models
Ejemplo n.º 3
0
        print(pca.distance)

    # PDF
    if run_pdf:

        from turbustat.statistics import PDF_Distance

        moment0 = fits.open(
            osjoin(data_path, "Design4_flatrho_0021_00_radmc_moment0.fits"))[0]
        moment0_fid = fits.open(
            osjoin(data_path,
                   "Fiducial0_flatrho_0021_00_radmc_moment0.fits"))[0]

        pdf = PDF_Distance(moment0_fid,
                           moment0,
                           min_val1=0.0,
                           min_val2=0.0,
                           do_fit=True,
                           normalization_type=None)
        pdf.distance_metric(verbose=True,
                            save_name=osjoin(fig_path, "pdf_distmet.png"))

        print(pdf.hellinger_distance)
        print(pdf.ks_distance)
        print(pdf.lognormal_distance)

    # PSpec
    if run_pspec:

        from turbustat.statistics import PSpec_Distance

        moment0 = fits.open(
Ejemplo n.º 4
0
min_deltas = np.logspace(-1.5, 0.5, 40)

dendro_distance = DendroDistance(dataset1["cube"][0],
                                 dataset2["cube"][0],
                                 min_deltas=min_deltas).distance_metric(verbose=False)

dendrogram_val = dendro_distance.dendro1.numfeatures

## PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["integrated_intensity"][0],
                 dataset2["integrated_intensity"][0],
                 min_val1=0.05,
                 min_val2=0.05,
                 weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                 weights2=dataset2["integrated_intensity_error"][0] ** -2.)

pdf_distance.distance_metric(verbose=False)

pdf_val = pdf_distance.PDF1.pdf

np.savez_compressed('checkVals', wavelet_val=wavelet_val,
                    mvc_val=mvc_val,
                    pspec_val=pspec_val,
                    bispec_val=bispec_val,
                    genus_val=genus_val,
                    delvar_val=delvar_val,
                    vcs_val=vcs_val,
def stats_wrapper(dataset1,
                  dataset2,
                  fiducial_models=None,
                  statistics=None,
                  multicore=False,
                  vca_break=None,
                  vcs_break=None,
                  vcs_regrid=[None, None],
                  dendro_params=None,
                  periodic_bounds=[True, True],
                  noise_value=[-np.inf, -np.inf],
                  dendro_saves=[None, None],
                  scf_saves=[None, None],
                  inertial_range=[[None] * 2, [None] * 2],
                  spatial_range=[[None] * 2, [None] * 2]):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    vcs_break : float, optional
        Pass an initial guess for the location of the VCS break.
    vcs_regrid : list of bools, optional
        The simulated cubes lack information on the smallest spectral scales.
        When enabled, the cube is downsampled by a factor of 5 spectrally
        before running VCS.
    dendro_params : dict or list, optional
        Provides parameters to use when computing the initial dendrogram.
        If different parameters are required for each dataset, the
        the input should be a list containing the two dictionaries.
    periodic_bounds : list of bools
        Set whether the boundaries should be handled as 'continuous' (True) or
        not ('cut' or 'fill'; False).
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = statistics_list

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:
        fiducial_models = {}
        for statistic in statistics:
            if "PDF" in statistic:
                statistic = "PDF"
            elif statistic == "Skewness" or statistic == "Kurtosis":
                statistic = "stat_moments"
            elif "Dendrogram" in statistic:
                statistic = "Dendrogram"
            elif "DeltaVariance_Centroid" in statistic:
                statistic = "DeltaVariance_Centroid"
            elif "DeltaVariance" in statistic and "Centroid" not in statistic:
                statistic = "DeltaVariance"
            fiducial_models[statistic] = None

    if any("Wavelet" in s for s in statistics):
        wavelet_distance = \
            Wavelet_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models["Wavelet"],
                             xlow=spatial_range[0],
                             xhigh=spatial_range[1])
        wavelet_distance.distance_metric()
        distances["Wavelet"] = wavelet_distance.distance
        if not multicore:
            fiducial_models["Wavelet"] = copy(wavelet_distance.wt1)

        del wavelet_distance

    if any("MVC" in s for s in statistics):
        mvc_distance = \
            MVC_Distance(dataset1, dataset2,
                         fiducial_model=fiducial_models["MVC"],
                         low_cut=inertial_range[0],
                         high_cut=inertial_range[1])
        mvc_distance.distance_metric()
        distances["MVC"] = mvc_distance.distance
        if not multicore:
            fiducial_models["MVC"] = copy(mvc_distance.mvc1)

        del mvc_distance

    if any("PSpec" in s for s in statistics):
        pspec_distance = \
            PSpec_Distance(dataset1["moment0"],
                           dataset2["moment0"],
                           fiducial_model=fiducial_models['PSpec'],
                           low_cut=inertial_range[0],
                           high_cut=inertial_range[1])
        pspec_distance.distance_metric()
        distances["PSpec"] = pspec_distance.distance
        if not multicore:
            fiducial_models["PSpec"] = copy(pspec_distance.pspec1)

        del pspec_distance

    if any("Bispectrum" in s for s in statistics):
        bispec_distance = \
            BiSpectrum_Distance(dataset1["moment0"],
                                dataset2["moment0"],
                                fiducial_model=fiducial_models['Bispectrum'])
        bispec_distance.distance_metric()
        distances["Bispectrum"] = bispec_distance.distance
        if not multicore:
            fiducial_models["Bispectrum"] = copy(bispec_distance.bispec1)

        del bispec_distance

    if any("DeltaVariance_Slope" in s for s in statistics) or \
       any("DeltaVariance_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["moment0"],
                                   dataset2["moment0"],
                                   weights1=dataset1["moment0_error"][0]**-2,
                                   weights2=dataset2["moment0_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])

        delvar_distance.distance_metric()
        distances["DeltaVariance_Curve"] = delvar_distance.curve_distance
        distances["DeltaVariance_Slope"] = delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance"] = copy(delvar_distance.delvar1)

        del delvar_distance

    if any("DeltaVariance_Centroid_Slope" in s for s in statistics) or \
       any("DeltaVariance_Centroid_Curve" in s for s in statistics):

        # Check for how boundaries should be handled.
        boundary1 = 'wrap' if periodic_bounds[0] else 'fill'
        boundary2 = 'wrap' if periodic_bounds[1] else 'fill'

        delvar_distance = \
            DeltaVariance_Distance(dataset1["centroid"],
                                   dataset2["centroid"],
                                   weights1=dataset1["centroid_error"][0]**-2,
                                   weights2=dataset2["centroid_error"][0]**-2,
                                   fiducial_model=fiducial_models["DeltaVariance_Centroid"],
                                   xlow=spatial_range[0],
                                   xhigh=spatial_range[1],
                                   boundary=[boundary1, boundary2])
        delvar_distance.distance_metric()
        distances["DeltaVariance_Centroid_Curve"] = \
            delvar_distance.curve_distance
        distances["DeltaVariance_Centroid_Slope"] = \
            delvar_distance.slope_distance
        if not multicore:
            fiducial_models["DeltaVariance_Centroid"] = \
                copy(delvar_distance.delvar1)

        del delvar_distance

    if any("Genus" in s for s in statistics):
        genus_distance = \
            GenusDistance(dataset1["moment0"],
                          dataset2["moment0"],
                          fiducial_model=fiducial_models['Genus'])
        genus_distance.distance_metric()
        distances["Genus"] = genus_distance.distance
        if not multicore:
            fiducial_models["Genus"] = copy(genus_distance.genus1)

        del genus_distance

    if any("VCS" in s for s in statistics):

        # Regrid the cube to lower spectral resolution
        if any(vcs_regrid):
            from spectral_cube import SpectralCube
            import astropy.io.fits as fits

            from analysis_funcs import spectral_regrid_cube

            if vcs_regrid[0] is not None:
                cube1_hdu = fits.PrimaryHDU(dataset1["cube"][0],
                                            header=dataset1["cube"][1])
                cube1 = SpectralCube.read(cube1_hdu)

                cube1 = spectral_regrid_cube(cube1, int(vcs_regrid[0]))
            else:
                cube1 = dataset1["cube"]

            if vcs_regrid[1] is not None:
                cube2_hdu = fits.PrimaryHDU(dataset2["cube"][0],
                                            header=dataset2["cube"][1])
                cube2 = SpectralCube.read(cube2_hdu)

                cube2 = spectral_regrid_cube(cube2, int(vcs_regrid[1]))
            else:
                cube2 = dataset2["cube"]

        else:
            cube1 = dataset1["cube"]
            cube2 = dataset2["cube"]

        vcs_distance = VCS_Distance(cube1,
                                    cube2,
                                    breaks=vcs_break,
                                    fiducial_model=fiducial_models['VCS'])
        vcs_distance.distance_metric()
        distances["VCS"] = vcs_distance.distance
        distances["VCS_Small_Scale"] = vcs_distance.small_scale_distance
        distances["VCS_Large_Scale"] = vcs_distance.large_scale_distance
        distances["VCS_Break"] = vcs_distance.break_distance
        if not multicore:
            fiducial_models["VCS"] = copy(vcs_distance.vcs1)

        del vcs_distance

    if any("VCA" in s for s in statistics):
        vca_distance = VCA_Distance(dataset1["cube"],
                                    dataset2["cube"],
                                    breaks=vca_break,
                                    fiducial_model=fiducial_models['VCA'],
                                    low_cut=inertial_range[0],
                                    high_cut=inertial_range[1])
        vca_distance.distance_metric()
        distances["VCA"] = vca_distance.distance
        if not multicore:
            fiducial_models["VCA"] = copy(vca_distance.vca1)

        del vca_distance

    if any("Tsallis" in s for s in statistics):
        tsallis_distance = \
            Tsallis_Distance(dataset1["moment0"],
                             dataset2["moment0"],
                             fiducial_model=fiducial_models['Tsallis'])
        tsallis_distance.distance_metric()
        distances["Tsallis"] = tsallis_distance.distance
        if not multicore:
            fiducial_models["Tsallis"] = copy(tsallis_distance.tsallis1)

        del tsallis_distance

    if any("Skewness" in s for s in statistics) or\
       any("Kurtosis" in s for s in statistics):
        moment_distance = \
            StatMoments_Distance(dataset1["moment0"],
                                 dataset2["moment0"], radius=5,
                                 weights1=dataset1["moment0_error"][0]**-2,
                                 weights2=dataset2["moment0_error"][0]**-2,
                                 fiducial_model=fiducial_models['stat_moments'])
        moment_distance.distance_metric()
        distances["Skewness"] = moment_distance.skewness_distance
        distances["Kurtosis"] = moment_distance.kurtosis_distance
        if not multicore:
            fiducial_models["stat_moments"] = \
                copy(moment_distance.moments1)

        del moment_distance

    if any("PCA" in s for s in statistics):
        pca_distance = \
            PCA_Distance(dataset1["cube"],
                         dataset2["cube"],
                         fiducial_model=fiducial_models['PCA'])
        pca_distance.distance_metric()
        distances["PCA"] = pca_distance.distance
        if not multicore:
            fiducial_models["PCA"] = pca_distance.pca1

        del pca_distance

    if any("SCF" in s for s in statistics):

        # Switch the inputs such that the save file is the "fiducial"
        # or first cube input below
        # if scf_saves[0] is not None:
        #     fid_model = SCF.load_results(scf_saves[0])
        #     cube1 = dataset1["cube"]
        #     cube2 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        # if scf_saves[1] is not None:
        #     fid_model = SCF.load_results(scf_saves[1])
        #     cube2 = dataset1["cube"]
        #     cube1 = dataset2["cube"]

        #     boundary1 = "continuous" if periodic_bounds[1] else 'cut'
        #     boundary2 = "continuous" if periodic_bounds[0] else 'cut'

        boundary1 = "continuous" if periodic_bounds[0] else 'cut'
        boundary2 = "continuous" if periodic_bounds[1] else 'cut'

        scf_distance = \
            SCF_Distance(cube1, cube2,
                         boundary=[boundary1, boundary2],
                         fiducial_model=fiducial_models["SCF"])
        # fiducial_model=fid_model)
        scf_distance.distance_metric()
        distances["SCF"] = scf_distance.distance
        if not multicore:
            fiducial_models["SCF"] = copy(scf_distance.scf1)

        del scf_distance

    if any("Cramer" in s for s in statistics):
        cramer_distance = \
            Cramer_Distance(dataset1["cube"],
                            dataset2["cube"],
                            noise_value1=noise_value[0],
                            noise_value2=noise_value[1]).distance_metric()
        distances["Cramer"] = cramer_distance.distance

        del cramer_distance

    if any("Dendrogram_Hist" in s for s in statistics) or \
       any("Dendrogram_Num" in s for s in statistics):

        if dendro_saves[0] is None:
            input1 = dataset1["cube"]

        elif isinstance(dendro_saves[0], str):
            input1 = dendro_saves[0]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        if dendro_saves[1] is None:
            input2 = dataset2["cube"]
        elif isinstance(dendro_saves[1], str):
            input2 = dendro_saves[1]
        else:
            raise UserWarning("dendro_saves must be the filename of the"
                              " saved file.")

        dendro_distance = \
            DendroDistance(input1, input2,
                           dendro_params=dendro_params,
                           fiducial_model=fiducial_models['Dendrogram'],
                           periodic_bounds=periodic_bounds,
                           min_features=40)
        dendro_distance.distance_metric()

        distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
        distances["Dendrogram_Num"] = dendro_distance.num_distance
        if not multicore:
            fiducial_models["Dendrogram"] = copy(dendro_distance.dendro1)

        del dendro_distance

    if any("PDF_Hellinger" in s for s in statistics) or \
       any("PDF_KS" in s for s in statistics) or \
       any("PDF_Lognormal" in s for s in statistics):  # or \
        # any("PDF_AD" in s for s in statistics):
        pdf_distance = \
            PDF_Distance(dataset1["moment0"],
                         dataset2["moment0"],
                         min_val1=2 * noise_value[0],
                         min_val2=2 * noise_value[1])

        pdf_distance.distance_metric()

        distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
        distances["PDF_KS"] = pdf_distance.ks_distance
        distances["PDF_Lognormal"] = pdf_distance.lognormal_distance
        # distances["PDF_AD"] = pdf_distance.ad_distance
        if not multicore:
            fiducial_models["PDF"] = copy(pdf_distance.PDF1)

        del pdf_distance

    if multicore:
        return distances
    else:
        return distances, fiducial_models
Ejemplo n.º 6
0
min_deltas = np.logspace(-1.5, 0.5, 40)

dendro_distance = DendroDistance(dataset1["cube"],
                                 dataset2["cube"],
                                 min_deltas=min_deltas).distance_metric()

dendrogram_val = dendro_distance.dendro1.numfeatures

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=0.05,
                 min_val2=0.05,
                 weights1=dataset1["moment0_error"][0] ** -2.,
                 weights2=dataset2["moment0_error"][0] ** -2.)

pdf_distance.distance_metric()

pdf_val = pdf_distance.PDF1.pdf
pdf_ecdf = pdf_distance.PDF1.ecdf
pdf_bins = pdf_distance.bins

np.savez_compressed('checkVals', wavelet_val=wavelet_val,
                    mvc_val=mvc_val,
                    pspec_val=pspec_val,
                    bispec_val=bispec_val,
                    genus_val=genus_val,
Ejemplo n.º 7
0
    pca = PCA_Distance(cube_fid, cube, n_eigs=50, mean_sub=True)
    pca.distance_metric(verbose=True,
                        save_name=osjoin(fig_path, 'pca_distmet.png'))

    print(pca.distance)

# PDF
if run_pdf:

    from turbustat.statistics import PDF_Distance

    moment0 = fits.open(osjoin(data_path, "Design4_flatrho_0021_00_radmc_moment0.fits"))[0]
    moment0_fid = fits.open(osjoin(data_path, "Fiducial0_flatrho_0021_00_radmc_moment0.fits"))[0]

    pdf = PDF_Distance(moment0_fid, moment0, min_val1=0.0, min_val2=0.0,
                       do_fit=True, normalization_type=None)
    pdf.distance_metric(verbose=True,
                        save_name=osjoin(fig_path, "pdf_distmet.png"))

    print(pdf.hellinger_distance)
    print(pdf.ks_distance)
    print(pdf.lognormal_distance)


# PSpec
if run_pspec:

    from turbustat.statistics import PSpec_Distance

    moment0 = fits.open(osjoin(data_path, "Design4_flatrho_0021_00_radmc_moment0.fits"))[0]
    moment0_fid = fits.open(osjoin(data_path, "Fiducial0_flatrho_0021_00_radmc_moment0.fits"))[0]
Ejemplo n.º 8
0
dendro_distance.histogram_stat(verbose=True, label1=label1,
                               label2=label2, savename=filename)
filename = os.path.join(figure_path, "dendrograms_numfeature_example.pdf")
dendro_distance.numfeature_stat(verbose=True, label1=label1,
                                label2=label2, savename=filename)

p.clf()

print dendro_distance.num_distance
print dendro_distance.histogram_distance

# PDF

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=0.0,
                 min_val2=0.0,
                 weights1=dataset1["moment0_error"][0] ** -2.,
                 weights2=dataset2["moment0_error"][0] ** -2.)

pdf_distance.distance_metric(verbose=True, show_data=False,
                             label1=label1, label2=label2)

print("Hellinger Distance: " + str(pdf_distance.hellinger_distance))
print("KS Distance: " + str(pdf_distance.ks_distance),
      "KS p-value" + str(pdf_distance.ks_pval))

p.savefig(os.path.join(figure_path, "pdf_example.pdf"))
p.clf()
Ejemplo n.º 9
0
def stats_wrapper(dataset1, dataset2, fiducial_models=None,
                  statistics=None, multicore=False, vca_break=None,
                  vcs_break=None, cleanup=True):
    '''
    Function to run all of the statistics on two datasets.
    Each statistic is run with set inputs. This function needs to be altered
    to change the inputs.

    Parameters
    ----------
    dataset1 : dict
        Contains the cube and all of its property arrays.
    dataset2 : dict
        See dataset1
    fiducial_models : dict, optional
        Models for dataset1. Avoids recomputing when comparing
        many sets to dataset1.
    statistics : list, optional
        List of all of the statistics to use. If None, all are run.
    multicore : bool, optional
        If the wrapper is being used in parallel, this disables
        returning model values for dataset1.
    cleanup : bool, optional
        Delete distance classes after running.
    '''

    if statistics is None:  # Run them all
        statistics = ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance",
                      "Genus", "VCS", "VCA", "VCS_Density", "VCS_Velocity",
                      "VCS_Break",
                      "Tsallis", "PCA", "SCF", "Cramer", "Skewness",
                      "Kurtosis", "SCF", "PCA", "Dendrogram_Hist",
                      "Dendrogram_Num", "PDF_Hellinger", "PDF_KS"]

    distances = {}

    # Calculate the fiducial case and return it for later use
    if fiducial_models is None:

        fiducial_models = {}

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance
            if not multicore:
                fiducial_models["Wavelet"] = wavelet_distance.wt1

            if cleanup:
                del wavelet_distance

        if any("MVC" in s for s in statistics):
            mvc_distance = MVC_distance(dataset1, dataset2).distance_metric()
            distances["MVC"] = mvc_distance.distance
            if not multicore:
                fiducial_models["MVC"] = mvc_distance.mvc1

            if cleanup:
                del mvc_distance

        if any("PSpec" in s for s in statistics):
            pspec_distance = \
              PSpec_Distance(dataset1["integrated_intensity"],
                             dataset2["integrated_intensity"],
                             weights1=dataset1["integrated_intensity_error"][0]**2.,
                             weights2=dataset2["integrated_intensity_error"][0]**2.).distance_metric()
            distances["PSpec"] = pspec_distance.distance
            if not multicore:
                fiducial_models["PSpec"] = pspec_distance.pspec1

            if cleanup:
                del pspec_distance

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance
            if not multicore:
                fiducial_models["Bispectrum"] = bispec_distance.bispec1

            if cleanup:
                del bispec_distance

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
              DeltaVariance_Distance(dataset1["integrated_intensity"],
                                     dataset2["integrated_intensity"],
                                     weights1=dataset1["integrated_intensity_error"][0],
                                     weights2=dataset2["integrated_intensity_error"][0]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance
            if not multicore:
                fiducial_models["DeltaVariance"] = delvar_distance.delvar1

            if cleanup:
                del delvar_distance

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0]).distance_metric()
            distances["Genus"] = genus_distance.distance
            if not multicore:
                fiducial_models["Genus"] = genus_distance.genus1

            if cleanup:
                del genus_distance

        if any("VCS" in s for s in statistics):
            vcs_distance = VCS_Distance(dataset1["cube"],
                                        dataset2["cube"],
                                        breaks=vcs_break).distance_metric()
            distances["VCS"] = vcs_distance.distance
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            distances["VCS_Break"] = vcs_distance.break_distance
            if not multicore:
                fiducial_models["VCS"] = vcs_distance.vcs1


            if cleanup:
                del vcs_distance

        if any("VCA" in s for s in statistics):
            vca_distance = VCA_Distance(dataset1["cube"],
                                        dataset2["cube"],
                                        breaks=vca_break).distance_metric()
            distances["VCA"] = vca_distance.distance
            if not multicore:
                fiducial_models["VCA"] = vca_distance.vca1

            if cleanup:
                del vca_distance

        if any("Tsallis" in s for s in statistics):
            tsallis_distance = \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance
            if not multicore:
                fiducial_models["Tsallis"] = tsallis_distance.tsallis1

            if cleanup:
                del tsallis_distance

        if any("Skewness" in s for s in statistics) or\
           any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0], 5).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance
            if not multicore:
                fiducial_models["stat_moments"] = moment_distance.moments1

            if cleanup:
                del moment_distance

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["PCA"] = pca_distance.distance
            if not multicore:
                fiducial_models["PCA"] = pca_distance.pca1

            if cleanup:
                del pca_distance

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0]).distance_metric()
            distances["SCF"] = scf_distance.distance
            if not multicore:
                fiducial_models["SCF"] = scf_distance.scf1

            if cleanup:
                del scf_distance

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

            if cleanup:
                del cramer_distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance
            if not multicore:
                fiducial_models["Dendrogram"] = dendro_distance.dendro1

            if cleanup:
                del dendro_distance

        if any("PDF_Hellinger" in s for s in statistics) or \
           any("PDF_KS" in s for s in statistics) or \
           any("PDF_AD" in s for s in statistics):
            pdf_distance = \
                PDF_Distance(dataset1["integrated_intensity"][0],
                             dataset2["integrated_intensity"][0],
                             min_val1=0.05,
                             min_val2=0.05,
                             weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                             weights2=dataset2["integrated_intensity_error"][0] ** -2.)

            pdf_distance.distance_metric()

            distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
            distances["PDF_KS"] = pdf_distance.ks_distance
            distances["PDF_AD"] = pdf_distance.ad_distance
            if not multicore:
                    fiducial_models["PDF"] = pdf_distance.PDF1

            if cleanup:
                del pdf_distance

        if multicore:
            return distances
        else:
            return distances, fiducial_models

    else:

        if any("Wavelet" in s for s in statistics):
            wavelet_distance = \
                Wavelet_Distance(dataset1["integrated_intensity"],
                                 dataset2["integrated_intensity"],
                                 fiducial_model=fiducial_models["Wavelet"]).distance_metric()
            distances["Wavelet"] = wavelet_distance.distance

            if cleanup:
                del wavelet_distance

        if any("MVC" in s for s in statistics):
            mvc_distance = \
                MVC_distance(dataset1,
                             dataset2,
                             fiducial_model=fiducial_models["MVC"]).distance_metric()
            distances["MVC"] = mvc_distance.distance

            if cleanup:
                del mvc_distance

        if any("PSpec" in s for s in statistics):
            pspec_distance = \
              PSpec_Distance(dataset1["integrated_intensity"],
                           dataset2["integrated_intensity"],
                           weight1=dataset1["integrated_intensity_error"][0]**2.,
                           weight2=dataset2["integrated_intensity_error"][0]**2.,
                           fiducial_model=fiducial_models["PSpec"]).distance_metric()
            distances["PSpec"] = pspec_distance.distance

            if cleanup:
                del pspec_distance

        if any("Bispectrum" in s for s in statistics):
            bispec_distance = \
                BiSpectrum_Distance(dataset1["integrated_intensity"],
                                    dataset2["integrated_intensity"],
                                    fiducial_model=fiducial_models["Bispectrum"]).distance_metric()
            distances["Bispectrum"] = bispec_distance.distance

            if cleanup:
                del bispec_distance

        if any("DeltaVariance" in s for s in statistics):
            delvar_distance = \
                DeltaVariance_Distance(dataset1["integrated_intensity"],
                                     dataset2["integrated_intensity"],
                                     weights1=dataset1["integrated_intensity_error"][0],
                                     weights2=dataset2["integrated_intensity_error"][0],
                                     fiducial_model=fiducial_models["DeltaVariance"]).distance_metric()
            distances["DeltaVariance"] = delvar_distance.distance

            if cleanup:
                del delvar_distance

        if any("Genus" in s for s in statistics):
            genus_distance = \
                GenusDistance(dataset1["integrated_intensity"][0],
                              dataset2["integrated_intensity"][0],
                              fiducial_model=fiducial_models["Genus"]).distance_metric()
            distances["Genus"] = genus_distance.distance

            if cleanup:
                del genus_distance

        if any("VCS" in s for s in statistics):
            vcs_distance = \
                VCS_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCS"],
                             breaks=vcs_break).distance_metric()
            distances["VCS_Density"] = vcs_distance.density_distance
            distances["VCS_Velocity"] = vcs_distance.velocity_distance
            distances["VCS_Break"] = vcs_distance.break_distance
            distances["VCS"] = vcs_distance.distance

            if cleanup:
                del vcs_distance

        if any("VCA" in s for s in statistics):
            vca_distance = \
                VCA_Distance(dataset1["cube"],
                             dataset2["cube"],
                             fiducial_model=fiducial_models["VCA"],
                             breaks=vca_break).distance_metric()
            distances["VCA"] = vca_distance.distance

            if cleanup:
                del vca_distance

        if any("Tsallis" in s for s in statistics):
            tsallis_distance= \
                Tsallis_Distance(dataset1["integrated_intensity"][0],
                                 dataset2["integrated_intensity"][0],
                                 fiducial_model=fiducial_models["Tsallis"]).distance_metric()
            distances["Tsallis"] = tsallis_distance.distance

            if cleanup:
                del tsallis_distance

        if any("Skewness" in s for s in statistics) or any("Kurtosis" in s for s in statistics):
            moment_distance = \
                StatMomentsDistance(dataset1["integrated_intensity"][0],
                                    dataset2["integrated_intensity"][0],
                                    5,
                                    fiducial_model=fiducial_models["stat_moments"]).distance_metric()
            distances["Skewness"] = moment_distance.skewness_distance
            distances["Kurtosis"] = moment_distance.kurtosis_distance

            if cleanup:
                del moment_distance

        if any("PCA" in s for s in statistics):
            pca_distance = \
                PCA_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["PCA"]).distance_metric()
            distances["PCA"] = pca_distance.distance

            if cleanup:
                del pca_distance

        if any("SCF" in s for s in statistics):
            scf_distance = \
                SCF_Distance(dataset1["cube"][0],
                             dataset2["cube"][0],
                             fiducial_model=fiducial_models["SCF"]).distance_metric()
            distances["SCF"] = scf_distance.distance

            if cleanup:
                del scf_distance

        if any("Cramer" in s for s in statistics):
            cramer_distance = \
                Cramer_Distance(dataset1["cube"][0],
                                dataset2["cube"][0]).distance_metric()
            distances["Cramer"] = cramer_distance.distance

            if cleanup:
                del cramer_distance

        if any("Dendrogram_Hist" in s for s in statistics) or \
           any("Dendrogram_Num" in s for s in statistics):
            dendro_distance = DendroDistance(dataset1["cube"][0],
                                             dataset2["cube"][0],
                                             fiducial_model=fiducial_models["Dendrogram"])
            dendro_distance.distance_metric()

            distances["Dendrogram_Hist"] = dendro_distance.histogram_distance
            distances["Dendrogram_Num"] = dendro_distance.num_distance

            if cleanup:
                del dendro_distance

        if any("PDF_Hellinger" in s for s in statistics) or \
           any("PDF_KS" in s for s in statistics) or \
           any("PDF_AD" in s for s in statistics):
            pdf_distance = \
                PDF_Distance(dataset1["integrated_intensity"][0],
                             dataset2["integrated_intensity"][0],
                             min_val1=0.05,
                             min_val2=0.05,
                             weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                             weights2=dataset2["integrated_intensity_error"][0] ** -2.)

            pdf_distance.distance_metric()

            distances["PDF_Hellinger"] = pdf_distance.hellinger_distance
            distances["PDF_KS"] = pdf_distance.ks_distance
            distances["PDF_AD"] = pdf_distance.ad_distance

            if cleanup:
                del pdf_distance

        return distances
Ejemplo n.º 10
0
def generate_unitvals():

    import numpy as np
    import astropy.units as u

    # The machine producing these values should have emcee installed!
    try:
        import emcee
    except ImportError:
        raise ImportError("Install emcee to generate unit test data.")

    from turbustat.tests._testing_data import dataset1, dataset2

    # Wavelet Transform

    from turbustat.statistics import Wavelet_Distance, Wavelet

    wavelet_distance = \
        Wavelet_Distance(dataset1["moment0"],
                         dataset2["moment0"]).distance_metric()

    wavelet_val = wavelet_distance.wt1.values
    wavelet_slope = wavelet_distance.wt1.slope

    # Wavelet with a break
    wave_break = Wavelet(dataset1['moment0']).run(xhigh=7 * u.pix,
                                                  brk=5.5 * u.pix)

    wavelet_slope_wbrk = wave_break.slope
    wavelet_brk_wbrk = wave_break.brk.value

    # MVC

    from turbustat.statistics import MVC_Distance, MVC

    mvc_distance = MVC_Distance(dataset1, dataset2).distance_metric()

    mvc = MVC(dataset1["centroid"], dataset1["moment0"], dataset1["linewidth"])
    mvc.run()

    mvc_val = mvc.ps1D
    mvc_slope = mvc.slope
    mvc_slope2D = mvc.slope2D

    # Spatial Power Spectrum/ Bispectrum

    from turbustat.statistics import (PSpec_Distance, Bispectrum_Distance,
                                      Bispectrum, PowerSpectrum)

    pspec_distance = \
        PSpec_Distance(dataset1["moment0"],
                       dataset2["moment0"]).distance_metric()

    pspec = PowerSpectrum(dataset1['moment0'])
    pspec.run()

    pspec_val = pspec.ps1D
    pspec_slope = pspec.slope
    pspec_slope2D = pspec.slope2D

    bispec_distance = \
        Bispectrum_Distance(dataset1["moment0"],
                            dataset2["moment0"]).distance_metric()

    bispec_val = bispec_distance.bispec1.bicoherence

    azimuthal_slice = bispec_distance.bispec1.azimuthal_slice(
        16, 10, value='bispectrum_logamp', bin_width=5 * u.deg)
    bispec_azim_bins = azimuthal_slice[16][0]
    bispec_azim_vals = azimuthal_slice[16][1]
    bispec_azim_stds = azimuthal_slice[16][2]

    bispec_meansub = Bispectrum(dataset1['moment0'])
    bispec_meansub.run(mean_subtract=True)

    bispec_val_meansub = bispec_meansub.bicoherence

    # Genus

    from turbustat.statistics import GenusDistance, Genus

    smooth_scales = np.linspace(1.0, 0.1 * min(dataset1["moment0"][0].shape),
                                5)

    genus_distance = \
        GenusDistance(dataset1["moment0"],
                      dataset2["moment0"],
                      lowdens_percent=20,
                      genus_kwargs=dict(match_kernel=True)).distance_metric()

    # The distance method requires standardizing the data. Make a
    # separate version that isn't
    genus = Genus(dataset1['moment0'], smoothing_radii=smooth_scales)
    genus.run(match_kernel=True)

    genus_val = genus.genus_stats

    # Delta-Variance

    from turbustat.statistics import DeltaVariance_Distance, DeltaVariance

    delvar_distance = \
        DeltaVariance_Distance(dataset1["moment0"],
                               dataset2["moment0"],
                               weights1=dataset1["moment0_error"][0],
                               weights2=dataset2["moment0_error"][0],
                               delvar_kwargs=dict(xhigh=11 * u.pix))

    delvar_distance.distance_metric()

    delvar = DeltaVariance(dataset1["moment0"],
                           weights=dataset1['moment0_error'][0]).run(xhigh=11 *
                                                                     u.pix)

    delvar_val = delvar.delta_var
    delvar_slope = delvar.slope

    # Test with a break point
    delvar_wbrk = \
      DeltaVariance(dataset1["moment0"],
                    weights=dataset1['moment0_error'][0]).run(xhigh=11 * u.pix,
                                                              brk=6 * u.pix)

    delvar_slope_wbrk = delvar_wbrk.slope
    delvar_brk = delvar_wbrk.brk.value

    # Change boundary conditions

    delvar_fill = \
        DeltaVariance(dataset1["moment0"],
                      weights=dataset1['moment0_error'][0]).run(xhigh=11 * u.pix,
                                                                boundary='fill',
                                                                nan_treatment='interpolate')

    delvar_fill_val = delvar_fill.delta_var
    delvar_fill_slope = delvar_fill.slope

    # VCA/VCS

    from turbustat.statistics import VCA_Distance, VCS_Distance, VCA

    vcs_distance = VCS_Distance(dataset1["cube"],
                                dataset2["cube"],
                                fit_kwargs=dict(high_cut=0.3 / u.pix,
                                                low_cut=3e-2 / u.pix))
    vcs_distance.distance_metric()

    vcs_val = vcs_distance.vcs1.ps1D
    vcs_slopes = vcs_distance.vcs1.slope

    vca_distance = VCA_Distance(dataset1["cube"],
                                dataset2["cube"]).distance_metric()

    vca = VCA(dataset1['cube'])
    vca.run()

    vca_val = vca.ps1D
    vca_slope = vca.slope
    vca_slope2D = vca.slope2D

    # Tsallis

    from turbustat.statistics import Tsallis

    tsallis_kwargs = {"sigma_clip": 5, "num_bins": 100}

    tsallis = Tsallis(dataset1['moment0'], lags=[1, 2, 4, 8, 16] * u.pix)
    tsallis.run(periodic=True, **tsallis_kwargs)

    tsallis_val = tsallis.tsallis_params
    tsallis_stderrs = tsallis.tsallis_stderrs

    tsallis_noper = Tsallis(dataset1['moment0'], lags=[1, 2, 4, 8, 16] * u.pix)
    tsallis_noper.run(periodic=False, num_bins=100)

    tsallis_val_noper = tsallis_noper.tsallis_params

    # High-order stats

    from turbustat.statistics import StatMoments_Distance, StatMoments

    moment_distance = \
        StatMoments_Distance(dataset1["moment0"],
                             dataset2["moment0"]).distance_metric()

    kurtosis_val = moment_distance.moments1.kurtosis_hist[1]
    skewness_val = moment_distance.moments1.skewness_hist[1]

    # Save a few from the non-distance version
    tester = StatMoments(dataset1["moment0"])
    tester.run()

    kurtosis_nondist_val = tester.kurtosis_hist[1]
    skewness_nondist_val = tester.skewness_hist[1]

    # Non-periodic
    tester = StatMoments(dataset1["moment0"])
    tester.run(periodic=False)

    kurtosis_nonper_val = tester.kurtosis_hist[1]
    skewness_nonper_val = tester.skewness_hist[1]

    # PCA

    from turbustat.statistics import PCA_Distance, PCA
    pca_distance = PCA_Distance(dataset1["cube"],
                                dataset2["cube"]).distance_metric()

    pca = PCA(dataset1["cube"], distance=250 * u.pc)
    pca.run(mean_sub=True,
            eigen_cut_method='proportion',
            min_eigval=0.75,
            spatial_method='contour',
            spectral_method='walk-down',
            fit_method='odr',
            brunt_beamcorrect=False,
            spectral_output_unit=u.m / u.s)

    pca_val = pca.eigvals
    pca_spectral_widths = pca.spectral_width().value
    pca_spatial_widths = pca.spatial_width().value

    pca_fit_vals = {
        "index": pca.index,
        "gamma": pca.gamma,
        "intercept": pca.intercept().value,
        "sonic_length": pca.sonic_length()[0].value
    }

    # Now get those values using mcmc
    pca.run(mean_sub=True,
            eigen_cut_method='proportion',
            min_eigval=0.75,
            spatial_method='contour',
            spectral_method='walk-down',
            fit_method='bayes',
            brunt_beamcorrect=False,
            spectral_output_unit=u.m / u.s)

    pca_fit_vals["index_bayes"] = pca.index
    pca_fit_vals["gamma_bayes"] = pca.gamma
    pca_fit_vals["intercept_bayes"] = pca.intercept().value
    pca_fit_vals["sonic_length_bayes"] = pca.sonic_length()[0].value

    # Record the number of eigenvalues kept by the auto method
    pca.run(mean_sub=True,
            n_eigs='auto',
            min_eigval=0.001,
            eigen_cut_method='value',
            decomp_only=True)

    pca_fit_vals["n_eigs_value"] = pca.n_eigs

    # Now w/ the proportion of variance cut
    pca.run(mean_sub=True,
            n_eigs='auto',
            min_eigval=0.99,
            eigen_cut_method='proportion',
            decomp_only=True)

    pca_fit_vals["n_eigs_proportion"] = pca.n_eigs

    # SCF

    from turbustat.statistics import SCF_Distance, SCF

    scf_distance = SCF_Distance(dataset1["cube"], dataset2["cube"],
                                size=11).distance_metric()

    scf = SCF(dataset1['cube'], size=11).run()

    scf_val = scf.scf_surface
    scf_spectrum = scf.scf_spectrum
    scf_slope = scf.slope
    scf_slope2D = scf.slope2D

    # Now run the SCF when the boundaries aren't continuous
    scf_distance_cut_bound = SCF_Distance(dataset1["cube"],
                                          dataset2["cube"],
                                          size=11,
                                          boundary='cut').distance_metric()
    scf_val_noncon_bound = scf_distance_cut_bound.scf1.scf_surface

    scf_fitlims = SCF(dataset1['cube'], size=11)
    scf_fitlims.run(boundary='continuous', xlow=1.5 * u.pix, xhigh=4.5 * u.pix)

    scf_slope_wlimits = scf_fitlims.slope
    scf_slope_wlimits_2D = scf_fitlims.slope2D

    # Cramer Statistic

    from turbustat.statistics import Cramer_Distance

    cramer_distance = Cramer_Distance(
        dataset1["cube"], dataset2["cube"], noise_value1=0.1,
        noise_value2=0.1).distance_metric(normalize=False)

    cramer_val = cramer_distance.data_matrix1

    # Dendrograms

    from turbustat.statistics import Dendrogram_Distance, Dendrogram_Stats

    min_deltas = np.logspace(-1.5, 0.5, 40)

    dendro_distance = Dendrogram_Distance(
        dataset1["cube"], dataset2["cube"],
        min_deltas=min_deltas).distance_metric()

    dendrogram_val = dendro_distance.dendro1.numfeatures

    # With periodic boundaries
    dendro = Dendrogram_Stats(dataset1['cube'], min_deltas=min_deltas)
    dendro.run(periodic_bounds=True)

    dendrogram_periodic_val = dendro.numfeatures

    # PDF

    from turbustat.statistics import PDF_Distance

    pdf_distance = \
        PDF_Distance(dataset1["moment0"],
                     dataset2["moment0"],
                     min_val1=0.05,
                     min_val2=0.05,
                     weights1=dataset1["moment0_error"][0]**-2.,
                     weights2=dataset2["moment0_error"][0]**-2.,
                     do_fit=False,
                     normalization_type='standardize')

    pdf_distance.distance_metric()

    pdf_val = pdf_distance.PDF1.pdf
    pdf_ecdf = pdf_distance.PDF1.ecdf
    pdf_bins = pdf_distance.bins

    # Do a fitted version of the PDF pca
    pdf_fit_distance = \
        PDF_Distance(dataset1["moment0"],
                     dataset2["moment0"],
                     min_val1=0.05,
                     min_val2=0.05,
                     do_fit=True,
                     normalization_type=None)

    pdf_fit_distance.distance_metric()

    np.savez_compressed('checkVals',
                        wavelet_val=wavelet_val,
                        wavelet_slope=wavelet_slope,
                        wavelet_slope_wbrk=wavelet_slope_wbrk,
                        wavelet_brk_wbrk=wavelet_brk_wbrk,
                        mvc_val=mvc_val,
                        mvc_slope=mvc_slope,
                        mvc_slope2D=mvc_slope2D,
                        pspec_val=pspec_val,
                        pspec_slope=pspec_slope,
                        pspec_slope2D=pspec_slope2D,
                        bispec_val=bispec_val,
                        bispec_azim_bins=bispec_azim_bins,
                        bispec_azim_vals=bispec_azim_vals,
                        bispec_azim_stds=bispec_azim_stds,
                        bispec_val_meansub=bispec_val_meansub,
                        genus_val=genus_val,
                        delvar_val=delvar_val,
                        delvar_slope=delvar_slope,
                        delvar_slope_wbrk=delvar_slope_wbrk,
                        delvar_brk=delvar_brk,
                        delvar_fill_val=delvar_fill_val,
                        delvar_fill_slope=delvar_fill_slope,
                        vcs_val=vcs_val,
                        vcs_slopes=vcs_slopes,
                        vca_val=vca_val,
                        vca_slope=vca_slope,
                        vca_slope2D=vca_slope2D,
                        tsallis_val=tsallis_val,
                        tsallis_stderrs=tsallis_stderrs,
                        tsallis_val_noper=tsallis_val_noper,
                        kurtosis_val=kurtosis_val,
                        skewness_val=skewness_val,
                        kurtosis_nondist_val=kurtosis_nondist_val,
                        skewness_nondist_val=skewness_nondist_val,
                        kurtosis_nonper_val=kurtosis_nonper_val,
                        skewness_nonper_val=skewness_nonper_val,
                        pca_val=pca_val,
                        pca_fit_vals=pca_fit_vals,
                        pca_spectral_widths=pca_spectral_widths,
                        pca_spatial_widths=pca_spatial_widths,
                        scf_val=scf_val,
                        scf_slope_wlimits=scf_slope_wlimits,
                        scf_slope_wlimits_2D=scf_slope_wlimits_2D,
                        scf_val_noncon_bound=scf_val_noncon_bound,
                        scf_spectrum=scf_spectrum,
                        scf_slope=scf_slope,
                        scf_slope2D=scf_slope2D,
                        cramer_val=cramer_val,
                        dendrogram_val=dendrogram_val,
                        dendrogram_periodic_val=dendrogram_periodic_val,
                        pdf_val=pdf_val,
                        pdf_bins=pdf_bins,
                        pdf_ecdf=pdf_ecdf)

    np.savez_compressed(
        'computed_distances',
        mvc_distance=mvc_distance.distance,
        pca_distance=pca_distance.distance,
        vca_distance=vca_distance.distance,
        pspec_distance=pspec_distance.distance,
        scf_distance=scf_distance.distance,
        wavelet_distance=wavelet_distance.distance,
        delvar_curve_distance=delvar_distance.curve_distance,
        delvar_slope_distance=delvar_distance.slope_distance,
        # tsallis_distance=tsallis_distance.distance,
        kurtosis_distance=moment_distance.kurtosis_distance,
        skewness_distance=moment_distance.skewness_distance,
        cramer_distance=cramer_distance.distance,
        genus_distance=genus_distance.distance,
        vcs_distance=vcs_distance.distance,
        bispec_mean_distance=bispec_distance.mean_distance,
        bispec_surface_distance=bispec_distance.surface_distance,
        dendrohist_distance=dendro_distance.histogram_distance,
        dendronum_distance=dendro_distance.num_distance,
        pdf_hellinger_distance=pdf_distance.hellinger_distance,
        pdf_ks_distance=pdf_distance.ks_distance,
        pdf_lognorm_distance=pdf_fit_distance.lognormal_distance)
Ejemplo n.º 11
0
                                                              label1=fits1,
                                                              label2=fits2)

print "Kurtosis Distance: %s" % (moment_distance.kurtosis_distance)

print "Skewness Distance: %s" % (moment_distance.skewness_distance)

# # Dendrogram Stats

from turbustat.statistics import DendroDistance

dendro_distance = DendroDistance(data1, data2).distance_metric(verbose=True,
                                                               label1=fits1,
                                                               label2=fits2)

print "Dendrogram Number Distance: %s " % (dendro_distance.num_distance)
print "Dendrogram Histogram Distance: %s " % \
    (dendro_distance.histogram_distance)

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(data1,
                 data2).distance_metric(verbose=True, label1=fits1,
                                        label2=fits2)

print "PDF Hellinger Distance: %s " % (pdf_distance.hellinger_distance)
print "PDF KS-Test Distance: %s " % (pdf_distance.ks_distance)
Ejemplo n.º 12
0
# With periodic boundaries
dendro = Dendrogram_Stats(dataset1['cube'], min_deltas=min_deltas)
dendro.run(periodic_bounds=True)

dendrogram_periodic_val = dendro.numfeatures

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=0.05,
                 min_val2=0.05,
                 weights1=dataset1["moment0_error"][0]**-2.,
                 weights2=dataset2["moment0_error"][0]**-2.,
                 do_fit=False,
                 normalization_type='standardize')

pdf_distance.distance_metric()

pdf_val = pdf_distance.PDF1.pdf
pdf_ecdf = pdf_distance.PDF1.ecdf
pdf_bins = pdf_distance.bins

# Do a fitted version of the PDF pca
pdf_fit_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
Ejemplo n.º 13
0
# Dendrogram Stats

from turbustat.statistics import DendroDistance

dendro_distance = DendroDistance(
    dataset1["cube"], dataset2["cube"]).distance_metric(verbose=True,
                                                        label1=fits1,
                                                        label2=fits2)

print "Dendrogram Number Distance: %s " % (dendro_distance.num_distance)
print "Dendrogram Histogram Distance: %s " % \
    (dendro_distance.histogram_distance)

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["moment0"],
                 dataset2["moment0"],
                 min_val1=scale,
                 min_val2=scale,
                 weights1=dataset1["moment0_error"][0] ** -2.,
                 weights2=dataset2["moment0_error"][0] ** -2.)

pdf_distance.distance_metric(verbose=True, label1=fits1, label2=fits2)

print "PDF Hellinger Distance: %s " % (pdf_distance.hellinger_distance)
print "PDF KS-Test Distance: %s " % (pdf_distance.ks_distance)
Ejemplo n.º 14
0
cramer_distance = Cramer_Distance(dataset1["cube"][0],
                                  dataset2["cube"][0]).distance_metric()

print "Cramer Distance: %s" % (cramer_distance.distance)

# Dendrogram Stats

from turbustat.statistics import DendroDistance

dendro_distance = DendroDistance(dataset1["cube"][0],
                                 dataset2["cube"][0]).distance_metric(verbose=True)

print dendro_distance.num_distance
print dendro_distance.histogram_distance

# PDF

from turbustat.statistics import PDF_Distance

pdf_distance = \
    PDF_Distance(dataset1["integrated_intensity"][0],
                 dataset2["integrated_intensity"][0],
                 min_val1=0.05,
                 min_val2=0.05,
                 weights1=dataset1["integrated_intensity_error"][0] ** -2.,
                 weights2=dataset2["integrated_intensity_error"][0] ** -2.)

pdf_distance.distance_metric(verbose=True)

print pdf_distance.distance