Ejemplo n.º 1
0
    def residual(params, x, obs, mol0, ll0, ul0, line_profile0, res0, units,
                 continuum):

        parvals = params.valuesdict()
        size = parvals['size']
        dV = parvals['dV']
        velocity = parvals['velocity']
        Tex = parvals['Tex']
        column = parvals['column']

        #generate a source object
        source = Source(
            continuum=continuum,
            size=size,
            dV=dV,
            velocity=velocity,
            Tex=Tex,
            column=column,
        )

        #create a simulation
        sim = Simulation(mol=mol0,
                         ll=ll0,
                         ul=ul0,
                         observation=obs,
                         source=source,
                         line_profile=line_profile0,
                         res=res0,
                         use_obs=True,
                         units=units)

        return_sims.append(sim)
        return np.array(obs.spectrum.Tb - sim.spectrum.int_profile)
Ejemplo n.º 2
0
 def make_simulation(self, molecule, ll, ul, obs, res: float = 0.0014):
     return Simulation(
         mol=molecule,
         ll=ll,
         ul=ul,
         observation=obs,
         source=self.source,
         line_profile="Gaussian",
         res=res,
     )
Ejemplo n.º 3
0
def process_mcmc_json(json_file, molecule, observation, ll=0, ul=float('inf'), line_profile='Gaussian', res=0.0014, stack_params = None, stack_plot_params = None, make_plots=True, return_json = False):

	from molsim.classes import Source, Simulation
	from molsim.functions import sum_spectra, velocity_stack, matched_filter
	from molsim.plotting import plot_stack, plot_mf
	
	with open(json_file) as input:
		json_dict = json.load(input)
		
	n_sources = len(json_dict['SourceSize']['mean'])
	
	sources = []
	
	for size,vlsr,col,tex,dv in zip(
		json_dict['SourceSize']['mean'],
		json_dict['VLSR']['mean'],
		json_dict['NCol']['mean'],
		json_dict['Tex']['mean'],
		json_dict['dV']['mean']):
		sources.append(Source(size=size,velocity=vlsr,column=col,Tex=tex,dV=dv))
		
	sims = [Simulation(mol=molecule,ll=ll,ul=ul,observation=observation,source=x,line_profile=line_profile,res=res) for x in sources]	
	sum1 = sum_spectra(sims)
	
	if make_plots is False:
		if return_json is True:
			return sources, sims, sum1, json_dict
		else:
			return sources, sims, sum1
			
	internal_stack_params = {'selection' : 'lines',
					'freq_arr' : observation.spectrum.frequency,
					'int_arr' : observation.spectrum.Tb,
					'freq_sim' : sum1.freq_profile,
					'int_sim' : sum1.int_profile,
					'res_inp' : res,
					'dV' : np.mean([x for x in json_dict['dV']['mean']]),
					'dV_ext' : 40,
					'vlsr' : np.mean([x for x in json_dict['VLSR']['mean']]),
					'vel_width' : 40,
					'v_res' : 0.02,
					'blank_lines' : True,
					'blank_keep_range' : [-5*np.mean([x for x in json_dict['dV']['mean']]),5*np.mean([x for x in json_dict['dV']['mean']])],
					'flag_lines' : False,
					'flag_sigma' : 5,
					}	
	
	if stack_params is not None:
		for x in stack_params:
			internal_stack_params[x] = stack_params[x]
		
	stack = velocity_stack(internal_stack_params)
	
	internal_stack_plot_params = {'xlimits' : [-10,10]}
	
	if stack_plot_params is not None:
		for	x in stack_plot_params:
			internal_stack_plot_params[x] = stack_plot_params[x]
		
	plot_stack(stack,params=internal_stack_plot_params)	
	
	mf = matched_filter(stack.velocity,
						stack.snr,
						stack.int_sim[find_nearest(stack.velocity,-2):find_nearest(stack.velocity,2)])
	plot_mf(mf)
	
	if return_json is True:
		return sources, sims, sum1, json_dict
	else:
		return sources, sims, sum1	
					
				
				
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
				
			
Ejemplo n.º 4
0
    def simulate_spectrum(self,
                          parameters: np.ndarray,
                          scale: float = 3.0) -> np.ndarray:
        """
        Wraps `molsim` functionality to simulate the spectrum, given a set
        of input parameters as a NumPy 1D array. On the first pass, this generates
        a `Simulation` instance and stores it, which has some overhead associated
        with figuring out which catalog entries to simulate. After the first
        pass, the instance is re-used with the `Source` object updated with
        the new parameters.

        The nuance in this function is with `scale`: during the preprocess
        step, we assume that the observation frequency is not shifted to the
        source reference. To simulate with molsim, we identify where the catalog
        overlaps with our frequency windows, and because it is unshifted this
        causes molsim to potentially ignore a lot of lines (particularly
        high frequency ones). The `scale` parameter scales the input VLSR
        as to make sure that we cover everything as best as we can.

        Parameters
        ----------
        parameters : np.ndarray
            NumPy 1D array containing parameters for the simulation.
        scale : float, optional
            Modifies the window to consider catalog overlap, by default 3.

        Returns
        -------
        np.ndarray
            NumPy 1D array corresponding to the simulated spectrum
        """
        size, vlsr, ncol, Tex, dV = parameters
        # Assume that the value is in log space, if it's below 1000
        if ncol <= 1e3:
            ncol = 10**ncol
        source = Source("", vlsr, size, column=ncol, Tex=Tex, dV=dV)
        if not hasattr(self, "simulation"):
            min_freq, max_freq = find_limits(
                self.observation.spectrum.frequency)
            # there's a buffer here just to make sure we don't go out of bounds
            # and suddenly stop simulating lines
            min_offsets = compute.calculate_dopplerwidth_frequency(
                min_freq, vlsr * scale)
            max_offsets = compute.calculate_dopplerwidth_frequency(
                max_freq, vlsr * scale)
            min_freq -= min_offsets
            max_freq += max_offsets
            self.simulation = Simulation(
                mol=self.molecule,
                ll=min_freq,
                ul=max_freq,
                observation=self.observation,
                source=source,
                line_profile="gaussian",
                use_obs=True,
            )
        else:
            self.simulation.source = source
            self.simulation._apply_voffset()
            self.simulation._calc_tau()
            self.simulation._make_lines()
            self.simulation._beam_correct()
        intensity = self.simulation.spectrum.int_profile
        return intensity
Ejemplo n.º 5
0
class SingleComponent(AbstractModel):
    """
    Simplest concrete implementation of an `AbstractModel`,
    corresponding to a single value for each modeling parameter.
    Each model parameter expects an `AbstractDistribution` object,
    which corresponds to the prior distribution over parameters.
    """
    source_size: AbstractDistribution
    vlsr: AbstractDistribution
    Ncol: AbstractDistribution
    Tex: AbstractDistribution
    dV: AbstractDistribution
    observation: Observation
    molecule: Molecule

    def __post_init__(self):
        self._distributions = [
            self.source_size,
            self.vlsr,
            self.Ncol,
            self.Tex,
            self.dV,
        ]

    def __len__(self) -> int:
        return len(self._distributions)

    def _get_components(self):
        return self._distributions

    def get_names(self) -> List[str]:
        return ["SourceSize", "VLSR", "NCol", "Tex", "dV"]

    def __repr__(self) -> str:
        output = f"Model: {type(self).__name__}\n"
        for dist in self._distributions:
            output += f"{dist}\n"
        return output

    def sample_prior(self) -> np.ndarray:
        """
        Draw samples from each respective prior distribution to
        return an array of parameters.

        Returns
        -------
        np.ndarray
            NumPy 1D array of parameter values drawn from the
            respective prior.
        """
        initial = np.array([param.sample() for param in self._distributions])
        return initial

    def simulate_spectrum(self,
                          parameters: np.ndarray,
                          scale: float = 3.0) -> np.ndarray:
        """
        Wraps `molsim` functionality to simulate the spectrum, given a set
        of input parameters as a NumPy 1D array. On the first pass, this generates
        a `Simulation` instance and stores it, which has some overhead associated
        with figuring out which catalog entries to simulate. After the first
        pass, the instance is re-used with the `Source` object updated with
        the new parameters.

        The nuance in this function is with `scale`: during the preprocess
        step, we assume that the observation frequency is not shifted to the
        source reference. To simulate with molsim, we identify where the catalog
        overlaps with our frequency windows, and because it is unshifted this
        causes molsim to potentially ignore a lot of lines (particularly
        high frequency ones). The `scale` parameter scales the input VLSR
        as to make sure that we cover everything as best as we can.

        Parameters
        ----------
        parameters : np.ndarray
            NumPy 1D array containing parameters for the simulation.
        scale : float, optional
            Modifies the window to consider catalog overlap, by default 3.

        Returns
        -------
        np.ndarray
            NumPy 1D array corresponding to the simulated spectrum
        """
        size, vlsr, ncol, Tex, dV = parameters
        # Assume that the value is in log space, if it's below 1000
        if ncol <= 1e3:
            ncol = 10**ncol
        source = Source("", vlsr, size, column=ncol, Tex=Tex, dV=dV)
        if not hasattr(self, "simulation"):
            min_freq, max_freq = find_limits(
                self.observation.spectrum.frequency)
            # there's a buffer here just to make sure we don't go out of bounds
            # and suddenly stop simulating lines
            min_offsets = compute.calculate_dopplerwidth_frequency(
                min_freq, vlsr * scale)
            max_offsets = compute.calculate_dopplerwidth_frequency(
                max_freq, vlsr * scale)
            min_freq -= min_offsets
            max_freq += max_offsets
            self.simulation = Simulation(
                mol=self.molecule,
                ll=min_freq,
                ul=max_freq,
                observation=self.observation,
                source=source,
                line_profile="gaussian",
                use_obs=True,
            )
        else:
            self.simulation.source = source
            self.simulation._apply_voffset()
            self.simulation._calc_tau()
            self.simulation._make_lines()
            self.simulation._beam_correct()
        intensity = self.simulation.spectrum.int_profile
        return intensity

    def prior_constraint(self, parameters: np.ndarray) -> float:
        """
        Function that will apply a constrain on the prior. This function
        should be overwritten in child models, say for example in the
        TMC-1 four component case, where we want to constrain parameter
        space to certain regions.

        Parameters
        ----------
        parameters : np.ndarray
            NumPy 1D array containing parameter values

        Returns
        -------
        float
            Return zero if parameters pass the constraint, otherwise
            return -np.inf
        """
        return 0.0

    def compute_prior_likelihood(self, parameters: np.ndarray) -> float:
        """
        Calculate the total prior log likelihood. The calculation is handed
        off to the individual distributions.

        Parameters
        ----------
        parameters : np.ndarray
            NumPy 1D array containing the model parameters

        Returns
        -------
        float
            The total prior log likelihood
        """
        lnlikelihood = self.prior_constraint(parameters)
        lnlikelihood += sum([
            dist.ln_likelihood(value)
            for dist, value in zip(self._distributions, parameters)
        ])
        return lnlikelihood

    def compute_log_likelihood(self, parameters: np.ndarray) -> float:
        """
        Calculate the negative log likelihood, given a set of parameters
        and our observed data.

        Parameters
        ----------
        parameters : np.ndarray
            [description]

        Returns
        -------
        float
            Log likelihood of the model
        """
        obs = self.observation.spectrum
        simulation = self.simulate_spectrum(parameters)
        # match the simulation with the spectrum
        lnlike = np.sum(
            np.log(1.0 / np.sqrt(obs.noise**2.0)) *
            np.exp(-((obs.Tb - simulation)**2.0) / (2.0 * obs.noise**2.0)))
        return lnlike

    def nll(self, parameters: np.ndarray) -> float:
        """
        Calculate the negative log likelihood. This is functionally exactly
        the sample as `compute_log_likelihood`, except that the sign of the
        likelihood is negative for use in maximum likelihood estimation.

        Parameters
        ----------
        parameters : np.ndarray
            [description]

        Returns
        -------
        float
            Negative log likelihood of the model
        """
        return -self.compute_log_likelihood(parameters)

    def mle_optimization(
        self,
        initial: Union[None, np.ndarray] = None,
        bounds: Union[None, List[Union[Tuple[float, float]]], None] = None,
        **kwargs,
    ):
        """
        Obtain a maximum likelihood estimate, given an initial starting point in
        parameter space. Because of the often highly covariant nature of models,

        Additional kwargs are passed into `scipy.optimize.minimize`, and can be
        used to overwrite things like the optimization method.

        The `Result` object from `scipy.optimize` is returned, which holds the
        MLE parameters as the attribute `x`, and the likelihood value as `fun`.

        Parameters
        ----------
        initial : Union[None, np.ndarray], optional
            Initial parameters for optimization, by default None, which
            will take the mean of the prior.
        bounds : Union[None, List[Union[Tuple[float, float]]], None], optional
            Bounds for constrained optimization. By default None, which
            imposes no constraints (highly not recommended!). See the
            `scipy.optimize.minimize` page for how `bounds` is specified.

        Returns
        -------
        `scipy.optimize.Result`
            A fit `Result` object that contains the final state of the
            minimization
        """
        if initial is None:
            initial = np.array([self.sample_prior()
                                for _ in range(3000)]).mean(axis=0)
        opt_kwargs = {
            "fun": self.nll,
            "x0": initial,
            "method": "Powell",
            "bounds": bounds,
        }
        opt_kwargs.update(**kwargs)
        result = minimize(**opt_kwargs)
        return result

    @classmethod
    def from_yml(cls, yml_path: str):
        input_dict = load_yaml(yml_path)
        cls_dict = dict()
        # the two stragglers
        for key in input_dict.keys():
            if key not in ["observation", "molecule", "nominal_vlsr"]:
                if hasattr(input_dict[key], "mu"):
                    dist = GaussianLikelihood
                else:
                    dist = UniformLikelihood
                cls_dict[key] = dist.from_values(**input_dict[key])
            else:
                if key != "nominal_vlsr":
                    # load in the observed data
                    cls_dict[key] = load(input_dict[key])
                else:
                    logger.warning(
                        f"{key} is not recognized, and therefore ignored.")
        return cls(**cls_dict)