def setUp(self):
        """
        Create test Spectrum objects.
        """

        self._size = 50
        self._observed_raster = np.arange(self._size, dtype=np.float64)
        self._observed_values = np.arange(self._size, dtype=np.float64)
        self._observed_value_errors = np.ones(self._size, dtype=np.float64)
        self._observed = fourgp_speclib.Spectrum(
            wavelengths=self._observed_raster,
            values=self._observed_values,
            value_errors=self._observed_value_errors,
            metadata={"origin": "unit-test"})

        self._absorption_values = np.ones(self._size, dtype=np.float64)
        self._absorption_value_errors = np.ones(self._size, dtype=np.float64)
        self._absorption = fourgp_speclib.Spectrum(
            wavelengths=self._observed_raster,
            values=self._absorption_values,
            value_errors=self._absorption_value_errors,
            metadata={"origin": "unit-test"})

        self._polynomial = fourgp_speclib.SpectrumSmoothFactory(
            function_family=fourgp_speclib.SpectrumPolynomial,
            wavelengths=self._observed_raster,
            terms=3)
Exemple #2
0
    def test_search_1d_numerical_range(self):
        """
        Check that we can search for spectra on a simple metadata numerical range constraint.
        """

        # Insert ten random spectra into SpectrumLibrary
        size = 50
        x_values = list(range(10))
        for x in x_values:
            input_spectrum = fourgp_speclib.Spectrum(
                wavelengths=np.arange(size),
                values=np.random.random(size),
                value_errors=np.random.random(size),
                metadata={
                    "origin": "unit-test",
                    "x_value": x
                })
            self._lib.insert(input_spectrum, "x_{}".format(x))

        # Search for spectra with x in a defined range
        x_range = [4.5, 8.5]
        x_values_expected = [
            x for x in x_values if (x > x_range[0] and x < x_range[1])
        ]
        my_spectra = self._lib.search(x_value=x_range)
        ids = [str(item["specId"]) for item in my_spectra]
        metadata = self._lib.get_metadata(ids=ids)
        x_values = [item['x_value'] for item in metadata]

        # Check that we got back the same spectrum we put in
        self.assertEqual(x_values, x_values_expected)
Exemple #3
0
    def test_spectrum_retrieval(self):
        """
        Check that we can store a single spectra into the SpectrumLibrary and retrieve it again.
        """

        # Create a random spectrum to insert into the spectrum library
        size = 50
        raster = np.arange(size)
        values = np.random.random(size)
        value_errors = np.random.random(size)
        input_spectrum = fourgp_speclib.Spectrum(
            wavelengths=raster,
            values=values,
            value_errors=value_errors,
            metadata={"origin": "unit-test"})

        # Insert it into the spectrum library
        self._lib.insert(input_spectrum, "dummy_filename")

        # Load it back as a SpectrumArray
        my_spectra = self._lib.search()
        my_spectrum_array = self._lib.open(filenames=my_spectra[0]['filename'])

        # Pick spectrum out of SpectrumArray
        my_spectrum = my_spectrum_array.extract_item(0)

        # Check that we got back the same spectrum we put in
        self.assertEqual(my_spectrum, input_spectrum)
Exemple #4
0
    def test_search_1d_string_value(self):
        """
        Check that we can search for spectra on a simple metadata string point-value constraint.
        """

        # Insert random spectra into SpectrumLibrary
        alphabet = "abcdefghijklmnopqrstuvwxyz"
        size = 50
        x_values = list(range(10))
        for x in x_values:
            input_spectrum = fourgp_speclib.Spectrum(
                wavelengths=np.arange(size),
                values=np.random.random(size),
                value_errors=np.random.random(size),
                metadata={
                    "origin": "unit-test",
                    "x_value": alphabet[x:x + 3]
                })
            self._lib.insert(input_spectrum, "x_{}".format(x))

        # Search for spectra with matching x_value
        my_spectra = self._lib.search(x_value="def")
        filenames_got = [str(item["filename"]) for item in my_spectra]
        x_values_got = [
            str(i["x_value"])
            for i in self._lib.get_metadata(filenames=filenames_got)
        ]
        x_values_got.sort()

        # Check that we got back the same spectrum we put in
        self.assertEqual(x_values_got, ["def"])
Exemple #5
0
    def test_search_1d_numerical_value(self):
        """
        Check that we can search for spectra on a simple metadata numerical point-value constraint.
        """

        # Insert ten random spectra into SpectrumLibrary
        size = 50
        x_values = list(range(10))
        for x in x_values:
            input_spectrum = fourgp_speclib.Spectrum(
                wavelengths=np.arange(size),
                values=np.random.random(size),
                value_errors=np.random.random(size),
                metadata={
                    "origin": "unit-test",
                    "x_value": x
                })
            self._lib.insert(input_spectrum, "x_{}".format(x))

        # Search for spectra with matching x_value
        my_spectra = self._lib.search(x_value=5)
        ids = [str(item["specId"]) for item in my_spectra]
        metadata = self._lib.get_metadata(ids=ids)
        x_values = [item['x_value'] for item in metadata]

        # Check that we got back the same spectrum we put in
        self.assertEqual(x_values, [5])
    def upsample_spectrum(self, input, upsampling_factor):
        """
        Upsample a spectrum object using cubic spline interpolation.

        :param input:
            The Spectrum object we should up sample
        :param upsampling_factor:
            The integer factor by which to up-sample the spectrum
        :return:
            An up-sampled Spectrum object
        """

        multiplicative_spacing_in = input.wavelengths[1] / input.wavelengths[0]
        multiplicative_spacing_out = pow(multiplicative_spacing_in, 1. / upsampling_factor)

        # We impose an explicit length on the output, because the arange() here is numerically unstable about whether
        # it includes the final point or not
        raster_in_length = len(input.wavelengths)
        raster_out_length = (raster_in_length - 1) * upsampling_factor

        raster_out = logarithmic_raster(lambda_min=input.wavelengths[0],
                                        lambda_max=input.wavelengths[-1],
                                        lambda_step=input.wavelengths[0] * (multiplicative_spacing_out - 1)
                                        )[:raster_out_length]

        f = InterpolatedUnivariateSpline(x=input.wavelengths, y=input.values)

        return fourgp_speclib.Spectrum(wavelengths=raster_out,
                                       values=f(raster_out),
                                       value_errors=np.zeros_like(raster_out),
                                       metadata=input.metadata
                                       )
Exemple #7
0
    def test_addition_multiplication(self):
        """
        Try adding spectra together repeatedly using the __sum__ and __isum__ methods. Check that this is the same
        as multiplying the spectrum by a fixed integer.
        """

        # Create an empty numpy array to insert raster of multipliers into
        multiplier = np.empty(self._size)
        failures = 0
        for i in range(2, 5):
            sum_1 = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                            values=np.zeros(self._size),
                                            value_errors=self._value_errors)
            sum_2 = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                            values=np.zeros(self._size),
                                            value_errors=self._value_errors)

            # Test __add__ method
            for j in range(i):
                sum_1 = sum_1 + self._spectrum

            # Test __iadd__ method
            for j in range(i):
                sum_2 += self._spectrum

            # Test __mul__ method
            multiplier.fill(i)
            b = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                        values=multiplier,
                                        value_errors=self._value_errors)
            sum_3 = b * self._spectrum

            # Test __imul__ method
            b *= self._spectrum

            # Check that all three calculations reached the same result
            if sum_1 != sum_2:
                failures += 1
            if sum_1 != sum_3:
                failures += 1
            if sum_1 != b:
                failures += 1

            del sum_1, sum_2, sum_3, b

        # Check that none of the calculations failed
        self.assertEqual(failures, 0)
 def test_data_sizes_must_match_2(self):
     with self.assertRaises(AssertionError):
         raster = np.arange(self._size + 10, dtype=np.float64)
         other = fourgp_speclib.Spectrum(wavelengths=raster,
                                         values=raster,
                                         value_errors=raster)
         self._polynomial.fit_to_continuum_via_template(
             other=other, template=self._absorption)
Exemple #9
0
    def test_subtraction_division(self):
        """
        Try subtracting a spectrum from zero N times. Then divide by minus N times, and ensure we get back to where we
        started.
        """

        # Create an empty numpy array to insert raster of multipliers into
        multiplier = np.empty(self._size)
        failures = 0
        for i in range(2, 5):
            sum_1 = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                            values=np.zeros(self._size),
                                            value_errors=self._value_errors)
            sum_2 = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                            values=np.zeros(self._size),
                                            value_errors=self._value_errors)

            # Test __sub__ method
            for j in range(i):
                sum_1 = sum_1 - self._spectrum

            # Test __isub__ method
            for j in range(i):
                sum_2 -= self._spectrum

            # Test __truediv__ method
            multiplier.fill(-i)
            b = fourgp_speclib.Spectrum(wavelengths=self._raster,
                                        values=multiplier,
                                        value_errors=self._value_errors)
            sum_3 = sum_1 / b
            sum_4 = sum_2 / b

            # Test __itruediv__ method
            sum_1 /= b
            sum_2 /= b

            # Check that all three calculations reached the same result
            for item in [sum_1, sum_2, sum_3, sum_4]:
                if item != self._spectrum:
                    failures += 1

            del sum_1, sum_2, sum_3, sum_4, b

        # Check that none of the calculations failed
        self.assertEqual(failures, 0)
Exemple #10
0
    def setUp(self):
        """
        Create a Spectrum object.
        """

        self._size = 50
        self._raster = np.arange(self._size)
        self._values = np.arange(100, self._size + 100)
        self._value_errors = np.random.random(self._size)
        self._spectrum = fourgp_speclib.Spectrum(
            wavelengths=self._raster,
            values=self._values,
            value_errors=self._value_errors,
            metadata={"origin": "unit-test"})
Exemple #11
0
    def test_search_illegal_metadata(self):
        """
        Check that we can search for spectra on a simple metadata constraint.
        """

        # Insert ten random spectra into SpectrumLibrary
        size = 50
        input_spectrum = fourgp_speclib.Spectrum(
            wavelengths=np.arange(size),
            values=np.random.random(size),
            value_errors=np.random.random(size),
            metadata={"origin": "unit-test"})
        self._lib.insert(input_spectrum, "dummy_filename")

        # Search on an item of metadata which doesn't exist
        with self.assertRaises(AssertionError):
            self._lib.search(x_value=23)
    def fit_spectrum(self, spectrum):
        """
        Fit stellar labels to a spectrum which has not been continuum normalised.

        :param spectrum:
            A Spectrum object containing the spectrum for the Cannon to fit.

        :type spectrum:
            Spectrum

        :return:
        """

        assert isinstance(spectrum, fourgp_speclib.Spectrum), \
            "Supplied spectrum for the Cannon to fit is not a Spectrum object."

        assert spectrum.raster_hash == self._training_set.raster_hash, \
            "Supplied spectrum for the Cannon to fit is not sampled on the same raster as the training set."

        if self._debugging:
            self._debugging_output_counter += 1

        # Fitting tolerances
        max_iterations = 20  # Iterate a maximum number of times

        # Work out the raster of pixels inside each wavelength arm
        raster = spectrum.wavelengths
        lower_cut = 0
        arm_rasters = []
        for break_point in self._wavelength_arms:
            arm_rasters.append((raster >= lower_cut) * (raster < break_point))
            lower_cut = break_point
        arm_rasters.append(raster >= lower_cut)

        # Make initial continuum mask, which covers entire spectrum
        continuum_mask = np.ones_like(raster, dtype=bool)

        # Begin iterative fitting of continuum
        iteration = 0
        while True:
            iteration += 1

            # Treat each wavelength arm separately.
            continuum_models = []
            for i, arm_raster in enumerate(arm_rasters):
                # Make a mask of pixels which are both continuum and inside this wavelength arm
                pixel_mask = (arm_raster * continuum_mask *
                              np.isfinite(spectrum.value_errors) *
                              (spectrum.value_errors > 0))
                # logger.info("Continuum pixels in arm {}: {} / {}".format(i, sum(pixel_mask), len(pixel_mask)))
                continuum_raster = raster[pixel_mask]
                continuum_values = spectrum.values[pixel_mask]
                continuum_value_errors = spectrum.value_errors[pixel_mask]

                # Make a new spectrum object containing only continuum pixels inside this wavelength arm
                continuum_spectrum = fourgp_speclib.Spectrum(
                    wavelengths=continuum_raster,
                    values=continuum_values,
                    value_errors=continuum_value_errors,
                )
                # logger.info("Continuum spectrum length: {}".format(len(continuum_spectrum)))

                # Fit a smooth function through these pixels
                continuum_model_factory = fourgp_speclib.SpectrumSmoothFactory(
                    function_family=self._continuum_model_family,
                    wavelengths=continuum_raster)

                continuum_smooth = continuum_model_factory.fit_to_continuum_via_mask(
                    other=continuum_spectrum,
                    mask=np.ones_like(continuum_raster, dtype=bool))

                if isinstance(continuum_smooth, str):
                    logger.info(continuum_smooth)
                    return None, None, None, None, None, None

                # logger.info("Best-fit polynomial coefficients: {}".format(continuum_smooth.coefficients))

                # Resample smooth function onto the full raster of pixels within this wavelength arm
                resampler = SpectrumResampler(input_spectrum=continuum_smooth)
                continuum_models.append(
                    resampler.onto_raster(raster[arm_raster]))

            # Splice together the continuum in all the wavelength arms
            continuum_model = fourgp_speclib.spectrum_splice(*continuum_models)

            # Create continuum-normalised spectrum using the continuum model we've just made
            cn_spectrum = spectrum / continuum_model

            # Run the Cannon
            labels, cov, meta = super(CannonInstanceCaseyNewWithContinuumNormalisation, self). \
                fit_spectrum(spectrum=cn_spectrum)

            # Fetch the Cannon's model spectrum
            model = fourgp_speclib.Spectrum(
                wavelengths=raster,
                values=self._model.predict(labels=labels),
                value_errors=np.zeros_like(raster))

            # Make new model of which pixels are continuum (based on Cannon's template being close to one)
            continuum_mask = (model.values > 0.99) * (model.values < 1.01)
            logger.info("Continuum pixels: {} / {}".format(
                sum(continuum_mask), len(continuum_mask)))
            logger.info("Best-fit labels: {}".format(list(labels[0])))

            # Produce debugging output if requested
            if self._debugging:
                np.savetxt(
                    "/tmp/debug_{:06d}_{:03d}.txt".format(
                        self._debugging_output_counter, iteration),
                    np.transpose([
                        raster, spectrum.values, spectrum.value_errors,
                        continuum_model.values, model.values, continuum_mask
                    ]))

            # Decide whether output is good enough for us to stop iterating
            if iteration >= max_iterations:
                break

        return labels, cov, meta
    def normalise(self, spectrum):
        """
        This is a hook for doing some kind of normalisation on spectra. Not implemented in this base class.

        :param spectrum:
            The spectrum to be normalised.
        :return:
            Normalised version of this spectrum.
        """

        # If we're passed a spectrum array, normalise each spectrum in turn
        if isinstance(spectrum, fourgp_speclib.SpectrumArray):
            l = len(spectrum)
            for i in range(l):
                spectrum_item = spectrum.extract_item(i)
                spectrum_normalised = self.normalise(spectrum_item)
                spectrum_item.values[:] = spectrum_normalised.values
                spectrum_item.value_errors[:] = spectrum_normalised.value_errors
            return spectrum

        assert isinstance(spectrum, fourgp_speclib.Spectrum), \
            "The CannonInstance.normalise method requires a Spectrum object as input."

        if self._debugging:
            self._debugging_output_counter += 1

        # Returns an array of length len(x)-(N-1)
        def running_mean(x, n):
            cumulative_sum = np.cumsum(np.insert(x, 0, 0))
            return (cumulative_sum[n:] - cumulative_sum[:-n]) / float(n)

        # Work out the raster of pixels inside each wavelength arm
        raster = spectrum.wavelengths
        lower_cut = 0
        arm_rasters = []
        for break_point in self._wavelength_arms:
            arm_rasters.append((raster >= lower_cut) * (raster < break_point))
            lower_cut = break_point
        arm_rasters.append(raster >= lower_cut)

        output_wavelengths = []
        output_values = []
        output_value_errors = []

        for arm in arm_rasters:
            output_wavelengths.append(raster[arm])
            input_values = spectrum.values[arm]
            input_errors = spectrum.value_errors[arm]

            normalisation = running_mean(input_values, self._window_width)
            padding_needed = len(input_values) - len(normalisation)
            padding_left = int(padding_needed / 2)
            padding_right = padding_needed - padding_left
            normalisation_full = np.concatenate([
                np.repeat(normalisation[0], padding_left), normalisation,
                np.repeat(normalisation[-1], padding_right)
            ])

            output_values.append(input_values / normalisation_full)
            output_value_errors.append(input_errors / normalisation_full)

        output = fourgp_speclib.Spectrum(
            wavelengths=np.concatenate(output_wavelengths),
            values=np.concatenate(output_values),
            value_errors=np.concatenate(output_value_errors),
            metadata=spectrum.metadata)

        # Produce debugging output if requested
        if self._debugging:
            np.savetxt(
                "/tmp/debug_{:06d}.txt".format(self._debugging_output_counter),
                np.transpose([raster, spectrum.values, spectrum.value_errors]))

        return output
Exemple #14
0
 def test_data_sizes_must_match_3(self):
     with self.assertRaises(AssertionError):
         fourgp_speclib.Spectrum(wavelengths=self._raster,
                                 values=self._values,
                                 value_errors=np.arange(self._size + 1))