def species(self, element, ion, **kwargs):
        """
        Homogenise the line abundances for all stars for a given element and ion.

        :param element:
            The element name to homogenise.

        :type element:
            str

        :param ion:
            The ionisation stage of the element to homogenise (1 = neutral).

        :type ion:
            int
        """

        scaled = kwargs.get("scaled", True)

        # Remove any existing homogenised line or average abundances.
        self.release.execute("""DELETE FROM homogenised_line_abundances
            WHERE TRIM(element) = %s AND ion = %s""", (element, ion))
        self.release.execute("""DELETE FROM homogenised_abundances
            WHERE TRIM(element) = %s AND ion = %s""", (element, ion))

        # Drop index if it exists.
        self.release.execute(
            "DROP INDEX IF EXISTS homogenised_line_abundances_species_index")
        self.release.commit()

        # Get the unique wavelengths.
        wavelengths = sorted(set(self.release.retrieve_table(
            """SELECT DISTINCT ON (wavelength) wavelength FROM line_abundances
            WHERE TRIM(element) = %s AND ion = %s AND flags = 0
            ORDER BY wavelength ASC""", (element, ion))["wavelength"]))

        # Get the unique CNAMEs. We deal with repeat spectra (of the same CNAME)
        # later on in the code.
        cnames = self.release.retrieve_table(
            """SELECT DISTINCT ON (cname) cname FROM line_abundances
            WHERE TRIM(element) = %s AND ion = %s ORDER BY cname ASC""",
            (element, ion))["cname"]

        # For each wavelength, approximate the covariance matrix then homogenise
        # this wavelength for all cnames.
        column = "scaled_abundance" if scaled else "abundance"
        logger.debug("Homogenising {0} {1} using column {2}".format(element,
            ion, column))

        # In order to build the covariance matrix for this species, at some 
        # point we will need to know the variance for each line. We can estimate
        # this from the variance in the distribution of differential abundances.

        # This effectively tells us how well this line could have been measured
        # (after accounting for systematics between different nodes).
        Y, Y_nodes, Y_table = self.release._match_species_abundances(
            element, ion, scaled=scaled, include_flagged_lines=False)

        # Homogenise the line abundances for each wavelength separately.
        for i, wavelength in enumerate(sorted(set(wavelengths))):

            # Match all of the abundances for this given line, so that we can
            # use the array to calculate the correlation coefficients between
            # different nodes for this particular line.
            X, X_nodes = self.release._match_line_abundances(element, ion, 
                wavelength, column, ignore_gaps=True, include_limits=False,
                include_flagged_lines=False, **kwargs)

            # Get the measurement variance for this line.
            Z = utils.calculate_differential_abundances(
                Y[(Y_table["wavelength"] == wavelength)])

            line_variance = np.nanvar(np.abs(Z))

            if not np.isfinite(line_variance):
                # If the line variance is not finite, it means we do not have
                # differential abundances for this line. This is typically
                # because there were not enough nodes measuring this wavelength.

                # But we do know where this line sits with respect to the mean
                # abundance for this element. So we can still estimate the line
                # variance.
                assert Y.shape[1] > 1

                 # For each measured wavelength, what is the mean abundance for
                # the corresponding star, and what is the variance in that
                # distribution?
                matchers = []
                wl_mask = (Y_table["wavelength"] == wavelength)

                for stub in Y_table["spectrum_filename_stub"][wl_mask]:
                    stub_mask = (Y_table["spectrum_filename_stub"] == stub)
                    
                    value = Y[stub_mask*wl_mask].flatten()
                    node_mask = np.isfinite(value)
                    value = value[node_mask]

                    matchers.extend(Y[stub_mask * ~wl_mask, node_mask] - value)

                line_variance = np.nanvar(np.abs(matchers))

                if line_variance == 0:
                    # This line was only measured by one node in some stars, and
                    # in those stars there are no other measurements of this
                    # element, so we have no basis for the variance in this line
                    # This is a fringe case, and we will just have to do 
                    # something reasonable:
                    line_variance = kwargs.get("default_variance", 0.1**2)
                    logger.warn("Using default variance of {0:.2f} for {1} {2}"\
                        " line at {3}".format(line_variance, element, ion, 
                            wavelength))

            assert np.isfinite(line_variance) and line_variance > 0

            # For each CNAME / FILENAME, homogenise this line.
            for j, cname in enumerate(cnames):
                
                # The line_abundances function will need the element, ion,
                # wavelength, the variance in the line measurement, and the
                # correlation coefficients between nodes (or the matrix to
                # produce them), and the cname to know where to put things.
                result = self.line_abundances(cname, element, ion, wavelength,
                    line_variance, X, X_nodes)

        # Need to commit before we can do the averaged results per star.
        self.release.commit()

        # Create an index to speed things up.
        # To prevent parallel problems, first check that the index has not been
        # created by a parallel homogenisation script.
        try:    
            self.release.execute("""CREATE INDEX
                homogenised_line_abundances_species_index
                ON homogenised_line_abundances (cname, element, ion)""")
            self.release.commit()

        except:
            self.release.execute("rollback")

        # To homogenise the spectrum abundances, we will need the correlation
        # coefficients between each line.

        # Match the homogenised line abundances on a per-star basis.
        Q, Q_wavelengths = self.release._match_homogenised_line_abundances(
            element, ion, ignore_gaps=False, include_limits=False)
        Q_rho = np.atleast_2d(np.ma.corrcoef(Q))

        for j, cname in enumerate(cnames):
            self.spectrum_abundances(element, ion, cname, rho=Q_rho,
                rho_wavelengths=Q_wavelengths, **kwargs)

        self.release.commit()

        # TODO what should we return?
        return None
Beispiel #2
0
    def differential(self, element, ion, scaled=False, ignore_flags=False,
        **kwargs):
        """
        Calculate the differential abundance bias for each wavelength for each 
        node.

        :param element:
            The element name to homogenise.

        :type element:
            str

        :param ion:
            The ionisation stage of the element to homogenise (1 = neutral).

        :type ion:
            int
        """

        X, nodes, diff_data = self.release._match_species_abundances(
            element, ion, scaled=scaled, include_flagged_lines=~ignore_flags)


        #X, nodes, diff_data = utils.match_node_abundances(self.release._database,
        #    element, ion, scaled=scaled, ignore_flags=ignore_flags)

        # Calculate the full differential abundances.
        X_diff, indices = utils.calculate_differential_abundances(X,
            full_output=True)

        # Determine the differences to each node.
        diff_data["wavelength"] = diff_data["wavelength"].astype(float)
        wavelengths = sorted(set(diff_data["wavelength"]))

        bias = { n: { w: (0, np.nan, -1) for w in wavelengths } for n in nodes }
        for wavelength in wavelengths:

            X_wl = X_diff[(diff_data["wavelength"] == wavelength), :]

            finite = { node: 0 for node in nodes }
            for k, (i, j) in enumerate(indices):
                value = np.isfinite(X_wl[:, k]).sum()
                finite[nodes[i]] += value
                finite[nodes[j]] += value

            finite_nodes = [node for node in nodes if finite[node] > 0]

            def differential_sigma(biases):
                # Apply the biases on a per-column basis.
                X_offsets = np.zeros(X_wl.shape[1])
                for i, idx in enumerate(indices):

                    # These are Node_0 - Node_1
                    # We want to apply (Node_0 - offset_0) - (Node_1 - offset_1)
                    # so the total offset is offset_1 - offset_0
                    # The biases.size is related to finite_nodes, not nodes.
                    try:
                        jdx0 = finite_nodes.index(nodes[idx[0]])
                        jdx1 = finite_nodes.index(nodes[idx[1]])
                    except ValueError:
                        continue
                    else:
                        X_offsets[i] = biases[jdx1] - biases[jdx0]

                return np.nanstd(X_wl - X_offsets)

            result = op.fmin(differential_sigma, np.zeros(len(finite_nodes)), 
                disp=False)

            
            initial = differential_sigma(np.zeros(len(finite_nodes)))
            final = differential_sigma(result)

            logger.info("Initial and final sigma: {0:.2f} {1:.2f}".format(
                initial, final))

            for node, offset in zip(finite_nodes, result):
                bias[node][wavelength] = (-offset, np.nan, -1)

        return bias