def species(self, element, ion, **kwargs): """ Homogenise the line abundances for all stars for a given element and ion. :param element: The element name to homogenise. :type element: str :param ion: The ionisation stage of the element to homogenise (1 = neutral). :type ion: int """ scaled = kwargs.get("scaled", True) # Remove any existing homogenised line or average abundances. self.release.execute("""DELETE FROM homogenised_line_abundances WHERE TRIM(element) = %s AND ion = %s""", (element, ion)) self.release.execute("""DELETE FROM homogenised_abundances WHERE TRIM(element) = %s AND ion = %s""", (element, ion)) # Drop index if it exists. self.release.execute( "DROP INDEX IF EXISTS homogenised_line_abundances_species_index") self.release.commit() # Get the unique wavelengths. wavelengths = sorted(set(self.release.retrieve_table( """SELECT DISTINCT ON (wavelength) wavelength FROM line_abundances WHERE TRIM(element) = %s AND ion = %s AND flags = 0 ORDER BY wavelength ASC""", (element, ion))["wavelength"])) # Get the unique CNAMEs. We deal with repeat spectra (of the same CNAME) # later on in the code. cnames = self.release.retrieve_table( """SELECT DISTINCT ON (cname) cname FROM line_abundances WHERE TRIM(element) = %s AND ion = %s ORDER BY cname ASC""", (element, ion))["cname"] # For each wavelength, approximate the covariance matrix then homogenise # this wavelength for all cnames. column = "scaled_abundance" if scaled else "abundance" logger.debug("Homogenising {0} {1} using column {2}".format(element, ion, column)) # In order to build the covariance matrix for this species, at some # point we will need to know the variance for each line. We can estimate # this from the variance in the distribution of differential abundances. # This effectively tells us how well this line could have been measured # (after accounting for systematics between different nodes). Y, Y_nodes, Y_table = self.release._match_species_abundances( element, ion, scaled=scaled, include_flagged_lines=False) # Homogenise the line abundances for each wavelength separately. for i, wavelength in enumerate(sorted(set(wavelengths))): # Match all of the abundances for this given line, so that we can # use the array to calculate the correlation coefficients between # different nodes for this particular line. X, X_nodes = self.release._match_line_abundances(element, ion, wavelength, column, ignore_gaps=True, include_limits=False, include_flagged_lines=False, **kwargs) # Get the measurement variance for this line. Z = utils.calculate_differential_abundances( Y[(Y_table["wavelength"] == wavelength)]) line_variance = np.nanvar(np.abs(Z)) if not np.isfinite(line_variance): # If the line variance is not finite, it means we do not have # differential abundances for this line. This is typically # because there were not enough nodes measuring this wavelength. # But we do know where this line sits with respect to the mean # abundance for this element. So we can still estimate the line # variance. assert Y.shape[1] > 1 # For each measured wavelength, what is the mean abundance for # the corresponding star, and what is the variance in that # distribution? matchers = [] wl_mask = (Y_table["wavelength"] == wavelength) for stub in Y_table["spectrum_filename_stub"][wl_mask]: stub_mask = (Y_table["spectrum_filename_stub"] == stub) value = Y[stub_mask*wl_mask].flatten() node_mask = np.isfinite(value) value = value[node_mask] matchers.extend(Y[stub_mask * ~wl_mask, node_mask] - value) line_variance = np.nanvar(np.abs(matchers)) if line_variance == 0: # This line was only measured by one node in some stars, and # in those stars there are no other measurements of this # element, so we have no basis for the variance in this line # This is a fringe case, and we will just have to do # something reasonable: line_variance = kwargs.get("default_variance", 0.1**2) logger.warn("Using default variance of {0:.2f} for {1} {2}"\ " line at {3}".format(line_variance, element, ion, wavelength)) assert np.isfinite(line_variance) and line_variance > 0 # For each CNAME / FILENAME, homogenise this line. for j, cname in enumerate(cnames): # The line_abundances function will need the element, ion, # wavelength, the variance in the line measurement, and the # correlation coefficients between nodes (or the matrix to # produce them), and the cname to know where to put things. result = self.line_abundances(cname, element, ion, wavelength, line_variance, X, X_nodes) # Need to commit before we can do the averaged results per star. self.release.commit() # Create an index to speed things up. # To prevent parallel problems, first check that the index has not been # created by a parallel homogenisation script. try: self.release.execute("""CREATE INDEX homogenised_line_abundances_species_index ON homogenised_line_abundances (cname, element, ion)""") self.release.commit() except: self.release.execute("rollback") # To homogenise the spectrum abundances, we will need the correlation # coefficients between each line. # Match the homogenised line abundances on a per-star basis. Q, Q_wavelengths = self.release._match_homogenised_line_abundances( element, ion, ignore_gaps=False, include_limits=False) Q_rho = np.atleast_2d(np.ma.corrcoef(Q)) for j, cname in enumerate(cnames): self.spectrum_abundances(element, ion, cname, rho=Q_rho, rho_wavelengths=Q_wavelengths, **kwargs) self.release.commit() # TODO what should we return? return None
def differential(self, element, ion, scaled=False, ignore_flags=False, **kwargs): """ Calculate the differential abundance bias for each wavelength for each node. :param element: The element name to homogenise. :type element: str :param ion: The ionisation stage of the element to homogenise (1 = neutral). :type ion: int """ X, nodes, diff_data = self.release._match_species_abundances( element, ion, scaled=scaled, include_flagged_lines=~ignore_flags) #X, nodes, diff_data = utils.match_node_abundances(self.release._database, # element, ion, scaled=scaled, ignore_flags=ignore_flags) # Calculate the full differential abundances. X_diff, indices = utils.calculate_differential_abundances(X, full_output=True) # Determine the differences to each node. diff_data["wavelength"] = diff_data["wavelength"].astype(float) wavelengths = sorted(set(diff_data["wavelength"])) bias = { n: { w: (0, np.nan, -1) for w in wavelengths } for n in nodes } for wavelength in wavelengths: X_wl = X_diff[(diff_data["wavelength"] == wavelength), :] finite = { node: 0 for node in nodes } for k, (i, j) in enumerate(indices): value = np.isfinite(X_wl[:, k]).sum() finite[nodes[i]] += value finite[nodes[j]] += value finite_nodes = [node for node in nodes if finite[node] > 0] def differential_sigma(biases): # Apply the biases on a per-column basis. X_offsets = np.zeros(X_wl.shape[1]) for i, idx in enumerate(indices): # These are Node_0 - Node_1 # We want to apply (Node_0 - offset_0) - (Node_1 - offset_1) # so the total offset is offset_1 - offset_0 # The biases.size is related to finite_nodes, not nodes. try: jdx0 = finite_nodes.index(nodes[idx[0]]) jdx1 = finite_nodes.index(nodes[idx[1]]) except ValueError: continue else: X_offsets[i] = biases[jdx1] - biases[jdx0] return np.nanstd(X_wl - X_offsets) result = op.fmin(differential_sigma, np.zeros(len(finite_nodes)), disp=False) initial = differential_sigma(np.zeros(len(finite_nodes))) final = differential_sigma(result) logger.info("Initial and final sigma: {0:.2f} {1:.2f}".format( initial, final)) for node, offset in zip(finite_nodes, result): bias[node][wavelength] = (-offset, np.nan, -1) return bias