def mback_norm(energy, mu=None, group=None, z=None, edge='K', e0=None, pre1=None, pre2=-50, norm1=100, norm2=None, nnorm=1, nvict=1, _larch=None): """ simplified version of MBACK to Match mu(E) data for tabulated f''(E) for normalization Arguments: energy, mu: arrays of energy and mu(E) group: output group (and input group for e0) z: Z number of absorber e0: edge energy pre1: low E range (relative to E0) for pre-edge fit pre2: high E range (relative to E0) for pre-edge fit norm1: low E range (relative to E0) for post-edge fit norm2: high E range (relative to E0) for post-edge fit nnorm: degree of polynomial (ie, nnorm+1 coefficients will be found) for post-edge normalization curve fit to the scaled f2. Default=1 (linear) Returns: group.norm_poly: normalized mu(E) from pre_edge() group.norm: normalized mu(E) from this method group.mback_mu: tabulated f2 scaled and pre_edge added to match mu(E) group.mback_params: Group of parameters for the minimization References: * MBACK (Weng, Waldo, Penner-Hahn): http://dx.doi.org/10.1086/303711 * Chantler: http://dx.doi.org/10.1063/1.555974 """ ### implement the First Argument Group convention energy, mu, group = parse_group_args(energy, members=('energy', 'mu'), defaults=(mu,), group=group, fcn_name='mback') if len(energy.shape) > 1: energy = energy.squeeze() if len(mu.shape) > 1: mu = mu.squeeze() group = set_xafsGroup(group, _larch=_larch) group.norm_poly = group.norm*1.0 if z is not None: # need to run find_e0: e0_nominal = xray_edge(z, edge)[0] if e0 is None: e0 = getattr(group, 'e0', None) if e0 is None: find_e0(energy, mu, group=group) e0 = group.e0 atsym = None if z is None or z < 2: atsym, edge = guess_edge(group.e0, _larch=_larch) z = atomic_number(atsym) if atsym is None and z is not None: atsym = atomic_symbol(z) if getattr(group, 'pre_edge_details', None) is None: # pre_edge never run preedge(energy, mu, pre1=pre1, pre2=pre2, nvict=nvict, norm1=norm1, norm2=norm2, e0=e0, nnorm=nnorm) mu_pre = mu - group.pre_edge f2 = f2_chantler(z, energy) weights = np.ones(len(energy))*1.0 if norm2 is None: norm2 = max(energy) - e0 if norm2 < 0: norm2 = max(energy) - e0 - norm2 # avoid l2 and higher edges if edge.lower().startswith('l'): if edge.lower() == 'l3': e_l2 = xray_edge(z, 'L2').edge norm2 = min(norm2, e_l2-e0) elif edge.lower() == 'l2': e_l2 = xray_edge(z, 'L1').edge norm2 = min(norm2, e_l1-e0) ipre2 = index_of(energy, e0+pre2) inor1 = index_of(energy, e0+norm1) inor2 = index_of(energy, e0+norm2) + 1 weights[ipre2:] = 0.0 weights[inor1:inor2] = np.linspace(0.1, 1.0, inor2-inor1) params = Parameters() params.add(name='slope', value=0.0, vary=True) params.add(name='offset', value=-f2[0], vary=True) params.add(name='scale', value=f2[-1], vary=True) out = minimize(f2norm, params, method='leastsq', gtol=1.e-5, ftol=1.e-5, xtol=1.e-5, epsfcn=1.e-5, kws = dict(en=energy, mu=mu_pre, f2=f2, weights=weights)) p = out.params.valuesdict() model = (p['offset'] + p['slope']*energy + f2) * p['scale'] group.mback_mu = model + group.pre_edge pre_f2 = preedge(energy, model, nnorm=nnorm, nvict=nvict, e0=e0, pre1=pre1, pre2=pre2, norm1=norm1, norm2=norm2) step_new = pre_f2['edge_step'] group.edge_step_poly = group.edge_step group.edge_step_mback = step_new group.norm_mback = mu_pre / step_new group.mback_params = Group(e0=e0, pre1=pre1, pre2=pre2, norm1=norm1, norm2=norm2, nnorm=nnorm, fit_params=p, fit_weights=weights, model=model, f2=f2, pre_f2=pre_f2, atsym=atsym, edge=edge) if (abs(step_new - group.edge_step)/(1.e-13+group.edge_step)) > 0.75: print("Warning: mback edge step failed....") else: group.edge_step = step_new group.norm = group.norm_mback
def mback(energy, mu=None, group=None, z=None, edge='K', e0=None, pre1=None, pre2=-50, norm1=100, norm2=None, order=3, leexiang=False, tables='chantler', fit_erfc=False, return_f1=False, _larch=None): """ Match mu(E) data for tabulated f''(E) using the MBACK algorithm and, optionally, the Lee & Xiang extension Arguments ---------- energy: array of x-ray energies, in eV. mu: array of mu(E). group: output group. z: atomic number of the absorber. edge: x-ray absorption edge (default 'K') e0: edge energy, in eV. If None, it will be determined here. pre1: low E range (relative to e0) for pre-edge region. pre2: high E range (relative to e0) for pre-edge region. norm1: low E range (relative to e0) for post-edge region. norm2: high E range (relative to e0) for post-edge region. order: order of the legendre polynomial for normalization. (default=3, min=0, max=5). leexiang: boolean (default False) to use the Lee & Xiang extension. tables: tabulated scattering factors: 'chantler' (default) or 'cl' (cromer-liberman) fit_erfc: boolean (default False) to fit parameters of error function. return_f1: boolean (default False) to include the f1 array in the group. Returns ------- None The following attributes will be written to the output group: group.f2: tabulated f2(E). group.f1: tabulated f1(E) (if 'return_f1' is True). group.fpp: mback atched spectrum. group.edge_step: edge step of spectrum. group.norm: normalized spectrum. group.mback_params: group of parameters for the minimization. References: * MBACK (Weng, Waldo, Penner-Hahn): http://dx.doi.org/10.1086/303711 * Lee and Xiang: http://dx.doi.org/10.1088/0004-637X/702/2/970 * Cromer-Liberman: http://dx.doi.org/10.1063/1.1674266 * Chantler: http://dx.doi.org/10.1063/1.555974 """ order = max(min(order, MAXORDER), 0) ### implement the First Argument Group convention energy, mu, group = parse_group_args(energy, members=('energy', 'mu'), defaults=(mu,), group=group, fcn_name='mback') if len(energy.shape) > 1: energy = energy.squeeze() if len(mu.shape) > 1: mu = mu.squeeze() group = set_xafsGroup(group, _larch=_larch) energy = remove_dups(energy) if e0 is None or e0 < energy[1] or e0 > energy[-2]: e0 = find_e0(energy, mu, group=group) print(e0) ie0 = index_nearest(energy, e0) e0 = energy[ie0] pre1_input = pre1 norm2_input = norm2 if pre1 is None: pre1 = min(energy) - e0 if norm2 is None: norm2 = max(energy) - e0 if norm2 < 0: norm2 = max(energy) - e0 - norm2 pre1 = max(pre1, (min(energy) - e0)) norm2 = min(norm2, (max(energy) - e0)) if pre1 > pre2: pre1, pre2 = pre2, pre1 if norm1 > norm2: norm1, norm2 = norm2, norm1 p1 = index_of(energy, pre1+e0) p2 = index_nearest(energy, pre2+e0) n1 = index_nearest(energy, norm1+e0) n2 = index_of(energy, norm2+e0) if p2 - p1 < 2: p2 = min(len(energy), p1 + 2) if n2 - n1 < 2: p2 = min(len(energy), p1 + 2) ## theta is a boolean array indicating the ## energy values considered for the fit. ## theta=1 for included values, theta=0 for excluded values. theta = np.zeros_like(energy, dtype='int') theta[p1:(p2+1)] = 1 theta[n1:(n2+1)] = 1 ## weights for the pre- and post-edge regions, as defined in the MBACK paper (?) weight = np.ones_like(energy, dtype=float) weight[p1:(p2+1)] = np.sqrt(np.sum(weight[p1:(p2+1)])) weight[n1:(n2+1)] = np.sqrt(np.sum(weight[n1:(n2+1)])) ## get the f'' function from CL or Chantler if tables.lower() == 'chantler': f1 = f1_chantler(z, energy, _larch=_larch) f2 = f2_chantler(z, energy, _larch=_larch) else: (f1, f2) = f1f2(z, energy, edge=edge, _larch=_larch) group.f2 = f2 if return_f1: group.f1 = f1 em = find_xray_line(z, edge)[0] # erfc centroid params = Parameters() params.add(name='s', value=1.0, vary=True) # scale of data params.add(name='xi', value=50.0, vary=False, min=0) # width of erfc params.add(name='a', value=0.0, vary=False) # amplitude of erfc if fit_erfc: params['a'].vary = True params['a'].value = 0.5 params['xi'].vary = True for i in range(order+1): # polynomial coefficients params.add(name='c%d' % i, value=0, vary=True) out = minimize(match_f2, params, method='leastsq', gtol=1.e-5, ftol=1.e-5, xtol=1.e-5, epsfcn=1.e-5, kws = dict(en=energy, mu=mu, f2=f2, e0=e0, em=em, order=order, weight=weight, theta=theta, leexiang=leexiang)) opars = out.params.valuesdict() eoff = energy - e0 norm_function = opars['a']*erfc((energy-em)/opars['xi']) + opars['c0'] for i in range(order): attr = 'c%d' % (i + 1) if attr in opars: norm_function += opars[attr]* eoff**(i + 1) group.e0 = e0 group.fpp = opars['s']*mu - norm_function # calculate edge step and normalization from f2 + norm_function pre_f2 = preedge(energy, group.f2+norm_function, e0=e0, pre1=pre1, pre2=pre2, norm1=norm1, norm2=norm2, nnorm=2, nvict=0) group.edge_step = pre_f2['edge_step'] / opars['s'] group.norm = (opars['s']*mu - pre_f2['pre_edge']) / pre_f2['edge_step'] group.mback_details = Group(params=opars, pre_f2=pre_f2, f2_scaled=opars['s']*f2, norm_function=norm_function)
def mback(energy, mu, group=None, order=3, z=None, edge='K', e0=None, emin=None, emax=None, whiteline=None, leexiang=False, tables='chantler', fit_erfc=False, return_f1=False, _larch=None): """ Match mu(E) data for tabulated f''(E) using the MBACK algorithm and, optionally, the Lee & Xiang extension Arguments: energy, mu: arrays of energy and mu(E) order: order of polynomial [3] group: output group (and input group for e0) z: Z number of absorber edge: absorption edge (K, L3) e0: edge energy emin: beginning energy for fit emax: ending energy for fit whiteline: exclusion zone around white lines leexiang: flag to use the Lee & Xiang extension tables: 'chantler' (default) or 'cl' fit_erfc: True to float parameters of error function return_f1: True to put the f1 array in the group Returns: group.f2: tabulated f2(E) group.f1: tabulated f1(E) (if return_f1 is True) group.fpp: matched data group.mback_params: Group of parameters for the minimization References: * MBACK (Weng, Waldo, Penner-Hahn): http://dx.doi.org/10.1086/303711 * Lee and Xiang: http://dx.doi.org/10.1088/0004-637X/702/2/970 * Cromer-Liberman: http://dx.doi.org/10.1063/1.1674266 * Chantler: http://dx.doi.org/10.1063/1.555974 """ order=int(order) if order < 1: order = 1 # set order of polynomial if order > MAXORDER: order = MAXORDER ### implement the First Argument Group convention energy, mu, group = parse_group_args(energy, members=('energy', 'mu'), defaults=(mu,), group=group, fcn_name='mback') if len(energy.shape) > 1: energy = energy.squeeze() if len(mu.shape) > 1: mu = mu.squeeze() group = set_xafsGroup(group, _larch=_larch) if e0 is None: # need to run find_e0: e0 = xray_edge(z, edge, _larch=_larch)[0] if e0 is None: e0 = group.e0 if e0 is None: find_e0(energy, mu, group=group) ### theta is an array used to exclude the regions <emin, >emax, and ### around white lines, theta=0.0 in excluded regions, theta=1.0 elsewhere (i1, i2) = (0, len(energy)-1) if emin is not None: i1 = index_of(energy, emin) if emax is not None: i2 = index_of(energy, emax) theta = np.ones(len(energy)) # default: 1 throughout theta[0:i1] = 0 theta[i2:-1] = 0 if whiteline: pre = 1.0*(energy<e0) post = 1.0*(energy>e0+float(whiteline)) theta = theta * (pre + post) if edge.lower().startswith('l'): l2 = xray_edge(z, 'L2', _larch=_larch)[0] l2_pre = 1.0*(energy<l2) l2_post = 1.0*(energy>l2+float(whiteline)) theta = theta * (l2_pre + l2_post) ## this is used to weight the pre- and post-edge differently as ## defined in the MBACK paper weight1 = 1*(energy<e0) weight2 = 1*(energy>e0) weight = np.sqrt(sum(weight1))*weight1 + np.sqrt(sum(weight2))*weight2 ## get the f'' function from CL or Chantler if tables.lower() == 'chantler': f1 = f1_chantler(z, energy, _larch=_larch) f2 = f2_chantler(z, energy, _larch=_larch) else: (f1, f2) = f1f2(z, energy, edge=edge, _larch=_larch) group.f2=f2 if return_f1: group.f1=f1 n = edge if edge.lower().startswith('l'): n = 'L' params = Group(s = Parameter(1, vary=True, _larch=_larch), # scale of data xi = Parameter(50, vary=fit_erfc, min=0, _larch=_larch), # width of erfc em = Parameter(xray_line(z, n, _larch=_larch)[0], vary=False, _larch=_larch), # erfc centroid e0 = Parameter(e0, vary=False, _larch=_larch), # abs. edge energy ## various arrays need by the objective function en = energy, mu = mu, f2 = group.f2, weight = weight, theta = theta, leexiang = leexiang, _larch = _larch) if fit_erfc: params.a = Parameter(1, vary=True, _larch=_larch) # amplitude of erfc else: params.a = Parameter(0, vary=False, _larch=_larch) # amplitude of erfc for i in range(order): # polynomial coefficients setattr(params, 'c%d' % i, Parameter(0, vary=True, _larch=_larch)) fit = Minimizer(match_f2, params, _larch=_larch, toler=1.e-5) fit.leastsq() eoff = energy - params.e0.value normalization_function = params.a.value*erfc((energy-params.em.value)/params.xi.value) + params.c0.value for i in range(MAXORDER): j = i+1 attr = 'c%d' % j if hasattr(params, attr): normalization_function = normalization_function + getattr(getattr(params, attr), 'value') * eoff**j group.fpp = params.s*mu - normalization_function group.mback_params = params
def mback(energy, mu, group=None, order=3, z=None, edge='K', e0=None, emin=None, emax=None, whiteline=None, leexiang=False, tables='chantler', fit_erfc=False, return_f1=False, _larch=None): """ Match mu(E) data for tabulated f''(E) using the MBACK algorithm and, optionally, the Lee & Xiang extension Arguments: energy, mu: arrays of energy and mu(E) order: order of polynomial [3] group: output group (and input group for e0) z: Z number of absorber edge: absorption edge (K, L3) e0: edge energy emin: beginning energy for fit emax: ending energy for fit whiteline: exclusion zone around white lines leexiang: flag to use the Lee & Xiang extension tables: 'chantler' (default) or 'cl' fit_erfc: True to float parameters of error function return_f1: True to put the f1 array in the group Returns: group.f2: tabulated f2(E) group.f1: tabulated f1(E) (if return_f1 is True) group.fpp: matched data group.mback_params: Group of parameters for the minimization References: * MBACK (Weng, Waldo, Penner-Hahn): http://dx.doi.org/10.1086/303711 * Lee and Xiang: http://dx.doi.org/10.1088/0004-637X/702/2/970 * Cromer-Liberman: http://dx.doi.org/10.1063/1.1674266 * Chantler: http://dx.doi.org/10.1063/1.555974 """ order = int(order) if order < 1: order = 1 # set order of polynomial if order > MAXORDER: order = MAXORDER ### implement the First Argument Group convention energy, mu, group = parse_group_args(energy, members=('energy', 'mu'), defaults=(mu, ), group=group, fcn_name='mback') if len(energy.shape) > 1: energy = energy.squeeze() if len(mu.shape) > 1: mu = mu.squeeze() group = set_xafsGroup(group, _larch=_larch) if e0 is None: # need to run find_e0: e0 = xray_edge(z, edge, _larch=_larch)[0] if e0 is None: e0 = group.e0 if e0 is None: find_e0(energy, mu, group=group) ### theta is an array used to exclude the regions <emin, >emax, and ### around white lines, theta=0.0 in excluded regions, theta=1.0 elsewhere (i1, i2) = (0, len(energy) - 1) if emin is not None: i1 = index_of(energy, emin) if emax is not None: i2 = index_of(energy, emax) theta = np.ones(len(energy)) # default: 1 throughout theta[0:i1] = 0 theta[i2:-1] = 0 if whiteline: pre = 1.0 * (energy < e0) post = 1.0 * (energy > e0 + float(whiteline)) theta = theta * (pre + post) if edge.lower().startswith('l'): l2 = xray_edge(z, 'L2', _larch=_larch)[0] l2_pre = 1.0 * (energy < l2) l2_post = 1.0 * (energy > l2 + float(whiteline)) theta = theta * (l2_pre + l2_post) ## this is used to weight the pre- and post-edge differently as ## defined in the MBACK paper weight1 = 1 * (energy < e0) weight2 = 1 * (energy > e0) weight = np.sqrt(sum(weight1)) * weight1 + np.sqrt(sum(weight2)) * weight2 ## get the f'' function from CL or Chantler if tables.lower() == 'chantler': f1 = f1_chantler(z, energy, _larch=_larch) f2 = f2_chantler(z, energy, _larch=_larch) else: (f1, f2) = f1f2(z, energy, edge=edge, _larch=_larch) group.f2 = f2 if return_f1: group.f1 = f1 n = edge if edge.lower().startswith('l'): n = 'L' params = Group( s=Parameter(1, vary=True, _larch=_larch), # scale of data xi=Parameter(50, vary=fit_erfc, min=0, _larch=_larch), # width of erfc em=Parameter(xray_line(z, n, _larch=_larch)[0], vary=False, _larch=_larch), # erfc centroid e0=Parameter(e0, vary=False, _larch=_larch), # abs. edge energy ## various arrays need by the objective function en=energy, mu=mu, f2=group.f2, weight=weight, theta=theta, leexiang=leexiang, _larch=_larch) if fit_erfc: params.a = Parameter(1, vary=True, _larch=_larch) # amplitude of erfc else: params.a = Parameter(0, vary=False, _larch=_larch) # amplitude of erfc for i in range(order): # polynomial coefficients setattr(params, 'c%d' % i, Parameter(0, vary=True, _larch=_larch)) fit = Minimizer(match_f2, params, _larch=_larch, toler=1.e-5) fit.leastsq() eoff = energy - params.e0.value normalization_function = params.a.value * erfc( (energy - params.em.value) / params.xi.value) + params.c0.value for i in range(MAXORDER): j = i + 1 attr = 'c%d' % j if hasattr(params, attr): normalization_function = normalization_function + getattr( getattr(params, attr), 'value') * eoff**j group.fpp = params.s * mu - normalization_function group.mback_params = params
10067.28 120488.7 89110.0998902 0.30168329 10073.72 118833.7 88265.1000656 0.29738025 10080.18 118434.7 88372.1004302 0.29280544 10086.66 117995.7 88449.0998063 0.28822094 10093.17 118435.7 89180.0997098 0.28371228 10099.69 117303.7 88720.0998253 0.27927983 10106.22 117929.7 89581.1003571 0.27494432 10112.78 116857.7 89144.1003332 0.27070279 10119.36 115791.7 88718.1000129 0.26632896 10125.96 111467.7 85797.099695 0.26174966 10132.57 110079.7 85128.099834 0.25704747 10139.21 104190.7 80953.0999403 0.2523529 10145.86 93726.7 73074.0996945 0.24890911''' raw_data_lines = raw_data.split('\n') raw_data_table = [] for line in raw_data_lines: raw_data_table.append(list(map(lambda x: float(x), line.strip().split()))) table = np.array(raw_data_table) group.energy = table[:, 0] group.mu = table[:, 3] e0 = find_e0(group, _larch=mylarch) pre_edge(group, _larch=mylarch) autobk(group, _larch=mylarch) xftf(group, _larch=mylarch) xftr(group, _larch=mylarch)
def mback(energy, mu=None, group=None, order=3, z=None, edge='K', e0=None, emin=None, emax=None, whiteline=None, leexiang=False, tables='chantler', fit_erfc=False, return_f1=False, _larch=None): """ Match mu(E) data for tabulated f''(E) using the MBACK algorithm and, optionally, the Lee & Xiang extension Arguments: energy, mu: arrays of energy and mu(E) order: order of polynomial [3] group: output group (and input group for e0) z: Z number of absorber edge: absorption edge (K, L3) e0: edge energy emin: beginning energy for fit emax: ending energy for fit whiteline: exclusion zone around white lines leexiang: flag to use the Lee & Xiang extension tables: 'chantler' (default) or 'cl' fit_erfc: True to float parameters of error function return_f1: True to put the f1 array in the group Returns: group.f2: tabulated f2(E) group.f1: tabulated f1(E) (if return_f1 is True) group.fpp: matched data group.mback_params: Group of parameters for the minimization References: * MBACK (Weng, Waldo, Penner-Hahn): http://dx.doi.org/10.1086/303711 * Lee and Xiang: http://dx.doi.org/10.1088/0004-637X/702/2/970 * Cromer-Liberman: http://dx.doi.org/10.1063/1.1674266 * Chantler: http://dx.doi.org/10.1063/1.555974 """ order = int(order) if order < 1: order = 1 # set order of polynomial if order > MAXORDER: order = MAXORDER ### implement the First Argument Group convention energy, mu, group = parse_group_args(energy, members=('energy', 'mu'), defaults=(mu, ), group=group, fcn_name='mback') if len(energy.shape) > 1: energy = energy.squeeze() if len(mu.shape) > 1: mu = mu.squeeze() group = set_xafsGroup(group, _larch=_larch) if e0 is None: # need to run find_e0: e0 = xray_edge(z, edge, _larch=_larch)[0] if e0 is None: e0 = group.e0 if e0 is None: find_e0(energy, mu, group=group) ### theta is an array used to exclude the regions <emin, >emax, and ### around white lines, theta=0.0 in excluded regions, theta=1.0 elsewhere (i1, i2) = (0, len(energy) - 1) if emin is not None: i1 = index_of(energy, emin) if emax is not None: i2 = index_of(energy, emax) theta = np.ones(len(energy)) # default: 1 throughout theta[0:i1] = 0 theta[i2:-1] = 0 if whiteline: pre = 1.0 * (energy < e0) post = 1.0 * (energy > e0 + float(whiteline)) theta = theta * (pre + post) if edge.lower().startswith('l'): l2 = xray_edge(z, 'L2', _larch=_larch)[0] l2_pre = 1.0 * (energy < l2) l2_post = 1.0 * (energy > l2 + float(whiteline)) theta = theta * (l2_pre + l2_post) ## this is used to weight the pre- and post-edge differently as ## defined in the MBACK paper weight1 = 1 * (energy < e0) weight2 = 1 * (energy > e0) weight = np.sqrt(sum(weight1)) * weight1 + np.sqrt(sum(weight2)) * weight2 ## get the f'' function from CL or Chantler if tables.lower() == 'chantler': f1 = f1_chantler(z, energy, _larch=_larch) f2 = f2_chantler(z, energy, _larch=_larch) else: (f1, f2) = f1f2(z, energy, edge=edge, _larch=_larch) group.f2 = f2 if return_f1: group.f1 = f1 em = xray_line(z, edge.upper(), _larch=_larch)[0] # erfc centroid params = Parameters() params.add(name='s', value=1, vary=True) # scale of data params.add(name='xi', value=50, vary=fit_erfc, min=0) # width of erfc params.add(name='a', value=0, vary=False) # amplitude of erfc if fit_erfc: params['a'].value = 1 params['a'].vary = True for i in range(order): # polynomial coefficients params.add(name='c%d' % i, value=0, vary=True) out = minimize(match_f2, params, method='leastsq', gtol=1.e-5, ftol=1.e-5, xtol=1.e-5, epsfcn=1.e-5, kws=dict(en=energy, mu=mu, f2=f2, e0=e0, em=em, order=order, weight=weight, theta=theta, leexiang=leexiang)) opars = out.params.valuesdict() eoff = energy - e0 norm_function = opars['a'] * erfc( (energy - em) / opars['xi']) + opars['c0'] for i in range(order): j = i + 1 attr = 'c%d' % j if attr in opars: norm_function += opars[attr] * eoff**j group.e0 = e0 group.fpp = opars['s'] * mu - norm_function group.mback_params = opars tmp = Group(energy=energy, mu=group.f2 - norm_function, e0=0) # calculate edge step from f2 + norm_function: should be very smooth pre_f2 = preedge(energy, group.f2 + norm_function, e0=e0, nnorm=2, nvict=0) group.edge_step = pre_f2['edge_step'] / opars['s'] pre_fpp = preedge(energy, mu, e0=e0, nnorm=2, nvict=0) group.norm = (mu - pre_fpp['pre_edge']) / group.edge_step
def deglitch(energy, mu, group, e_window='xas', sg_window_length=9, sg_polyorder=3, alpha=.025, max_glitches='Default', max_glitch_length=4, plot_res=False): """Routine to deglitch a XAS spectrum. This function deglitches points in XAS data through two-step fitting with Savitzky-Golay filter and outlier identification with generalized extreme student deviate test. This code requires the data group to have at least an energy and normalized absorption channel. Parameters ---------- energy : array Array of the energies of the XAS scan mu : array Array of the absorption coefficient data group : Larch Group Larch Group to be modified by deglitching procedure e_window : {'xas', 'xanes', 'exafs', (float, float)} 'xas' scans the full spectrum. 'xanes' looks from the beginning up to the edge + 150eV. 'exafs' looks at the edge + 150eV to the end. (float, float) provides start and end energies in eV for analysis sg_window_length : odd int, default: 7 Window length to build Savitzky-Golay filter from normalized data sg_polyorder : int, default: 3 Polynomial order to build Savitzky-Golay filter from normalized data alpha : float, default: .001 Alpha value for generalized ESD test for outliers. max_glitches : int, default: len(data)//10 Maximum number of outliers to remove. plot_res : bool, default: False Command to plot the final normalized residuals and a histogram of their distribution. Returns ------- None """ import numpy as np from scipy.interpolate import interp1d from scipy.signal import savgol_filter from larch_plugins.utils import group2dict from larch_plugins.xafs import find_e0 from larch import Interpreter from copy import deepcopy session = Interpreter(with_plugins=False) # computing the energy window to perform the deglitch: e_val = 150 # energy limit to separate xanes from exafs [eV] e_windows = ['xas', 'xanes', 'exafs'] if e_window in e_windows: if e_window =='xas': e_window = [energy[0], energy[-1]] else: if 'e0' not in dir(group): e0 = find_e0(energy, mu=mu, group=group, _larch=session) else: e0 = getattr(group, 'e0') if e_window =='xanes': e_window = [energy[0], e0+e_val] else: e_window = [e0+e_val, energy[-1]] index = np.where((energy >= e_window[0]) & (energy <= e_window[1])) index = index[0] # creating copies of original data mu_copy = np.copy(mu) # interpolated values for posterior analysis will be inserted in this ener = np.copy(energy) # copy of energy to create interp1d function without the potential glitches # not limited to start:end to ensure data at edges gets best possible fit sg_init = savgol_filter(mu, sg_window_length, sg_polyorder) # computing the difference between normalized spectrum and the savitsky-golay filter res1 = mu - sg_init roll_mad1 = roll_med(abs(res1), window = 2*(sg_window_length+(max_glitch_length-1))+1, edgemethod='calc') res_norm = res1 / roll_mad1 #If the max is not set to an int, the max will be set to the default of the length of the analyzed data//10 if type(max_glitches) != int: max_glitches = len(res1)//10 out1 = genesd(res_norm[index], max_glitches, alpha) #finds outliers in residuals between data and Savitzky-Golay filter if index[0] != 0: #compensates for nonzero starting index out1 = out1 + index[0] if len(out1) == 0: #deglitching ends here if no outliers are found in this first round of analysis return e2 = np.delete(ener, out1) #removes points that are poorly fitted by the S-G filter n2 = np.delete(mu_copy, out1) f = interp1d(e2, n2, kind='cubic') interp_pts = f(energy[out1]) #interpolates for normalized mu at the removed energies for i, point in enumerate(out1): mu_copy[point] = interp_pts[i] #inserts interpolated points into normalized data sg_final = savgol_filter(mu_copy, sg_window_length, sg_polyorder) #fits the normalized absorption with the interpolated points res2 = mu - sg_final roll_mad2 = roll_med(abs(res2), window = (2*max_glitch_length)+1, edgemethod='calc') res_norm2 = res2 / roll_mad2 if plot_res: import matplotlib.pyplot as plt fig, axes = plt.subplots(ncols=2, figsize=(8,2.5), gridspec_kw={'width_ratios':[2, 1]}) axes[0].plot(res_norm, color='tab:orange') axes[0].set_ylabel('Residuals (μ(E))') axes[0].set_xlabel('Point Index') #plotting the normalized residuals on a point-index basis critval = find_critval(res_norm2, alpha) axes[1].hist(res_norm, bins=len(ener)//20, range=(-1*critval, critval), color='tab:orange') #plots histogram for normalized residuals axes[1].set_ylabel('Number of Points') axes[1].set_xlabel('Norm. Resid. Value') #Will not plot large outliers, since the limits are set at the initial critical values for the genesd plt.show() glitches_init = genesd(res_norm2[index], max_glitches, alpha)#by normalizing the standard deviation to the same window as our S-G calculation, #we can tackle the full spectrum, accounting for the noise we expect in the data; #as a bonus, with the S-G filter, we ideally have a near-normal distribution of residuals #(which makes the generalized ESD a robust method for finding the outliers) if index[0] != 0: glitches_init = glitches_init + index[0] glitches = np.array([]) for glitch in glitches_init: if True in np.where(abs(glitch-out1)<(sg_window_length//2)+1, True, False): glitches = np.append(glitches, glitch) glitches[::-1].sort() glitches = glitches.astype(int) data_filt = deepcopy(group) #non-destructive copy for comparison group_dict = group2dict(data_filt) #transfers data copy to a dictionary (easier to work with) if len(glitches) == 0: glitches = None else: glitch_dict = {energy[glitch] : {} for glitch in glitches} for number in glitches: targetLength = len(energy) #everything that is of the same length as the energy array will have the indices #corresponding to glitches removed for key in dir(group): if type(getattr(group, key)) == np.ndarray or type(getattr(group, key)) == list: if len(getattr(group, key)) == targetLength and key!='energy': #deletes the energy last glitch_dict[getattr(group, 'energy')[number]].update({key : group_dict[key][number]}) group_dict[key] = np.delete(group_dict[key], number) #replaces the array with one that removes glitch points #numpy arrays require extra steps to delete an element (which is why this takes this structure) #removed indices is reversed to avoid changing the length ahead of the removal of points group_dict['energy'] = np.delete(group_dict['energy'], number) glitch_dict[energy[number]].update({'params' : {'e_window':e_window, 'sg_window_length':sg_window_length, 'sg_polyorder':sg_polyorder, 'alpha':alpha, 'max_glitches':max_glitches, 'max_glitch_length':max_glitch_length}}) if glitches is not None: if hasattr(group,'glitches'): group_dict['glitches'].update(glitch_dict) else: setattr(group,'glitches', glitch_dict) dataKeys = list(group_dict.keys()) for item in dataKeys: setattr(group, item, group_dict[item]) return