def test_gmean_gerr1(self): """Results are same for no weight and equal weights""" x = np.array([3, 5, 8, 10, np.nan, 3, 4, -np.inf]) w = np.ones(len(x)) self.assertEqual(gmean(x), gmean(x, weights=w)) self.assertEqual(gerr(x), gerr(x, weights=w)) x = x.reshape((2, 4)) w = np.ones(x.shape) self.assertEqual(len(gmean(x, axis=0)), x.shape[1]) np.testing.assert_array_equal(gmean(x, axis=0), gmean(x, axis=0, weights=w)) np.testing.assert_array_equal(gerr(x, axis=0), gerr(x, axis=0, weights=w))
def test_gmean_gerr2(self): """Results are same for repeated elements and adapted weights""" x1 = np.array([1, 2, 2, 5, 5, 5, 5, 5, np.inf]) x2 = np.array([1, 2, 5, np.inf]) w = np.array([1, 2, 5, 10]) self.assertEqual(gmean(x1), gmean(x2, weights=w)) # errors can only be compared for biased std np.testing.assert_array_equal(gerr(x1, unbiased=False)[1:], gerr(x2, weights=w, unbiased=False)[1:]) # weighted errors are bigger than unweighted np.testing.assert_array_less(gerr(x1)[1:], gerr(x2, weights=w)[1:]) # biased errors are smaller than unbiased np.testing.assert_array_less(gerr(x2, weights=w, unbiased=False)[1:], gerr(x2, weights=w)[1:])
def test_gmean_gerr2(self): """Results are same for repeated elements and adapted weights""" x1 = np.array([1, 2, 2, 5, 5, 5, 5, 5, np.inf]) x2 = np.array([1, 2, 5, np.inf]) w = np.array([1, 2, 5, 10]) self.assertEqual(gmean(x1), gmean(x2, weights=w)) # errors can only be compared for biased std np.testing.assert_array_equal( gerr(x1, unbiased=False)[1:], gerr(x2, weights=w, unbiased=False)[1:]) # weighted errors are bigger than unweighted np.testing.assert_array_less(gerr(x1)[1:], gerr(x2, weights=w)[1:]) # biased errors are smaller than unbiased np.testing.assert_array_less( gerr(x2, weights=w, unbiased=False)[1:], gerr(x2, weights=w)[1:])
def align_site_responses(results, station=None, response=1., use_sparse=True, seismic_moment_method=None, seismic_moment_options=None, ignore_stations=None): """ Align station site responses and correct source parameters (experimental) Determine best factor for each event so that site response is the same for each station and different events. :param results: original result dictionary. For the other options see the help for the corresponding command line options or configuration parameters. :return: corrected result dictionary """ # Ignore not existing event results, sort dict by event id results['events'] = OrderedDict( sorted([(evid, eres) for (evid, eres) in results['events'].items() if eres is not None], key=lambda x: x[0])) join_unconnected = None if join_unconnected: inventory = None msg = 'This feature needs more work and tests' raise NotImplementedError(msg) Ne = len(results['events']) if Ne == 1: use_sparse = False # Determine number of freqs Nf = _get_number_of_freqs(results) # Determine number of events at stations for each freq band Nstations = [defaultdict(int) for i in range(Nf)] for evid, eres in results['events'].items(): for i in range(Nf): for sta, Rsta in eres['R'].items(): Rsta = Rsta[i] if Rsta is None or np.isnan(Rsta): continue Nstations[i][sta] += 1 def construct_ols(coldata, b_val): # b, row and Arepr are nonlocal lists b.append(b_val) if use_sparse: for col, data in coldata: Arepr[0].append(data) Arepr[1][0].append(row[0]) Arepr[1][1].append(col) else: Arow = np.zeros(Ne) for col, data in coldata: Arow[col] = data Arepr.append(Arow) row[0] += 1 # calculate best factors for each freq band with OLS A*factor=b factors = np.ones((Ne, Nf)) std_before = [] # one for each freq largest_areas = [] for i in range(Nf): log.debug('align sites for freq no. %d', i) # find unconnected areas areas = _find_unconnected_areas(results, i, ignore_stations=ignore_stations) if join_unconnected: areas, near_stations = _join_unconnected_areas( areas, join_unconnected, inventory) if len(areas) == 0: largest_areas.append(None) std_before.append(np.nan) continue largest_area = max(areas, key=lambda k: len(areas[k])) msg = 'use largest area %s with %d stations' log.info(msg, largest_area, len(areas[largest_area])) largest_area = areas[largest_area] largest_areas.append(largest_area) R = _collectR(results, freqi=i, only=largest_area) std_before.append(_Rstd(R)) row = [0] b = [] if use_sparse: Arepr = [[], [[], []]] else: Arepr = [] norm_row_A = defaultdict(float) norm_row_b = 0 first = {} last = {} stations_used_norm = set() # add pairs of site responses for one station and different events for k, item in enumerate(results['events'].items()): evid, eres = item for sta, Rsta in eres['R'].items(): Rsta = Rsta[i] if sta not in largest_area: continue if Rsta is None or np.isnan(Rsta): continue if station is None or sta == station or sta in station: # collect information for normalization stations_used_norm.add(sta) fac = 1. / Nstations[i][sta] norm_row_A[k] += fac norm_row_b -= np.log(Rsta) * fac if sta in last: # add pairs of site responses for one station # and two different events kl, Rstal = last[sta] b_val = np.log(Rstal) - np.log(Rsta) construct_ols(((k, 1), (kl, -1)), b_val) last[sta] = k, Rsta elif (join_unconnected and sta in near_stations.keys() and near_stations[sta] in last): # add pairs of site responses for two nearby stations # (in two previously unconnected areas) # and two different events kl, Rstal = last[near_stations[sta]] b_val = np.log(Rstal) - np.log(Rsta) construct_ols(((k, 1), (kl, -1)), b_val) last[sta] = k, Rsta else: last[sta] = first[sta] = (k, Rsta) # pin mean site response or site response of specific station(s) norm_row_b = norm_row_b / len(stations_used_norm) + np.log(response) for k in norm_row_A: norm_row_A[k] /= len(stations_used_norm) construct_ols(norm_row_A.items(), norm_row_b) msg = 'constructed %scoefficient matrix with shape (%d, %d)' log.debug(msg, 'sparse ' * use_sparse, row[0], Ne) # solve least squares system b = np.array(b) if use_sparse: A = scipy.sparse.csr_matrix(tuple(Arepr), shape=(row[0], Ne)) res = scipy.sparse.linalg.lsmr(A, b, atol=1e-9) else: A = np.array(Arepr) res = scipy.linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True) factors[:, i] = np.exp(res[0]) # Scale W and R _rescale_results(results, factors, only=largest_areas) # Calculate sds, M0 and m again calculate_source_properties(results, seismic_moment_method=seismic_moment_method, seismic_moment_options=seismic_moment_options) # Calculate mean Rs again, use robust mean here results.setdefault('R', {}) for st, Rst in _collectR(results, freqi=None).items(): if not np.all(np.isnan(Rst)): results['R'][st] = gmean(Rst, axis=0, robust=True).tolist() std_after = [] for i in range(Nf): R = _collectR(results, freqi=i) std_after.append(_Rstd(R)) msg = ('aligned sites for all frequencies, reduction of mean stdev: ' + ', '.join(Nf * ['%.2g']) + ' -> ' + ', '.join(Nf * ['%.2g'])) log.info(msg, *(std_before + std_after)) return results