Example #1
0
 def test_gmean_gerr1(self):
     """Results are same for no weight and equal weights"""
     x = np.array([3, 5, 8, 10, np.nan, 3, 4, -np.inf])
     w = np.ones(len(x))
     self.assertEqual(gmean(x), gmean(x, weights=w))
     self.assertEqual(gerr(x), gerr(x, weights=w))
     x = x.reshape((2, 4))
     w = np.ones(x.shape)
     self.assertEqual(len(gmean(x, axis=0)), x.shape[1])
     np.testing.assert_array_equal(gmean(x, axis=0),
                                   gmean(x, axis=0, weights=w))
     np.testing.assert_array_equal(gerr(x, axis=0),
                                   gerr(x, axis=0, weights=w))
Example #2
0
 def test_gmean_gerr1(self):
     """Results are same for no weight and equal weights"""
     x = np.array([3, 5, 8, 10, np.nan, 3, 4, -np.inf])
     w = np.ones(len(x))
     self.assertEqual(gmean(x), gmean(x, weights=w))
     self.assertEqual(gerr(x), gerr(x, weights=w))
     x = x.reshape((2, 4))
     w = np.ones(x.shape)
     self.assertEqual(len(gmean(x, axis=0)), x.shape[1])
     np.testing.assert_array_equal(gmean(x, axis=0),
                                   gmean(x, axis=0, weights=w))
     np.testing.assert_array_equal(gerr(x, axis=0),
                                   gerr(x, axis=0, weights=w))
Example #3
0
    def test_gmean_gerr2(self):
        """Results are same for repeated elements and adapted weights"""
        x1 = np.array([1, 2, 2, 5, 5, 5, 5, 5, np.inf])
        x2 = np.array([1, 2, 5, np.inf])
        w = np.array([1, 2, 5, 10])
        self.assertEqual(gmean(x1), gmean(x2, weights=w))

        # errors can only be compared for biased std
        np.testing.assert_array_equal(gerr(x1, unbiased=False)[1:],
                                      gerr(x2, weights=w, unbiased=False)[1:])
        # weighted errors are bigger than unweighted
        np.testing.assert_array_less(gerr(x1)[1:], gerr(x2, weights=w)[1:])
        # biased errors are smaller than unbiased
        np.testing.assert_array_less(gerr(x2, weights=w, unbiased=False)[1:],
                                     gerr(x2, weights=w)[1:])
Example #4
0
    def test_gmean_gerr2(self):
        """Results are same for repeated elements and adapted weights"""
        x1 = np.array([1, 2, 2, 5, 5, 5, 5, 5, np.inf])
        x2 = np.array([1, 2, 5, np.inf])
        w = np.array([1, 2, 5, 10])
        self.assertEqual(gmean(x1), gmean(x2, weights=w))

        # errors can only be compared for biased std
        np.testing.assert_array_equal(
            gerr(x1, unbiased=False)[1:],
            gerr(x2, weights=w, unbiased=False)[1:])
        # weighted errors are bigger than unweighted
        np.testing.assert_array_less(gerr(x1)[1:], gerr(x2, weights=w)[1:])
        # biased errors are smaller than unbiased
        np.testing.assert_array_less(
            gerr(x2, weights=w, unbiased=False)[1:],
            gerr(x2, weights=w)[1:])
Example #5
0
def align_site_responses(results,
                         station=None,
                         response=1.,
                         use_sparse=True,
                         seismic_moment_method=None,
                         seismic_moment_options=None,
                         ignore_stations=None):
    """
    Align station site responses and correct source parameters (experimental)

    Determine best factor for each event so that site response is the same
    for each station and different events.

    :param results: original result dictionary. For the other options see
        the help for the corresponding command line options or configuration
        parameters.
    :return: corrected result dictionary
    """
    # Ignore not existing event results, sort dict by event id
    results['events'] = OrderedDict(
        sorted([(evid, eres) for (evid, eres) in results['events'].items()
                if eres is not None],
               key=lambda x: x[0]))
    join_unconnected = None
    if join_unconnected:
        inventory = None
        msg = 'This feature needs more work and tests'
        raise NotImplementedError(msg)
    Ne = len(results['events'])
    if Ne == 1:
        use_sparse = False
    # Determine number of freqs
    Nf = _get_number_of_freqs(results)
    # Determine number of events at stations for each freq band
    Nstations = [defaultdict(int) for i in range(Nf)]
    for evid, eres in results['events'].items():
        for i in range(Nf):
            for sta, Rsta in eres['R'].items():
                Rsta = Rsta[i]
                if Rsta is None or np.isnan(Rsta):
                    continue
                Nstations[i][sta] += 1

    def construct_ols(coldata, b_val):
        # b, row and Arepr are nonlocal lists
        b.append(b_val)
        if use_sparse:
            for col, data in coldata:
                Arepr[0].append(data)
                Arepr[1][0].append(row[0])
                Arepr[1][1].append(col)
        else:
            Arow = np.zeros(Ne)
            for col, data in coldata:
                Arow[col] = data
            Arepr.append(Arow)
        row[0] += 1

    # calculate best factors for each freq band with OLS A*factor=b
    factors = np.ones((Ne, Nf))
    std_before = []
    # one for each freq
    largest_areas = []
    for i in range(Nf):
        log.debug('align sites for freq no. %d', i)
        # find unconnected areas
        areas = _find_unconnected_areas(results,
                                        i,
                                        ignore_stations=ignore_stations)
        if join_unconnected:
            areas, near_stations = _join_unconnected_areas(
                areas, join_unconnected, inventory)
        if len(areas) == 0:
            largest_areas.append(None)
            std_before.append(np.nan)
            continue
        largest_area = max(areas, key=lambda k: len(areas[k]))
        msg = 'use largest area %s with %d stations'
        log.info(msg, largest_area, len(areas[largest_area]))
        largest_area = areas[largest_area]
        largest_areas.append(largest_area)
        R = _collectR(results, freqi=i, only=largest_area)
        std_before.append(_Rstd(R))
        row = [0]
        b = []
        if use_sparse:
            Arepr = [[], [[], []]]
        else:
            Arepr = []
        norm_row_A = defaultdict(float)
        norm_row_b = 0
        first = {}
        last = {}
        stations_used_norm = set()
        # add pairs of site responses for one station and different events
        for k, item in enumerate(results['events'].items()):
            evid, eres = item
            for sta, Rsta in eres['R'].items():
                Rsta = Rsta[i]
                if sta not in largest_area:
                    continue
                if Rsta is None or np.isnan(Rsta):
                    continue
                if station is None or sta == station or sta in station:
                    # collect information for normalization
                    stations_used_norm.add(sta)
                    fac = 1. / Nstations[i][sta]
                    norm_row_A[k] += fac
                    norm_row_b -= np.log(Rsta) * fac
                if sta in last:
                    # add pairs of site responses for one station
                    # and two different events
                    kl, Rstal = last[sta]
                    b_val = np.log(Rstal) - np.log(Rsta)
                    construct_ols(((k, 1), (kl, -1)), b_val)
                    last[sta] = k, Rsta
                elif (join_unconnected and sta in near_stations.keys()
                      and near_stations[sta] in last):
                    # add pairs of site responses for two nearby stations
                    # (in two previously unconnected areas)
                    # and two different events
                    kl, Rstal = last[near_stations[sta]]
                    b_val = np.log(Rstal) - np.log(Rsta)
                    construct_ols(((k, 1), (kl, -1)), b_val)
                    last[sta] = k, Rsta
                else:
                    last[sta] = first[sta] = (k, Rsta)
        # pin mean site response or site response of specific station(s)
        norm_row_b = norm_row_b / len(stations_used_norm) + np.log(response)
        for k in norm_row_A:
            norm_row_A[k] /= len(stations_used_norm)
        construct_ols(norm_row_A.items(), norm_row_b)
        msg = 'constructed %scoefficient matrix with shape (%d, %d)'
        log.debug(msg, 'sparse ' * use_sparse, row[0], Ne)
        # solve least squares system
        b = np.array(b)
        if use_sparse:
            A = scipy.sparse.csr_matrix(tuple(Arepr), shape=(row[0], Ne))
            res = scipy.sparse.linalg.lsmr(A, b, atol=1e-9)
        else:
            A = np.array(Arepr)
            res = scipy.linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
        factors[:, i] = np.exp(res[0])

    # Scale W and R
    _rescale_results(results, factors, only=largest_areas)
    # Calculate sds, M0 and m again
    calculate_source_properties(results,
                                seismic_moment_method=seismic_moment_method,
                                seismic_moment_options=seismic_moment_options)
    # Calculate mean Rs again, use robust mean here
    results.setdefault('R', {})
    for st, Rst in _collectR(results, freqi=None).items():
        if not np.all(np.isnan(Rst)):
            results['R'][st] = gmean(Rst, axis=0, robust=True).tolist()
    std_after = []
    for i in range(Nf):
        R = _collectR(results, freqi=i)
        std_after.append(_Rstd(R))
    msg = ('aligned sites for all frequencies, reduction of mean stdev: ' +
           ', '.join(Nf * ['%.2g']) + ' -> ' + ', '.join(Nf * ['%.2g']))
    log.info(msg, *(std_before + std_after))
    return results