def mantel(s_coords, t_coords, permutations=99, scon=1.0, spow=-1.0, tcon=1.0, tpow=-1.0): """ Standardized Mantel test for spatio-temporal interaction. [2]_ Parameters ---------- s_coords : array nx2 spatial coordinates t_coords : array nx1 temporal coordinates permutations : int the number of permutations used to establish pseudo- significance (default is 99) scon : float constant added to spatial distances spow : float value for power transformation for spatial distances tcon : float constant added to temporal distances tpow : float value for power transformation for temporal distances Returns ------- mantel_result : dictionary contains the statistic (stat) for the test and the associated p-value (pvalue) stat : float value of the knox test for the dataset pvalue : float pseudo p-value associated with the statistic References ---------- .. [2] N. Mantel. 1967. The detection of disease clustering and a generalized regression approach. Cancer Research, 27(2):209-220. Examples -------- >>> import numpy as np >>> import pysal Read in the example data and create an instance of SpaceTimeEvents. >>> path = pysal.examples.get_path("burkitt") >>> events = SpaceTimeEvents(path,'T') Set the random seed generator. This is used by the permutation based inference to replicate the pseudo-significance of our example results - the end-user will normally omit this step. >>> np.random.seed(100) The standardized Mantel test is a measure of matrix correlation between the spatial and temporal distance matrices of the event dataset. The following example runs the standardized Mantel test without a constant or transformation; however, as recommended by Mantel (1967) [2]_, these should be added by the user. This can be done by adjusting the constant and power parameters. >>> result = mantel(events.space, events.t, 99, scon=1.0, spow=-1.0, tcon=1.0, tpow=-1.0) Next, we examine the result of the test. >>> print("%6.6f"%result['stat']) 0.048368 Finally, we look at the pseudo-significance of this value, calculated by permuting the timestamps and rerunning the statistic for each of the 99 permutations. According to these parameters, the results indicate space-time interaction between the events. >>> print("%2.2f"%result['pvalue']) 0.01 """ t = t_coords s = s_coords n = len(t) # calculate the spatial and temporal distance matrices for the events distmat = cg.distance_matrix(s) timemat = cg.distance_matrix(t) # calculate the transformed standardized statistic timevec = (util.get_lower(timemat) + tcon)**tpow distvec = (util.get_lower(distmat) + scon)**spow stat = stats.pearsonr(timevec, distvec)[0].sum() # return the results (if no inference) if not permutations: return stat # loop for generating a random distribution to assess significance dist = [] for i in range(permutations): trand = util.shuffle_matrix(timemat, range(n)) timevec = (util.get_lower(trand) + tcon)**tpow m = stats.pearsonr(timevec, distvec)[0].sum() dist.append(m) ## establish the pseudo significance of the observed statistic distribution = np.array(dist) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # report the results mantel_result = {'stat': stat, 'pvalue': pvalue} return mantel_result
def modified_knox(s_coords, t_coords, delta, tau, permutations=99): """ Baker's modified Knox test for spatio-temporal interaction. [4]_ Parameters ---------- s_coords : array nx2 spatial coordinates t_coords : array nx1 temporal coordinates delta : float threshold for proximity in space tau : float threshold for proximity in time permutations : int the number of permutations used to establish pseudo- significance (default is 99) Returns ------- modknox_result : dictionary contains the statistic (stat) for the test and the associated p-value (pvalue) stat : float value of the modified knox test for the dataset pvalue : float pseudo p-value associated with the statistic References ---------- .. [4] R.D. Baker. Identifying space-time disease clusters. Acta Tropica, 91(3):291-299, 2004 Examples -------- >>> import numpy as np >>> import pysal Read in the example data and create an instance of SpaceTimeEvents. >>> path = pysal.examples.get_path("burkitt") >>> events = SpaceTimeEvents(path, 'T') Set the random seed generator. This is used by the permutation based inference to replicate the pseudo-significance of our example results - the end-user will normally omit this step. >>> np.random.seed(100) Run the modified Knox test with distance and time thresholds of 20 and 5, respectively. This counts the events that are closer than 20 units in space, and 5 units in time. >>> result = modified_knox(events.space, events.t, delta=20, tau=5, permutations=99) Next, we examine the results. First, we call the statistic from the results dictionary. This reports the difference between the observed and expected Knox statistic. >>> print("%2.8f" % result['stat']) 2.81016043 Next, we look at the pseudo-significance of this value, calculated by permuting the timestamps and rerunning the statistics. In this case, the results indicate there is likely no space-time interaction. >>> print("%2.2f" % result['pvalue']) 0.11 """ s = s_coords t = t_coords n = len(t) # calculate the spatial and temporal distance matrices for the events sdistmat = cg.distance_matrix(s) tdistmat = cg.distance_matrix(t) # identify events within thresholds spacmat = np.ones((n, n)) spacbin = sdistmat <= delta spacmat = spacmat * spacbin timemat = np.ones((n, n)) timebin = tdistmat <= tau timemat = timemat * timebin # calculate the observed (original) statistic knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat stat = (obsstat - (expstat / (n - 1.0))) / 2.0 # return results (if no inference) if not permutations: return stat distribution = [] # loop for generating a random distribution to assess significance for p in range(permutations): rtdistmat = util.shuffle_matrix(tdistmat, range(n)) timemat = np.ones((n, n)) timebin = rtdistmat <= tau timemat = timemat * timebin # calculate the observed knox again knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value again ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat tempstat = (obsstat - (expstat / (n - 1.0))) / 2.0 distribution.append(tempstat) # establish the pseudo significance of the observed statistic distribution = np.array(distribution) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # return results modknox_result = {'stat': stat, 'pvalue': pvalue} return modknox_result
def mantel(s_coords, t_coords, permutations=99, scon=1.0, spow=-1.0, tcon=1.0, tpow=-1.0): """ Standardized Mantel test for spatio-temporal interaction. [Mantel1967]_ Parameters ---------- s_coords : array (n, 2), spatial coordinates. t_coords : array (n, 1), temporal coordinates. permutations : int, optional the number of permutations used to establish pseudo- significance (the default is 99). scon : float, optional constant added to spatial distances (the default is 1.0). spow : float, optional value for power transformation for spatial distances (the default is -1.0). tcon : float, optional constant added to temporal distances (the default is 1.0). tpow : float, optional value for power transformation for temporal distances (the default is -1.0). Returns ------- mantel_result : dictionary contains the statistic (stat) for the test and the associated p-value (pvalue). stat : float value of the knox test for the dataset. pvalue : float pseudo p-value associated with the statistic. Examples -------- >>> import numpy as np >>> import pysal Read in the example data and create an instance of SpaceTimeEvents. >>> path = pysal.examples.get_path("burkitt.shp") >>> events = SpaceTimeEvents(path,'T') Set the random seed generator. This is used by the permutation based inference to replicate the pseudo-significance of our example results - the end-user will normally omit this step. >>> np.random.seed(100) The standardized Mantel test is a measure of matrix correlation between the spatial and temporal distance matrices of the event dataset. The following example runs the standardized Mantel test without a constant or transformation; however, as recommended by Mantel (1967) [2]_, these should be added by the user. This can be done by adjusting the constant and power parameters. >>> result = mantel(events.space, events.t, 99, scon=1.0, spow=-1.0, tcon=1.0, tpow=-1.0) Next, we examine the result of the test. >>> print("%6.6f"%result['stat']) 0.048368 Finally, we look at the pseudo-significance of this value, calculated by permuting the timestamps and rerunning the statistic for each of the 99 permutations. According to these parameters, the results indicate space-time interaction between the events. >>> print("%2.2f"%result['pvalue']) 0.01 """ t = t_coords s = s_coords n = len(t) # calculate the spatial and temporal distance matrices for the events distmat = cg.distance_matrix(s) timemat = cg.distance_matrix(t) # calculate the transformed standardized statistic timevec = (util.get_lower(timemat) + tcon) ** tpow distvec = (util.get_lower(distmat) + scon) ** spow stat = stats.pearsonr(timevec, distvec)[0].sum() # return the results (if no inference) if not permutations: return stat # loop for generating a random distribution to assess significance dist = [] for i in range(permutations): trand = util.shuffle_matrix(timemat, range(n)) timevec = (util.get_lower(trand) + tcon) ** tpow m = stats.pearsonr(timevec, distvec)[0].sum() dist.append(m) ## establish the pseudo significance of the observed statistic distribution = np.array(dist) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # report the results mantel_result = {'stat': stat, 'pvalue': pvalue} return mantel_result
def modified_knox(s_coords, t_coords, delta, tau, permutations=99): """ Baker's modified Knox test for spatio-temporal interaction. [Baker2004]_ Parameters ---------- s_coords : array (n, 2), spatial coordinates. t_coords : array (n, 1), temporal coordinates. delta : float threshold for proximity in space. tau : float threshold for proximity in time. permutations : int, optional the number of permutations used to establish pseudo- significance (the default is 99). Returns ------- modknox_result : dictionary contains the statistic (stat) for the test and the associated p-value (pvalue). stat : float value of the modified knox test for the dataset. pvalue : float pseudo p-value associated with the statistic. Examples -------- >>> import numpy as np >>> import pysal Read in the example data and create an instance of SpaceTimeEvents. >>> path = pysal.examples.get_path("burkitt.shp") >>> events = SpaceTimeEvents(path, 'T') Set the random seed generator. This is used by the permutation based inference to replicate the pseudo-significance of our example results - the end-user will normally omit this step. >>> np.random.seed(100) Run the modified Knox test with distance and time thresholds of 20 and 5, respectively. This counts the events that are closer than 20 units in space, and 5 units in time. >>> result = modified_knox(events.space, events.t, delta=20, tau=5, permutations=99) Next, we examine the results. First, we call the statistic from the results dictionary. This reports the difference between the observed and expected Knox statistic. >>> print("%2.8f" % result['stat']) 2.81016043 Next, we look at the pseudo-significance of this value, calculated by permuting the timestamps and rerunning the statistics. In this case, the results indicate there is likely no space-time interaction. >>> print("%2.2f" % result['pvalue']) 0.11 """ s = s_coords t = t_coords n = len(t) # calculate the spatial and temporal distance matrices for the events sdistmat = cg.distance_matrix(s) tdistmat = cg.distance_matrix(t) # identify events within thresholds spacmat = np.ones((n, n)) spacbin = sdistmat <= delta spacmat = spacmat * spacbin timemat = np.ones((n, n)) timebin = tdistmat <= tau timemat = timemat * timebin # calculate the observed (original) statistic knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat stat = (obsstat - (expstat / (n - 1.0))) / 2.0 # return results (if no inference) if not permutations: return stat distribution = [] # loop for generating a random distribution to assess significance for p in range(permutations): rtdistmat = util.shuffle_matrix(tdistmat, range(n)) timemat = np.ones((n, n)) timebin = rtdistmat <= tau timemat = timemat * timebin # calculate the observed knox again knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value again ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat tempstat = (obsstat - (expstat / (n - 1.0))) / 2.0 distribution.append(tempstat) # establish the pseudo significance of the observed statistic distribution = np.array(distribution) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # return results modknox_result = {'stat': stat, 'pvalue': pvalue} return modknox_result
def modified_knox(s_coords, t_coords, delta, tau, permutations=99): s = s_coords t = t_coords n = len(t) # calculate the spatial and temporal distance matrices for the events sdistmat = cg.distance_matrix(s) tdistmat = cg.distance_matrix(t) # identify events within thresholds spacmat = np.ones((n, n)) spacbin = sdistmat <= delta spacmat = spacmat * spacbin timemat = np.ones((n, n)) timebin = tdistmat <= tau timemat = timemat * timebin # calculate the observed (original) statistic knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat stat = (obsstat - (expstat / (n - 1.0))) / 2.0 # return results (if no inference) if not permutations: return stat distribution = [] # loop for generating a random distribution to assess significance for p in range(permutations): rtdistmat = util.shuffle_matrix(tdistmat, range(n)) timemat = np.ones((n, n)) timebin = rtdistmat <= tau timemat = timemat * timebin # calculate the observed knox again knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value again ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() eknox = expstat / (n - 1.0) # calculate the modified stat tempstat = (obsstat - (expstat / (n - 1.0))) / 2.0 distribution.append(tempstat) # establish the pseudo significance of the observed statistic distribution = np.array(distribution) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # return results modknox_result = {'stat': stat, 'pvalue': pvalue,'eknox': eknox } return modknox_result
def knox(events, delta, tau, permutations=99): """ Knox test for spatio-temporal interaction. [1]_ Parameters ---------- events : space time events object an output instance from the class SpaceTimeEvents delta : float threshold for proximity in space tau : float threshold for proximity in time permutations : int the number of permutations used to establish pseudo- significance (default is 99) Returns ------- knox_result : dictionary contains the statistic (stat) for the test and the associated p-value (pvalue) stat : float value of the knox test for the dataset pvalue : float pseudo p-value associated with the statistic References ---------- .. [1] E. Knox. 1964. The detection of space-time interactions. Journal of the Royal Statistical Society. Series C (Applied Statistics), 13(1):25-30. Examples -------- >>> import numpy as np >>> import pysal Read in the example data and create an instance of SpaceTimeEvents. >>> path = pysal.examples.get_path("burkitt") >>> events = SpaceTimeEvents(path,'T') Set the random seed generator. This is used by the permutation based inference to replicate the pseudo-significance of our example results - the end-user will normally omit this step. >>> np.random.seed(100) Run the Knox test with distance and time thresholds of 20 and 5, respectively. This counts the events that are closer than 20 units in space, and 5 units in time. >>> result = knox(events,delta=20,tau=5,permutations=99) Next, we examine the results. First, we call the statistic from the results results dictionary. This reports that there are 13 events close in both space and time, according to our threshold definitions. >>> print(result['stat']) 13.0 Next, we look at the pseudo-significance of this value, calculated by permuting the timestamps and rerunning the statistics. In this case, the results indicate there is likely no space-time interaction between the events. >>> print("%2.2f"%result['pvalue']) 0.18 """ n = events.n s = events.space t = events.t # calculate the spatial and temporal distance matrices for the events sdistmat = cg.distance_matrix(s) tdistmat = cg.distance_matrix(t) # identify events within thresholds spacmat = np.ones((n, n)) test = sdistmat <= delta spacmat = spacmat * test timemat = np.ones((n, n)) test = tdistmat <= tau timemat = timemat * test # calculate the statistic knoxmat = timemat * spacmat stat = (knoxmat.sum() - n) / 2 # return results (if no inference) if permutations == 0: return stat distribution = [] # loop for generating a random distribution to assess significance for p in range(permutations): rtdistmat = util.shuffle_matrix(tdistmat, range(n)) timemat = np.ones((n, n)) test = rtdistmat <= tau timemat = timemat * test knoxmat = timemat * spacmat k = (knoxmat.sum() - n) / 2 distribution.append(k) # establish the pseudo significance of the observed statistic distribution = np.array(distribution) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # return results knox_result = {'stat': stat, 'pvalue': pvalue} return knox_result
def modified_knox(s_coords, t_coords, delta, tau, permutations=99): s = s_coords t = t_coords n = len(t) # calculate the spatial and temporal distance matrices for the events sdistmat = cg.distance_matrix(s) tdistmat = cg.distance_matrix(t) # identify events within thresholds spacmat = np.ones((n, n)) spacbin = sdistmat <= delta spacmat = spacmat * spacbin timemat = np.ones((n, n)) timebin = tdistmat <= tau timemat = timemat * timebin # calculate the observed (original) statistic knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() # calculate the modified stat stat = (obsstat - (expstat / (n - 1.0))) / 2.0 # return results (if no inference) if not permutations: return stat distribution = [] # loop for generating a random distribution to assess significance for p in range(permutations): rtdistmat = util.shuffle_matrix(tdistmat, range(n)) timemat = np.ones((n, n)) timebin = rtdistmat <= tau timemat = timemat * timebin # calculate the observed knox again knoxmat = timemat * spacmat obsstat = (knoxmat.sum() - n) # calculate the expectated value again ssumvec = np.reshape((spacbin.sum(axis=0) - 1), (n, 1)) tsumvec = np.reshape((timebin.sum(axis=0) - 1), (n, 1)) expstat = (ssumvec * tsumvec).sum() eknox = expstat / (n - 1.0) # calculate the modified stat tempstat = (obsstat - (expstat / (n - 1.0))) / 2.0 distribution.append(tempstat) # establish the pseudo significance of the observed statistic distribution = np.array(distribution) greater = np.ma.masked_greater_equal(distribution, stat) count = np.ma.count_masked(greater) pvalue = (count + 1.0) / (permutations + 1.0) # return results modknox_result = {'stat': stat, 'pvalue': pvalue, 'eknox': eknox} return modknox_result
def test_shuffle_matrix(self): np.random.seed(10) obs = util.shuffle_matrix(self.X, range(4)).flatten().tolist() exp = [10, 8, 11, 9, 2, 0, 3, 1, 14, 12, 15, 13, 6, 4, 7, 5] for i in range(16): self.assertEqual(exp[i], obs[i])