def interregionalInequalityTestOneVariable(Y, area2region, permutations=9999): def getVar(Y, possition): result = {} for k in Y: result[k] = [Y[k][possition]] return result def shuffleMap(Y): result = {} values = Y.values() numpy.random.shuffle(values) keys = Y.keys() newY = dict(zip(keys, values)) return newY results = [] for nv1 in range(len(Y[0])): var = getVar(Y, nv1) t1, tb1, tw1 = theil(var, area2region) numerator = 1 for iter in range(permutations): var = shuffleMap(var) t2, tb2, tw2 = theil(var, area2region) if tb1 <= tb2: numerator += 1 results.append(numerator / float(permutations + 1)) return results
def interregionalInequalityTestOneVariable(Y, area2region, permutations=9999): def getVar(Y, possition): result = {} for k in Y: result[k] = [Y[k][possition]] return result def shuffleMap(Y): result = {} values = Y.values() numpy.random.shuffle(values) keys = Y.keys() newY = dict(zip(keys,values)) return newY results = [] for nv1 in range(len(Y[0])): var = getVar(Y,nv1) t1,tb1,tw1 = theil(var,area2region) numerator = 1 for iter in range(permutations): var = shuffleMap(var) t2,tb2,tw2 = theil(var,area2region) if tb1 <= tb2: numerator += 1 results.append(numerator/float(permutations+1)) return results
def inequalityMultivar(Y, area2region, index='theil'): """Inequality index for multiple variables This function calculates a given inequality index for multiple variables:: Layer.inequality('inequalityMultivar',vars, cluster, <index>) :keyword vars: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] :type vars: list :keyword cluster: variable in Layer containing regionalization solution; e.g.: 'BELS' :type cluster: string :keyword index: inequality index to be applied. Default value index = 'theil'. :type index: string :rtype: Tuple :return: index, index between groups, index within groups, index whitin groups over index **Example 1** :: import clusterpy instance = clusterpy.importArcData("clusterpy/data_examples/china") result = instance.inequality('inequality',['Y1978', 'Y1979', 'Y1980', 'Y1981'], 'BELS', index = 'theil') """ matrix = numpy.matrix(Y.values()).transpose() periods = [x.tolist()[0] for x in matrix] areas = Y.keys() t = [] tb = [] tw = [] tw_t = [] for var in periods: var = [[x] for x in var] dictionary = dict(zip(areas, var)) if index == 'theil': t2, tb2, tw2 = theil(dictionary, area2region) tw_t2 = tw2 / float(t2) else: raise NameError("index is not available in clusterpy") t.append(t2) tb.append(tb2) tw.append(tw2) tw_t.append(tw_t2) return t, tb, tw, tw_t
def inequalityMultivar(Y, area2region, index = 'theil'): """Inequality index for multiple variables This function calculates a given inequality index for multiple variables:: Layer.inequality('inequalityMultivar',vars, cluster, <index>) :keyword vars: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] :type vars: list :keyword cluster: variable in Layer containing regionalization solution; e.g.: 'BELS' :type cluster: string :keyword index: inequality index to be applied. Default value index = 'theil'. :type index: string :rtype: Tuple :return: index, index between groups, index within groups, index whitin groups over index **Example 1** :: import clusterpy instance = clusterpy.importArcData("clusterpy/data_examples/china") result = instance.inequality('inequality',['Y1978', 'Y1979', 'Y1980', 'Y1981'], 'BELS', index = 'theil') """ matrix = numpy.matrix(Y.values()).transpose() periods = [x.tolist()[0] for x in matrix] areas = Y.keys() t = [] tb = [] tw = [] tw_t = [] for var in periods: var = [[x] for x in var] dictionary = dict(zip(areas,var)) if index == 'theil': t2,tb2,tw2 = theil(dictionary,area2region) tw_t2 = tw2/float(t2) else: raise NameError("index is not available in clusterpy") t.append(t2) tb.append(tb2) tw.append(tw2) tw_t.append(tw_t2) return t,tb,tw,tw_t
def globalInequalityChanges(Y, fieldNames, outFile, permutations=9999): """Global inequality change test This function tests whether global inequality has significantly changed for the Theil statistic over the period t to t+k. For more information on this function see [Rey_Sastre2010] (this function recreates Table 2 in that paper). Layer.inequality('globalInequalityChanges', var, outFile, <permutations>) :keyword var: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] :type var: list :keyword outFile: Name for the output file; e.g.: "regionsDifferenceTest.csv" :type outFile: string :keyword permutations: Number of random spatial permutations. Default value permutations = 9999. :type permutations: integer **Example 1** :: import clusterpy china = clusterpy.importArcData("clusterpy/data_examples/china") result = china.inequality('globalInequalityChanges',['Y1978', 'Y1979', 'Y1980', 'Y1981'], "interregional_inequality_differences.csv") """ def getVar(Y, possition): result = {} for k in Y: result[k] = [Y[k][possition]] return result def shufflePeriods(Y,pos1,pos2): result = {} for k in Y: possibilities = [Y[k][pos1],Y[k][pos2]] result[k] = [possibilities[numpy.random.randint(0,2)]] return result print "Creating global Inequality Changes [Rey_Sastre2010 - Table 2]" results = {} r2a = range(len(Y)) for nv1, var1 in enumerate(fieldNames): var = getVar(Y,nv1) t1,tb1,tw1 = theil(var,r2a) results[(var1,var1)] = t1 for nv2, var2 in enumerate(fieldNames[nv1+1:]): var = getVar(Y,nv1+nv2+1) t2,tb2,tw2 = theil(var,r2a) results[(var1,var2)] = t2 - t1 numerator = 1 for iter in range(permutations): var = shufflePeriods(Y,nv1,nv1 + nv2 + 1) t3,tb3,tw3 = theil(var,r2a) if abs(t2-t1) < abs(t3-t1): numerator += 1 results[(var2,var1)] = numerator/float(permutations+1) if outFile: fout = open(outFile,"w") aux = str(fieldNames).replace("[","") aux = aux.replace("]","") aux = aux.replace("'","") line = "".join([",",aux]) fout.write("".join([line,"\n"])) for var1 in fieldNames: line = [var1] for var2 in fieldNames: line += [results[(var1,var2)]] line = str(line).replace("[","") line = line.replace("]","") line = line.replace("'","") fout.write("".join([line,"\n"])) fout.close() print "global Inequality Changes created!" return results
def interregionalInequalityDifferences(Y, fieldNames, area2regions, area2regionsNames, outFile="", permutations=9999): """Interregional inequality differences This function examines whether the differences across a set of clustering solutions are significant. For more information on this function see [Rey_Sastre2010] (this function recreates Table 6 in that paper). Layer.inequality('interregionalInequalityDifferences', var, clusters, outFile="", <permutations>) :keyword var: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] :type var: list :keyword clusters: variables in Layer containing regionalization schemes e.g.: ["arisel1", "arisel2", "arisel3", "BELS"] :type clusters: list :keyword outFile: Name for the output file; e.g.: "regionsDifferenceTest.csv" :type outFile: string :keyword permutations: Number of random spatial permutations. Default value permutations = 9999. :type permutations: integer **Example 1** :: import clusterpy china = clusterpy.importArcData("clusterpy/data_examples/china") china.inequality('regionsInequalityDifferenceTest',['Y1978', 'Y1979', 'Y1980', 'Y1981'], ['BELS','T78-98','T86_98'], "interregional_inequality_differences.csv") """ def getVar(Y, possition): result = {} for k in Y: result[k] = [Y[k][possition]] return result def shuffleMap(Y): values = Y.values() numpy.random.shuffle(values) keys = Y.keys() newY = dict(zip(keys,values)) return newY results = {} for nv1, v1 in enumerate(fieldNames): for na2r_1,a2r_1 in enumerate(area2regions): var = getVar(Y,nv1) t1,tb1,tw1 = theil(var,a2r_1) name = area2regionsNames[na2r_1] results[(v1,name,name)] = tb1 for na2r_2,a2r_2 in enumerate(area2regions[na2r_1+1:]): t2,tb2,tw2 = theil(var,a2r_2) oDifference = tb1 - tb2 numerator = 1 for iter in range(permutations): var = shuffleMap(var) t3,tb3,tw3 = theil(var,a2r_1) t4,tb4,tw4 = theil(var,a2r_2) rDifference = tb3 - tb4 if abs(rDifference) <= abs(oDifference): numerator += 1 p = numerator/float(permutations+1) name2 = area2regionsNames[na2r_2 + na2r_1 + 1] results[(v1,name,name2)] = oDifference results[(v1,name2,name)] = p if outFile: fout = open(outFile,"w") for var1 in fieldNames: aux = str(area2regionsNames).replace("[","") aux = aux.replace("]","") aux = aux.replace("'","") line = "".join([var1,",",aux]) fout.write("".join([line,"\n"])) for a2r in area2regionsNames: line = [a2r] for a2r2 in area2regionsNames: line += [results[(var1,a2r,a2r2)]] line = str(line).replace("[","") line = line.replace("]","") line = line.replace("'","") fout.write("".join([line,"\n"])) fout.write("\n") fout.close() return None