Python theil 예제들, theilIndex.theil Python 예제들

예제 #1

0

파일 보기

파일: regionsTest.py 프로젝트: yangyu777/clusterpy

def interregionalInequalityTestOneVariable(Y, area2region, permutations=9999):
    def getVar(Y, possition):
        result = {}
        for k in Y:
            result[k] = [Y[k][possition]]
        return result

    def shuffleMap(Y):
        result = {}
        values = Y.values()
        numpy.random.shuffle(values)
        keys = Y.keys()
        newY = dict(zip(keys, values))
        return newY

    results = []
    for nv1 in range(len(Y[0])):
        var = getVar(Y, nv1)
        t1, tb1, tw1 = theil(var, area2region)
        numerator = 1
        for iter in range(permutations):
            var = shuffleMap(var)
            t2, tb2, tw2 = theil(var, area2region)
            if tb1 <= tb2:
                numerator += 1
        results.append(numerator / float(permutations + 1))
    return results

예제 #2

0

파일 보기

파일: regionsTest.py 프로젝트: JuancaDuque/clusterpy

def interregionalInequalityTestOneVariable(Y, area2region, permutations=9999):
    def getVar(Y, possition):
        result = {}
        for k in Y:
            result[k] = [Y[k][possition]]
        return result    
    
    def shuffleMap(Y):
        result = {}
        values = Y.values()
        numpy.random.shuffle(values)
        keys = Y.keys()
        newY = dict(zip(keys,values))
        return newY    

    results = []
    for nv1 in range(len(Y[0])):
        var = getVar(Y,nv1)
        t1,tb1,tw1 = theil(var,area2region)
        numerator = 1
        for iter in range(permutations):
            var = shuffleMap(var)
            t2,tb2,tw2 = theil(var,area2region)
            if tb1 <= tb2:
                numerator += 1
        results.append(numerator/float(permutations+1))
    return results

예제 #3

0

파일 보기

파일: inequalityMultivar.py 프로젝트: yangyu777/clusterpy

def inequalityMultivar(Y, area2region, index='theil'):
    """Inequality index for multiple variables

    This function calculates a given inequality index for multiple variables::

        Layer.inequality('inequalityMultivar',vars, cluster, <index>)

    :keyword vars: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] 
    :type vars: list
    :keyword cluster: variable in Layer containing regionalization solution; e.g.: 'BELS'
    :type cluster: string
    :keyword index: inequality index to be applied. Default value index = 'theil'. 
    :type index: string

    :rtype: Tuple
    :return: index, index between groups, index within groups, index whitin groups over index 


    **Example 1** ::

        import clusterpy
        instance = clusterpy.importArcData("clusterpy/data_examples/china")
        result = instance.inequality('inequality',['Y1978', 'Y1979', 'Y1980', 'Y1981'], 'BELS', index = 'theil')

    """
    matrix = numpy.matrix(Y.values()).transpose()
    periods = [x.tolist()[0] for x in matrix]
    areas = Y.keys()
    t = []
    tb = []
    tw = []
    tw_t = []
    for var in periods:
        var = [[x] for x in var]
        dictionary = dict(zip(areas, var))
        if index == 'theil':
            t2, tb2, tw2 = theil(dictionary, area2region)
            tw_t2 = tw2 / float(t2)
        else:
            raise NameError("index is not available in clusterpy")
        t.append(t2)
        tb.append(tb2)
        tw.append(tw2)
        tw_t.append(tw_t2)
    return t, tb, tw, tw_t

예제 #4

0

파일 보기

파일: inequalityMultivar.py 프로젝트: 1990q828j/clusterpy

def inequalityMultivar(Y, area2region, index = 'theil'):
    """Inequality index for multiple variables

    This function calculates a given inequality index for multiple variables::

        Layer.inequality('inequalityMultivar',vars, cluster, <index>)

    :keyword vars: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] 
    :type vars: list
    :keyword cluster: variable in Layer containing regionalization solution; e.g.: 'BELS'
    :type cluster: string
    :keyword index: inequality index to be applied. Default value index = 'theil'. 
    :type index: string

    :rtype: Tuple
    :return: index, index between groups, index within groups, index whitin groups over index 


    **Example 1** ::

        import clusterpy
        instance = clusterpy.importArcData("clusterpy/data_examples/china")
        result = instance.inequality('inequality',['Y1978', 'Y1979', 'Y1980', 'Y1981'], 'BELS', index = 'theil')

    """
    matrix = numpy.matrix(Y.values()).transpose()
    periods = [x.tolist()[0] for x in matrix]
    areas = Y.keys()
    t = []
    tb = []
    tw = []
    tw_t = []
    for var in periods:
        var = [[x] for x in var]
        dictionary = dict(zip(areas,var))
        if index == 'theil':
            t2,tb2,tw2 = theil(dictionary,area2region)
            tw_t2 = tw2/float(t2)
        else:
            raise NameError("index is not available in clusterpy")
        t.append(t2)
        tb.append(tb2)
        tw.append(tw2)
        tw_t.append(tw_t2)
    return t,tb,tw,tw_t

예제 #5

0

파일 보기

파일: gineqTest.py 프로젝트: clusterpy/clusterpy-hd

def globalInequalityChanges(Y, fieldNames, outFile, permutations=9999):
    """Global inequality change test 

    This function tests whether global inequality has significantly changed
    for the Theil statistic over the period t to t+k. For more information on
    this function see [Rey_Sastre2010] (this function recreates Table 2 in
    that paper).
    
        Layer.inequality('globalInequalityChanges', var, outFile, <permutations>)

    :keyword var: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] 
    :type var: list
    :keyword outFile: Name for the output file; e.g.: "regionsDifferenceTest.csv"
    :type outFile: string 
    :keyword permutations: Number of random spatial permutations. Default value permutations = 9999.
    :type permutations: integer 


    **Example 1** ::

        import clusterpy
        china = clusterpy.importArcData("clusterpy/data_examples/china")
        result = china.inequality('globalInequalityChanges',['Y1978', 'Y1979', 'Y1980', 'Y1981'],  "interregional_inequality_differences.csv")

    """

    def getVar(Y, possition):
        result = {}
        for k in Y:
            result[k] = [Y[k][possition]]
        return result    
    
    def shufflePeriods(Y,pos1,pos2):
        result = {}
        for k in Y:
            possibilities = [Y[k][pos1],Y[k][pos2]]
            result[k] = [possibilities[numpy.random.randint(0,2)]]
        return result    

    print "Creating global Inequality Changes [Rey_Sastre2010 - Table 2]"
    results = {}
    r2a = range(len(Y))
    for nv1, var1 in enumerate(fieldNames):
        var = getVar(Y,nv1)
        t1,tb1,tw1 = theil(var,r2a)
        results[(var1,var1)] = t1
        for nv2, var2 in enumerate(fieldNames[nv1+1:]):
            var = getVar(Y,nv1+nv2+1)
            t2,tb2,tw2 = theil(var,r2a)
            results[(var1,var2)] = t2 - t1
            numerator = 1
            for iter in range(permutations):
                var = shufflePeriods(Y,nv1,nv1 + nv2 + 1)
                t3,tb3,tw3 = theil(var,r2a)
                if abs(t2-t1) < abs(t3-t1):
                    numerator += 1
                results[(var2,var1)] = numerator/float(permutations+1)
    if outFile:
        fout = open(outFile,"w")
        aux = str(fieldNames).replace("[","")
        aux = aux.replace("]","")
        aux = aux.replace("'","")
        line = "".join([",",aux])
        fout.write("".join([line,"\n"]))
        for var1 in fieldNames:
            line = [var1]
            for var2 in fieldNames:
                line += [results[(var1,var2)]]
            line = str(line).replace("[","")
            line = line.replace("]","")
            line = line.replace("'","")
            fout.write("".join([line,"\n"]))
        fout.close()        
    print "global Inequality Changes created!"
    return results

예제 #6

0

파일 보기

파일: regionsDiffTest.py 프로젝트: clusterpy/clusterpy-hd

def interregionalInequalityDifferences(Y, fieldNames, area2regions,
            area2regionsNames, outFile="", permutations=9999):
    """Interregional inequality differences 

    This function examines whether the differences across a set of clustering
    solutions are significant. For more information on this function see
    [Rey_Sastre2010] (this function recreates Table 6 in that paper).
    
        Layer.inequality('interregionalInequalityDifferences', var, clusters, outFile="", <permutations>)

    :keyword var: List with variables to be analyzed; e.g: ['Y1978', 'Y1979', 'Y1980', 'Y1981'] 
    :type var: list
    :keyword clusters: variables in Layer containing regionalization schemes e.g.: ["arisel1", "arisel2", "arisel3", "BELS"]
    :type clusters: list 
    :keyword outFile: Name for the output file; e.g.: "regionsDifferenceTest.csv"
    :type outFile: string 
    :keyword permutations: Number of random spatial permutations. Default value permutations = 9999.
    :type permutations: integer 


    **Example 1** ::

        import clusterpy
        china = clusterpy.importArcData("clusterpy/data_examples/china")
        china.inequality('regionsInequalityDifferenceTest',['Y1978', 'Y1979', 'Y1980', 'Y1981'], ['BELS','T78-98','T86_98'], "interregional_inequality_differences.csv")

    """
    def getVar(Y, possition):
        result = {}
        for k in Y:
            result[k] = [Y[k][possition]]
        return result    
    
    def shuffleMap(Y):
        values = Y.values()
        numpy.random.shuffle(values)
        keys = Y.keys()
        newY = dict(zip(keys,values))
        return newY    

    results = {}
    for nv1, v1 in enumerate(fieldNames):
        for na2r_1,a2r_1 in enumerate(area2regions):
            var = getVar(Y,nv1)
            t1,tb1,tw1 = theil(var,a2r_1)
            name = area2regionsNames[na2r_1]
            results[(v1,name,name)] = tb1
            for na2r_2,a2r_2 in enumerate(area2regions[na2r_1+1:]):
                t2,tb2,tw2 = theil(var,a2r_2)
                oDifference = tb1 - tb2
                numerator = 1
                for iter in range(permutations):
                    var = shuffleMap(var)
                    t3,tb3,tw3 = theil(var,a2r_1)
                    t4,tb4,tw4 = theil(var,a2r_2)
                    rDifference = tb3 - tb4
                    if abs(rDifference) <= abs(oDifference):
                        numerator += 1
                p = numerator/float(permutations+1)
                name2 = area2regionsNames[na2r_2 + na2r_1 + 1]
                results[(v1,name,name2)] = oDifference
                results[(v1,name2,name)] = p
    if outFile:
        fout = open(outFile,"w")
        for var1 in fieldNames:
            aux = str(area2regionsNames).replace("[","")
            aux = aux.replace("]","")
            aux = aux.replace("'","")
            line = "".join([var1,",",aux])
            fout.write("".join([line,"\n"]))
            for a2r in area2regionsNames:
                line = [a2r]
                for a2r2 in area2regionsNames:
                    line += [results[(var1,a2r,a2r2)]]
                line = str(line).replace("[","")
                line = line.replace("]","")
                line = line.replace("'","")
                fout.write("".join([line,"\n"]))
            fout.write("\n")    
        fout.close()        
    return None