Python compute_totalsの例、common.compute_totals Pythonの例

コード例 #1

0

ファイルを表示

ファイル: dissimilarity.py プロジェクト: giserh/marble

def dissimilarity(distribution, classes=None):
    """ Compute the inter-class dissimilarity index

    The dissimilarity index between two categories `\alpha` and `\beta` is
    defined as 

    ..math::
        D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left|
    \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right|

    Its value ranges from 0 to 1.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    dissimilarity: nested dictionary
        Classes matrix with dissimilarity as values
        > {alpha: {beta: D_{\alpha \beta}}}
    """
    ## Regroup into classes if specified
    if classes is not None:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    ## Compute total numbers of individuals per class and areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    ## Compute the dissimilarity matrix
    # Only half of the values are computed (the matrix is symmetric)
    dissimilarity = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        dissimilarity[alpha][beta] = _pair_dissimilarity(
            distribution, N_class, alpha, beta)

    # Symmetrize the output
    for c0 in dissimilarity.iterkeys():
        for c1 in dissimilarity[c0].iterkeys():
            if c0 not in dissimilarity[c1]:
                dissimilarity[c1][c0] = dissimilarity[c0][c1]

    return dissimilarity

コード例 #2

0

ファイルを表示

ファイル: representation.py プロジェクト: mrubert/marble

def representation(distribution, classes=None):
    """ Compute the representation of the different classes in all areal units

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. 
        > {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    representation: nested dictionnaries
        Representation of each category in each areal unit.
        > {areal_id: {class_id: (representation_values, variance of the null
                                model)}}
    """
    # Regroup into classes if specified. Otherwise return categories indicated
    # in the data
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
       classes = return_categories(distribution) 


    # Compute the total numbers per class and per individual
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    # Compute the representation and standard deviation for all areal units
    representation = {au:{cl:(single_representation(dist_au[cl],
                                                    N_unit[au],
                                                    N_class[cl],
                                                    N_tot), 
                              single_variance(N_unit[au],
                                               N_class[cl],
                                               N_tot) 
                             ) for cl in classes}
                      for au, dist_au in distribution.iteritems()}
    
    return representation

コード例 #3

0

ファイルを表示

ファイル: test_common.py プロジェクト: mrubert/marble

    def test_compute_totals(self):
        """ Compute totals """
        city = fake_city()
        cat = return_categories(city)
        N_au, N_class, N_tot = compute_totals(city, cat)
        
        # Answers computed by hand
        Ntot_answer = 226
        Nclass_answer = {1:0, 3:22, 4:37, 5:66, 6:9, 7:76, 8:16}
        Nau_answer = {"A":131, "B":95}

        # Test
        assert N_tot == Ntot_answer
        assert_equal(N_class, Nclass_answer)
        assert_equal(N_au, Nau_answer)

コード例 #4

0

ファイルを表示

    def test_compute_totals(self):
        """ Compute totals """
        city = fake_city()
        cat = return_categories(city)
        N_au, N_class, N_tot = compute_totals(city, cat)

        # Answers computed by hand
        Ntot_answer = 226
        Nclass_answer = {1: 0, 3: 22, 4: 37, 5: 66, 6: 9, 7: 76, 8: 16}
        Nau_answer = {"A": 131, "B": 95}

        # Test
        assert N_tot == Ntot_answer
        assert_equal(N_class, Nclass_answer)
        assert_equal(N_au, Nau_answer)

コード例 #5

0

ファイルを表示

ファイル: representation.py プロジェクト: giserh/marble

def representation(distribution, classes=None):
    """ Compute the representation of the different classes in all areal units

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. 
        > {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    representation: nested dictionnaries
        Representation of each category in each areal unit.
        > {areal_id: {class_id: (representation_values, variance of the null
                                model)}}
    """
    # Regroup into classes if specified. Otherwise return categories indicated
    # in the data
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    # Compute the total numbers per class and per individual
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    # Compute the representation and standard deviation for all areal units
    representation = {
        au: {
            cl: (single_representation(dist_au[cl], N_unit[au], N_class[cl],
                                       N_tot),
                 single_variance(N_unit[au], N_class[cl], N_tot))
            for cl in classes
        }
        for au, dist_au in distribution.iteritems()
    }

    return representation

コード例 #6

0

ファイルを表示

ファイル: dissimilarity.py プロジェクト: mrubert/marble

def dissimilarity(distribution, classes=None):
    """ Compute the inter-class dissimilarity index

    The dissimilarity index between two categories `\alpha` and `\beta` is
    defined as 

    ..math::
        D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left|
    \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right|

    Its value ranges from 0 to 1.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    dissimilarity: nested dictionary
        Classes matrix with dissimilarity as values
        > {alpha: {beta: D_{\alpha \beta}}}
    """
    ## Regroup into classes if specified
    if classes is not None:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)


    ## Compute total numbers of individuals per class and areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    ## Compute the dissimilarity matrix
    # Only half of the values are computed (the matrix is symmetric)
    dissimilarity = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        dissimilarity[alpha][beta] = _pair_dissimilarity(distribution, 
                                                        N_class, 
                                                        alpha, 
                                                        beta)

    # Symmetrize the output
    for c0 in dissimilarity.iterkeys():
        for c1 in dissimilarity[c0].iterkeys():
            if c0 not in dissimilarity[c1]:
                dissimilarity[c1][c0] = dissimilarity[c0][c1]


    return dissimilarity

コード例 #7

0

ファイルを表示

ファイル: classes.py プロジェクト: giserh/marble

def cluster_categories(distribution, exposure):
    """ Perform hierarhical clustering on the intra-tract exposure values 
    
    At each step of the aggregation, we look for the pair `(\beta, \delta)` of
    categories that has the highest exposure (renormalised by the maximum
    possible value). We aggregate them in a new category `\gamma` whose exposure
    with the other categories `\alpha` is given by

    .. math::
        E_{\alpha, \gamma} = \frac{1}{N_\beta + N_\delta} \left( N_\beta
        E_{\alpha, \beta} + N_\delta E_{\alpha, \delta} \right)


    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 


    Returns
    -------

    linkage: list of tuples
        list L that encodes the hierarhical tree. At the ith iteration of the
        algorithm, L[i,0] and L[i,1] are aggregated to form the n+ith cluster. The
        exposure between L[i,1] and L[i,0] is given by L[i,3], the variance is
        given by L[i,4].
    """
    #
    # Data preparation
    #

    ## Linkage matrix
    linkage = [cl for cl in sorted(exposure, key=lambda x: int(x))]
    N = len(linkage)

    ## Get total
    categories = return_categories(distribution)
    N_unit, N_class, N_tot = compute_totals(distribution, categories)

    ## Use classes' position in the linkage matrix rather than names
    # Class totals
    for cl in categories:
        N_class[linkage.index(cl)] = N_class.pop(cl)

    #exposure
    E = {
        linkage.index(cl0):
        {linkage.index(cl1): exposure[cl0][cl1][0]
         for cl1 in exposure[cl0]}
        for cl0 in exposure
    }
    E_var = {
        linkage.index(cl0):
        {linkage.index(cl1): exposure[cl0][cl1][1]
         for cl1 in exposure[cl0]}
        for cl0 in exposure
    }

    #
    # Clustering
    #
    for i in range(N - 1):
        a, b = _find_friends(E, N_class)
        linkage.append((a, b, E[a][b], E_var[a][b]))
        E, E_var, N_class = _update_matrix(E, E_var, N_class, a, b)

    return linkage

コード例 #8

0

ファイルを表示

ファイル: exposure.py プロジェクト: giserh/marble

def exposure(distribution, classes=None):
    """ Compute the exposure between classes
    
    The exposure between two categories `\alpha` and `\beta` is defined as

    ..math::
        E_{\alpha \beta} = \frac{1}{N} \sum_{t=1}^{T} n(t) r_\alpha(t)
        r_\beta(t)

    where `r_\alpha(t)` is the representation of the class `\alpha` in the areal
    unit `t`, `n(t)` the total population of `t`, and `N` the total population
    in the considered system.

    The exposure of a class to itself `E_{\alpha \alpha}` measures the
    **isolation** of this class.

    The variance is computed on the null model which corresponds to the
    unsegregated configuration, that is when the spatial repartition of people
    of different income classes is no different from that that would be obtained
    if they scattered at random across the city.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 
    """
    ## Regroup into classes if specified.
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    ## Compute the total numbers per class and per areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    ## Compute representation for all areal unit
    representation = mb.representation(distribution)

    ## Compute the exposure matrix
    # Only half of the values are computed (the matrix is symmetric)
    exposure = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        exposure[alpha][beta] = (pair_exposure(representation, N_unit, N_tot,
                                               alpha, beta),
                                 pair_variance(representation, N_unit, N_class,
                                               N_tot, alpha, beta))

    # Symmetrize the output
    for c0 in exposure.iterkeys():
        for c1 in exposure[c0].iterkeys():
            if c0 not in exposure[c1]:
                exposure[c1][c0] = exposure[c0][c1]

    return exposure

コード例 #9

0

ファイルを表示

ファイル: classes.py プロジェクト: mrubert/marble

def cluster_categories(distribution, exposure):
    """ Perform hierarhical clustering on the intra-tract exposure values 
    
    At each step of the aggregation, we look for the pair `(\beta, \delta)` of
    categories that has the highest exposure (renormalised by the maximum
    possible value). We aggregate them in a new category `\gamma` whose exposure
    with the other categories `\alpha` is given by

    .. math::
        E_{\alpha, \gamma} = \frac{1}{N_\beta + N_\delta} \left( N_\beta
        E_{\alpha, \beta} + N_\delta E_{\alpha, \delta} \right)


    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 


    Returns
    -------

    linkage: list of tuples
        list L that encodes the hierarhical tree. At the ith iteration of the
        algorithm, L[i,0] and L[i,1] are aggregated to form the n+ith cluster. The
        exposure between L[i,1] and L[i,0] is given by L[i,3], the variance is
        given by L[i,4].
    """
    #
    # Data preparation
    #

    ## Linkage matrix
    linkage = [cl for cl in sorted(exposure, key=lambda x: int(x))]
    N = len(linkage)

    ## Get total
    categories = return_categories(distribution)
    N_unit, N_class, N_tot = compute_totals(distribution, categories) 

    

    ## Use classes' position in the linkage matrix rather than names
    # Class totals
    for cl in categories:
        N_class[linkage.index(cl)] = N_class.pop(cl)

    #exposure
    E = {linkage.index(cl0):{linkage.index(cl1):exposure[cl0][cl1][0]
                                for cl1 in exposure[cl0]}
            for cl0 in exposure}
    E_var = {linkage.index(cl0):{linkage.index(cl1):exposure[cl0][cl1][1]
                                for cl1 in exposure[cl0]}
            for cl0 in exposure}



    #
    # Clustering
    #
    for i in range(N-1): 
        a, b = _find_friends(E, N_class)
        linkage.append((a, b, E[a][b], E_var[a][b])) 
        E, E_var, N_class = _update_matrix(E, E_var, N_class, a, b) 


    return linkage

コード例 #10

0

ファイルを表示

ファイル: exposure.py プロジェクト: mrubert/marble

def exposure(distribution, classes=None):
    """ Compute the exposure between classes
    
    The exposure between two categories `\alpha` and `\beta` is defined as

    ..math::
        E_{\alpha \beta} = \frac{1}{N} \sum_{t=1}^{T} n(t) r_\alpha(t)
        r_\beta(t)

    where `r_\alpha(t)` is the representation of the class `\alpha` in the areal
    unit `t`, `n(t)` the total population of `t`, and `N` the total population
    in the considered system.

    The exposure of a class to itself `E_{\alpha \alpha}` measures the
    **isolation** of this class.

    The variance is computed on the null model which corresponds to the
    unsegregated configuration, that is when the spatial repartition of people
    of different income classes is no different from that that would be obtained
    if they scattered at random across the city.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 
    """
    ## Regroup into classes if specified.
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
       classes = return_categories(distribution) 


    ## Compute the total numbers per class and per areal unit 
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    ## Compute representation for all areal unit
    representation = mb.representation(distribution)


    ## Compute the exposure matrix
    # Only half of the values are computed (the matrix is symmetric)
    exposure = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        exposure[alpha][beta] = (pair_exposure(representation, N_unit, N_tot, alpha, beta),
                                 pair_variance(representation, N_unit, N_class, N_tot, alpha, beta))

    # Symmetrize the output
    for c0 in exposure.iterkeys():
        for c1 in exposure[c0].iterkeys():
            if c0 not in exposure[c1]:
                exposure[c1][c0] = exposure[c0][c1]

    return exposure