Ejemplo n.º 1
0
def dissimilarity(distribution, classes=None):
    """ Compute the inter-class dissimilarity index

    The dissimilarity index between two categories `\alpha` and `\beta` is
    defined as 

    ..math::
        D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left|
    \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right|

    Its value ranges from 0 to 1.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    dissimilarity: nested dictionary
        Classes matrix with dissimilarity as values
        > {alpha: {beta: D_{\alpha \beta}}}
    """
    ## Regroup into classes if specified
    if classes is not None:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    ## Compute total numbers of individuals per class and areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    ## Compute the dissimilarity matrix
    # Only half of the values are computed (the matrix is symmetric)
    dissimilarity = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        dissimilarity[alpha][beta] = _pair_dissimilarity(
            distribution, N_class, alpha, beta)

    # Symmetrize the output
    for c0 in dissimilarity.iterkeys():
        for c1 in dissimilarity[c0].iterkeys():
            if c0 not in dissimilarity[c1]:
                dissimilarity[c1][c0] = dissimilarity[c0][c1]

    return dissimilarity
Ejemplo n.º 2
0
def representation(distribution, classes=None):
    """ Compute the representation of the different classes in all areal units

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. 
        > {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    representation: nested dictionnaries
        Representation of each category in each areal unit.
        > {areal_id: {class_id: (representation_values, variance of the null
                                model)}}
    """
    # Regroup into classes if specified. Otherwise return categories indicated
    # in the data
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
       classes = return_categories(distribution) 


    # Compute the total numbers per class and per individual
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    # Compute the representation and standard deviation for all areal units
    representation = {au:{cl:(single_representation(dist_au[cl],
                                                    N_unit[au],
                                                    N_class[cl],
                                                    N_tot), 
                              single_variance(N_unit[au],
                                               N_class[cl],
                                               N_tot) 
                             ) for cl in classes}
                      for au, dist_au in distribution.iteritems()}
    
    return representation
Ejemplo n.º 3
0
def representation(distribution, classes=None):
    """ Compute the representation of the different classes in all areal units

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. 
        > {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    representation: nested dictionnaries
        Representation of each category in each areal unit.
        > {areal_id: {class_id: (representation_values, variance of the null
                                model)}}
    """
    # Regroup into classes if specified. Otherwise return categories indicated
    # in the data
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    # Compute the total numbers per class and per individual
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    # Compute the representation and standard deviation for all areal units
    representation = {
        au: {
            cl: (single_representation(dist_au[cl], N_unit[au], N_class[cl],
                                       N_tot),
                 single_variance(N_unit[au], N_class[cl], N_tot))
            for cl in classes
        }
        for au, dist_au in distribution.iteritems()
    }

    return representation
Ejemplo n.º 4
0
def dissimilarity(distribution, classes=None):
    """ Compute the inter-class dissimilarity index

    The dissimilarity index between two categories `\alpha` and `\beta` is
    defined as 

    ..math::
        D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left|
    \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right|

    Its value ranges from 0 to 1.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    dissimilarity: nested dictionary
        Classes matrix with dissimilarity as values
        > {alpha: {beta: D_{\alpha \beta}}}
    """
    ## Regroup into classes if specified
    if classes is not None:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)


    ## Compute total numbers of individuals per class and areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    ## Compute the dissimilarity matrix
    # Only half of the values are computed (the matrix is symmetric)
    dissimilarity = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        dissimilarity[alpha][beta] = _pair_dissimilarity(distribution, 
                                                        N_class, 
                                                        alpha, 
                                                        beta)

    # Symmetrize the output
    for c0 in dissimilarity.iterkeys():
        for c1 in dissimilarity[c0].iterkeys():
            if c0 not in dissimilarity[c1]:
                dissimilarity[c1][c0] = dissimilarity[c0][c1]


    return dissimilarity
Ejemplo n.º 5
0
def exposure(distribution, classes=None):
    """ Compute the exposure between classes
    
    The exposure between two categories `\alpha` and `\beta` is defined as

    ..math::
        E_{\alpha \beta} = \frac{1}{N} \sum_{t=1}^{T} n(t) r_\alpha(t)
        r_\beta(t)

    where `r_\alpha(t)` is the representation of the class `\alpha` in the areal
    unit `t`, `n(t)` the total population of `t`, and `N` the total population
    in the considered system.

    The exposure of a class to itself `E_{\alpha \alpha}` measures the
    **isolation** of this class.

    The variance is computed on the null model which corresponds to the
    unsegregated configuration, that is when the spatial repartition of people
    of different income classes is no different from that that would be obtained
    if they scattered at random across the city.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 
    """
    ## Regroup into classes if specified.
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
       classes = return_categories(distribution) 


    ## Compute the total numbers per class and per areal unit 
    N_unit, N_class, N_tot = compute_totals(distribution, classes) 


    ## Compute representation for all areal unit
    representation = mb.representation(distribution)


    ## Compute the exposure matrix
    # Only half of the values are computed (the matrix is symmetric)
    exposure = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        exposure[alpha][beta] = (pair_exposure(representation, N_unit, N_tot, alpha, beta),
                                 pair_variance(representation, N_unit, N_class, N_tot, alpha, beta))

    # Symmetrize the output
    for c0 in exposure.iterkeys():
        for c1 in exposure[c0].iterkeys():
            if c0 not in exposure[c1]:
                exposure[c1][c0] = exposure[c0][c1]

    return exposure 
Ejemplo n.º 6
0
def exposure(distribution, classes=None):
    """ Compute the exposure between classes
    
    The exposure between two categories `\alpha` and `\beta` is defined as

    ..math::
        E_{\alpha \beta} = \frac{1}{N} \sum_{t=1}^{T} n(t) r_\alpha(t)
        r_\beta(t)

    where `r_\alpha(t)` is the representation of the class `\alpha` in the areal
    unit `t`, `n(t)` the total population of `t`, and `N` the total population
    in the considered system.

    The exposure of a class to itself `E_{\alpha \alpha}` measures the
    **isolation** of this class.

    The variance is computed on the null model which corresponds to the
    unsegregated configuration, that is when the spatial repartition of people
    of different income classes is no different from that that would be obtained
    if they scattered at random across the city.

    Parameters
    ----------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    exposure: nested dictionaries
        Matrix of exposures between categories.
        > {class_id0: {class_id1: (exposure_01, variance null model)}} 
    """
    ## Regroup into classes if specified.
    if classes:
        distribution = regroup_per_class(distribution, classes)
    else:
        classes = return_categories(distribution)

    ## Compute the total numbers per class and per areal unit
    N_unit, N_class, N_tot = compute_totals(distribution, classes)

    ## Compute representation for all areal unit
    representation = mb.representation(distribution)

    ## Compute the exposure matrix
    # Only half of the values are computed (the matrix is symmetric)
    exposure = collections.defaultdict(dict)
    for alpha, beta in itertools.combinations_with_replacement(classes, 2):
        exposure[alpha][beta] = (pair_exposure(representation, N_unit, N_tot,
                                               alpha, beta),
                                 pair_variance(representation, N_unit, N_class,
                                               N_tot, alpha, beta))

    # Symmetrize the output
    for c0 in exposure.iterkeys():
        for c1 in exposure[c0].iterkeys():
            if c0 not in exposure[c1]:
                exposure[c1][c0] = exposure[c0][c1]

    return exposure