Esempio n. 1
0
def supply_null_modell_functions():
    attr_list = ["ID", "Age", "Sex"]
    test_df = Data(path_tij=r"face2face/data/Test/tij_test.dat", separator_tij="\t",
                   path_meta=r"face2face/data/Test/meta_test.dat", separator_meta="\t",
                   meta_attr_list=attr_list)

    test_network = create_network_from_data(test_df)

    return [test_df, test_network]
Esempio n. 2
0
def group_list_degree(df):
    """Creates lists of the degrees

       Creating a list of degrees for every attribute value of every group

       Parameters
       ----------
       df: Data
           Contains a dataframe with the tij-data from the data set

       Returns
       -------
       attr_degree_list: list
           Contains lists for all attributes and all occurring degrees for the different attribute values.

       Examples
       ---------
       This functions return a list of lists which contains a list for every attribute value. In this list you have the
       attribute as a string, the attribute value and a list with all degrees for this attribute value.

       >>> attr_list = ["ID", "Age", "Sex"]
       >>> test_df = Data(path_tij="face2face/data/Test/tij_test.dat", separator_tij="\t",
       >>>               path_meta="face2face/data/Test/meta_test.dat", separator_meta="\t",
       >>>               meta_attr_list=attr_list)
       >>> attr_degree_list = group_list_degree(test_df)
       [['Age', 1.0, [1, 2, 2]], ['Age', 0.0, [3, 2]], ['Age', 2.0, [2, 2, 2]], ['Sex', 'F', [1, 2, 2, 2, 2]],
       ['Sex', 'M', [2, 2, 2]]]


       See Also
       ----------
       face2face.avg_degree_attr
       face2face.global_avg_var_std

       """
    df_meta_nan = df.metadata.fillna("NaN")
    network = create_network_from_data(df)

    parameter_list = []
    for col in df_meta_nan.columns:
        if col != "ID":
            parameter_list.append(col)

    complete_parameter_value_list = []
    for i in parameter_list:
        nan_filtered_dataframe = df.metadata.loc[df.metadata[i] != "NaN"]
        for parameter_values, grouped_by_dataframes in nan_filtered_dataframe.groupby(i):
            complete_parameter_value_list.append([i, parameter_values, list(grouped_by_dataframes["ID"])])

    for i in complete_parameter_value_list:
        parameter_value_degree_list = []
        for j in i[2]:
            parameter_value_degree_list.append(network.degree(j))
        i[2] = parameter_value_degree_list[:]
    return complete_parameter_value_list
Esempio n. 3
0
def avg_degree_attr(df):
    """Calculates the average degree

        Calculates the average degree for every subgroup and for the whole group for every attribute.

        Parameters
        ----------
        df: Data
            Data Object that contains Tij- and Metadata for a data set.

        Returns
        -------
        attr_degree_list : list
            A list that contains the average degree for every subgroup and for the whole group for every attribute.

        Examples
        ---------
        The first string in a list, for example "Age", tells you which attribute's average degrees are in this list.
        The following first list entries for example 0.0, 1.0 or 2.0 are the different attribute values and the second
        entry in the list is the average degree for this attribute value. The 'GlobalAvG' list entry tells you the
        average degree for the whole attribute (for example "Age").

        >>> attr_list = ["ID", "Age", "Sex"]
        >>> test_df = Data(path_tij="face2face/data/Test/tij_test.dat", separator_tij="\t",
        >>>               path_meta="face2face/data/Test/meta_test.dat", separator_meta="\t",
        >>>               meta_attr_list=attr_list)
        >>> avg_degree_list = avg_degree_attr(test_df)
        >>> print(avg_degree_list)
        [['Age', [[0.0, 1.6666666666666667], [1.0, 2.5], [2.0, 2.0], ['GlobalAvG', 2.055555555555556]]],
         ['Sex', [['F', 1.8], ['M', 2.0], ['GlobalAvG', 1.9]]]]

        See Also
        ---------
        face2face.group_list_degree
        face2face.global_avg_var_std

        """

    network = create_network_from_data(df)
    df_meta_nan = df.metadata.fillna("NaN")

    parameter_list = []
    for col in df_meta_nan.columns:
        if col != "ID":
            parameter_list.append(col)

    complete_parameter_value_list = []
    for i in parameter_list:
        parameter_value_list = []
        nan_filtered_dataframe = df.metadata.loc[df.metadata[i] != "NaN"]
        for parameter_values, grouped_by_dataframes in nan_filtered_dataframe.groupby(i):
            parameter_value_list.append([grouped_by_dataframes["ID"], parameter_values])
        complete_parameter_value_list.append([i, parameter_value_list])

    avg_degree_param_list = []
    for i in complete_parameter_value_list:
        value_avg_degree_pair_list = []
        for j in i[1]:
            avg_degree = 0
            for k in j[0]:
                avg_degree += network.degree[k]
            avg_degree = avg_degree / len(j[0])
            value_avg_degree_pair_list.append([j[1], avg_degree])
        avg_degree_param_list.append([i[0], value_avg_degree_pair_list])

    for i in avg_degree_param_list:
        avg_degree_parameter = 0
        for j in i[1]:
            avg_degree_parameter += j[1]
        avg_degree_parameter = avg_degree_parameter / len(i[1])
        i[1].append(["GlobalAvG", avg_degree_parameter])

    return avg_degree_param_list
Esempio n. 4
0
def shuffle_label_z_score_mixing_matrix(Data,
                                        runs=1000,
                                        label="type",
                                        seed_label=None):
    r"""Creates a contact matrix based on a null model with randomized identities.

        Creates a contact matrix with z-scores based on the chosen attribute. You can assume randomized identities.

        Parameters
        ----------
        Data: Data
            Data Object that contains Tij- and Metadata for a data set.
        runs: int
            The amount of times the function should be executed. It's a heuristic approach, so the more the runs the
            better might be the result
        label: str
            A string that tells the function for which attribute the contact matrix should be made.
        seed_label : list, default None
            Allows to create reproducible "randomized" labels for a reproducible output. This parameters is basically
            just for applying tests.

        Returns
        -------
        contact_matrix : list
             A matrixlike List of lists that contains the z-scores for a given attribute and randomized identities.

        References
        ----------
        .. [2] Génois, Mathieu & Zens, Maria & Lechner, Clemens & Rammstedt, Beatrice & Strohmaier, Markus. (2019).
               Building connections: How scientists meet each other during a conference.

        Examples
        ---------
        >>> contact_matrix = shuffle_label_z_score_mixing_matrix(test_network, test_df, runs=1000, label="Age"):
        >>> print(contact_matrix)
        [[1.2247448713915892, -0.44846105565116173, -1.5452456409610384],
        [-0.44846105565116173, 2.0, -3.143958736099446],
        [-1.5452456409610384, -3.143958736099446, 5.719237485832778]]

        See Also
        ---------
        face2face.statistics.null_modell.configuration_model_label_z_score_mixing_matrix

        """

    df = Data

    network = create_network_from_data(df, replace_attr=True, label=label)
    network = network.copy()
    mapping = mapping_function(df, label)
    data_mixing_matrix = nx.assortativity.attribute_mixing_matrix(
        network, label, mapping=mapping)

    def shuffle_labels(graph):
        if seed_label is None:
            data_group_nodes = [graph.nodes[n][label] for n in graph.nodes]
            np.random.shuffle(data_group_nodes)
        else:
            data_group_nodes = [graph.nodes[n][label] for n in graph.nodes]
            random.Random(seed_label[_]).shuffle(data_group_nodes)
        for n, new_label in zip(graph.nodes, data_group_nodes):
            graph.nodes[n][label] = new_label
        return graph

    matrices = []
    matrices2 = []
    for _ in range(runs):
        shuffle_labels(network)
        matrices.append(
            nx.assortativity.attribute_mixing_matrix(network,
                                                     label,
                                                     mapping=mapping))
        matrices2.append(
            nx.assortativity.attribute_mixing_matrix(network,
                                                     label,
                                                     mapping=mapping,
                                                     normalized=False))

    return (data_mixing_matrix - np.array(matrices).mean(axis=0)
            ) / np.array(matrices).std(axis=0), matrices2
Esempio n. 5
0
def configuration_model_label_z_score_mixing_matrix(Data,
                                                    runs=1000,
                                                    label="type",
                                                    shuffle_label=False,
                                                    force_simple_graph=False,
                                                    seed_config_mat=None,
                                                    seed_label=None):
    r"""Creates a contact matrix based on the configuration model

        Creates a contact matrix with z-scores based on the chosen attribute. You can assume randomized attributes and/
        or randomized degrees in the null model.

        Parameters
        ----------
        Data: Data
            Data Object that contains Tij- and Metadata for a data set.
        runs: int
            The amount of times the function should be executed. It's a heuristic approach, so the more the runs the
            better might be the result
        label: str
            A string that tells the function for which attribute the contact matrix should be made.
        shuffle_label: bool
            Gives the option to extend the null model by randomizing the node attributes.
        force_simple_graph: bool
            Deletes parallel- and selfedges that can occur by using the networkx "configuration_model"-function if True.
        seed_config_mat : List, default None
            Allows to create reproducible configuration models for a reproducible output. This parameters is basically
            just for applying tests.
        seed_label : list, default None
            Allows to create reproducible "randomized" labels for a reproducible output. This parameters is basically
            just for applying tests.

        Returns
        -------
        contact_matrix : list
             A matrixlike List of lists that contains the z-scores for a given attribute

        References
        ----------
        .. [1] Génois, Mathieu & Zens, Maria & Lechner, Clemens & Rammstedt, Beatrice & Strohmaier, Markus. (2019).
               Building connections: How scientists meet each other during a conference.

        Examples
        ---------
        >>> contact_matrix = configuration_model_label_z_score_mixing_matrix(test_network, test_df, runs=1000,
        >>>                                                                  label="Age", shuffle_label=True,
        >>>                                                                  force_simple_graph=True)
        >>> print(contact_matrix)
        [[1.0320936930842797, -0.7717976357301974, -0.5],
        [-0.7717976357301974, -0.0667601413575786, -1.2927763604862383],
        [-0.5, -1.2927763604862383, 2.632147318581194]]

        See Also
        ---------
        face2face.statistics.null_modell.shuffle_label_z_score_mixing_matrix
        """

    network = create_network_from_data(Data, replace_attr=True, label=label)
    mapping = mapping_function(Data, label)
    data_mixing_matrix = nx.assortativity.attribute_mixing_matrix(
        network, label, mapping=mapping)
    degree_sequence = [v[1] for v in network.degree]
    type_sequence = [network.nodes[n][label] for n in network.nodes]
    matrices = []
    matrices_abs = []
    for _ in range(runs):
        if seed_config_mat is None:
            null_model = nx.configuration_model(degree_sequence)
        else:
            null_model = nx.configuration_model(degree_sequence,
                                                seed=seed_config_mat[_])
        if force_simple_graph:
            null_model = to_simple_graph(null_model)
        if shuffle_label:
            if seed_label is None:
                np.random.shuffle(type_sequence)
            else:
                random.Random(seed_label[_]).shuffle(type_sequence)
        for n, t in zip(null_model.nodes, type_sequence):
            null_model.nodes[n][label] = t
        matrices.append(
            nx.assortativity.attribute_mixing_matrix(null_model,
                                                     label,
                                                     mapping=mapping))
        matrices_abs.append(
            nx.assortativity.attribute_mixing_matrix(null_model,
                                                     label,
                                                     mapping=mapping,
                                                     normalized=False))

    return (data_mixing_matrix - np.array(matrices).mean(axis=0))/np.array(matrices).std(axis=0), matrices, \
        data_mixing_matrix, matrices_abs