def supply_null_modell_functions(): attr_list = ["ID", "Age", "Sex"] test_df = Data(path_tij=r"face2face/data/Test/tij_test.dat", separator_tij="\t", path_meta=r"face2face/data/Test/meta_test.dat", separator_meta="\t", meta_attr_list=attr_list) test_network = create_network_from_data(test_df) return [test_df, test_network]
def group_list_degree(df): """Creates lists of the degrees Creating a list of degrees for every attribute value of every group Parameters ---------- df: Data Contains a dataframe with the tij-data from the data set Returns ------- attr_degree_list: list Contains lists for all attributes and all occurring degrees for the different attribute values. Examples --------- This functions return a list of lists which contains a list for every attribute value. In this list you have the attribute as a string, the attribute value and a list with all degrees for this attribute value. >>> attr_list = ["ID", "Age", "Sex"] >>> test_df = Data(path_tij="face2face/data/Test/tij_test.dat", separator_tij="\t", >>> path_meta="face2face/data/Test/meta_test.dat", separator_meta="\t", >>> meta_attr_list=attr_list) >>> attr_degree_list = group_list_degree(test_df) [['Age', 1.0, [1, 2, 2]], ['Age', 0.0, [3, 2]], ['Age', 2.0, [2, 2, 2]], ['Sex', 'F', [1, 2, 2, 2, 2]], ['Sex', 'M', [2, 2, 2]]] See Also ---------- face2face.avg_degree_attr face2face.global_avg_var_std """ df_meta_nan = df.metadata.fillna("NaN") network = create_network_from_data(df) parameter_list = [] for col in df_meta_nan.columns: if col != "ID": parameter_list.append(col) complete_parameter_value_list = [] for i in parameter_list: nan_filtered_dataframe = df.metadata.loc[df.metadata[i] != "NaN"] for parameter_values, grouped_by_dataframes in nan_filtered_dataframe.groupby(i): complete_parameter_value_list.append([i, parameter_values, list(grouped_by_dataframes["ID"])]) for i in complete_parameter_value_list: parameter_value_degree_list = [] for j in i[2]: parameter_value_degree_list.append(network.degree(j)) i[2] = parameter_value_degree_list[:] return complete_parameter_value_list
def avg_degree_attr(df): """Calculates the average degree Calculates the average degree for every subgroup and for the whole group for every attribute. Parameters ---------- df: Data Data Object that contains Tij- and Metadata for a data set. Returns ------- attr_degree_list : list A list that contains the average degree for every subgroup and for the whole group for every attribute. Examples --------- The first string in a list, for example "Age", tells you which attribute's average degrees are in this list. The following first list entries for example 0.0, 1.0 or 2.0 are the different attribute values and the second entry in the list is the average degree for this attribute value. The 'GlobalAvG' list entry tells you the average degree for the whole attribute (for example "Age"). >>> attr_list = ["ID", "Age", "Sex"] >>> test_df = Data(path_tij="face2face/data/Test/tij_test.dat", separator_tij="\t", >>> path_meta="face2face/data/Test/meta_test.dat", separator_meta="\t", >>> meta_attr_list=attr_list) >>> avg_degree_list = avg_degree_attr(test_df) >>> print(avg_degree_list) [['Age', [[0.0, 1.6666666666666667], [1.0, 2.5], [2.0, 2.0], ['GlobalAvG', 2.055555555555556]]], ['Sex', [['F', 1.8], ['M', 2.0], ['GlobalAvG', 1.9]]]] See Also --------- face2face.group_list_degree face2face.global_avg_var_std """ network = create_network_from_data(df) df_meta_nan = df.metadata.fillna("NaN") parameter_list = [] for col in df_meta_nan.columns: if col != "ID": parameter_list.append(col) complete_parameter_value_list = [] for i in parameter_list: parameter_value_list = [] nan_filtered_dataframe = df.metadata.loc[df.metadata[i] != "NaN"] for parameter_values, grouped_by_dataframes in nan_filtered_dataframe.groupby(i): parameter_value_list.append([grouped_by_dataframes["ID"], parameter_values]) complete_parameter_value_list.append([i, parameter_value_list]) avg_degree_param_list = [] for i in complete_parameter_value_list: value_avg_degree_pair_list = [] for j in i[1]: avg_degree = 0 for k in j[0]: avg_degree += network.degree[k] avg_degree = avg_degree / len(j[0]) value_avg_degree_pair_list.append([j[1], avg_degree]) avg_degree_param_list.append([i[0], value_avg_degree_pair_list]) for i in avg_degree_param_list: avg_degree_parameter = 0 for j in i[1]: avg_degree_parameter += j[1] avg_degree_parameter = avg_degree_parameter / len(i[1]) i[1].append(["GlobalAvG", avg_degree_parameter]) return avg_degree_param_list
def shuffle_label_z_score_mixing_matrix(Data, runs=1000, label="type", seed_label=None): r"""Creates a contact matrix based on a null model with randomized identities. Creates a contact matrix with z-scores based on the chosen attribute. You can assume randomized identities. Parameters ---------- Data: Data Data Object that contains Tij- and Metadata for a data set. runs: int The amount of times the function should be executed. It's a heuristic approach, so the more the runs the better might be the result label: str A string that tells the function for which attribute the contact matrix should be made. seed_label : list, default None Allows to create reproducible "randomized" labels for a reproducible output. This parameters is basically just for applying tests. Returns ------- contact_matrix : list A matrixlike List of lists that contains the z-scores for a given attribute and randomized identities. References ---------- .. [2] Génois, Mathieu & Zens, Maria & Lechner, Clemens & Rammstedt, Beatrice & Strohmaier, Markus. (2019). Building connections: How scientists meet each other during a conference. Examples --------- >>> contact_matrix = shuffle_label_z_score_mixing_matrix(test_network, test_df, runs=1000, label="Age"): >>> print(contact_matrix) [[1.2247448713915892, -0.44846105565116173, -1.5452456409610384], [-0.44846105565116173, 2.0, -3.143958736099446], [-1.5452456409610384, -3.143958736099446, 5.719237485832778]] See Also --------- face2face.statistics.null_modell.configuration_model_label_z_score_mixing_matrix """ df = Data network = create_network_from_data(df, replace_attr=True, label=label) network = network.copy() mapping = mapping_function(df, label) data_mixing_matrix = nx.assortativity.attribute_mixing_matrix( network, label, mapping=mapping) def shuffle_labels(graph): if seed_label is None: data_group_nodes = [graph.nodes[n][label] for n in graph.nodes] np.random.shuffle(data_group_nodes) else: data_group_nodes = [graph.nodes[n][label] for n in graph.nodes] random.Random(seed_label[_]).shuffle(data_group_nodes) for n, new_label in zip(graph.nodes, data_group_nodes): graph.nodes[n][label] = new_label return graph matrices = [] matrices2 = [] for _ in range(runs): shuffle_labels(network) matrices.append( nx.assortativity.attribute_mixing_matrix(network, label, mapping=mapping)) matrices2.append( nx.assortativity.attribute_mixing_matrix(network, label, mapping=mapping, normalized=False)) return (data_mixing_matrix - np.array(matrices).mean(axis=0) ) / np.array(matrices).std(axis=0), matrices2
def configuration_model_label_z_score_mixing_matrix(Data, runs=1000, label="type", shuffle_label=False, force_simple_graph=False, seed_config_mat=None, seed_label=None): r"""Creates a contact matrix based on the configuration model Creates a contact matrix with z-scores based on the chosen attribute. You can assume randomized attributes and/ or randomized degrees in the null model. Parameters ---------- Data: Data Data Object that contains Tij- and Metadata for a data set. runs: int The amount of times the function should be executed. It's a heuristic approach, so the more the runs the better might be the result label: str A string that tells the function for which attribute the contact matrix should be made. shuffle_label: bool Gives the option to extend the null model by randomizing the node attributes. force_simple_graph: bool Deletes parallel- and selfedges that can occur by using the networkx "configuration_model"-function if True. seed_config_mat : List, default None Allows to create reproducible configuration models for a reproducible output. This parameters is basically just for applying tests. seed_label : list, default None Allows to create reproducible "randomized" labels for a reproducible output. This parameters is basically just for applying tests. Returns ------- contact_matrix : list A matrixlike List of lists that contains the z-scores for a given attribute References ---------- .. [1] Génois, Mathieu & Zens, Maria & Lechner, Clemens & Rammstedt, Beatrice & Strohmaier, Markus. (2019). Building connections: How scientists meet each other during a conference. Examples --------- >>> contact_matrix = configuration_model_label_z_score_mixing_matrix(test_network, test_df, runs=1000, >>> label="Age", shuffle_label=True, >>> force_simple_graph=True) >>> print(contact_matrix) [[1.0320936930842797, -0.7717976357301974, -0.5], [-0.7717976357301974, -0.0667601413575786, -1.2927763604862383], [-0.5, -1.2927763604862383, 2.632147318581194]] See Also --------- face2face.statistics.null_modell.shuffle_label_z_score_mixing_matrix """ network = create_network_from_data(Data, replace_attr=True, label=label) mapping = mapping_function(Data, label) data_mixing_matrix = nx.assortativity.attribute_mixing_matrix( network, label, mapping=mapping) degree_sequence = [v[1] for v in network.degree] type_sequence = [network.nodes[n][label] for n in network.nodes] matrices = [] matrices_abs = [] for _ in range(runs): if seed_config_mat is None: null_model = nx.configuration_model(degree_sequence) else: null_model = nx.configuration_model(degree_sequence, seed=seed_config_mat[_]) if force_simple_graph: null_model = to_simple_graph(null_model) if shuffle_label: if seed_label is None: np.random.shuffle(type_sequence) else: random.Random(seed_label[_]).shuffle(type_sequence) for n, t in zip(null_model.nodes, type_sequence): null_model.nodes[n][label] = t matrices.append( nx.assortativity.attribute_mixing_matrix(null_model, label, mapping=mapping)) matrices_abs.append( nx.assortativity.attribute_mixing_matrix(null_model, label, mapping=mapping, normalized=False)) return (data_mixing_matrix - np.array(matrices).mean(axis=0))/np.array(matrices).std(axis=0), matrices, \ data_mixing_matrix, matrices_abs