예제 #1
0
    def fit(self, p, samples=100):
        """Estimate the g-formula for network data under weak interference at coverage `p`	
        """
        marginals = []
        for s in range(samples):
            # Selecting and applying binary treatment
            g = self.df.copy()
            g[self.exposure] = np.random.binomial(n=1, p=p, size=g.shape[0])

            # Back-calculate updated exposure mapping
            v_vector = np.asarray(g[self.exposure])
            g[self.exposure + '_sum'] = fast_exp_map(self.adj_matrix, v_vector, measure='sum')
            g[self.exposure + '_mean'] = fast_exp_map(self.adj_matrix, v_vector, measure='mean')
            g[self.exposure + '_mean'] = g[self.exposure + '_mean'].fillna(0)  # isolates should have mean=0
            g[self.exposure + '_var'] = fast_exp_map(self.adj_matrix, v_vector, measure='var')
            g[self.exposure + '_var'] = g[self.exposure + '_var'].fillna(0)  # isolates should have var=0
            g[self.exposure + '_mean_dist'] = fast_exp_map(self.adj_matrix, v_vector, measure='mean_dist')
            g[self.exposure + '_mean_dist'] = g[self.exposure + '_mean_dist'].fillna(0)  # isolates have mean_dist=0
            g[self.exposure + '_var_dist'] = fast_exp_map(self.adj_matrix, v_vector, measure='var_dist')
            g[self.exposure + '_var_dist'] = g[self.exposure + '_var_dist'].fillna(0)  # isolates have var_dist=0
            if self._thresholds_any_:
                create_threshold(data=g, variables=self._thresholds_variables_,
                                 thresholds=self._thresholds_, definitions=self._thresholds_def_)

            # Generating predictions for treatment plan
            g[self.outcome] = np.nan
            g[self.outcome] = self._outcome_model.predict(g)
            marginals.append(np.mean(g[self.outcome]))

        self.marginals_vector = marginals
        self.marginal_outcome = np.mean(marginals)
예제 #2
0
    def __init__(self, network, exposure, outcome, verbose=False):
        """Implementation of the g-formula estimator described in Sofrygin & van der Laan 2017	
        """
        # Background processing to convert network attribute data to pandas DataFrame
        df = network_to_df(network)

        if not df[exposure].value_counts().index.isin([0, 1]).all():
            raise ValueError("NetworkGFormula only supports binary exposures currently")

        if df[outcome].value_counts().index.isin([0, 1]).all():
            self._continuous_ = False
        else:
            self._continuous_ = True

        network = nx.convert_node_labels_to_integers(network, first_label=0, label_attribute='_original_id_')

        self.network = network
        self.adj_matrix = nx.adjacency_matrix(network, weight=None)
        self.exposure = exposure
        self.outcome = outcome

        # Creating variable mapping for all variables in the network
        for v in [var for var in list(df.columns) if var not in ['_original_id_', outcome]]:
            v_vector = np.asarray(df[v])
            df[v + '_sum'] = fast_exp_map(self.adj_matrix, v_vector, measure='sum')
            df[v + '_mean'] = fast_exp_map(self.adj_matrix, v_vector, measure='mean')
            df[v + '_mean'] = df[v + '_mean'].fillna(0)  # isolates should have mean=0
            df[v + '_var'] = fast_exp_map(self.adj_matrix, v_vector, measure='var')
            df[v + '_var'] = df[v + '_var'].fillna(0)  # isolates should have var=0
            df[v + '_mean_dist'] = fast_exp_map(self.adj_matrix, v_vector, measure='mean_dist')
            df[v + '_mean_dist'] = df[v + '_mean_dist'].fillna(0)  # isolates should have mean_dist=0
            df[v + '_var_dist'] = fast_exp_map(self.adj_matrix, v_vector, measure='var_dist')
            df[v + '_var_dist'] = df[v + '_var_dist'].fillna(0)  # isolates should have var_dist=0

        # Calculating Degree
        degree_data = pd.DataFrame.from_dict(dict(network.degree), orient='index').rename(columns={0: 'degree'})
        self.df = pd.merge(df, degree_data, how='left', left_index=True, right_index=True)

        # Output attributes
        self.marginals_vector = None
        self.marginal_outcome = None

        # Storage for items I need later
        self._outcome_model = None
        self._q_model = None
        self._verbose_ = verbose
        self._thresholds_ = []
        self._thresholds_variables_ = []
        self._thresholds_def_ = []
        self._thresholds_any_ = False
예제 #3
0
    def test_fast_exp_map_graph3(self):
        G = nx.complete_graph(5)
        a = [1, 1, 1, 0, 0]
        for node in G.nodes():
            G.nodes[node]['A'] = a[node]

        npt.assert_equal(
            fast_exp_map(nx.adjacency_matrix(G, weight=None),
                         np.array(a),
                         measure='sum'), exp_map(G, 'A'))
예제 #4
0
    def test_fast_exp_map_directed(self):
        G = nx.DiGraph()
        G.add_edges_from([(0, 1), (0, 2), (0, 3), (0, 4)])
        a = [1, 0, 1, 1, 1]
        for node in G.nodes():
            G.nodes[node]['A'] = a[node]

        npt.assert_equal(
            fast_exp_map(nx.adjacency_matrix(G, weight=None),
                         np.array(a),
                         measure='sum'), exp_map(G, 'A'))
예제 #5
0
    def _generate_pooled_sample(self, p, samples):
        pooled_sample = []

        for s in range(samples):
            g = self.df.copy()
            g[self.exposure] = np.random.binomial(n=1, p=p, size=g.shape[0])

            g[self.exposure+'_sum'] = fast_exp_map(self.adj_matrix, np.array(g[self.exposure]), measure='sum')
            g[self.exposure + '_mean'] = fast_exp_map(self.adj_matrix, np.array(g[self.exposure]), measure='mean')
            g[self.exposure + '_mean'] = g[self.exposure + '_mean'].fillna(0)  # isolates should have mean=0
            g[self.exposure + '_var'] = fast_exp_map(self.adj_matrix, np.array(g[self.exposure]), measure='var')
            g[self.exposure + '_var'] = g[self.exposure + '_var'].fillna(0)  # isolates should have mean=0
            g[self.exposure + '_mean_dist'] = fast_exp_map(self.adj_matrix,
                                                           np.array(g[self.exposure]), measure='mean_dist')
            g[self.exposure + '_mean_dist'] = g[self.exposure + '_mean_dist'].fillna(0)  # isolates should have mean=0
            g[self.exposure + '_var_dist'] = fast_exp_map(self.adj_matrix,
                                                                     np.array(g[self.exposure]), measure='var_dist')
            g[self.exposure + '_mean_dist'] = g[self.exposure + '_mean_dist'].fillna(0)  # isolates should have mean=0

            if self._gs_measure_ is None:
                network = self.network.copy()
                a = np.array(g[self.exposure])
                for n in network.nodes():
                    network.node[n][self.exposure] = a[n]
                df = exp_map_individual(network, measure=self.exposure, max_degree=self._max_degree_).fillna(0)
                for c in self._nonparam_cols_:
                    g[c] = df[c]

            if self._thresholds_any_:
                create_threshold(data=g, variables=self._thresholds_variables_,
                                 thresholds=self._thresholds_, definitions=self._thresholds_def_)

            g['_sample_id_'] = s
            pooled_sample.append(g)

        return pd.concat(pooled_sample, axis=0, ignore_index=True)
예제 #6
0
        raise ValueError("Invalid set-up specification for " + network +
                         " network")

else:
    raise ValueError("Invalid network name in .sh script")

# Determining if shift or absolute
shift = bool(int(shift))
if shift:
    prop_treated = [-2.5, -2.0, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5]

    # Generating probabilities (true) to assign
    data = network_to_df(G)
    adj_matrix = nx.adjacency_matrix(G, weight=None)
    data['O_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['O']),
                                  measure='mean')
    data['G_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['G']),
                                  measure='mean')
    prob = logistic.cdf(-1.3 - 1.5 * data['P'] + 1.5 * data['P'] * data['G'] +
                        0.95 * data['O_mean'] + 0.95 * data['G_mean'])
    log_odds = np.log(probability_to_odds(prob))

else:
    prop_treated = [
        0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65,
        0.7, 0.75, 0.8, 0.85, 0.9, 0.95
    ]

truth = truth_values(network=network,
예제 #7
0
        raise ValueError("Invalid set-up specification for " + network +
                         " network")

else:
    raise ValueError("Invalid network name in .sh script")

# Determining if shift or absolute
shift = bool(int(shift))
if shift:
    prop_treated = [-2.5, -2.0, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5]

    # Generating probabilities (true) to assign
    data = network_to_df(G)
    adj_matrix = nx.adjacency_matrix(G, weight=None)
    data['E_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['E']),
                                  measure='mean')
    prob = logistic.cdf(-0.5 + 0.05 * (data['B'] - 30) +
                        0.25 * data['G'] * data['E'] + 0.05 * data['E_mean'])
    log_odds = np.log(probability_to_odds(prob))

else:
    prop_treated = [
        0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65,
        0.7, 0.75, 0.8, 0.85, 0.9, 0.95
    ]

truth = truth_values(network=network,
                     dgm=exposure,
                     restricted_degree=restrict,
                     shift=shift)