Beispiel #1
0
    def _run_igsp(dag_num):
        # === GENERATE FILENAME
        sample_folder = sample_folders[dag_num]
        alg_folder = os.path.join(sample_folder, 'estimates', 'igsp')
        os.makedirs(alg_folder, exist_ok=True)
        filename = os.path.join(
            alg_folder,
            'nruns=%d,depth=%d,alpha=%.2e,alpha_invariant=%.2e.npy' %
            (nruns, depth, alpha, alpha_invariant))

        # === RUN ALGORITHM
        if not os.path.exists(filename) or overwrite:
            obs_samples, setting_list, sample_dict = get_dag_samples(
                ndags,
                nnodes,
                nneighbors,
                nsamples,
                nsettings,
                num_known,
                num_unknown,
                intervention,
                dag_num,
                nonlinear=nonlinear)

            if nonlinear:
                suffstat = gauss_ci_suffstat(obs_samples)
                suffstat_inv = gauss_invariance_suffstat(
                    obs_samples,
                    [setting['samples'] for setting in setting_list])
                ci_tester = MemoizedCI_Tester(gauss_ci_test,
                                              suffstat,
                                              alpha=alpha)
                inv_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                      suffstat_inv,
                                                      alpha=alpha_invariant)
            else:
                suffstat = gauss_ci_suffstat(obs_samples)
                suffstat_inv = gauss_invariance_suffstat(
                    obs_samples,
                    [setting['samples'] for setting in setting_list])
                ci_tester = MemoizedCI_Tester(gauss_ci_test,
                                              suffstat,
                                              alpha=alpha)
                inv_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                      suffstat_inv,
                                                      alpha=alpha_invariant)

            est_dag = igsp([{
                'interventions': setting['known_interventions']
            } for setting in setting_list],
                           nodes,
                           ci_tester,
                           inv_tester,
                           depth=depth,
                           nruns=nruns)

            np.save(filename, est_dag.to_amat()[0])
            return est_dag
        else:
            return cd.DAG.from_amat(np.load(filename))
Beispiel #2
0
def prepare_igsp(obs_samples,
                 iv_samples_list,
                 targets_list,
                 alpha=1e-3,
                 alpha_inv=1e-3,
                 ci_test="gaussian"):

    # Form sufficient statistics
    if ci_test == "gaussian":
        obs_suffstat = gauss_ci_suffstat(obs_samples)
        invariance_suffstat = gauss_invariance_suffstat(
            obs_samples, iv_samples_list)

        # Create CI and invariance
        ci_tester = MemoizedCI_Tester(gauss_ci_test, obs_suffstat, alpha=alpha)
        invariance_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                     invariance_suffstat,
                                                     alpha=alpha_inv)
    elif ci_test == "hsic":
        contexts = {i: s for i, s in enumerate(iv_samples_list)}
        invariance_suffstat = {"obs_samples": obs_samples}
        invariance_suffstat.update(contexts)

        # Create CI and invariance
        ci_tester = MemoizedCI_Tester(hsic_test, obs_samples, alpha=alpha)
        invariance_tester = MemoizedInvarianceTester(hsic_invariance_test,
                                                     invariance_suffstat,
                                                     alpha=alpha_inv)
    elif ci_test == "kci":
        contexts = {i: s for i, s in enumerate(iv_samples_list)}
        invariance_suffstat = {"obs_samples": obs_samples}
        invariance_suffstat.update(contexts)

        # Create CI and invariance
        ci_tester = MemoizedCI_Tester(kci_test, obs_samples, alpha=alpha)
        invariance_tester = MemoizedInvarianceTester(kci_invariance_test,
                                                     invariance_suffstat,
                                                     alpha=alpha_inv)
    else:
        raise ValueError(
            f"CI test '{ci_test}' does not exist. Choose between: [gaussian, hsic, kci]"
        )
    return ci_tester, invariance_tester
Beispiel #3
0
    samples = pd.read_csv(os.path.join(SACHS_DATA_FOLDER, file), sep=',')
    iv_str = file.split('=')[1][:-4]
    ivs = frozenset({int(iv_str)}) if iv_str != '' else frozenset()
    sample_dict[ivs] = samples.values
obs_samples = sample_dict[frozenset()]
all_samples = np.concatenate(tuple(sample_dict.values()), axis=0)
suffstat = gauss_ci_suffstat(obs_samples)
suffstat_all = dict(C=np.corrcoef(all_samples, rowvar=False), n=all_samples.shape[0])

setting_list = [
    {'known_interventions': iv_nodes}
    for iv_nodes, samples in sample_dict.items()
    if iv_nodes != frozenset()
]
iv_samples_list = [sample_dict[setting['known_interventions']] for setting in setting_list]
invariance_suffstat = gauss_invariance_suffstat(obs_samples, iv_samples_list)
hsic_invariance_suffstat = {iv: samples for iv, samples in enumerate(iv_samples_list)}
hsic_invariance_suffstat['obs_samples'] = obs_samples

# === RUN UNKNOWN TARGET IGSP WITH GAUSS CI
for alpha in tqdm([1e-1, 1e-2, 1e-3, 2e-1, 3e-1, 4e-1, 5e-1, 5e-2]):
    alpha_i = 1e-20
    filename = os.path.join(ESTIMATED_FOLDER, 'utigsp_gauss_ci_alpha=%.2e.txt,alpha_i=%.2e.txt' % (alpha, alpha_i))
    ci_tester = MemoizedCI_Tester(gauss_ci_test, suffstat, alpha=alpha)
    invariance_tester = MemoizedInvarianceTester(gauss_invariance_test, invariance_suffstat, alpha=alpha_i)
    # invariance_tester = MemoizedInvarianceTester(hsic_invariance_test, hsic_invariance_suffstat, alpha=alpha_i)
    if OVERWRITE or not os.path.exists(filename):
        est_dag, learned_interventions = unknown_target_igsp(
            setting_list,
            set(range(nnodes)),
            ci_tester,
Beispiel #4
0
def get_bs_dags(num_bs,
                obs_samples,
                nsamples_obs,
                nnodes,
                cheat_cpdag=None,
                bic=True):
    """
    takes in a number of bootstrap dags and observational data, outputs a list of bootstrapped dags
    cheat_dag is for debugging and doing experimets where we allow access to the MEC: on the first
    round forces the cheat cpdag into the sample
    """
    #subsample data in DAG bootstrap, and learn the DAG + MLE estimates of parameters
    bs_dags = []  # a list of the dags we get from the bootstrap
    bs_index = {}  #a mapping from dag string to index in the list
    count_dags = 0  #number unique dags
    total_dags = 0  #number of dags
    samples_per_bs = nsamples_obs
    nodes = set(range(nnodes))

    while total_dags < num_bs:

        if total_dags == 0 and isinstance(cheat_cpdag, np.ndarray):
            est_cpdag = cheat_cpdag
        else:
            bs_i = np.random.choice(nsamples_obs, samples_per_bs, replace=True)
            bs_data = obs_samples[bs_i]
            #from this sample learn the DAG and an MLE of the parameters
            obs_suffstat = gauss_ci_suffstat(bs_data)
            invariance_suffstat = gauss_invariance_suffstat(obs_samples, [])
            alpha = 1e-3
            alpha_inv = 1e-3
            ci_tester = MemoizedCI_Tester(gauss_ci_test,
                                          obs_suffstat,
                                          alpha=alpha)
            invariance_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                         invariance_suffstat,
                                                         alpha=alpha_inv)
            setting_list = []
            est_dag, est_targets_list = unknown_target_igsp(setting_list,
                                                            nodes,
                                                            ci_tester,
                                                            invariance_tester,
                                                            nruns=5,
                                                            depth=None)

            est_dag = est_dag.to_amat()[0]
            est_cpdag = main.cpdag_from_dag_observational(est_dag)

        if mec_size.mec_size(est_cpdag) <= num_bs:
            #now compute the mec and add all mec members
            mec_dags = mec_size.enumerate_dags(est_cpdag)
        else:
            #get just enough dags if mec too big
            mec_dags = mec_size.uniform_sample_dag_plural(est_cpdag,
                                                          num_bs -
                                                          len(bs_dags),
                                                          exact=False)
        for est_dag in mec_dags:
            if est_dag.tobytes() in bs_index:
                #increase weight by one if we double count
                bs_dags[bs_index[est_dag.tobytes()]]['w'] += (1 / num_bs)
                bs_dags[bs_index[est_dag.tobytes()]]['count'] += 1
                #count is the number of times the dag appears in the multiset
            else:
                A, b = finite.get_weights_MLE(est_dag, obs_samples)
                bs_dags.append({
                    'dag': est_dag,
                    'A': A,
                    'b': b,
                    'w': (1 / num_bs),
                    'count': 1
                })
                bs_index[est_dag.tobytes()] = count_dags
                count_dags += 1
            total_dags += 1
        if total_dags > num_bs:
            break

    #for now we've just made all the weights 1/num_dags

    #now correct weights by computing the posterior of each DAG
    T = len(bs_dags)
    logPy = finite.llhood(
        [obs_samples], [[]], bs_dags,
        (np.zeros(nnodes), 0))  #getting the likelihood of the observations
    #print(logPy)
    weighted_logPy = np.zeros(T)
    for i in range(T):
        #use the BIC
        weighted_logPy[i] = logPy[i] + np.log(
            bs_dags[i]
            ['w'])  #- np.sum(bs_dags[i]['dag']) * np.log(nsamples_obs) / 2
        if bic:
            weighted_logPy[i] = weighted_logPy[i] - np.sum(
                bs_dags[i]['dag']) * np.log(nsamples_obs) / 2
    denom = logsumexp(weighted_logPy)
    #now set w for each DAG to be the posterior
    for i in range(T):
        bs_dags[i]['w'] = np.exp(weighted_logPy[i] - denom)
    #remove all the tiny weights and renormalize
    w_sum = 0
    bs_dags_pruned = []
    for i in range(T):
        if bs_dags[i]['w'] >= 0.001:
            bs_dags_pruned.append(bs_dags[i])
            w_sum += bs_dags[i]['w']
    T = len(bs_dags_pruned)
    for i in range(T):
        bs_dags_pruned[i]['w'] = bs_dags_pruned[i]['w'] / w_sum
    return bs_dags_pruned
Beispiel #5
0
    for known_iv in known_iv_list
]
all_ivs_list = [{
    known_iv, *unknown_ivs
} for known_iv, unknown_ivs in zip(known_iv_list, unknown_ivs_list)]

nsamples = 5000
obs_samples = g.sample(nsamples)
iv_samples_list = [
    g.sample_interventional({iv: INTERVENTION
                             for iv in all_ivs}, nsamples)
    for all_ivs in all_ivs_list
]

ci_suffstat = gauss_ci_suffstat(obs_samples)
inv_suffstat = gauss_invariance_suffstat(obs_samples,
                                         context_samples_list=iv_samples_list)
ci_tester = MemoizedCI_Tester(gauss_ci_test, ci_suffstat)
invariance_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                             inv_suffstat)

combined_ci_tester = MemoizedCI_Tester(combined_gauss_ci_test,
                                       dict(ci=ci_suffstat,
                                            invariance=inv_suffstat),
                                       alpha_inv=1e-5)

est_dag, est_targets_list = jci_gsp(
    [dict(known_interventions={known_iv}) for known_iv in known_iv_list],
    nodes,
    combined_ci_tester,
    verbose=True)
Beispiel #6
0
    def _run_utigsp(dag_num):
        # === GENERATE FILENAME
        sample_folder = sample_folders[dag_num]
        alg_folder = os.path.join(sample_folder, 'estimates', 'utigsp')
        os.makedirs(alg_folder, exist_ok=True)
        no_target_str = '_no_targets' if no_targets else ''
        filename = os.path.join(
            alg_folder,
            f'nruns=%d,depth=%d,alpha=%.2e,alpha_invariant=%.2e{no_target_str}.npy'
            % (nruns, depth, alpha, alpha_invariant))

        # === RUN ALGORITHM
        if not os.path.exists(filename) or overwrite:
            obs_samples, setting_list, _ = get_dag_samples(ndags,
                                                           nnodes,
                                                           nneighbors,
                                                           nsamples,
                                                           nsettings,
                                                           num_known,
                                                           num_unknown,
                                                           intervention,
                                                           dag_num,
                                                           nonlinear=nonlinear)

            if nonlinear:
                suffstat = gauss_ci_suffstat(obs_samples)
                suffstat_inv = gauss_invariance_suffstat(
                    obs_samples,
                    [setting['samples'] for setting in setting_list])
                ci_tester = MemoizedCI_Tester(gauss_ci_test,
                                              suffstat,
                                              alpha=alpha)
                inv_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                      suffstat_inv,
                                                      alpha=alpha_invariant)
            else:
                suffstat = gauss_ci_suffstat(obs_samples)
                suffstat_inv = gauss_invariance_suffstat(
                    obs_samples,
                    [setting['samples'] for setting in setting_list])
                ci_tester = MemoizedCI_Tester(gauss_ci_test,
                                              suffstat,
                                              alpha=alpha)
                inv_tester = MemoizedInvarianceTester(gauss_invariance_test,
                                                      suffstat_inv,
                                                      alpha=alpha_invariant)

            est_dag, learned_intervention_targets = unknown_target_igsp(
                [{
                    'known_interventions': setting['known_interventions']
                } for setting in setting_list],
                nodes,
                ci_tester,
                inv_tester,
                depth=depth,
                nruns=nruns,
                no_targets=no_targets)

            np.save(filename, est_dag.to_amat()[0])
            json.dump(
                list(map(list, learned_intervention_targets)),
                open(filename + '_learned_intervention_targets.json', 'w'))
            return est_dag, learned_intervention_targets
        else:
            learned_intervention_targets = json.load(
                open(filename + '_learned_intervention_targets.json'))
            return cd.DAG.from_amat(
                np.load(filename)), learned_intervention_targets