Ejemplo n.º 1
0
def test_ANM():
    data, labels = load_dataset('tuebingen')
    obj = ANM()

    # This example uses the predict() method
    # (HEBI: this is too slow)
    output = obj.predict(data)

    # This example uses the orient_graph() method. The dataset used
    # can be loaded using the cdt.data module
    data, graph = load_dataset('sachs')
    output = obj.orient_graph(data, nx.DiGraph(graph))

    # To view the directed graph run the following command
    nx.draw_networkx(output, font_size=8)
    plt.show()
Ejemplo n.º 2
0
def load_cdt_dataset(folder_path=None, file_name=None, names=('tuebingen')):
    df_data, df_labels = load_dataset(names[0])
    data = [
        np.vstack((df_data.iloc[i, 0], df_data.iloc[i, 1]))
        for i in range(len(df_data))
    ]
    labels = 1 - df_labels.values.squeeze()
    return data, labels
Ejemplo n.º 3
0
def test_NCC():
    data, labels = load_dataset('tuebingen')
    X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.5)

    obj = NCC()
    obj.fit(X_tr, y_tr)
    # This example uses the predict() method
    output = obj.predict(X_te)

    # (HEBI: I'll need to compare with this)
    # This example uses the orient_graph() method. The dataset used
    # can be loaded using the cdt.data module
    data, graph = load_dataset("sachs")
    output = obj.orient_graph(data, nx.Graph(graph))

    #To view the directed graph run the following command
    nx.draw_networkx(output, font_size=8)
    plt.show()
def test_tuebingen():
    data, labels = load_dataset('tuebingen')
    data = data[:30]
    labels = labels[:30]
    # print(labels)
    m = Jarfo()
    m.fit(data, labels[['Target']])
    r = m.predict(data)
    print(r)
    return 0
Ejemplo n.º 5
0
def test_RCC():
    data, labels = load_dataset('tuebingen')
    X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.5)

    # why all training data has label 1.0?

    obj = RCC()
    obj.fit(X_tr, y_tr)
    # This example uses the predict() method
    output = obj.predict(X_te)

    # (HEBI: and this as well)
    # This example uses the orient_graph() method. The dataset used
    # can be loaded using the cdt.data module
    data, graph = load_dataset('sachs')
    # Oh, this is only used to orient the graph? The graph is already given!
    output = obj.orient_graph(data, nx.DiGraph(graph))

    # To view the directed graph run the following command
    nx.draw_networkx(output, font_size=8)
    nx.draw_networkx(graph)
    plt.show()
Ejemplo n.º 6
0
def test_GES():
    data, graph = load_dataset("sachs")
    obj = GES()
    #The predict() method works without a graph, or with a
    #directed or udirected graph provided as an input
    output = obj.predict(data)  #No graph provided as an argument

    output = obj.predict(data, nx.Graph(graph))  #With an undirected graph

    output = obj.predict(data, graph)  #With a directed graph

    #To view the graph created, run the below commands:
    nx.draw_networkx(output, font_size=8)
    plt.show()
def test_categorical():
    data, labels = load_dataset('tuebingen')
    data = data[:10]
    for idx in range(10):
        data.iloc[idx, 0] = np.digitize(data.iloc[idx, 0],
                                        np.histogram(data.iloc[idx, 0])[1])
        data.iloc[idx, 1] = np.digitize(data.iloc[idx, 1],
                                        np.histogram(data.iloc[idx, 1])[1])
    labels = labels[:10]
    m = Jarfo()
    m.fit(data, labels[['Target']])
    r = m.predict(data)
    print(r)
    return 0
def tcep_geom_net_datalosses(
        max_sample_size=(10**3), max_iter_factor=8, num_hiddens=20):
    data, labels = load_dataset('tuebingen', shuffle=False)
    cut_num_pairs(data, num_max=max_sample_size)

    dataset_results = None
    for i, row in data.iterrows():
        X, Y = process(row)
        X, Y = torch.from_numpy(X).type(dtype), torch.from_numpy(Y).type(dtype)

        results = inner_loop_datalosses(X, Y, loss="sinkhorn", p=1)

        # stacks matrices along third dim: access mat_i using [:,:,i]
        dataset_results = results if dataset_results is None else np.dstack(
            [dataset_results, results])
        print(f'------- end test for sample {i}/{len(data)} (i/N) -------')
    return dataset_results
Ejemplo n.º 9
0
def test_PC():
    data, graph = load_dataset("sachs")
    # nx.draw_networkx(graph, font_size=8)

    # (HEBI: this requires pcalg, kpcalg, and
    # https://github.com/Diviyan-Kalainathan/RCIT)
    obj = PC()
    #The predict() method works without a graph, or with a
    #directed or undirected graph provided as an input
    output = obj.predict(data)  #No graph provided as an argument

    output = obj.predict(data, nx.Graph(graph))  #With an undirected graph

    output = obj.predict(data, graph)  #With a directed graph

    #To view the graph created, run the below commands:
    nx.draw_networkx(output, font_size=8)
    plt.show()
def tcep_acc_curves_datalength(max_iter_factor=8, num_hiddens=20, ax=None):
    if ax is None:
        ax = plt

    data, labels = load_dataset("tuebingen",shuffle=False) ; labels = labels.values
    results = _aggregate_datasets(rescale_tests=True)
    results = results[ (results["max_iter_factor"] == max_iter_factor) & (results["num_hiddens"] == num_hiddens)]

    for test in ["mmd-gamma","c2st-nn","c2st-knn", "test_loss"]:
        t_res = results[results["test"] == test ]
        scores = np.vstack([t_res[t_res["direction"] == dir_name_map("->")]["value"].values,
                            t_res[t_res["direction"] == dir_name_map("<-")]["value"].values]).T

        acc_curve = _accuracy_curve(scores,labels)
        ax.plot(acc_curve[:,0], acc_curve[:,1], label=f'Test={test} (AUAC={np.round_(_area_under_acc_curve(scores,labels),2)})')
    crits_dr, crits_vals = _critical_curve(max_n=len(data))
    ax.fill_between(crits_dr,crits_vals,1-crits_vals, alpha=0.5, color='lightgrey')
    ax.axhline(0.5,color='grey',linestyle="--")
    plt.axis([0,1,0,1])
Ejemplo n.º 11
0
def test_tuebingen():
    data, labels = load_dataset('tuebingen')
Ejemplo n.º 12
0
import numpy as np
import torch
import numbers
from cdt.data import load_dataset
from sklearn.metrics import mean_squared_error
import math
from numba import jit

# have global values
RESOLUTION = 1e-02
data, labels = load_dataset('tuebingen', shuffle=False)
labels = labels.values
complain = "Datatype not supported, try Torch.tensors or np.ndarrays"

# functions


def _bin_int_as_array(int_val, num_bits, dtype=None):
    ''' returns an int as an array of bits, using max `num_bits`.
        ex: 4,7 --> 0010000
            2,5 --> 01000
            etc..
    '''
    if dtype is None or dtype == 'numpy':
        bin_arr = np.zeros(num_bits)
    elif dtype == 'torch':
        bin_arr = torch.zeros(num_bits)
    else:
        ValueError(complain + "(Default value is numpy array)", dtype)

    i = num_bits
Ejemplo n.º 13
0
def test_unknown_dataset():
    try:
        data, graph = load_dataset('asdasd')
    except ValueError:
        pass
Ejemplo n.º 14
0
def test_dream():
    data, graph = load_dataset('dream4-2')
Ejemplo n.º 15
0
def test_sachs():
    data, graph = load_dataset('sachs')
# generator = CausalPairGenerator('linear')
# data, labels = generator.generate(100, npoints=500)
# # generator.to_csv('generated_pairs')
# d = data.values

# from cdt.causality.pairwise import NCC
from CausalDiscuveryToolboxClone.Models.NCC import NCC
import networkx as nx
import matplotlib.pyplot as plt
from cdt.data import load_dataset
from sklearn.model_selection import train_test_split
from CausalDiscuveryToolboxClone.DataGeneration import functions

from scipy.special import expit

data, labels = load_dataset('tuebingen')
data, labels = functions.swap_cause_effect(data, labels)

X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.2)

obj = NCC()
obj.fit(X_tr, y_tr, 3, learning_rate=1e-2)
# This example uses the predict() method
logits = obj.predict(X_te)
output = expit(logits.values)
Ejemplo n.º 17
0
def test_LiNGAM():
    data, graph = load_dataset("sachs")
    obj = LiNGAM()
    output = obj.predict(data)
Ejemplo n.º 18
0
def test_CAM():
    data, graph = load_dataset("sachs")
    obj = CAM()
    output = obj.predict(data)
    nx.draw_networkx(output, font_size=8)
    plt.show()
        os.path.dirname(os.path.realpath(__file__)))).iloc[:, :50]

train_target = pd.read_csv(
    "{}/../datasets/Example_pairwise_targets.csv".format(
        os.path.dirname(
            os.path.realpath(__file__)))).iloc[:, :50].set_index("SampleID")

data_pairwise = read_causal_pairs(
    "{}/../datasets/Example_pairwise_pairs.csv".format(
        os.path.dirname(os.path.realpath(__file__)))).iloc[0, :50]

data_graph = pd.read_csv('{}/../datasets/Example_graph_numdata.csv'.format(
    os.path.dirname(os.path.realpath(__file__)))).iloc[:50, :5]

graph_skeleton = Glasso().predict(data_graph)
tueb = load_dataset('tuebingen')[0][:10]


def test_pairwise():
    for method in [ANM, IGCI, BivariateFit, CDS, RCC, NCC, RECI]:  # Jarfo
        print(method)
        m = method()
        if hasattr(m, "fit"):
            m.fit(train_data, train_target)
        r = m.predict(data_pairwise)
        assert r is not None
        print(r)
    return 0


def test_pairwise():