def test_ANM(): data, labels = load_dataset('tuebingen') obj = ANM() # This example uses the predict() method # (HEBI: this is too slow) output = obj.predict(data) # This example uses the orient_graph() method. The dataset used # can be loaded using the cdt.data module data, graph = load_dataset('sachs') output = obj.orient_graph(data, nx.DiGraph(graph)) # To view the directed graph run the following command nx.draw_networkx(output, font_size=8) plt.show()
def load_cdt_dataset(folder_path=None, file_name=None, names=('tuebingen')): df_data, df_labels = load_dataset(names[0]) data = [ np.vstack((df_data.iloc[i, 0], df_data.iloc[i, 1])) for i in range(len(df_data)) ] labels = 1 - df_labels.values.squeeze() return data, labels
def test_NCC(): data, labels = load_dataset('tuebingen') X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.5) obj = NCC() obj.fit(X_tr, y_tr) # This example uses the predict() method output = obj.predict(X_te) # (HEBI: I'll need to compare with this) # This example uses the orient_graph() method. The dataset used # can be loaded using the cdt.data module data, graph = load_dataset("sachs") output = obj.orient_graph(data, nx.Graph(graph)) #To view the directed graph run the following command nx.draw_networkx(output, font_size=8) plt.show()
def test_tuebingen(): data, labels = load_dataset('tuebingen') data = data[:30] labels = labels[:30] # print(labels) m = Jarfo() m.fit(data, labels[['Target']]) r = m.predict(data) print(r) return 0
def test_RCC(): data, labels = load_dataset('tuebingen') X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.5) # why all training data has label 1.0? obj = RCC() obj.fit(X_tr, y_tr) # This example uses the predict() method output = obj.predict(X_te) # (HEBI: and this as well) # This example uses the orient_graph() method. The dataset used # can be loaded using the cdt.data module data, graph = load_dataset('sachs') # Oh, this is only used to orient the graph? The graph is already given! output = obj.orient_graph(data, nx.DiGraph(graph)) # To view the directed graph run the following command nx.draw_networkx(output, font_size=8) nx.draw_networkx(graph) plt.show()
def test_GES(): data, graph = load_dataset("sachs") obj = GES() #The predict() method works without a graph, or with a #directed or udirected graph provided as an input output = obj.predict(data) #No graph provided as an argument output = obj.predict(data, nx.Graph(graph)) #With an undirected graph output = obj.predict(data, graph) #With a directed graph #To view the graph created, run the below commands: nx.draw_networkx(output, font_size=8) plt.show()
def test_categorical(): data, labels = load_dataset('tuebingen') data = data[:10] for idx in range(10): data.iloc[idx, 0] = np.digitize(data.iloc[idx, 0], np.histogram(data.iloc[idx, 0])[1]) data.iloc[idx, 1] = np.digitize(data.iloc[idx, 1], np.histogram(data.iloc[idx, 1])[1]) labels = labels[:10] m = Jarfo() m.fit(data, labels[['Target']]) r = m.predict(data) print(r) return 0
def tcep_geom_net_datalosses( max_sample_size=(10**3), max_iter_factor=8, num_hiddens=20): data, labels = load_dataset('tuebingen', shuffle=False) cut_num_pairs(data, num_max=max_sample_size) dataset_results = None for i, row in data.iterrows(): X, Y = process(row) X, Y = torch.from_numpy(X).type(dtype), torch.from_numpy(Y).type(dtype) results = inner_loop_datalosses(X, Y, loss="sinkhorn", p=1) # stacks matrices along third dim: access mat_i using [:,:,i] dataset_results = results if dataset_results is None else np.dstack( [dataset_results, results]) print(f'------- end test for sample {i}/{len(data)} (i/N) -------') return dataset_results
def test_PC(): data, graph = load_dataset("sachs") # nx.draw_networkx(graph, font_size=8) # (HEBI: this requires pcalg, kpcalg, and # https://github.com/Diviyan-Kalainathan/RCIT) obj = PC() #The predict() method works without a graph, or with a #directed or undirected graph provided as an input output = obj.predict(data) #No graph provided as an argument output = obj.predict(data, nx.Graph(graph)) #With an undirected graph output = obj.predict(data, graph) #With a directed graph #To view the graph created, run the below commands: nx.draw_networkx(output, font_size=8) plt.show()
def tcep_acc_curves_datalength(max_iter_factor=8, num_hiddens=20, ax=None): if ax is None: ax = plt data, labels = load_dataset("tuebingen",shuffle=False) ; labels = labels.values results = _aggregate_datasets(rescale_tests=True) results = results[ (results["max_iter_factor"] == max_iter_factor) & (results["num_hiddens"] == num_hiddens)] for test in ["mmd-gamma","c2st-nn","c2st-knn", "test_loss"]: t_res = results[results["test"] == test ] scores = np.vstack([t_res[t_res["direction"] == dir_name_map("->")]["value"].values, t_res[t_res["direction"] == dir_name_map("<-")]["value"].values]).T acc_curve = _accuracy_curve(scores,labels) ax.plot(acc_curve[:,0], acc_curve[:,1], label=f'Test={test} (AUAC={np.round_(_area_under_acc_curve(scores,labels),2)})') crits_dr, crits_vals = _critical_curve(max_n=len(data)) ax.fill_between(crits_dr,crits_vals,1-crits_vals, alpha=0.5, color='lightgrey') ax.axhline(0.5,color='grey',linestyle="--") plt.axis([0,1,0,1])
def test_tuebingen(): data, labels = load_dataset('tuebingen')
import numpy as np import torch import numbers from cdt.data import load_dataset from sklearn.metrics import mean_squared_error import math from numba import jit # have global values RESOLUTION = 1e-02 data, labels = load_dataset('tuebingen', shuffle=False) labels = labels.values complain = "Datatype not supported, try Torch.tensors or np.ndarrays" # functions def _bin_int_as_array(int_val, num_bits, dtype=None): ''' returns an int as an array of bits, using max `num_bits`. ex: 4,7 --> 0010000 2,5 --> 01000 etc.. ''' if dtype is None or dtype == 'numpy': bin_arr = np.zeros(num_bits) elif dtype == 'torch': bin_arr = torch.zeros(num_bits) else: ValueError(complain + "(Default value is numpy array)", dtype) i = num_bits
def test_unknown_dataset(): try: data, graph = load_dataset('asdasd') except ValueError: pass
def test_dream(): data, graph = load_dataset('dream4-2')
def test_sachs(): data, graph = load_dataset('sachs')
# generator = CausalPairGenerator('linear') # data, labels = generator.generate(100, npoints=500) # # generator.to_csv('generated_pairs') # d = data.values # from cdt.causality.pairwise import NCC from CausalDiscuveryToolboxClone.Models.NCC import NCC import networkx as nx import matplotlib.pyplot as plt from cdt.data import load_dataset from sklearn.model_selection import train_test_split from CausalDiscuveryToolboxClone.DataGeneration import functions from scipy.special import expit data, labels = load_dataset('tuebingen') data, labels = functions.swap_cause_effect(data, labels) X_tr, X_te, y_tr, y_te = train_test_split(data, labels, train_size=.2) obj = NCC() obj.fit(X_tr, y_tr, 3, learning_rate=1e-2) # This example uses the predict() method logits = obj.predict(X_te) output = expit(logits.values)
def test_LiNGAM(): data, graph = load_dataset("sachs") obj = LiNGAM() output = obj.predict(data)
def test_CAM(): data, graph = load_dataset("sachs") obj = CAM() output = obj.predict(data) nx.draw_networkx(output, font_size=8) plt.show()
os.path.dirname(os.path.realpath(__file__)))).iloc[:, :50] train_target = pd.read_csv( "{}/../datasets/Example_pairwise_targets.csv".format( os.path.dirname( os.path.realpath(__file__)))).iloc[:, :50].set_index("SampleID") data_pairwise = read_causal_pairs( "{}/../datasets/Example_pairwise_pairs.csv".format( os.path.dirname(os.path.realpath(__file__)))).iloc[0, :50] data_graph = pd.read_csv('{}/../datasets/Example_graph_numdata.csv'.format( os.path.dirname(os.path.realpath(__file__)))).iloc[:50, :5] graph_skeleton = Glasso().predict(data_graph) tueb = load_dataset('tuebingen')[0][:10] def test_pairwise(): for method in [ANM, IGCI, BivariateFit, CDS, RCC, NCC, RECI]: # Jarfo print(method) m = method() if hasattr(m, "fit"): m.fit(train_data, train_target) r = m.predict(data_pairwise) assert r is not None print(r) return 0 def test_pairwise():