Beispiel #1
0
def simulate_data(method='nonlinear', sem_type='mlp',
                  n_nodes=6, n_edges=15, n=1000):
    weighted_random_dag = DAG.erdos_renyi(n_nodes=n_nodes, n_edges=n_edges,
                                          weight_range=(0.5, 2.0), seed=1)
    dataset = IIDSimulation(W=weighted_random_dag, n=n, method=method,
                            sem_type=sem_type)
    true_dag, X = dataset.B, dataset.X

    return X, true_dag
Beispiel #2
0
def run_simulate(config):
    """this function used to run simulate data task

    Parameters
    ----------
    config: dict
        configuration info.

    Returns
    -------
    out: tuple
        (X, true_dag) or (X, true_dag, topology_matrix)
    """

    algo_params = config['algorithm_params']
    if config['task_params']['algorithm'] == 'EVENT':
        true_dag = DAG.erdos_renyi(n_nodes=algo_params['n_nodes'],
                                   n_edges=algo_params['n_edges'],
                                   weight_range=algo_params['weight_range'],
                                   seed=algo_params['seed'])
        topology_matrix = Topology.erdos_renyi(
            n_nodes=algo_params['Topology_n_nodes'],
            n_edges=algo_params['Topology_n_edges'],
            seed=algo_params['Topology_seed'])
        simulator = THPSimulation(true_dag,
                                  topology_matrix,
                                  mu_range=algo_params['mu_range'],
                                  alpha_range=algo_params['alpha_range'])
        X = simulator.simulate(
            T=algo_params['THPSimulation_simulate_T'],
            max_hop=algo_params['THPSimulation_simulate_max_hop'],
            beta=algo_params['THPSimulation_simulate_beta'])

        return X, true_dag, topology_matrix
    else:
        weighted_random_dag = DAG.erdos_renyi(
            n_nodes=algo_params['n_nodes'],
            n_edges=algo_params['n_edges'],
            weight_range=algo_params['weight_range'],
            seed=algo_params['seed'])
        dataset = IIDSimulation(W=weighted_random_dag,
                                n=algo_params['n'],
                                method=algo_params['method'],
                                sem_type=algo_params['sem_type'],
                                noise_scale=algo_params['noise_scale'])

        return pd.DataFrame(dataset.X), dataset.B
Beispiel #3
0
`networkx` package, then like the following import method.

Warnings: This script is used only for demonstration and cannot be directly
          imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import DirectLiNGAM


#######################################
# DirectLiNGAM used simulate data
#######################################
# simulate data for DirectLiNGAM
weighted_random_dag = DAG.erdos_renyi(n_nodes=10, n_edges=20, weight_range=(0.5, 2.0), seed=1)
dataset = IIDSimulation(W=weighted_random_dag, n=2000, method='linear', sem_type='gauss')
true_dag, X = dataset.B, dataset.X

# DirectLiNGAM learn
g = DirectLiNGAM()
g.learn(X)

# plot est_dag and true_dag
GraphDAG(g.causal_matrix, true_dag)

# calculate accuracy
met = MetricsDAG(g.causal_matrix, true_dag)
print(met.metrics)
Beispiel #4
0
from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import PC
from castle.common.priori_knowledge import PrioriKnowledge


method = 'linear'
sem_type = 'gauss'
n_nodes = 10
n_edges = 15
n = 2000

# simulation for pc
weighted_random_dag = DAG.erdos_renyi(n_nodes=n_nodes, n_edges=n_edges, weight_range=(0.5, 2.0), seed=1)
dataset = IIDSimulation(W=weighted_random_dag, n=n, method=method, sem_type=sem_type)
true_dag, X = dataset.B, dataset.X

# PC learn
priori = PrioriKnowledge(X.shape[1])
priori.add_required_edges([(3, 9),
                           (4, 9),
                           (5, 9),
                           (8, 5),
                           (4, 3)])
priori.add_forbidden_edges([(8, 1),
                            (9, 5)])
pc = PC(variant='original', priori_knowledge=priori)
X = pd.DataFrame(X, columns=list('abcdefghij'))
pc.learn(X)
Beispiel #5
0
from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import MCSL

#######################################
# mcsl used simulate data
#######################################
# simulate data for mcsl
weighted_random_dag = DAG.erdos_renyi(n_nodes=10,
                                      n_edges=20,
                                      weight_range=(0.5, 2.0),
                                      seed=1)
dataset = IIDSimulation(W=weighted_random_dag,
                        n=2000,
                        method='nonlinear',
                        sem_type='mlp')
true_dag, X = dataset.B, dataset.X

# mcsl learn
mc = MCSL()
mc.learn(X,
         iter_step=1000,
         rho_thres=1e14,
         init_rho=1e-5,
         rho_multiply=10,
         graph_thres=0.5,
         l1_graph_penalty=2e-3,
         degree=2,
         use_float64=False)
Beispiel #6
0
If you want to plot causal graph, please make sure you have already install
`networkx` package, then like the following import method.

Warnings: This script is used only for demonstration and cannot be directly
        imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import ANMNonlinear

weighted_random_dag = DAG.erdos_renyi(n_nodes=6,
                                      n_edges=10,
                                      weight_range=(0.5, 2.0),
                                      seed=1)
dataset = IIDSimulation(W=weighted_random_dag,
                        n=1000,
                        method='nonlinear',
                        sem_type='gp-add')
true_dag, X = dataset.B, dataset.X

anm = ANMNonlinear(alpha=0.05)
anm.learn(data=X)

# plot predict_dag and true_dag
GraphDAG(anm.causal_matrix, true_dag)
mm = MetricsDAG(anm.causal_matrix, true_dag)
print(mm.metrics)
Beispiel #7
0
def simulate_data(data, alg, task_id, parameters):
    """
    Simulation Data Generation Entry.

    Parameters
    ----------
    data: str
        Path for storing generated data files.
    alg: str
        Generating Operator Strings.
    task_id: int
        task key in the database.
    parameters: dict
        Data generation parameters.
    Returns
    -------
        True or False
    """
    parameters = translation_parameters(parameters)
    task_api = TaskApi()
    start_time = datetime.datetime.now()
    task_api.update_task_status(task_id, 0.1)
    task_api.update_consumed_time(task_id, start_time)
    task_api.update_update_time(task_id, start_time)

    if not os.path.exists(data):
        os.makedirs(data)
    task_name = task_api.get_task_name(task_id)
    sample_path = os.path.join(data, "datasets",
                               str(task_id) + "_" + task_name + ".csv")
    true_dag_path = os.path.join(data, "true",
                                 str(task_id) + "_" + task_name + ".npz")
    node_relationship_path = os.path.join(
        data, "node_relationship_" + str(task_id) + "_" + task_name + ".csv")
    topo_path = os.path.join(data,
                             "topo_" + str(task_id) + "_" + task_name + ".npz")
    task_api.update_task_status(task_id, 0.2)
    task_api.update_consumed_time(task_id, start_time)

    topo = None
    try:
        if alg == "EVENT":
            true_dag = DAG.erdos_renyi(n_nodes=parameters['n_nodes'],
                                       n_edges=parameters['n_edges'],
                                       weight_range=parameters['weight_range'],
                                       seed=parameters['seed'])
            topo = Topology.erdos_renyi(n_nodes=parameters['Topology_n_nodes'],
                                        n_edges=parameters['Topology_n_edges'],
                                        seed=parameters['Topology_seed'])
            simulator = THPSimulation(true_dag,
                                      topo,
                                      mu_range=parameters['mu_range'],
                                      alpha_range=parameters['alpha_range'])
            sample = simulator.simulate(
                T=parameters['THPSimulation_simulate_T'],
                max_hop=parameters['THPSimulation_simulate_max_hop'],
                beta=parameters['THPSimulation_simulate_beta'])

            task_api.update_task_status(task_id, 0.5)
            task_api.update_consumed_time(task_id, start_time)
        else:

            weighted_random_dag = DAG.erdos_renyi(
                n_nodes=parameters['n_nodes'],
                n_edges=parameters['n_edges'],
                weight_range=parameters['weight_range'],
                seed=parameters['seed'])
            dataset = IIDSimulation(W=weighted_random_dag,
                                    n=parameters['n'],
                                    method=parameters['method'],
                                    sem_type=parameters['sem_type'],
                                    noise_scale=parameters['noise_scale'])

            true_dag, sample = dataset.B, dataset.X
            sample = pd.DataFrame(sample)

            task_api.update_task_status(task_id, 0.5)
            task_api.update_consumed_time(task_id, start_time)
    except Exception as error:
        task_api.update_task_status(task_id, str(error))
        task_api.update_consumed_time(task_id, start_time)
        logger.warning('Generating simulation data failed, exp=%s' % error)
        if os.path.exists(sample_path):
            os.remove(sample_path)
        if os.path.exists(true_dag_path):
            os.remove(true_dag_path)
        if os.path.exists(node_relationship_path):
            os.remove(node_relationship_path)
        if os.path.exists(topo_path):
            os.remove(topo_path)
        return False

    if os.path.exists(topo_path):
        os.remove(topo_path)

    task_api.update_task_status(task_id, 0.6)
    task_api.update_consumed_time(task_id, start_time)

    save_to_file(sample, sample_path)
    save_to_file(true_dag, true_dag_path)
    if isinstance(topo, np.ndarray):
        save_to_file(topo, topo_path)

    # calculate accuracy
    save_gragh_edges(true_dag, node_relationship_path)
    task_api.update_task_status(task_id, 1.0)
    task_api.update_consumed_time(task_id, start_time)
    return True