Exemple #1
0
def train(model_name,
          X,
          true_dag,
          model_params,
          topology_matrix=None,
          plot=True):
    """run algorithm of castle

    Parameters
    ----------
    model_name: str
        algorithm name
    X: pd.DataFrame
        train data
    true_dag: array
        true directed acyclic graph
    model_params: dict
        Parameters from configuration file
    topology_matrix: array, default None
        topology graph matrix
    plot: boolean, default None
        whether show graph.

    Returns
    -------
    model: castle.algorithm
        model of castle.algorithm
    pre_dag: array
        discovered causal matrix
    """

    # Instantiation algorithm and learn dag
    if model_name == 'TTPM':
        model = INLINE_ALGORITHMS[model_name.upper()](topology_matrix,
                                                      **model_params)
        model.learn(X)
    elif model_name == 'NOTEARSLOWRANK':
        rank = model_params.get('rank')
        del model_params['rank']
        model = NotearsLowRank(**model_params)
        model.learn(X, rank=rank)
    else:
        try:
            model = INLINE_ALGORITHMS[model_name.upper()](**model_params)
            model.learn(data=X)
        except ValueError:
            raise ValueError('Invalid algorithm name: {}.'.format(model_name))

    pre_dag = model.causal_matrix
    if plot:
        if true_dag is not None:
            GraphDAG(pre_dag, true_dag, show=plot)
            m = MetricsDAG(pre_dag, true_dag)
            print(m.metrics)
        else:
            GraphDAG(pre_dag, show=plot)

    return model, pre_dag
Exemple #2
0
def castle_experiment(model, x, y=None, show_graph=False, **kwargs):

    model.learn(x, **kwargs)
    if y is not None:
        metrics = MetricsDAG(model.causal_matrix, y).metrics
    else:
        metrics = None
    if show_graph:
        GraphDAG(model.causal_matrix, y)

    return metrics
Exemple #3
0
    def _rl(self, X, config):
        # Reproducibility
        set_seed(config.seed)

        logging.info('Python version is {}'.format(platform.python_version()))

        # input data
        if hasattr(config, 'dag'):
            training_set = DataGenerator_read_data(
                X, config.dag, config.normalize, config.transpose)
        else:
            training_set = DataGenerator_read_data(
                X, None, config.normalize, config.transpose)

        # set penalty weights
        score_type = config.score_type
        reg_type = config.reg_type

        if config.lambda_flag_default:            
            sl, su, strue = BIC_lambdas(training_set.inputdata, None, None, None, reg_type, score_type)
            lambda1 = 0
            lambda1_upper = 5
            lambda1_update_add = 1
            lambda2 = 1/(10**(np.round(config.max_length/3)))
            lambda2_upper = 0.01
            lambda2_update_mul = 10
            lambda_iter_num = config.lambda_iter_num

            # test initialized score
            logging.info('Original sl: {}, su: {}, strue: {}'.format(sl, su, strue))
            logging.info('Transfomed sl: {}, su: {}, lambda2: {}, true: {}'.format(sl, su, lambda2,
                        (strue-sl)/(su-sl)*lambda1_upper))   
        else:
            # test choices for the case with mannualy provided bounds
            # not fully tested
            sl = config.score_lower
            su = config.score_upper
            if config.score_bd_tight:
                lambda1 = 2
                lambda1_upper = 2
            else:
                lambda1 = 0
                lambda1_upper = 5
                lambda1_update_add = 1
            lambda2 = 1/(10**(np.round(config.max_length/3)))
            lambda2_upper = 0.01
            lambda2_update_mul = config.lambda2_update
            lambda_iter_num = config.lambda_iter_num

        # actor
        actor = Actor(config)
        callreward = get_Reward(actor.batch_size, config.max_length, 
                                actor.input_dimension, training_set.inputdata,
                                sl, su, lambda1_upper, score_type, reg_type, 
                                config.l1_graph_reg, False)
        logging.info('Finished creating training dataset, actor model and reward class')

        logging.info('Starting session...')
        sess_config = tf.ConfigProto(log_device_placement=False)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            # Run initialize op
            sess.run(tf.global_variables_initializer())

            # Test tensor shape
            logging.info('Shape of actor.input: {}'.format(sess.run(tf.shape(actor.input_))))

            # Initialize useful variables
            rewards_avg_baseline = []
            rewards_batches = []
            reward_max_per_batch = []
            
            lambda1s = []
            lambda2s = []
            
            graphss = []
            probsss = []
            max_rewards = []
            max_reward = float('-inf')
            max_reward_score_cyc = (lambda1_upper+1, 0)

            logging.info('Starting training.')
            
            for i in (range(1, config.nb_epoch + 1)):

                if config.verbose:
                    logging.info('Start training for {}-th epoch'.format(i))

                input_batch = training_set.train_batch(actor.batch_size, actor.max_length, actor.input_dimension)
                graphs_feed = sess.run(actor.graphs, feed_dict={actor.input_: input_batch})
                reward_feed = callreward.cal_rewards(graphs_feed, lambda1, lambda2)

                # max reward, max reward per batch
                max_reward = -callreward.update_scores([max_reward_score_cyc], lambda1, lambda2)[0]
                max_reward_batch = float('inf')
                max_reward_batch_score_cyc = (0, 0)

                for reward_, score_, cyc_ in reward_feed:
                    if reward_ < max_reward_batch:
                        max_reward_batch = reward_
                        max_reward_batch_score_cyc = (score_, cyc_)
                            
                max_reward_batch = -max_reward_batch

                if max_reward < max_reward_batch:
                    max_reward = max_reward_batch
                    max_reward_score_cyc = max_reward_batch_score_cyc

                # for average reward per batch
                reward_batch_score_cyc = np.mean(reward_feed[:,1:], axis=0)

                if config.verbose:
                    logging.info('Finish calculating reward for current batch of graph')

                # Get feed dict
                feed = {actor.input_: input_batch, actor.reward_: -reward_feed[:,0], actor.graphs_:graphs_feed}

                summary, base_op, score_test, probs, graph_batch, reward_batch, \
                    reward_avg_baseline, train_step1, train_step2 = sess.run( \
                        [actor.merged, actor.base_op, actor.test_scores, \
                         actor.log_softmax, actor.graph_batch, actor.reward_batch, \
                         actor.avg_baseline, actor.train_step1, actor.train_step2], \
                        feed_dict=feed)

                if config.verbose:
                    logging.info('Finish updating actor and critic network using reward calculated')
                
                lambda1s.append(lambda1)
                lambda2s.append(lambda2)

                rewards_avg_baseline.append(reward_avg_baseline)
                rewards_batches.append(reward_batch_score_cyc)
                reward_max_per_batch.append(max_reward_batch_score_cyc)

                graphss.append(graph_batch)
                probsss.append(probs)
                max_rewards.append(max_reward_score_cyc)

                # logging
                if i == 1 or i % 500 == 0:
                    logging.info('[iter {}] reward_batch: {:.4}, max_reward: {:.4}, max_reward_batch: {:.4}'.format(i,
                                reward_batch, max_reward, max_reward_batch))

                # update lambda1, lamda2
                if i == 1 or i % lambda_iter_num == 0:
                    ls_kv = callreward.update_all_scores(lambda1, lambda2)

                    graph_int, score_min, cyc_min = np.int32(ls_kv[0][0]), ls_kv[0][1][1], ls_kv[0][1][-1]

                    if cyc_min < 1e-5:
                        lambda1_upper = score_min
                    lambda1 = min(lambda1+lambda1_update_add, lambda1_upper)
                    lambda2 = min(lambda2*lambda2_update_mul, lambda2_upper)
                    logging.info('[iter {}] lambda1 {:.4}, upper {:.4}, lambda2 {:.4}, upper {:.4}, score_min {:.4}, cyc_min {:.4}'.format(i,
                                lambda1*1.0, lambda1_upper*1.0, lambda2*1.0, lambda2_upper*1.0, score_min*1.0, cyc_min*1.0))

                    graph_batch = convert_graph_int_to_adj_mat(graph_int)

                    if reg_type == 'LR':
                        graph_batch_pruned = np.array(graph_prunned_by_coef(graph_batch, training_set.inputdata))
                    elif reg_type == 'QR':
                        graph_batch_pruned = np.array(graph_prunned_by_coef_2nd(graph_batch, training_set.inputdata))
                    # elif reg_type == 'GPR':
                    #     # The R codes of CAM pruning operates the graph form that (i,j)=1 indicates i-th node-> j-th node
                    #     # so we need to do a tranpose on the input graph and another tranpose on the output graph
                    #     graph_batch_pruned = np.transpose(pruning_cam(training_set.inputdata, np.array(graph_batch).T))

                    # estimate accuracy
                    if hasattr(config, 'dag'):
                        met = MetricsDAG(graph_batch.T, training_set.true_graph)
                        met2 = MetricsDAG(graph_batch_pruned.T, training_set.true_graph)
                        acc_est = met.metrics
                        acc_est2 = met2.metrics

                        fdr, tpr, fpr, shd, nnz = \
                            acc_est['fdr'], acc_est['tpr'], acc_est['fpr'], \
                            acc_est['shd'], acc_est['nnz']
                        fdr2, tpr2, fpr2, shd2, nnz2 = \
                            acc_est2['fdr'], acc_est2['tpr'], acc_est2['fpr'], \
                            acc_est2['shd'], acc_est2['nnz']
                        
                        logging.info('before pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'.format(fdr, tpr, fpr, shd, nnz))
                        logging.info('after  pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'.format(fdr2, tpr2, fpr2, shd2, nnz2))

            plt.figure(1)
            plt.plot(rewards_batches, label='reward per batch')
            plt.plot(max_rewards, label='max reward')
            plt.legend()
            plt.savefig('reward_batch_average.png')
            plt.show()
            plt.close()
            
            logging.info('Training COMPLETED !')

        return graph_batch_pruned.T
Exemple #4
0
`networkx` package, then like the following import method.

Warnings: This script is used only for demonstration and cannot be directly
          imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import DirectLiNGAM


#######################################
# DirectLiNGAM used simulate data
#######################################
# simulate data for DirectLiNGAM
weighted_random_dag = DAG.erdos_renyi(n_nodes=10, n_edges=20, weight_range=(0.5, 2.0), seed=1)
dataset = IIDSimulation(W=weighted_random_dag, n=2000, method='linear', sem_type='gauss')
true_dag, X = dataset.B, dataset.X

# DirectLiNGAM learn
g = DirectLiNGAM()
g.learn(X)

# plot est_dag and true_dag
GraphDAG(g.causal_matrix, true_dag)

# calculate accuracy
met = MetricsDAG(g.causal_matrix, true_dag)
print(met.metrics)
Exemple #5
0
    def _rl(self, X, config):
        """
        Starting model of CORL2.

        Parameters
        ----------
        X: numpy.ndarray
            The numpy.ndarray format data you want to learn.
        config: dict
            The parameters dict for corl2.
        """

        set_seed(config.seed)

        logger.info('Python version is {}'.format(platform.python_version()))

        # input data
        solution_path_mask = None
        if hasattr(config, 'dag'):
            training_set = DataGenerator_read_data(X, config.dag,
                                                   solution_path_mask,
                                                   config.normalize,
                                                   config.transpose)
        else:
            training_set = DataGenerator_read_data(X, None, solution_path_mask,
                                                   config.normalize,
                                                   config.transpose)
        input_data = training_set.inputdata[:config.data_size, :]

        # set penalty weights
        score_type = config.score_type
        reg_type = config.reg_type

        actor = Actor(config)
        callreward = get_Reward(actor.batch_size, config.max_length,
                                config.parral, actor.input_dimension,
                                input_data, score_type, reg_type,
                                config.gpr_alpha, config.med_w,
                                config.median_flag, config.l1_graph_reg, False)
        logger.info(
            'Finished creating training dataset, actor model and reward class')

        logger.info('Starting session...')
        sess_config = tf.ConfigProto(log_device_placement=False)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())

            logger.info('Shape of actor.input: {}'.format(
                sess.run(tf.shape(actor.input_))))

            # Initialize useful variables
            rewards_batches = []
            reward_max_per_batch = []

            max_rewards = []
            max_reward = float('-inf')

            max_sum = 0

            loss1_s, loss_2s = [], []
            beg_t = time.time()
            for i in (range(1, config.nb_epoch + 1)):
                if (time.time() - beg_t) > (15 * 3600):
                    if i > 1001:
                        break
                input_batch = training_set.train_batch(actor.batch_size,
                                                       actor.max_length,
                                                       actor.input_dimension)
                positions, i_list, s0_list, s1_list = sess.run(
                    [
                        actor.positions, actor.i_list, actor.s0_list,
                        actor.s1_list
                    ],
                    feed_dict={actor.input_: input_batch})
                samples = []
                action_mask_s = []
                for m in range(positions.shape[0]):
                    zero_matrix = from_order_to_graph(positions[m])

                    action_mask = np.zeros(actor.max_length)
                    for po in positions[m]:
                        action_mask_s.append(action_mask.copy())
                        action_mask += np.eye(actor.max_length)[po]

                    samples.append(zero_matrix)
                    temp_sum = cover_rate(zero_matrix,
                                          training_set.true_graph.T)
                    if temp_sum > max_sum:
                        max_sum = temp_sum
                        if i == 1 or i % 500 == 0:
                            logger.info(
                                '[iter {}] [Batch {}_th] The optimal nodes order cover true graph {}/{}!'
                                .format(i, m, max_sum,
                                        actor.max_length * actor.max_length))

                graphs_feed = np.stack(samples)
                action_mask_s = np.stack(action_mask_s)
                reward_feed = callreward.cal_rewards(graphs_feed, positions)

                max_reward_batch = -float('inf')
                reward_list, normal_batch_reward = [], []
                for nu, (reward_, reward_list_) in enumerate(reward_feed):
                    reward_list.append(reward_list_)
                    normalized_reward = -reward_
                    normal_batch_reward.append(normalized_reward)
                    if normalized_reward > max_reward_batch:
                        max_reward_batch = normalized_reward
                if max_reward < max_reward_batch:
                    max_reward = max_reward_batch

                normal_batch_reward = np.stack(normal_batch_reward)

                feed = {
                    actor.input_:
                    input_batch,
                    actor.reward_:
                    normal_batch_reward,
                    actor.prev_state_0:
                    s0_list.reshape((-1, actor.input_dimension)),
                    actor.prev_state_1:
                    s1_list.reshape((-1, actor.input_dimension)),
                    actor.prev_input:
                    i_list.reshape((-1, actor.input_dimension)),
                    actor.position:
                    positions.reshape(actor.batch_size * actor.max_length),
                    actor.action_mask_:
                    action_mask_s.reshape((-1, actor.max_length))
                }

                base_op, reward_avg_baseline, log_softmax, train_step1, loss1, loss2, train_step2 = sess.run(
                    [
                        actor.base_op, actor.avg_baseline, actor.log_softmax,
                        actor.train_step1, actor.loss1, actor.loss2,
                        actor.train_step2
                    ],
                    feed_dict=feed)

                loss1_s.append(loss1)
                loss_2s.append(loss2)
                reward_max_per_batch.append(max_reward_batch)
                rewards_batches.append(np.mean(normal_batch_reward))
                max_rewards.append(max_reward)

                # logging
                if i == 1 or i % 500 == 0:
                    logger.info(
                        '[iter {}] reward_batch: {:.4}, max_reward: {:.4}, max_reward_batch: {:.4}'
                        .format(i, np.mean(normal_batch_reward), max_reward,
                                max_reward_batch))

                if i == 1 or (i + 1) % config.lambda_iter_num == 0:
                    ls_kv = callreward.update_all_scores()

                    score_min, graph_int_key = ls_kv[0][1][0], ls_kv[0][0]
                    logger.info('[iter {}] score_min {:.4}'.format(
                        i, score_min * 1.0))
                    graph_batch = from_order_to_graph(graph_int_key)

                    temp_sum = cover_rate(graph_batch,
                                          training_set.true_graph.T)

                    if reg_type == 'LR':
                        graph_batch_pruned = np.array(
                            graph_prunned_by_coef(graph_batch, input_data))
                    elif reg_type == 'QR':
                        graph_batch_pruned = np.array(
                            graph_prunned_by_coef_2nd(graph_batch, input_data))
                    # elif reg_type == 'GPR' or reg_type == 'GPR_learnable':
                    #     graph_batch_pruned = np.transpose(pruning_cam(input_data,
                    #                                                     np.array(graph_batch).T))

                    if hasattr(config, 'dag'):
                        met = MetricsDAG(graph_batch.T,
                                         training_set.true_graph)
                        met2 = MetricsDAG(graph_batch_pruned.T,
                                          training_set.true_graph)
                        acc_est = met.metrics
                        acc_est2 = met2.metrics

                        fdr, tpr, fpr, shd, nnz = \
                            acc_est['fdr'], acc_est['tpr'], acc_est['fpr'], \
                            acc_est['shd'], acc_est['nnz']
                        fdr2, tpr2, fpr2, shd2, nnz2 = \
                            acc_est2['fdr'], acc_est2['tpr'], acc_est2['fpr'], \
                            acc_est2['shd'], acc_est2['nnz']

                        logger.info(
                            'before pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'
                            .format(fdr, tpr, fpr, shd, nnz))
                        logger.info(
                            'after  pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'
                            .format(fdr2, tpr2, fpr2, shd2, nnz2))

            plt.figure(1)
            plt.plot(rewards_batches, label='reward per batch')
            plt.plot(reward_max_per_batch, label='max reward per batch')
            plt.plot(max_rewards, label='max reward')
            plt.legend()
            plt.savefig('reward_batch_average.png')
            plt.show()
            plt.close()

            logger.info('Training COMPLETED!')
        return graph_batch_pruned.T
Exemple #6
0
    def _rl(self, X):
        # Reproducibility
        set_seed(self.seed)

        logging.info('Python version is {}'.format(platform.python_version()))

        # input data
        if self.dag :
            training_set = DataGenerator_read_data(
                X, self.dag, self.normalize, self.transpose)
        else:
            training_set = DataGenerator_read_data(
                X, None, self.normalize, self.transpose)

        # set penalty weights
        score_type = self.score_type
        reg_type = self.reg_type

        if self.lambda_flag_default:
            sl, su, strue = BIC_lambdas(training_set.inputdata, None, None, None, reg_type, score_type)
            lambda1 = 0
            lambda1_upper = 5
            lambda1_update_add = 1
            lambda2 = 1/(10**(np.round(self.max_length/3)))
            lambda2_upper = 0.01
            lambda2_update_mul = 10
            lambda_iter_num = self.lambda_iter_num

            # test initialized score
            logging.info('Original sl: {}, su: {}, strue: {}'.format(sl, su, strue))
            logging.info('Transfomed sl: {}, su: {}, lambda2: {}, true: {}'.format(sl, su, lambda2,
                        (strue-sl)/(su-sl)*lambda1_upper))
        else:
            # test choices for the case with mannualy provided bounds
            # not fully tested
            sl = self.score_lower
            su = self.score_upper
            if self.score_bd_tight:
                lambda1 = 2
                lambda1_upper = 2
            else:
                lambda1 = 0
                lambda1_upper = 5
                lambda1_update_add = 1
            lambda2 = 1/(10**(np.round(self.max_length/3)))
            lambda2_upper = 0.01
            lambda2_update_mul = self.lambda2_update
            lambda_iter_num = self.lambda_iter_num

        # actor
        actor = Actor(encoder_type=self.encoder_type,
                      hidden_dim=self.hidden_dim,
                      max_length=self.max_length,
                      num_heads=self.num_heads,
                      num_stacks=self.num_stacks,
                      residual=self.residual,
                      decoder_type=self.decoder_type,
                      decoder_activation=self.decoder_activation,
                      decoder_hidden_dim=self.decoder_hidden_dim,
                      use_bias=self.use_bias,
                      use_bias_constant=self.use_bias_constant,
                      bias_initial_value=self.bias_initial_value,
                      batch_size=self.batch_size,
                      input_dimension=self.input_dimension,
                      lr1_start=self.lr1_start,
                      lr1_decay_step=self.lr1_decay_step,
                      lr1_decay_rate=self.lr1_decay_rate,
                      alpha=self.alpha,
                      init_baseline=self.init_baseline,
                      device=self.device)
        callreward = get_Reward(self.batch_size, self.max_length,
                                self.input_dimension, training_set.inputdata,
                                sl, su, lambda1_upper, score_type, reg_type, 
                                self.l1_graph_reg, False)
        logging.info('Finished creating training dataset and reward class')

        # Initialize useful variables
        rewards_avg_baseline = []
        rewards_batches = []
        reward_max_per_batch = []
        
        lambda1s = []
        lambda2s = []
        
        graphss = []
        probsss = []
        max_rewards = []
        max_reward = float('-inf')
        max_reward_score_cyc = (lambda1_upper+1, 0)

        logging.info('Starting training.')
        
        for i in tqdm(range(1, self.nb_epoch + 1)):

            if self.verbose:
                logging.info('Start training for {}-th epoch'.format(i))

            input_batch = training_set.train_batch(self.batch_size, self.max_length, self.input_dimension)
            inputs = torch.from_numpy(np.array(input_batch)).to(self.device)

            # Test tensor shape
            if i == 1:
                logging.info('Shape of actor.input: {}'.format(inputs.shape))

            # actor
            actor.build_permutation(inputs)
            graphs_feed = actor.graphs_

            reward_feed = callreward.cal_rewards(graphs_feed.cpu().detach().numpy(), lambda1, lambda2)  # np.array
            actor.build_reward(reward_ = -torch.from_numpy(reward_feed)[:,0].to(self.device))


            # max reward, max reward per batch
            max_reward = -callreward.update_scores([max_reward_score_cyc], lambda1, lambda2)[0]
            max_reward_batch = float('inf')
            max_reward_batch_score_cyc = (0, 0)

            for reward_, score_, cyc_ in reward_feed:
                if reward_ < max_reward_batch:
                    max_reward_batch = reward_
                    max_reward_batch_score_cyc = (score_, cyc_)
                    
            max_reward_batch = -max_reward_batch

            if max_reward < max_reward_batch:
                max_reward = max_reward_batch
                max_reward_score_cyc = max_reward_batch_score_cyc

            # for average reward per batch
            reward_batch_score_cyc = np.mean(reward_feed[:,1:], axis=0)

            if self.verbose:
                logging.info('Finish calculating reward for current batch of graph')

            score_test, probs, graph_batch, \
            reward_batch, reward_avg_baseline = \
                    actor.test_scores, actor.log_softmax, actor.graph_batch, \
                    actor.reward_batch, actor.avg_baseline

            if self.verbose:
                logging.info('Finish updating actor and critic network using reward calculated')
            
            lambda1s.append(lambda1)
            lambda2s.append(lambda2)

            rewards_avg_baseline.append(reward_avg_baseline)
            rewards_batches.append(reward_batch_score_cyc)
            reward_max_per_batch.append(max_reward_batch_score_cyc)

            graphss.append(graph_batch)
            probsss.append(probs)
            max_rewards.append(max_reward_score_cyc)

            # logging
            if i == 1 or i % 500 == 0:
                logging.info('[iter {}] reward_batch: {:.4}, max_reward: {:.4}, max_reward_batch: {:.4}'.format(i,
                            reward_batch, max_reward, max_reward_batch))

            # update lambda1, lamda2
            if i == 1 or i % lambda_iter_num == 0:
                ls_kv = callreward.update_all_scores(lambda1, lambda2)

                graph_int, score_min, cyc_min = np.int64(ls_kv[0][0]), ls_kv[0][1][1], ls_kv[0][1][-1]

                if cyc_min < 1e-5:
                    lambda1_upper = score_min
                lambda1 = min(lambda1+lambda1_update_add, lambda1_upper)
                lambda2 = min(lambda2*lambda2_update_mul, lambda2_upper)
                logging.info('[iter {}] lambda1 {:.4}, upper {:.4}, lambda2 {:.4}, upper {:.4}, score_min {:.4}, cyc_min {:.4}'.format(i,
                            lambda1*1.0, lambda1_upper*1.0, lambda2*1.0, lambda2_upper*1.0, score_min*1.0, cyc_min*1.0))

                graph_batch = convert_graph_int_to_adj_mat(graph_int)

                if reg_type == 'LR':
                    graph_batch_pruned = np.array(graph_prunned_by_coef(graph_batch, training_set.inputdata))
                elif reg_type == 'QR':
                    graph_batch_pruned = np.array(graph_prunned_by_coef_2nd(graph_batch, training_set.inputdata))

                if self.dag:
                    met = MetricsDAG(graph_batch.T, training_set.true_graph)
                    met2 = MetricsDAG(graph_batch_pruned.T, training_set.true_graph)
                    acc_est = met.metrics
                    acc_est2 = met2.metrics

                    fdr, tpr, fpr, shd, nnz = \
                        acc_est['fdr'], acc_est['tpr'], acc_est['fpr'], \
                        acc_est['shd'], acc_est['nnz']
                    fdr2, tpr2, fpr2, shd2, nnz2 = \
                        acc_est2['fdr'], acc_est2['tpr'], acc_est2['fpr'], \
                        acc_est2['shd'], acc_est2['nnz']
                    
                    logging.info('before pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'.format(fdr, tpr, fpr, shd, nnz))
                    logging.info('after  pruning: fdr {}, tpr {}, fpr {}, shd {}, nnz {}'.format(fdr2, tpr2, fpr2, shd2, nnz2))
        
        logging.info('Training COMPLETED !')

        return graph_batch_pruned.T
        imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import GraN_DAG, Parameters

# load data
weighted_random_dag = DAG.erdos_renyi(n_nodes=10,
                                      n_edges=20,
                                      weight_range=(0.5, 2.0),
                                      seed=1)
dataset = IIDSimulation(W=weighted_random_dag,
                        n=2000,
                        method='nonlinear',
                        sem_type='mlp')
dag, x = dataset.B, dataset.X

# Initialize parameters for gran_dag
parameters = Parameters(input_dim=x.shape[1])

# Instantiation algorithm
gnd = GraN_DAG(params=parameters)
gnd.learn(data=x, target=dag)

# plot predict_dag and true_dag
GraphDAG(gnd.causal_matrix, dag, 'result')
mm = MetricsDAG(gnd.causal_matrix, dag)
print(mm.metrics)
Exemple #8
0
how to use TTPM algorithm in `castle` package for causal inference.

Warnings: This script is used only for demonstration and cannot be directly
        imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, Topology, THPSimulation
from castle.algorithms import TTPM

# Data Simulation for TTPM
true_causal_matrix = DAG.erdos_renyi(n_nodes=10, n_edges=10)
topology_matrix = Topology.erdos_renyi(n_nodes=20, n_edges=20)
simulator = THPSimulation(true_causal_matrix,
                          topology_matrix,
                          mu_range=(0.00005, 0.0001),
                          alpha_range=(0.005, 0.007))
X = simulator.simulate(T=3600 * 24, max_hop=2)

# TTPM modeling
ttpm = TTPM(topology_matrix, max_hop=2)
ttpm.learn(X)
print(ttpm.causal_matrix)

# plot est_dag and true_dag
GraphDAG(ttpm.causal_matrix, true_causal_matrix)
# calculate accuracy
ret_metrix = MetricsDAG(ttpm.causal_matrix, true_causal_matrix)
print(ret_metrix.metrics)
Exemple #9
0
If you want to plot causal graph, please make sure you have already install
`networkx` package, then like the following import method.

Warnings: This script is used only for demonstration and cannot be directly
        imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, IIDSimulation
from castle.algorithms import ANMNonlinear

weighted_random_dag = DAG.erdos_renyi(n_nodes=6,
                                      n_edges=10,
                                      weight_range=(0.5, 2.0),
                                      seed=1)
dataset = IIDSimulation(W=weighted_random_dag,
                        n=1000,
                        method='nonlinear',
                        sem_type='gp-add')
true_dag, X = dataset.B, dataset.X

anm = ANMNonlinear(alpha=0.05)
anm.learn(data=X)

# plot predict_dag and true_dag
GraphDAG(anm.causal_matrix, true_dag)
mm = MetricsDAG(anm.causal_matrix, true_dag)
print(mm.metrics)
Exemple #10
0
how to use TTPM algorithm in `castle` package for causal inference.

Warnings: This script is used only for demonstration and cannot be directly
        imported.
"""

from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.datasets import DAG, Topology, THPSimulation
from castle.algorithms import TTPM

# Data Simulation for TTPM
true_causal_matrix = DAG.erdos_renyi(n_nodes=10, n_edges=10)
topology_matrix = Topology.erdos_renyi(n_nodes=20, n_edges=20)
simulator = THPSimulation(true_causal_matrix,
                          topology_matrix,
                          mu_range=(0.00005, 0.0001),
                          alpha_range=(0.005, 0.007))
X = simulator.simulate(T=3600 * 24, max_hop=2)

# TTPM modeling
ttpm = TTPM(topology_matrix, max_hop=2)
ttpm.learn(X)
print(ttpm.causal_matrix)

# plot est_dag and true_dag
GraphDAG(ttpm.causal_matrix.values, true_causal_matrix)
# calculate accuracy
ret_metrix = MetricsDAG(ttpm.causal_matrix.values, true_causal_matrix)
print(ret_metrix.metrics)
Exemple #11
0
def to_dag(model, train_data, test_data, params, stage_name='to_dag'):
    """
    1- If some entries of A_\phi == 0, also mask them
    (This can happen with stochastic proximal gradient descent)
    2- Remove edges (from weaker to stronger) until a DAG is obtained.

    Parameters
    ----------
    model : model class
        NonlinearGauss or NonlinearGaussANM
    train_data : NormalizationData
        training data
    test_data : NormalizationData
        test data
    params : Parameters
        Parameters class include all parameters
    stage_name : str
        name of folder for saving data after to_dag process

    Returns
    -------
    out : model
    """

    # Prepare path for saving results
    save_path = os.path.join(params.learning_path, stage_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Check if already computed
    if os.path.exists(os.path.join(save_path, "infer_DAG.npy")):
        return Accessor.load(save_path, "dag_model.pkl")

    model.eval()

    if params.jac_thresh:
        A = compute_jacobian_avg(model, train_data, train_data.n_samples).t()
    else:
        A = model.get_w_adj()
    A = A.detach().cpu().numpy()

    with torch.no_grad():
        # Find the smallest threshold that removes all cycle-inducing edges
        thresholds = np.unique(A)
        epsilon = 1e-8
        for step, t in enumerate(thresholds):
            to_keep = torch.Tensor(A > t + epsilon)
            new_adj = model.adjacency * to_keep
            if is_acyclic(new_adj):
                model.adjacency.copy_(new_adj)
                break

    # evaluate on validation set
    x, _ = test_data.sample(test_data.n_samples)
    weights, biases, extra_params = model.get_parameters(mode="wbx")
    nll_validation = -torch.mean(
        model.compute_log_likelihood(x, weights, biases, extra_params)).item()
    # Compute SHD and SID metrics
    pred_adj_ = model.adjacency.detach().cpu().numpy()
    train_adj_ = train_data.adjacency.detach().cpu().numpy()

    model.metrics = MetricsDAG(pred_adj_, train_adj_).metrics
    del train_adj_, pred_adj_

    # Save
    Accessor.dump_pkl(model, save_path, 'dag_model')
    Accessor.dump_pkl(params, save_path, 'params')
    Accessor.dump_pkl(nll_validation, save_path, "nll_validation", txt=True)
    np.save(os.path.join(save_path, "infer_DAG"),
            model.adjacency.detach().cpu().numpy())

    return model
Exemple #12
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from castle.datasets import DAG, IIDSimulation
from castle.common import GraphDAG
from castle.metrics import MetricsDAG
from castle.algorithms.ges.ges import GES

for d in [6, 8, 10, 15, 20]:
    edges = d * 2
    weighted_random_dag = DAG.erdos_renyi(n_nodes=d,
                                          n_edges=edges,
                                          weight_range=(0.5, 2.0),
                                          seed=1)
    dataset = IIDSimulation(W=weighted_random_dag,
                            n=1000,
                            method='nonlinear',
                            sem_type='gp-add')
    true_dag, X = dataset.B, dataset.X

    algo = GES(criterion='bic', method='scatter')
    algo.learn(X)

    # plot predict_dag and true_dag
    GraphDAG(algo.causal_matrix, true_dag)
    m1 = MetricsDAG(algo.causal_matrix, true_dag)
    print(m1.metrics)
    break
Exemple #13
0
    from castle.algorithms import RL

    g = RL(**params_config['model_params'])
    g.learn(data=X, dag=true_dag)

elif args.model_name == 'ttpm':
    from castle.algorithms import TTPM

    g = TTPM(topology_matrix, **params_config['model_params'])
    g.learn(X)

else:
    raise ValueError('Invalid algorithm name: {}.'.format(args.model_name))

# plot and evaluate predict_dag and true_dag
if true_dag is not None:
    if args.model_name == 'ttpm':
        GraphDAG(g.causal_matrix.values, true_dag)
        m = MetricsDAG(g.causal_matrix.values, true_dag)
        print(m.metrics)
    else:
        GraphDAG(g.causal_matrix, true_dag)
        m = MetricsDAG(g.causal_matrix, true_dag)
        print(m.metrics)

else:
    if args.model_name == 'ttpm':
        GraphDAG(g.causal_matrix.values)
    else:
        GraphDAG(g.causal_matrix)