コード例 #1
0
ファイル: main.py プロジェクト: kt8506/notears-tensorflow
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(datetime.now(timezone('Canada/Central')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree, args.sem_type,
                               args.noise_scale, args.dataset_type)
    _logger.info('Finished generating dataset')

    model = NoTears(args.n, args.d, args.seed, args.l1_lambda, args.use_float64)
    model.print_summary(print_func=model.logger.info)

    trainer = ALTrainer(args.init_rho, args.rho_max, args.h_factor, args.rho_multiply,
                        args.init_iter, args.learning_rate, args.h_tol)
    W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres,
                          args.max_iter, args.iter_step, output_dir)
    _logger.info('Finished training model')

    # Save raw estimated graph, ground truth and observational data after training
    np.save('{}/true_graph.npy'.format(output_dir), dataset.W)
    np.save('{}/X.npy'.format(output_dir), dataset.X)
    np.save('{}/final_raw_estimated_graph.npy'.format(output_dir), W_est)

    # Plot raw estimated graph
    plot_estimated_graph(W_est, dataset.W,
                         save_name='{}/raw_estimated_graph.png'.format(output_dir))

    _logger.info('Thresholding.')
    # Plot thresholded estimated graph
    W_est[np.abs(W_est) < args.graph_thres] = 0    # Thresholding
    plot_estimated_graph(W_est, dataset.W,
                         save_name='{}/thresholded_estimated_graph.png'.format(output_dir))
    results_thresholded = count_accuracy(dataset.W, W_est)
    _logger.info('Results after thresholding by {}: {}'.format(args.graph_thres, results_thresholded))
コード例 #2
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = RealDataset(args.batch_size)
    _logger.info('Finished generating dataset')

    device = get_device()
    model = VAE(args.z_dim, args.num_hidden, args.input_dim, device)

    trainer = Trainer(args.batch_size, args.num_epochs, args.learning_rate)

    trainer.train_model(model=model,
                        dataset=dataset,
                        output_dir=output_dir,
                        device=device,
                        input_dim=args.input_dim)

    _logger.info('Finished training model')

    # Visualizations
    samples = sample_vae(model, args.z_dim, device)
    plot_samples(samples)

    plot_reconstructions(model, dataset, device)

    _logger.info('All Finished!')
コード例 #3
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree,
                               args.sem_type, args.noise_scale,
                               args.dataset_type, args.x_dim)
    _logger.info('Finished generating dataset')

    model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers,
                args.num_decoder_layers, args.hidden_size, args.latent_dim,
                args.l1_graph_penalty, args.use_float64)
    model.print_summary(print_func=model.logger.info)

    trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres,
                        args.rho_multiply, args.init_iter, args.learning_rate,
                        args.h_tol, args.early_stopping,
                        args.early_stopping_thres)
    W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres,
                          args.max_iter, args.iter_step, output_dir)
    _logger.info('Finished training model')

    # Save raw recovered graph, ground truth and observational data after training
    np.save('{}/true_graph.npy'.format(output_dir), dataset.W)
    np.save('{}/observational_data.npy'.format(output_dir), dataset.X)
    np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est)

    # Plot raw recovered graph
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/raw_recovered_graph.png'.format(output_dir))

    _logger.info('Filter by constant threshold')
    W_est = W_est / np.max(np.abs(W_est))  # Normalize

    # Plot thresholded recovered graph
    W_est[np.abs(W_est) < args.graph_thres] = 0  # Thresholding
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/thresholded_recovered_graph.png'.format(output_dir))
    results_thresholded = count_accuracy(dataset.W, W_est)
    _logger.info('Results after thresholding by {}: {}'.format(
        args.graph_thres, results_thresholded))
コード例 #4
0
def synthetic():

    np.set_printoptions(precision=3)

    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = SyntheticDataset(args.num_X, args.num_Z, args.num_samples,
                               args.max_lag)
    # Save dataset
    dataset.save_dataset(output_dir=output_dir)
    _logger.info('Finished generating dataset')

    # Look at data
    _logger.info('The shape of observed data: {}'.format(dataset.X.shape))
    plot_timeseries(dataset.X[-150:],
                    'X',
                    display_mode=False,
                    save_name=output_dir + '/timeseries_X.png')
    plot_timeseries(dataset.Z[-150:],
                    'Z',
                    display_mode=False,
                    save_name=output_dir + '/timeseries_Z.png')

    # Init model
    model = TimeLatent(args.num_X, args.max_lag, args.num_samples, args.device,
                       args.prior_rho_A, args.prior_sigma_W, args.temperature,
                       args.sigma_Z, args.sigma_X)

    trainer = Trainer(args.learning_rate, args.num_iterations, args.num_output)

    trainer.train_model(model=model,
                        X=torch.tensor(dataset.X,
                                       dtype=torch.float32,
                                       device=args.device),
                        output_dir=output_dir)

    plot_losses(trainer.train_losses,
                display_mode=False,
                save_name=output_dir + '/loss.png')

    # Save result
    trainer.log_and_save_intermediate_outputs()

    _logger.info('Finished training model')

    # Calculate performance

    estimate_A = model.posterior_A.probs[:, :args.num_X, :args.num_X].cpu(
    ).data.numpy(
    )  # model.posterior_A.probs is shape with (max_lag,num_X+num_Z,num_X+num_Z)
    groudtruth_A = np.array(
        dataset.groudtruth)  # groudtruth is shape with (max_lag,num_X,num_X)

    Score = AUC_score(estimate_A.T, groudtruth_A.T)
    _logger.info(
        '\n        fpr:{} \n        tpr:{}\n thresholds:{}\n AUC:{}'.format(
            Score['fpr'], Score['tpr'], Score['thresholds'], Score['AUC']))

    plot_ROC_curve(estimate_A.T,
                   groudtruth_A.T,
                   display_mode=False,
                   save_name=output_dir + '/ROC_Curve.png')

    for t in range(0, 11):
        _logger.info('Under threshold:{}'.format(t / 10))
        _logger.info(F1(estimate_A.T, groudtruth_A.T, threshold=t / 10))

    estimate_A_all = model.posterior_A.probs.cpu().data.numpy()

    # Visualizations
    for k in range(args.max_lag):
        # Note that in our implementation, A_ij=1 means j->i, but in the plot_recovered_graph A_ij=1 means i->j, so transpose A
        plot_recovered_graph(estimate_A[k].T,
                             groudtruth_A[k].T,
                             title='Lag = {}'.format(k + 1),
                             display_mode=False,
                             save_name=output_dir + '/A_lag_{}.png'.format(k))
        plot_recovered_graph(estimate_A_all[k].T,
                             dataset.A[k].T,
                             title='Lag = {}'.format(k + 1),
                             display_mode=False,
                             save_name=output_dir +
                             '/All_lag_{}.png'.format(k))

    _logger.info('All Finished!')
コード例 #5
0
def real():

    np.set_printoptions(precision=3)

    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/real_{}'.format(
        datetime.now(
            timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger('real')

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = RealDataset()

    # Look at data
    _logger.info('The shape of observed data: {}'.format(dataset.stock.shape))
    plot_timeseries(dataset.stock[-150:],
                    'stock',
                    display_mode=False,
                    save_name=output_dir + '/timeseries_stock.png')

    # Set parameters
    num_samples, num_X = dataset.stock.shape
    temperature = 2.0
    max_lag = 1
    prior_rho_A = 0.7
    prior_sigma_W = 0.05
    sigma_Z = 1.0
    sigma_X = 0.05
    num_iterations = 3000

    # Log the parameters
    _logger.info(
        "num_X:{},max_lag:{},num_samples:{},args.device:{},prior_rho_A:{},prior_sigma_W:{},temperature:{},sigma_Z:{},sigma_X:{},num_iterations:{}"
        .format(num_X, max_lag, num_samples, args.device, prior_rho_A,
                prior_sigma_W, temperature, sigma_Z, sigma_X, num_iterations))
    # Init model
    model = TimeLatent(num_X=num_X,
                       max_lag=max_lag,
                       num_samples=num_samples,
                       device=args.device,
                       prior_rho_A=prior_rho_A,
                       prior_sigma_W=prior_sigma_W,
                       temperature=temperature,
                       sigma_Z=sigma_Z,
                       sigma_X=sigma_X)
    trainer = Trainer(learning_rate=args.learning_rate,
                      num_iterations=num_iterations,
                      num_output=args.num_output)

    trainer.train_model(model=model,
                        X=torch.tensor(dataset.stock,
                                       dtype=torch.float32,
                                       device=args.device),
                        output_dir=output_dir)

    plot_losses(trainer.train_losses,
                display_mode=False,
                save_name=output_dir + '/loss.png')

    # Save result
    trainer.log_and_save_intermediate_outputs()

    _logger.info('Finished training model')

    estimate_A = model.posterior_A.probs.cpu().data.numpy()

    # Visualizations
    for k in range(max_lag):
        # Note that in our implementation, A_ij=1 means j->i, but in the plot_recovered_graph A_ij=1 means i->j, so transpose A
        plot_recovered_graph(estimate_A[k].T,
                             W=None,
                             title='Lag = {}'.format(k + 1),
                             display_mode=False,
                             save_name=output_dir + '/lag_{}.png'.format(k))

    _logger.info('All Finished!')