def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format(datetime.now(timezone('Canada/Central')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree, args.sem_type, args.noise_scale, args.dataset_type) _logger.info('Finished generating dataset') model = NoTears(args.n, args.d, args.seed, args.l1_lambda, args.use_float64) model.print_summary(print_func=model.logger.info) trainer = ALTrainer(args.init_rho, args.rho_max, args.h_factor, args.rho_multiply, args.init_iter, args.learning_rate, args.h_tol) W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres, args.max_iter, args.iter_step, output_dir) _logger.info('Finished training model') # Save raw estimated graph, ground truth and observational data after training np.save('{}/true_graph.npy'.format(output_dir), dataset.W) np.save('{}/X.npy'.format(output_dir), dataset.X) np.save('{}/final_raw_estimated_graph.npy'.format(output_dir), W_est) # Plot raw estimated graph plot_estimated_graph(W_est, dataset.W, save_name='{}/raw_estimated_graph.png'.format(output_dir)) _logger.info('Thresholding.') # Plot thresholded estimated graph W_est[np.abs(W_est) < args.graph_thres] = 0 # Thresholding plot_estimated_graph(W_est, dataset.W, save_name='{}/thresholded_estimated_graph.png'.format(output_dir)) results_thresholded = count_accuracy(dataset.W, W_est) _logger.info('Results after thresholding by {}: {}'.format(args.graph_thres, results_thresholded))
def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = RealDataset(args.batch_size) _logger.info('Finished generating dataset') device = get_device() model = VAE(args.z_dim, args.num_hidden, args.input_dim, device) trainer = Trainer(args.batch_size, args.num_epochs, args.learning_rate) trainer.train_model(model=model, dataset=dataset, output_dir=output_dir, device=device, input_dim=args.input_dim) _logger.info('Finished training model') # Visualizations samples = sample_vae(model, args.z_dim, device) plot_samples(samples) plot_reconstructions(model, dataset, device) _logger.info('All Finished!')
def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree, args.sem_type, args.noise_scale, args.dataset_type, args.x_dim) _logger.info('Finished generating dataset') model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers, args.num_decoder_layers, args.hidden_size, args.latent_dim, args.l1_graph_penalty, args.use_float64) model.print_summary(print_func=model.logger.info) trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres, args.rho_multiply, args.init_iter, args.learning_rate, args.h_tol, args.early_stopping, args.early_stopping_thres) W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres, args.max_iter, args.iter_step, output_dir) _logger.info('Finished training model') # Save raw recovered graph, ground truth and observational data after training np.save('{}/true_graph.npy'.format(output_dir), dataset.W) np.save('{}/observational_data.npy'.format(output_dir), dataset.X) np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est) # Plot raw recovered graph plot_recovered_graph( W_est, dataset.W, save_name='{}/raw_recovered_graph.png'.format(output_dir)) _logger.info('Filter by constant threshold') W_est = W_est / np.max(np.abs(W_est)) # Normalize # Plot thresholded recovered graph W_est[np.abs(W_est) < args.graph_thres] = 0 # Thresholding plot_recovered_graph( W_est, dataset.W, save_name='{}/thresholded_recovered_graph.png'.format(output_dir)) results_thresholded = count_accuracy(dataset.W, W_est) _logger.info('Results after thresholding by {}: {}'.format( args.graph_thres, results_thresholded))
def synthetic(): np.set_printoptions(precision=3) # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = SyntheticDataset(args.num_X, args.num_Z, args.num_samples, args.max_lag) # Save dataset dataset.save_dataset(output_dir=output_dir) _logger.info('Finished generating dataset') # Look at data _logger.info('The shape of observed data: {}'.format(dataset.X.shape)) plot_timeseries(dataset.X[-150:], 'X', display_mode=False, save_name=output_dir + '/timeseries_X.png') plot_timeseries(dataset.Z[-150:], 'Z', display_mode=False, save_name=output_dir + '/timeseries_Z.png') # Init model model = TimeLatent(args.num_X, args.max_lag, args.num_samples, args.device, args.prior_rho_A, args.prior_sigma_W, args.temperature, args.sigma_Z, args.sigma_X) trainer = Trainer(args.learning_rate, args.num_iterations, args.num_output) trainer.train_model(model=model, X=torch.tensor(dataset.X, dtype=torch.float32, device=args.device), output_dir=output_dir) plot_losses(trainer.train_losses, display_mode=False, save_name=output_dir + '/loss.png') # Save result trainer.log_and_save_intermediate_outputs() _logger.info('Finished training model') # Calculate performance estimate_A = model.posterior_A.probs[:, :args.num_X, :args.num_X].cpu( ).data.numpy( ) # model.posterior_A.probs is shape with (max_lag,num_X+num_Z,num_X+num_Z) groudtruth_A = np.array( dataset.groudtruth) # groudtruth is shape with (max_lag,num_X,num_X) Score = AUC_score(estimate_A.T, groudtruth_A.T) _logger.info( '\n fpr:{} \n tpr:{}\n thresholds:{}\n AUC:{}'.format( Score['fpr'], Score['tpr'], Score['thresholds'], Score['AUC'])) plot_ROC_curve(estimate_A.T, groudtruth_A.T, display_mode=False, save_name=output_dir + '/ROC_Curve.png') for t in range(0, 11): _logger.info('Under threshold:{}'.format(t / 10)) _logger.info(F1(estimate_A.T, groudtruth_A.T, threshold=t / 10)) estimate_A_all = model.posterior_A.probs.cpu().data.numpy() # Visualizations for k in range(args.max_lag): # Note that in our implementation, A_ij=1 means j->i, but in the plot_recovered_graph A_ij=1 means i->j, so transpose A plot_recovered_graph(estimate_A[k].T, groudtruth_A[k].T, title='Lag = {}'.format(k + 1), display_mode=False, save_name=output_dir + '/A_lag_{}.png'.format(k)) plot_recovered_graph(estimate_A_all[k].T, dataset.A[k].T, title='Lag = {}'.format(k + 1), display_mode=False, save_name=output_dir + '/All_lag_{}.png'.format(k)) _logger.info('All Finished!')
def real(): np.set_printoptions(precision=3) # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/real_{}'.format( datetime.now( timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger('real') # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = RealDataset() # Look at data _logger.info('The shape of observed data: {}'.format(dataset.stock.shape)) plot_timeseries(dataset.stock[-150:], 'stock', display_mode=False, save_name=output_dir + '/timeseries_stock.png') # Set parameters num_samples, num_X = dataset.stock.shape temperature = 2.0 max_lag = 1 prior_rho_A = 0.7 prior_sigma_W = 0.05 sigma_Z = 1.0 sigma_X = 0.05 num_iterations = 3000 # Log the parameters _logger.info( "num_X:{},max_lag:{},num_samples:{},args.device:{},prior_rho_A:{},prior_sigma_W:{},temperature:{},sigma_Z:{},sigma_X:{},num_iterations:{}" .format(num_X, max_lag, num_samples, args.device, prior_rho_A, prior_sigma_W, temperature, sigma_Z, sigma_X, num_iterations)) # Init model model = TimeLatent(num_X=num_X, max_lag=max_lag, num_samples=num_samples, device=args.device, prior_rho_A=prior_rho_A, prior_sigma_W=prior_sigma_W, temperature=temperature, sigma_Z=sigma_Z, sigma_X=sigma_X) trainer = Trainer(learning_rate=args.learning_rate, num_iterations=num_iterations, num_output=args.num_output) trainer.train_model(model=model, X=torch.tensor(dataset.stock, dtype=torch.float32, device=args.device), output_dir=output_dir) plot_losses(trainer.train_losses, display_mode=False, save_name=output_dir + '/loss.png') # Save result trainer.log_and_save_intermediate_outputs() _logger.info('Finished training model') estimate_A = model.posterior_A.probs.cpu().data.numpy() # Visualizations for k in range(max_lag): # Note that in our implementation, A_ij=1 means j->i, but in the plot_recovered_graph A_ij=1 means i->j, so transpose A plot_recovered_graph(estimate_A[k].T, W=None, title='Lag = {}'.format(k + 1), display_mode=False, save_name=output_dir + '/lag_{}.png'.format(k)) _logger.info('All Finished!')