def main (args):
    """Main function for timeGAN experiments.
    
    Args:
        - data_name: sine, stock, or energy
        - seq_len: sequence length
        - Network parameters (should be optimized for different datasets)
        - module: gru, lstm, or lstmLN
        - hidden_dim: hidden dimensions
        - num_layer: number of layers
        - iteration: number of training iterations
        - batch_size: the number of samples in each batch
        - metric_iteration: number of iterations for metric computation
    
    Returns:
        - ori_data: original data
        - generated_data: generated synthetic data
        - metric_results: discriminative and predictive scores
    """
    ## Data loading
    if args.data_name in ['stock', 'energy']:
        ori_data = real_data_loading(args.data_name, args.seq_len)
    elif args.data_name == 'sine':
        # Set number of samples and its dimensions
        no, dim = 10000, 5
        ori_data = sine_data_generation(no, args.seq_len, dim)
        
    print(args.data_name + ' dataset is ready.')
        
    ## Synthetic data generation by TimeGAN
    # Set newtork parameters
    parameters = dict()  
    parameters['module'] = args.module
    parameters['hidden_dim'] = args.hidden_dim
    parameters['num_layer'] = args.num_layer
    parameters['iterations'] = args.iteration
    parameters['batch_size'] = args.batch_size
        
    generated_data = timegan(ori_data, parameters)   
    print('Finish Synthetic Data Generation')
    
    """
    ## Performance metrics   
    # Output initialization
    metric_results = dict()
    
    # 1. Discriminative Score
    discriminative_score = list()
    for _ in range(args.metric_iteration):
        temp_disc = discriminative_score_metrics(ori_data, generated_data)
        discriminative_score.append(temp_disc)
        
    metric_results['discriminative'] = np.mean(discriminative_score)
        
    # 2. Predictive score
    predictive_score = list()
    for tt in range(args.metric_iteration):
        temp_pred = predictive_score_metrics(ori_data, generated_data)
        predictive_score.append(temp_pred)   
        
    metric_results['predictive'] = np.mean(predictive_score)     
            
    # 3. Visualization (PCA and tSNE)
    visualization(ori_data, generated_data, 'pca')
    visualization(ori_data, generated_data, 'tsne')
    
    ## Print discriminative and predictive scores
    print(metric_results)
    """

    return ori_data, generated_data #, metric_results
Esempio n. 2
0
def main(args):
    """Main function for timeGAN experiments.

    Args:
      - data_name: sine, stock, or energy
      - seq_len: sequence length
      - Network parameters (should be optimized for different datasets)
        - module: gru, lstm, or lstmLN
        - hidden_dim: hidden dimensions
        - num_layer: number of layers
        - iteration: number of training iterations
        - batch_size: the number of samples in each batch
      - metric_iteration: number of iterations for metric computation

    Returns:
      - ori_data: original data
      - generated_data: generated synthetic data
      - metric_results: discriminative and predictive scores
    """
    # Data loading
    if args.data_name in ["stock", "energy"]:
        ori_data = real_data_loading(args.data_name, args.seq_len)
    elif args.data_name == "sine":
        # Set number of samples and its dimensions
        no, dim = 10000, 5
        ori_data = sine_data_generation(no, args.seq_len, dim)
    elif args.data_name == "hypo":
        ori_data = real_data_loading(args.data_name, args.seq_len)
    print(args.data_name + " dataset is ready.")

    # Synthetic data generation by TimeGAN
    # Set newtork parameters
    parameters = dict()
    parameters["module"] = args.module
    parameters["hidden_dim"] = args.hidden_dim
    parameters["num_layer"] = args.num_layer
    parameters["iterations"] = args.iteration
    parameters["batch_size"] = args.batch_size
    print(len(ori_data), ori_data[0].shape)
    generated_data = timegan(ori_data, parameters)
    print("Finish Synthetic Data Generation")
    print(len(generated_data), generated_data[0].shape)
    if len(generated_data) > len(ori_data):
        generated_data_part = generated_data[: len(ori_data)]
        print(
            "Generated data shape mismatch with original data, "
            + "calibrating part of generated data"
        )

    # Performance metrics
    # Output initialization
    metric_results = dict()

    # 1. Discriminative Score
    discriminative_score = list()
    for _ in range(args.metric_iteration):
        temp_disc = discriminative_score_metrics(ori_data, generated_data_part)
        discriminative_score.append(temp_disc)

    metric_results["discriminative"] = np.mean(discriminative_score)

    # 2. Predictive score
    predictive_score = list()
    for tt in range(args.metric_iteration):
        temp_pred = predictive_score_metrics(ori_data, generated_data_part)
        predictive_score.append(temp_pred)

    metric_results["predictive"] = np.mean(predictive_score)

    # 3. Visualization (PCA and tSNE)
    visualization(ori_data, generated_data_part, "pca", args)
    visualization(ori_data, generated_data_part, "tsne", args)

    # Print discriminative and predictive scores
    print(metric_results)

    return ori_data, generated_data, metric_results
Esempio n. 3
0
# Experiments iterations
Iteration = 2
Sub_Iteration = 10
speed = 200         # 100,200,300,400,500
feed = 6           # 6,12

        
#%% Data Loading
seq_length = 24

if data_name == 'google':
    dataX = google_data_loading(seq_length)
elif data_name == 'sine':
    No = 10000
    F_No = 5
    dataX = sine_data_generation(No, seq_length, F_No)
elif data_name == 'TUD':
    dataX,dataXs, min, max, idx, data_true = load_real_samples(seq_length, speed, feed, False)
print(data_name + ' dataset is ready.')

#%% Newtork Parameters
parameters = dict()

parameters['hidden_dim'] = len(dataX[0][0,:]) * 4
parameters['num_layers'] = 3
parameters['iterations'] = 2
parameters['batch_size'] = 128
parameters['module_name'] = 'gru'   # Other options: 'lstm' or 'lstmLN'
parameters['z_dim'] = len(dataX[0][0,:]) 

#%% Experiments
Esempio n. 4
0
            print(X_mb, T_mb)
            train_step_embedder(X_mb, T_mb)
            print(itt)
    #train()

####TESTING####

from data_loading import real_data_loading, sine_data_generation

data_name = 'sine'
seq_len = 5

if data_name in ['stock', 'energy']:
  ori_data = real_data_loading(data_name, seq_len)
elif data_name == 'sine':
  # Set number of samples and its dimensions
  no, dim = 50, 2
  ori_data = sine_data_generation(no, seq_len, dim)
    
print(data_name + ' dataset is ready.')

## Newtork parameters
parameters = dict()

parameters['module'] = 'lstm' 
parameters['hidden_dim'] = 6
parameters['num_layer'] = 3
parameters['iterations'] = 10
parameters['batch_size'] = 4

timegan(ori_data, parameters)