def get_all_responses_with_games(self, message, games):
        for_time = extract_time(message.content)
        game_and_players_strings = []

        for game in games:
            db.record_would_play(message.author, game, for_time)
            ready_would_plays_for_game = db.get_ready_would_plays_for_game(
                game)
            unready_would_plays_for_game = db.get_unready_would_plays_for_game(
                game)
            if len(unready_would_plays_for_game) == 0:
                game_and_players_strings += [
                    "%s (%s)" % (game.name, len(game.get_ready_players()))
                ]
            else:
                game_and_players_strings += [
                    "%s %s" % (game.name,
                               generate_ready_at_time_messages(
                                   ready_would_plays_for_game,
                                   unready_would_plays_for_game))
                ]
        messages = [
            "%s would play %s" %
            (message.author.display_name,
             make_sentence_from_strings(game_and_players_strings))
        ]
        for game in games:
            messages += get_any_ready_messages(game)
        return messages

        for game in games:
            messages += get_any_ready_messages(game)
        return messages
Example #2
0
def predictive_score_metrics(ori_data, generated_data):
    """Report the performance of Post-hoc RNN one-step ahead prediction.
  
  Args:
    - ori_data: original data
    - generated_data: generated synthetic data
    
  Returns:
    - predictive_score: MAE of the predictions on the original data
  """
    # Initialization on the Graph
    tf.compat.v1.reset_default_graph()

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Set maximum sequence length and each sequence length
    ori_time, ori_max_seq_len = extract_time(ori_data)
    generated_time, generated_max_seq_len = extract_time(ori_data)
    max_seq_len = max([ori_max_seq_len, generated_max_seq_len])

    ## Builde a post-hoc RNN predictive network
    # Network parameters
    hidden_dim = int(dim / 2)
    iterations = 5000
    batch_size = 128

    # Input place holders
    X = tf.compat.v1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1],
                                 name="myinput_x")
    T = tf.compat.v1.placeholder(tf.int32, [None], name="myinput_t")
    Y = tf.compat.v1.placeholder(tf.float32, [None, max_seq_len - 1, 1],
                                 name="myinput_y")

    # Predictor function
    def predictor(x, t):
        """Simple predictor function.
    
    Args:
      - x: time-series data
      - t: time information
      
    Returns:
      - y_hat: prediction
      - p_vars: predictor variables
    """
        with tf.compat.v1.variable_scope("predictor",
                                         reuse=tf.compat.v1.AUTO_REUSE) as vs:
            p_cell = tf.keras.layers.GRUCell(hidden_dim,
                                             activation='tanh',
                                             name='p_cell')
            p_outputs = tf.keras.layers.RNN(p_cell, return_sequences=True)(x)
            y_hat_logit = tf.keras.layers.Dense(1, activation=None)(p_outputs)
            y_hat = tf.nn.sigmoid(y_hat_logit)
            p_vars = [
                v for v in tf.compat.v1.global_variables()
                if v.name.startswith(vs.name)
            ]

        return y_hat, p_vars

    y_pred, p_vars = predictor(X, T)
    # Loss for the predictor
    p_loss = tf.compat.v1.losses.absolute_difference(Y, y_pred)
    # optimizer
    p_solver = tf.compat.v1.train.AdamOptimizer().minimize(p_loss,
                                                           var_list=p_vars)

    ## Training
    # Session start
    sess = tf.compat.v1.Session()
    sess.run(tf.compat.v1.global_variables_initializer())

    # Training using Synthetic dataset
    for itt in range(iterations):

        # Set mini-batch
        idx = np.random.permutation(len(generated_data))
        train_idx = idx[:batch_size]

        X_mb = list(generated_data[i][:-1, :(dim - 1)] for i in train_idx)
        T_mb = list(generated_time[i] - 1 for i in train_idx)
        Y_mb = list(
            np.reshape(generated_data[i][1:, (
                dim - 1)], [len(generated_data[i][1:, (dim - 1)]), 1])
            for i in train_idx)

        # Train predictor
        _, step_p_loss = sess.run([p_solver, p_loss],
                                  feed_dict={
                                      X: X_mb,
                                      T: T_mb,
                                      Y: Y_mb
                                  })

    ## Test the trained model on the original data
    idx = np.random.permutation(len(ori_data))
    train_idx = idx[:no]

    X_mb = list(ori_data[i][:-1, :(dim - 1)] for i in train_idx)
    T_mb = list(ori_time[i] - 1 for i in train_idx)
    Y_mb = list(
        np.reshape(ori_data[i][1:, (dim -
                                    1)], [len(ori_data[i][1:, (dim - 1)]), 1])
        for i in train_idx)

    # Prediction
    pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb})

    # Compute the performance in terms of MAE
    MAE_temp = 0
    for i in range(no):
        MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i],
                                                  pred_Y_curr[i, :, :])

    predictive_score = MAE_temp / no

    return predictive_score
def discriminative_score_metrics(ori_data,
                                 generated_data,
                                 rnn_iterations=2000):
    """Use post-hoc RNN to classify original data and synthetic data
  
  Args:
    - ori_data: original data
    - generated_data: generated synthetic data
    
  Returns:
    - discriminative_score: np.abs(classification accuracy - 0.5)
  """
    # Initialization on the Graph
    tf.reset_default_graph()

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Set maximum sequence length and each sequence length
    ori_time, ori_max_seq_len = extract_time(ori_data)
    generated_time, generated_max_seq_len = extract_time(ori_data)
    max_seq_len = max([ori_max_seq_len, generated_max_seq_len])

    ## Builde a post-hoc RNN discriminator network
    # Network parameters
    hidden_dim = int(dim / 2)
    iterations = rnn_iterations
    batch_size = 128

    # Input place holders
    # Feature
    X = tf.placeholder(tf.float32, [None, max_seq_len, dim], name="myinput_x")
    X_hat = tf.placeholder(tf.float32, [None, max_seq_len, dim],
                           name="myinput_x_hat")

    T = tf.placeholder(tf.int32, [None], name="myinput_t")
    T_hat = tf.placeholder(tf.int32, [None], name="myinput_t_hat")

    # discriminator function
    def discriminator(x, t):
        """Simple discriminator function.
    
    Args:
      - x: time-series data
      - t: time information
      
    Returns:
      - y_hat_logit: logits of the discriminator output
      - y_hat: discriminator output
      - d_vars: discriminator variables
    """
        with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE) as vs:
            d_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_dim,
                                            activation=tf.nn.tanh,
                                            name='d_cell')
            d_outputs, d_last_states = tf.nn.dynamic_rnn(d_cell,
                                                         x,
                                                         dtype=tf.float32,
                                                         sequence_length=t)
            y_hat_logit = tf.contrib.layers.fully_connected(d_last_states,
                                                            1,
                                                            activation_fn=None)
            y_hat = tf.nn.sigmoid(y_hat_logit)
            d_vars = [
                v for v in tf.all_variables() if v.name.startswith(vs.name)
            ]

        return y_hat_logit, y_hat, d_vars

    y_logit_real, y_pred_real, d_vars = discriminator(X, T)
    y_logit_fake, y_pred_fake, _ = discriminator(X_hat, T_hat)

    # Loss for the discriminator
    d_loss_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=y_logit_real, labels=tf.ones_like(y_logit_real)))
    d_loss_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=y_logit_fake, labels=tf.zeros_like(y_logit_fake)))
    d_loss = d_loss_real + d_loss_fake

    # optimizer
    d_solver = tf.train.AdamOptimizer().minimize(d_loss, var_list=d_vars)

    ## Train the discriminator
    # Start session and initialize
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Train/test division for both original and generated data
    train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat = \
    train_test_divide(ori_data, generated_data, ori_time, generated_time)

    # Training step
    for itt in range(iterations):
        # Batch setting
        X_mb, T_mb = batch_generator(train_x, train_t, batch_size)
        X_hat_mb, T_hat_mb = batch_generator(train_x_hat, train_t_hat,
                                             batch_size)

        # Train discriminator
        _, step_d_loss = sess.run([d_solver, d_loss],
                                  feed_dict={
                                      X: X_mb,
                                      T: T_mb,
                                      X_hat: X_hat_mb,
                                      T_hat: T_hat_mb
                                  })

    ## Test the performance on the testing set
    y_pred_real_curr, y_pred_fake_curr = sess.run([y_pred_real, y_pred_fake],
                                                  feed_dict={
                                                      X: test_x,
                                                      T: test_t,
                                                      X_hat: test_x_hat,
                                                      T_hat: test_t_hat
                                                  })

    y_pred_final = np.squeeze(
        np.concatenate((y_pred_real_curr, y_pred_fake_curr), axis=0))
    y_label_final = np.concatenate((np.ones([
        len(y_pred_real_curr),
    ]), np.zeros([
        len(y_pred_real_curr),
    ])),
                                   axis=0)

    # Compute the accuracy
    acc = accuracy_score(y_label_final, (y_pred_final > 0.5))
    discriminative_score = np.abs(0.5 - acc)

    return discriminative_score
def timegan(ori_data, parameters):

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape
    
    # Maximum sequence length and each sequence length
    ori_time, max_seq_len = extract_time(ori_data)
  
    def MinMaxScaler(data):
        """Min-Max Normalizer.
        
        Args:
        - data: raw data
        
        Returns:
        - norm_data: normalized data
        - min_val: minimum values (for renormalization)
        - max_val: maximum values (for renormalization)
        """    
        min_val = np.nanmin(np.nanmin(data, axis = 0), axis = 0)
        data = data - min_val
        
        max_val = np.nanmax(np.nanmax(data, axis = 0), axis = 0)
        norm_data = data / (max_val + 1e-7)
        
        return norm_data, min_val, max_val
  
    # Normalization
    ori_data, min_val, max_val = MinMaxScaler(ori_data)
              
    ## Build a RNN networks         
  
    # Network Parameters
    hidden_dim   = parameters['hidden_dim'] 
    num_layers   = parameters['num_layer']
    iterations   = parameters['iterations']
    batch_size   = parameters['batch_size']
    module_name  = parameters['module'] 
    loss_mode    = parameters['loss']
    z_dim        = dim
    gamma        = 1
    gp_weight    = 10

    def make_embedder ():
        """Embedding network between original feature space to latent space.
        
        Args for model:
        - X: input time-series features
        
        Model returns:
        - H: embeddings
        """
        embedder_model = tf.keras.Sequential(name='embedder')
        #embedder_model.add(tf.keras.layers.Masking(mask_value=-1, input_shape=(seq_len,dim)))
        embedder_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len,dim)))
        for i in range(num_layers-1):
            embedder_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, hidden_dim)))
        embedder_model.add(tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return embedder_model

    def make_recovery ():   
        """Recovery network from latent space to original space.
        
        Args for model:
        - H: latent representation
        
        Model returns:
        - X_tilde: recovered data
        """     
        recovery_model = tf.keras.Sequential(name='recovery')
        for i in range(num_layers):
            recovery_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, hidden_dim)))
        recovery_model.add(tf.keras.layers.Dense(dim, activation='sigmoid'))

        return recovery_model
  
    def make_generator ():  
        """Generator function: Generate time-series data in latent space.
        
        Args for model:
        - Z: random variables
        
        MOdel returns:
        - E: generated embedding
        """ 
        generator_model = tf.keras.Sequential(name='generator')
        generator_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, dim)))
        for i in range(num_layers-1):
            generator_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, hidden_dim)))
        generator_model.add(tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return generator_model

    def make_supervisor (): 
        """Generate next sequence using the previous sequence.
        
        Args for model:
        - H: latent representation
        
        Model returns:
        - S: generated sequence based on the latent representations generated by the generator
        """     
        supervisor_model = tf.keras.Sequential(name='supervisor')
        for i in range(num_layers-1):
            supervisor_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, hidden_dim)))
        supervisor_model.add(tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return supervisor_model
    
    def make_discriminator ():   
        """Recovery network from latent space to original space.
        
        Args for model:
        - H: latent representation
        
        MOdel returns:
        - Y_hat: classification results between original and synthetic time-series
        """     
        discriminator_model = tf.keras.Sequential(name='discriminator')
        for i in range(num_layers):
            discriminator_model.add(rnn_cell(module_name, hidden_dim, return_sequences=True, input_shape=(seq_len, hidden_dim)))
        discriminator_model.add(tf.keras.layers.Dense(1, activation=None))

        return discriminator_model

    # make the models
    embedder_model = make_embedder()
    recovery_model = make_recovery()
    generator_model = make_generator()
    supervisor_model = make_supervisor()
    discriminator_model = make_discriminator()

    def get_embedder_T0_loss(X, X_tilde):
        mse = tf.keras.losses.MeanSquaredError() 
        E_loss_T0 = mse(X, X_tilde)
        return E_loss_T0

    def get_embedder_0_loss(X, X_tilde): 
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde)
        E_loss0 = 10*tf.sqrt(E_loss_T0)
        return E_loss0
    
    def get_embedder_loss(X, X_tilde, H, H_hat_supervise):
        """
        computes embedder network loss

        Args:
        - X: input time-series features
        - X_tilde: recovered data
        - H: latent representation
        - H_hat_supervise: generated sequence based on the latent representations generated by the generator

        Returns:
        - E_loss: embedder loss

        """
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde)
        E_loss0 = 10*tf.sqrt(E_loss_T0) #could use function above
        G_loss_S = get_generator_s_loss(H, H_hat_supervise)
        E_loss = E_loss0 + 0.1*G_loss_S
        return E_loss

    def get_generator_s_loss(H, H_hat_supervise):
        """
        computes supervised loss

        Args:
        - H: latent representation
        - H_hat_supervise: generated sequence based on the latent representations generated by the generator

        Returns:
        - G_loss_s: supervised loss for generator

        """
        mse = tf.keras.losses.MeanSquaredError()
        G_loss_S = mse(H[:,1:,:], H_hat_supervise[:,:-1,:])
        return G_loss_S

    def get_generator_loss(Y_fake, Y_fake_e, X_hat, X, H, H_hat_supervise):
        """
        computes generator loss

        Args:
        - Y_fake: classification results of latent synthetic time-series
        - Y_fake_e: classification results of generated sequence for latent synthetic time-series
        - X_hat: recovered data
        - X: input time-series data
        - H: latent representation
        - H_hat_supervise: generated sequence for latent representation

        Returns:
        - G_loss: generator loss
        - G_loss_U: unsupervised generator loss
        - G_loss_S: supervised generator loss
        - G_loss_V: moments loss for generator

        """

        #1. Adversarial loss
        if loss_mode == "bce":
            bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
            G_loss_U = bce(tf.ones_like(Y_fake), Y_fake)
            G_loss_U_e = bce(tf.ones_like(Y_fake_e), Y_fake_e)
        
        elif loss_mode == "wgan_gp":
            G_loss_U = -tf.reduce_mean(Y_fake)
            G_loss_U_e = -tf.reduce_mean(Y_fake_e)
        
        else:
            raise Exception("Loss method should be specified")

        #2. Two Moments
        X = tf.convert_to_tensor(X)
        G_loss_V1 = tf.reduce_mean(tf.abs(tf.sqrt(tf.nn.moments(X_hat,[0])[1] + 1e-6) - tf.sqrt(tf.nn.moments(X,[0])[1] + 1e-6)))
        G_loss_V2 = tf.reduce_mean(tf.abs((tf.nn.moments(X_hat,[0])[0]) - (tf.nn.moments(X,[0])[0])))
        G_loss_V = G_loss_V1 + G_loss_V2

        #3. Supervised loss
        G_loss_S = get_generator_s_loss(H, H_hat_supervise)

        #4. Summation
        G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(G_loss_S) + 100*G_loss_V
        return G_loss, G_loss_U, G_loss_S, G_loss_V
    
    def get_discriminator_loss(Y_real, Y_fake, Y_fake_e):
        """
        computes discrminator loss

        Args:
        - Y_real: classification results of latent real time-series
        - Y_fake: classification results of latent synthetic time-series
        - Y_fake_e: classification results of generated sequence for latent synthetic time-series

        Returns:
        - d_loss: discriminator loss

        """
        if loss_mode == "bce":
            bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) #loss for cls of latent real data seq
            #default arg for tf.keras.losses.BinaryCrossentropy reduction=losses_utils.ReductionV2.AUTO
            D_loss_real = bce(tf.ones_like(Y_real), Y_real)
            D_loss_fake = bce(tf.zeros_like(Y_fake), Y_fake) 
            D_loss_fake_e = bce(tf.zeros_like(Y_fake_e), Y_fake_e) 

        #Wasserstein loss
        elif loss_mode == "wgan_gp":
            D_loss_real = -tf.reduce_mean(Y_real)
            D_loss_fake = tf.reduce_mean(Y_fake)
            D_loss_fake_e = tf.reduce_mean(Y_fake_e)
        
        else:
            raise Exception("Loss method should be specified")
            
        D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e

        return D_loss

    def gradient_penalty(f, H_mb, H_hat_mb):
        """
        Calculates the gradient penalty.
        This loss is calculated on an interpolated "patient"
        and added to the discriminator loss.
        """
        # get the interplated patient
        alpha = tf.random.uniform([H_mb.shape[0], 1, 1], minval=0.0, maxval=1.0, dtype=tf.float64)
        diff = H_hat_mb - H_mb
        interpolated = H_mb + alpha * diff

        with tf.GradientTape() as tape:
            tape.watch(interpolated)
            predict = f(interpolated)

        # Calculate the gradients w.r.t to this interpolated patient
        grad = tape.gradient(predict, interpolated)

        norm = tf.norm(tf.reshape(grad, [tf.shape(grad)[0], -1]), axis=1)
        gp = tf.reduce_mean((norm - 1.)**2)
        return gp


    # optimizer
    embedder0_optimizer = tf.keras.optimizers.Adam()
    embedder_optimizer = tf.keras.optimizers.Adam()
    gen_s_optimizer = tf.keras.optimizers.Adam()
    generator_optimizer = tf.keras.optimizers.Adam()
    discriminator_optimizer = tf.keras.optimizers.Adam()

    @tf.function
    def train_step_embedder(X_mb):
        """
        trains embedder model
        """

        with tf.GradientTape() as embedder_tape:
            # Embedder & Recovery
            H_mb = embedder_model(X_mb)
            X_tilde_mb = recovery_model(H_mb)

            embedder_0_loss = get_embedder_0_loss(X_mb, X_tilde_mb)
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables

        gradients_of_embedder = embedder_tape.gradient(embedder_0_loss, emb_vars)
        embedder0_optimizer.apply_gradients(zip(gradients_of_embedder, emb_vars))
        
        return embedder_0_loss

    @tf.function
    def train_step_generator_s(X_mb):
        """
        supervised training for generator model
        """
        
        with tf.GradientTape() as gen_s_tape: 
            
            H_mb = embedder_model(X_mb)
            
            # Generator
            H_hat_supervise_mb = supervisor_model(H_mb)

            gen_s_loss = get_generator_s_loss(H_mb, H_hat_supervise_mb) 
            gen_s_vars = supervisor_model.trainable_variables  
        gradients_of_gen_s = gen_s_tape.gradient(gen_s_loss, gen_s_vars)
        gen_s_optimizer.apply_gradients(zip(gradients_of_gen_s, gen_s_vars))

        return gen_s_loss 

    @tf.function
    def train_step_joint(X_mb, Z_mb):
        """
        join training for generator and supervisor model, embedder model
        """
        #train generator
        with tf.GradientTape() as gen_tape:
            # Generator
            H_mb = embedder_model(X_mb)
            E_hat_mb = generator_model(Z_mb)
            H_hat_mb = supervisor_model(E_hat_mb)
            H_hat_supervise_mb = supervisor_model(H_mb)

            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)
            
            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_real_mb = discriminator_model(H_mb)
            Y_fake_e_mb = discriminator_model(E_hat_mb)

            gen_loss, g_loss_u, gen_s_loss, g_loss_v = get_generator_loss(Y_fake_mb, Y_fake_e_mb, X_hat_mb, X_mb, H_mb, H_hat_supervise_mb)
            gen_vars = generator_model.trainable_variables + supervisor_model.trainable_variables

        gradients_of_gen = gen_tape.gradient(gen_loss, gen_vars)
        generator_optimizer.apply_gradients(zip(gradients_of_gen, gen_vars))

        #train embedder
        with tf.GradientTape() as embedder_tape:

            H_mb = embedder_model(X_mb)

            X_tilde_mb = recovery_model(H_mb) 

            H_hat_supervise = supervisor_model(H_mb)
            
            #i think we are minimizing E_loss but printing out E_loss_T0??
            emb_T0_loss = get_embedder_T0_loss(X_mb, X_tilde_mb)
            emb_loss = get_embedder_loss(X_mb, X_tilde_mb, H_mb, H_hat_supervise) 
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables

        gradients_of_emb = embedder_tape.gradient(emb_loss, emb_vars)
        embedder_optimizer.apply_gradients(zip(gradients_of_emb, emb_vars))
        
        return emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v #H_hat_mb, E_hat_mb, 

    @tf.function
    def train_step_discriminator(X_mb, Z_mb):
        """
        trains discriminator model
        """
        
        with tf.GradientTape() as disc_tape:
            
            H_mb = embedder_model(X_mb)
            E_hat_mb = generator_model(Z_mb)
            H_hat_mb = supervisor_model(E_hat_mb)
            
            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)
            
            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_real_mb = discriminator_model(H_mb)
            Y_fake_e_mb = discriminator_model(E_hat_mb)

            # Check discriminator loss before updating
            disc_loss = get_discriminator_loss(Y_real_mb, Y_fake_mb, Y_fake_e_mb)
           
            if loss_mode == "wgan_gp":
                gp = gradient_penalty(discriminator_model, H_mb, H_hat_mb)
                disc_loss = gp * gp_weight + disc_loss

        # Train discriminator (only when the discriminator does not work well)
        if (disc_loss > 0.15):
            disc_vars = discriminator_model.trainable_variables
            gradients_of_disc = disc_tape.gradient(disc_loss, disc_vars)
        
            discriminator_optimizer.apply_gradients(zip(gradients_of_disc, disc_vars))
        
        return disc_loss

    #timeGAN training
    def train():
        
        #1. Embedding network training
        print('Start Embedding Network Training')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Train embedder
            step_e_loss = train_step_embedder(X_mb)
           
            # Checkpoint
            if itt % 100 == 0:
                print('step: '+ str(itt) + '/' + str(iterations) + ', e_loss: ' + str(np.round(np.sqrt(step_e_loss),4)) )

        print('Finish Embedding Network Training')
        
        #2. Training only with supervised loss
        print('Start Training with Supervised Loss Only')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Random vector generation 
            # Train generator
            step_gen_s_loss = train_step_generator_s(X_mb)

            # Checkpoint
            if itt % 100 == 0:
                print('step: '+ str(itt)  + '/' + str(iterations) +', s_loss: ' + str(np.round(np.sqrt(step_gen_s_loss),4)) )

        print('Finish Training with Supervised Loss Only')
        
        # 3. Joint Training
        print('Start Joint Training')

        for itt in range(iterations):
            # Generator training (twice more than discriminator training)
            for kk in range(2):
                # Set mini-batch
                X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
                # Random vector generation 
                Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
                # Train generator and embedder
                emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v = train_step_joint(X_mb, Z_mb)

            # Discriminator training        
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Random vector generation 
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            #train discriminator
            d_loss = train_step_discriminator(X_mb, Z_mb)

            # Print multiple checkpoints
            if itt % 100 == 0:
                print('step: '+ str(itt) + '/' + str(iterations) + 
                    ', d_loss: ' + str(np.round(d_loss,4)) + 
                    ', g_loss_u: ' + str(np.round(g_loss_u,4)) + 
                    ', g_loss_s: ' + str(np.round(np.sqrt(gen_s_loss),4)) + 
                    ', g_loss_v: ' + str(np.round(g_loss_v,4)) + 
                    ', e_loss_t0: ' + str(np.round(np.sqrt(emb_T0_loss),4))  )
        
        print('Finish Joint Training')
        
        ## Synthetic data generation
        Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)
        E_hat_generated = generator_model(Z_mb)
        H_hat_generated = supervisor_model(E_hat_generated)
        generated_data_curr = recovery_model(H_hat_generated)

        generated_data = list()

        for i in range(no):
            temp = generated_data_curr[i,:ori_time[i],:]
            generated_data.append(temp)
                
        # Renormalization
        generated_data = generated_data * max_val
        generated_data = generated_data + min_val
    
        return generated_data
        
    return train()


####TESTING####

# from data_loading import real_data_loading, sine_data_generation

# data_name = 'prism'
# seq_len = 10

# if data_name in ['stock', 'energy', 'sine_sampling', 'prism']:
#  ori_data = real_data_loading(data_name, seq_len)
# elif data_name == 'sine':
#   # Set number of samples and its dimensions
#   no, dim = 15, 5
#   ori_data = sine_data_generation(no, seq_len, dim)
    
# print(data_name + ' dataset is ready.')

# ## Newtork parameters
# parameters = dict()

# parameters['module'] = 'lstm' 
# parameters['hidden_dim'] = 3
# parameters['num_layer'] = 3
# parameters['iterations'] = 2
# parameters['batch_size'] = 2
# parameters['loss'] = "bce"

# generated_data = timegan(ori_data, parameters)
# print('Finish Synthetic Data Generation')
# print(generated_data)
Example #5
0
def timegan(ori_data, parameters):
    """TimeGAN function.

    Use original data as training set to generater synthetic data (time-series)

    Args:
      - ori_data: original time-series data
      - parameters: TimeGAN network parameters

    Returns:
      - generated_data: generated time-series data
    """
    # Initialization on the Graph
    tf.reset_default_graph()

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Maximum sequence length and each sequence length
    ori_time, max_seq_len = extract_time(ori_data)

    def MinMaxScaler(data):
        """Min-Max Normalizer.

        Args:
          - data: raw data

        Returns:
          - norm_data: normalized data
          - min_val: minimum values (for renormalization)
          - max_val: maximum values (for renormalization)
        """
        min_val = np.min(np.min(data, axis=0), axis=0)
        data = data - min_val

        max_val = np.max(np.max(data, axis=0), axis=0)
        norm_data = data / (max_val + 1e-7)

        return norm_data, min_val, max_val

    # Normalization
    ori_data, min_val, max_val = MinMaxScaler(ori_data)

    # Build a RNN networks

    # Network Parameters
    hidden_dim = parameters["hidden_dim"]
    num_layers = parameters["num_layer"]
    iterations = parameters["iterations"]
    batch_size = parameters["batch_size"]
    module_name = parameters["module"]
    z_dim = dim
    gamma = 1

    # Input place holders
    X = tf.placeholder(tf.float32, [None, max_seq_len, dim], name="myinput_x")
    Z = tf.placeholder(tf.float32, [None, max_seq_len, z_dim],
                       name="myinput_z")
    T = tf.placeholder(tf.int32, [None], name="myinput_t")

    def embedder(X, T):
        """Embedding network between original feature space to latent space.

        Args:
          - X: input time-series features
          - T: input time information

        Returns:
          - H: embeddings
        """
        with tf.variable_scope("embedder", reuse=tf.AUTO_REUSE):
            e_cell = tf.nn.rnn_cell.MultiRNNCell(
                [rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
            e_outputs, e_last_states = tf.nn.dynamic_rnn(e_cell,
                                                         X,
                                                         dtype=tf.float32,
                                                         sequence_length=T)
            H = tf.contrib.layers.fully_connected(e_outputs,
                                                  hidden_dim,
                                                  activation_fn=tf.nn.sigmoid)
        return H

    def recovery(H, T):
        """Recovery network from latent space to original space.

        Args:
          - H: latent representation
          - T: input time information

        Returns:
          - X_tilde: recovered data
        """
        with tf.variable_scope("recovery", reuse=tf.AUTO_REUSE):
            r_cell = tf.nn.rnn_cell.MultiRNNCell(
                [rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
            r_outputs, r_last_states = tf.nn.dynamic_rnn(r_cell,
                                                         H,
                                                         dtype=tf.float32,
                                                         sequence_length=T)
            X_tilde = tf.contrib.layers.fully_connected(
                r_outputs, dim, activation_fn=tf.nn.sigmoid)
        return X_tilde

    def generator(Z, T):
        """Generator function: Generate time-series data in latent space.

        Args:
          - Z: random variables
          - T: input time information

        Returns:
          - E: generated embedding
        """
        with tf.variable_scope("generator", reuse=tf.AUTO_REUSE):
            e_cell = tf.nn.rnn_cell.MultiRNNCell(
                [rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
            e_outputs, e_last_states = tf.nn.dynamic_rnn(e_cell,
                                                         Z,
                                                         dtype=tf.float32,
                                                         sequence_length=T)
            E = tf.contrib.layers.fully_connected(e_outputs,
                                                  hidden_dim,
                                                  activation_fn=tf.nn.sigmoid)
        return E

    def supervisor(H, T):
        """Generate next sequence using the previous sequence.

        Args:
          - H: latent representation
          - T: input time information

        Returns:
          - S: generated sequence based on the latent representations generated by the generator
        """
        with tf.variable_scope("supervisor", reuse=tf.AUTO_REUSE):
            e_cell = tf.nn.rnn_cell.MultiRNNCell([
                rnn_cell(module_name, hidden_dim)
                for _ in range(num_layers - 1)
            ])
            e_outputs, e_last_states = tf.nn.dynamic_rnn(e_cell,
                                                         H,
                                                         dtype=tf.float32,
                                                         sequence_length=T)
            S = tf.contrib.layers.fully_connected(e_outputs,
                                                  hidden_dim,
                                                  activation_fn=tf.nn.sigmoid)
        return S

    def discriminator(H, T):
        """Discriminate the original and synthetic time-series data.

        Args:
          - H: latent representation
          - T: input time information

        Returns:
          - Y_hat: classification results between original and synthetic time-series
        """
        with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE):
            d_cell = tf.nn.rnn_cell.MultiRNNCell(
                [rnn_cell(module_name, hidden_dim) for _ in range(num_layers)])
            d_outputs, d_last_states = tf.nn.dynamic_rnn(d_cell,
                                                         H,
                                                         dtype=tf.float32,
                                                         sequence_length=T)
            Y_hat = tf.contrib.layers.fully_connected(d_outputs,
                                                      1,
                                                      activation_fn=None)
        return Y_hat

    # Embedder & Recovery
    H = embedder(X, T)
    X_tilde = recovery(H, T)

    # Generator
    E_hat = generator(Z, T)
    H_hat = supervisor(E_hat, T)
    H_hat_supervise = supervisor(H, T)

    # Synthetic data
    X_hat = recovery(H_hat, T)

    # Discriminator
    Y_fake = discriminator(H_hat, T)
    Y_real = discriminator(H, T)
    Y_fake_e = discriminator(E_hat, T)

    # Variables
    e_vars = [
        v for v in tf.trainable_variables() if v.name.startswith("embedder")
    ]
    r_vars = [
        v for v in tf.trainable_variables() if v.name.startswith("recovery")
    ]
    g_vars = [
        v for v in tf.trainable_variables() if v.name.startswith("generator")
    ]
    s_vars = [
        v for v in tf.trainable_variables() if v.name.startswith("supervisor")
    ]
    d_vars = [
        v for v in tf.trainable_variables()
        if v.name.startswith("discriminator")
    ]

    # Discriminator loss
    D_loss_real = tf.losses.sigmoid_cross_entropy(tf.ones_like(Y_real), Y_real)
    D_loss_fake = tf.losses.sigmoid_cross_entropy(tf.zeros_like(Y_fake),
                                                  Y_fake)
    D_loss_fake_e = tf.losses.sigmoid_cross_entropy(tf.zeros_like(Y_fake_e),
                                                    Y_fake_e)
    D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e

    # Generator loss
    # 1. Adversarial loss
    G_loss_U = tf.losses.sigmoid_cross_entropy(tf.ones_like(Y_fake), Y_fake)
    G_loss_U_e = tf.losses.sigmoid_cross_entropy(tf.ones_like(Y_fake_e),
                                                 Y_fake_e)

    # 2. Supervised loss
    G_loss_S = tf.losses.mean_squared_error(H[:, 1:, :],
                                            H_hat_supervise[:, :-1, :])

    # 3. Two Momments
    G_loss_V1 = tf.reduce_mean(
        tf.abs(
            tf.sqrt(tf.nn.moments(X_hat, [0])[1] + 1e-6) -
            tf.sqrt(tf.nn.moments(X, [0])[1] + 1e-6)))
    G_loss_V2 = tf.reduce_mean(
        tf.abs((tf.nn.moments(X_hat, [0])[0]) - (tf.nn.moments(X, [0])[0])))

    G_loss_V = G_loss_V1 + G_loss_V2

    # 4. Summation
    G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(
        G_loss_S) + 100 * G_loss_V

    # Embedder network loss
    E_loss_T0 = tf.losses.mean_squared_error(X, X_tilde)
    E_loss0 = 10 * tf.sqrt(E_loss_T0)
    E_loss = E_loss0 + 0.1 * G_loss_S

    # optimizer
    E0_solver = tf.train.AdamOptimizer().minimize(E_loss0,
                                                  var_list=e_vars + r_vars)
    E_solver = tf.train.AdamOptimizer().minimize(E_loss,
                                                 var_list=e_vars + r_vars)
    D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=d_vars)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss,
                                                 var_list=g_vars + s_vars)
    GS_solver = tf.train.AdamOptimizer().minimize(G_loss_S,
                                                  var_list=g_vars + s_vars)

    # TimeGAN training
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # 1. Embedding network training
    print("Start Embedding Network Training")

    for itt in range(iterations):
        # Set mini-batch
        X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
        # Train embedder
        _, step_e_loss = sess.run([E0_solver, E_loss_T0],
                                  feed_dict={
                                      X: X_mb,
                                      T: T_mb
                                  })
        # Checkpoint
        if itt % 1000 == 0:
            print("step: " + str(itt) + "/" + str(iterations) + ", e_loss: " +
                  str(np.round(np.sqrt(step_e_loss), 4)))

    print("Finish Embedding Network Training")

    # 2. Training only with supervised loss
    print("Start Training with Supervised Loss Only")

    for itt in range(iterations):
        # Set mini-batch
        X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
        # Random vector generation
        Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
        # Train generator
        _, step_g_loss_s = sess.run([GS_solver, G_loss_S],
                                    feed_dict={
                                        Z: Z_mb,
                                        X: X_mb,
                                        T: T_mb
                                    })
        # Checkpoint
        if itt % 1000 == 0:
            print("step: " + str(itt) + "/" + str(iterations) + ", s_loss: " +
                  str(np.round(np.sqrt(step_g_loss_s), 4)))

    print("Finish Training with Supervised Loss Only")

    # 3. Joint Training
    print("Start Joint Training")

    for itt in range(iterations):
        # Generator training (twice more than discriminator training)
        for kk in range(2):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            # Train generator
            _, step_g_loss_u, step_g_loss_s, step_g_loss_v = sess.run(
                [G_solver, G_loss_U, G_loss_S, G_loss_V],
                feed_dict={
                    Z: Z_mb,
                    X: X_mb,
                    T: T_mb
                },
            )
            # Train embedder
            _, step_e_loss_t0 = sess.run([E_solver, E_loss_T0],
                                         feed_dict={
                                             Z: Z_mb,
                                             X: X_mb,
                                             T: T_mb
                                         })

        # Discriminator training
        # Set mini-batch
        X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
        # Random vector generation
        Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
        # Check discriminator loss before updating
        check_d_loss = sess.run(D_loss, feed_dict={X: X_mb, T: T_mb, Z: Z_mb})
        # Train discriminator (only when the discriminator does not work well)
        if check_d_loss > 0.15:
            _, step_d_loss = sess.run([D_solver, D_loss],
                                      feed_dict={
                                          X: X_mb,
                                          T: T_mb,
                                          Z: Z_mb
                                      })

        # Print multiple checkpoints
        if itt % 1000 == 0:
            print("step: " + str(itt) + "/" + str(iterations) + ", d_loss: " +
                  str(np.round(step_d_loss, 4)) + ", g_loss_u: " +
                  str(np.round(step_g_loss_u, 4)) + ", g_loss_s: " +
                  str(np.round(np.sqrt(step_g_loss_s), 4)) + ", g_loss_v: " +
                  str(np.round(step_g_loss_v, 4)) + ", e_loss_t0: " +
                  str(np.round(np.sqrt(step_e_loss_t0), 4)))
    print("Finish Joint Training")

    # Synthetic data generation
    Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)
    generated_data_curr = sess.run(X_hat,
                                   feed_dict={
                                       Z: Z_mb,
                                       X: ori_data,
                                       T: ori_time
                                   })

    generated_data = list()

    for _ in range(50):
        for i in range(no):
            temp = generated_data_curr[i, :ori_time[i], :]
            generated_data.append(temp)

    # Renormalization
    generated_data = generated_data * max_val
    generated_data = generated_data + min_val

    return generated_data
Example #6
0
def timegan_static(ori_data, ori_data_static, ori_data_stack, parameters):

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Maximum sequence length and each sequence length
    ori_time, max_seq_len = extract_time(ori_data)

    ori_data = np.array(ori_data)
    ori_data_static = np.array(ori_data_static)
    no_static, dim_static = ori_data_static.shape

    dstack = np.dstack((ori_data, ori_data_stack))

    no, seq_len, dim = np.asarray(ori_data).shape
    ori_time, max_seq_len = extract_time(ori_data)
    np.save('mix_data_no_seq_2k', dstack)

    def MinMaxScaler(data):
        """Min-Max Normalizer.
        
        Args:
        - data: raw data
        
        Returns:
        - norm_data: normalized data
        - min_val: minimum values (for renormalization)
        - max_val: maximum values (for renormalization)
        """
        min_val = np.nanmin(np.nanmin(data, axis=0), axis=0)
        data = data - min_val

        max_val = np.nanmax(np.nanmax(data, axis=0), axis=0)
        norm_data = data / (max_val + 1e-7)

        return norm_data, min_val, max_val

    def MinMaxScaler_static(data):
        """Min-Max Normalizer.
        
        Args:
        - data: raw data
        
        Returns:
        - norm_data: normalized data
        - min_val: minimum values (for renormalization)
        - max_val: maximum values (for renormalization)
        """
        min_val = np.nanmin(data, axis=0)
        data = data - min_val

        max_val = np.nanmax(data, axis=0)
        norm_data = data / (max_val + 1e-7)

        return norm_data, min_val, max_val

    # Normalization
    ori_data, min_val, max_val = MinMaxScaler(ori_data)

    ori_data_static, min_val_static, max_val_static = MinMaxScaler_static(
        ori_data_static)

    ## Build a RNN networks

    # Network Parameters
    hidden_dim = parameters['hidden_dim']
    num_layers = parameters['num_layer']
    iterations = parameters['iterations']
    batch_size = parameters['batch_size']
    module_name = parameters['module']
    z_dim = dim
    z_dim_static = dim_static
    gamma = 1

    def make_embedder():
        """Embedding network between original feature space to latent space.
        
        Args for model:
        - X: input time-series features
        
        Model returns:
        - H: embeddings
        """
        embedder_model = tf.keras.Sequential(name='embedder')
        embedder_model.add(
            rnn_cell(module_name,
                     hidden_dim,
                     return_sequences=True,
                     input_shape=(seq_len, dim)))
        for i in range(num_layers - 1):
            embedder_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        embedder_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return embedder_model

    def make_embedder_static():
        """
        Embedder model for static values
        """

        embedder_model_static = tf.keras.Sequential(
            name="embedder_static",
            layers=[
                tf.keras.layers.Dense(hidden_dim, input_shape=(dim_static, )),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.Dense(hidden_dim, activation=tf.nn.sigmoid)
            ])

        return embedder_model_static

    def make_recovery():
        """Recovery network from latent space to original space.
        
        Args for model:
        - H: latent representation
        
        Model returns:
        - X_tilde: recovered data
        """
        recovery_model = tf.keras.Sequential(name='recovery')
        for i in range(num_layers):
            recovery_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        recovery_model.add(tf.keras.layers.Dense(dim, activation='sigmoid'))

        return recovery_model

    def make_recovery_static():
        """
        recovery model for static values
        """

        recovery_model_static = tf.keras.Sequential(
            name="recovery_static",
            layers=[
                tf.keras.layers.Dense(dim_static, input_shape=(hidden_dim, )),
                tf.keras.layers.Dense(dim_static),
                tf.keras.layers.Dense(dim_static),
                tf.keras.layers.Dense(dim_static),
                tf.keras.layers.Dense(dim_static, activation=tf.nn.sigmoid)
            ])

        return recovery_model_static

    def make_generator():
        """Generator function: Generate time-series data in latent space.
        
        Args for model:
        - Z: random variables
        
        MOdel returns:
        - E: generated embedding
        """
        generator_model = tf.keras.Sequential(name='generator')
        generator_model.add(
            rnn_cell(module_name,
                     hidden_dim,
                     return_sequences=True,
                     input_shape=(seq_len, dim)))
        for i in range(num_layers - 1):
            generator_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        generator_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return generator_model

    def make_generator_static():
        """
        generator model for static values
        """

        generator_model_static = tf.keras.Sequential(
            name="generator_static",
            layers=[
                tf.keras.layers.Dense(hidden_dim, input_shape=(dim_static, )),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim, activation='tanh'),
            ])

        return generator_model_static

    def make_supervisor():
        """Generate next sequence using the previous sequence.
        
        Args for model:
        - H: latent representation
        
        Model returns:
        - S: generated sequence based on the latent representations generated by the generator
        """
        supervisor_model = tf.keras.Sequential(name='supervisor')
        for i in range(num_layers - 1):
            supervisor_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim * 2)))
        supervisor_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return supervisor_model

    def make_discriminator():
        """Recovery network from latent space to original space.
        
        Args for model:
        - H: latent representation
        
        Model returns:
        - Y_hat: classification results between original and synthetic time-series
        """
        discriminator_model = tf.keras.Sequential(name='discriminator')
        for i in range(num_layers):
            discriminator_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        discriminator_model.add(tf.keras.layers.Dense(1, activation=None))

        return discriminator_model

    def make_discriminator_static():
        """
        discirminator model for static values
        """

        discriminator_model_static = tf.keras.Sequential(
            name="discriminator_static",
            layers=[
                tf.keras.layers.Dense(hidden_dim, input_shape=(hidden_dim, )),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.LeakyReLU(),
                tf.keras.layers.Dense(hidden_dim),
                tf.keras.layers.Dense(1, activation=None),
            ])

        return discriminator_model_static

    # make the models
    embedder_model = make_embedder()
    recovery_model = make_recovery()
    generator_model = make_generator()
    supervisor_model = make_supervisor()
    discriminator_model = make_discriminator()

    embedder_model_static = make_embedder_static()
    recovery_model_static = make_recovery_static()
    generator_model_static = make_generator_static()
    discriminator_model_static = make_discriminator_static()

    def get_embedder_T0_loss(X, X_tilde):
        mse = tf.keras.losses.MeanSquaredError()
        E_loss_T0 = mse(X, X_tilde)
        return E_loss_T0

    def get_embedder_0_loss(X, X_tilde):
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde)
        E_loss0 = 10 * tf.sqrt(E_loss_T0)
        return E_loss0

    def get_embedder_loss(X, X_tilde, H, H_hat_supervise):
        """
        computes embedder network loss
        
        Args:
        - X: input time-series features
        - X_tilde: recovered data
        - H: latent representation
        - H_hat_supervise: generated sequence based on the latent representations generated by the generator
        
        Returns:
        - E_loss: embedder loss
        """
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde)
        E_loss0 = 10 * tf.sqrt(E_loss_T0)  #could use function above
        G_loss_S = get_generator_s_loss(H, H_hat_supervise)
        E_loss = E_loss0 + 0.1 * G_loss_S
        return E_loss

    def get_generator_s_loss(H, H_hat_supervise):
        """
        computes supervised loss

        Args:
        - H: latent representation
        - H_hat_supervise: generated sequence based on the latent representations generated by the generator
       
        Returns:
        - G_loss_s: supervised loss for generator
        """
        mse = tf.keras.losses.MeanSquaredError()
        G_loss_S = mse(H[:, 1:, :], H_hat_supervise[:, :-1, :])
        return G_loss_S

    def get_generator_loss(Y_fake, Y_fake_e, X_hat, X, H, H_hat_supervise):
        """
        computes generator loss for time series variables

        Args:
        - Y_fake: classification results of latent synthetic time-series
        - Y_fake_e: classification results of generated sequence for latent synthetic time-series
        - X_hat: recovered data
        - X: input time-series data
        - H: latent representation
        - H_hat_supervise: generated sequence for latent representation

        Returns:
        - G_loss: generator loss
        - G_loss_U: unsupervised generator loss
        - G_loss_S: supervised generator loss
        - G_loss_V: moments loss for generator
        """
        #1. Adversarial loss
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        G_loss_U = bce(tf.ones_like(Y_fake), Y_fake)
        G_loss_U_e = bce(tf.ones_like(Y_fake_e), Y_fake_e)

        #2. Two Moments
        X = tf.convert_to_tensor(X)
        G_loss_V1 = tf.reduce_mean(
            tf.abs(
                tf.sqrt(tf.nn.moments(X_hat, [0])[1] + 1e-6) -
                tf.sqrt(tf.nn.moments(X, [0])[1] + 1e-6)))
        G_loss_V2 = tf.reduce_mean(
            tf.abs((tf.nn.moments(X_hat, [0])[0]) -
                   (tf.nn.moments(X, [0])[0])))
        G_loss_V = G_loss_V1 + G_loss_V2

        #3. Supervised loss
        G_loss_S = get_generator_s_loss(H, H_hat_supervise)

        #4. Summation
        G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(
            G_loss_S) + 100 * G_loss_V
        return G_loss, G_loss_U, G_loss_S, G_loss_V

    def get_generator_loss_static(Y_fake_e, X_hat, X):
        """
        returns generator loss for static values
        """
        #1. Adversarial loss
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        G_loss_U_e = bce(tf.ones_like(Y_fake_e), Y_fake_e)

        #2. Two Moments
        X = tf.convert_to_tensor(X)
        G_loss_V1 = tf.reduce_mean(
            tf.abs(
                tf.sqrt(tf.nn.moments(X_hat, [0])[1] + 1e-6) -
                tf.sqrt(tf.nn.moments(X, [0])[1] + 1e-6)))
        G_loss_V2 = tf.reduce_mean(
            tf.abs((tf.nn.moments(X_hat, [0])[0]) -
                   (tf.nn.moments(X, [0])[0])))
        G_loss_V = G_loss_V1 + G_loss_V2

        #4. Summation
        G_loss = gamma * G_loss_U_e + 100 * G_loss_V
        return G_loss, G_loss_V

    def get_generator_loss_both(Y_fake, Y_fake_e, X_hat, X, H, H_hat_supervise,
                                Y_fake_e_static, X_hat_static, X_static):
        """
        returns generator loss for both static and time series variables
        """
        #1. Adversarial loss
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        G_loss_U = bce(tf.ones_like(Y_fake), Y_fake)
        G_loss_U_e = bce(tf.ones_like(Y_fake_e), Y_fake_e)
        G_loss_U_e_static = bce(tf.ones_like(Y_fake_e_static), Y_fake_e_static)

        #2. Two Moments
        X = tf.convert_to_tensor(X)
        G_loss_V1 = tf.reduce_mean(
            tf.abs(
                tf.sqrt(tf.nn.moments(X_hat, [0])[1] + 1e-6) -
                tf.sqrt(tf.nn.moments(X, [0])[1] + 1e-6)))
        G_loss_V2 = tf.reduce_mean(
            tf.abs((tf.nn.moments(X_hat, [0])[0]) -
                   (tf.nn.moments(X, [0])[0])))
        G_loss_V = G_loss_V1 + G_loss_V2

        X_static = tf.convert_to_tensor(X_static)
        G_loss_V1_static = tf.reduce_mean(
            tf.abs(
                tf.sqrt(tf.nn.moments(X_hat_static, [0])[1] + 1e-6) -
                tf.sqrt(tf.nn.moments(X_static, [0])[1] + 1e-6)))
        G_loss_V2_static = tf.reduce_mean(
            tf.abs((tf.nn.moments(X_hat_static, [0])[0]) -
                   (tf.nn.moments(X_static, [0])[0])))
        G_loss_V_static = G_loss_V1_static + G_loss_V2_static

        #3. Supervised loss
        G_loss_S = get_generator_s_loss(H, H_hat_supervise)

        #4. Summation
        G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(
            G_loss_S
        ) + 100 * G_loss_V + gamma * G_loss_U_e_static + 100 * G_loss_V_static
        return G_loss, G_loss_U, G_loss_S, G_loss_V, G_loss_V_static

    def get_discriminator_loss(Y_real, Y_fake, Y_fake_e):
        """
        computes discrminator loss for time series variables
        
        Args:
        - Y_real: classification results of latent real time-series
        - Y_fake: classification results of latent synthetic time-series
        - Y_fake_e: classification results of generated sequence for latent synthetic time-series
        
        Returns:
        - d_loss: discriminator loss
        """
        bce = tf.keras.losses.BinaryCrossentropy(
            from_logits=True)  #loss for cls of latent real data seq
        #default arg for tf.keras.losses.BinaryCrossentropy reduction=losses_utils.ReductionV2.AUTO
        D_loss_real = bce(tf.ones_like(Y_real), Y_real)
        D_loss_fake = bce(tf.zeros_like(Y_fake),
                          Y_fake)  #loss for cls of latent synthethic data seq
        D_loss_fake_e = bce(tf.zeros_like(Y_fake_e),
                            Y_fake_e)  #loss for cls of latent synthetic data
        D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e
        return D_loss

    def get_discriminator_loss_static(Y_real, Y_fake_e):
        """
        returns discrminator loss for static values
        """
        bce = tf.keras.losses.BinaryCrossentropy(
            from_logits=True)  #loss for cls of latent real data seq
        #default arg for tf.keras.losses.BinaryCrossentropy reduction=losses_utils.ReductionV2.AUTO
        D_loss_real = bce(tf.ones_like(Y_real), Y_real)
        D_loss_fake_e = bce(tf.zeros_like(Y_fake_e),
                            Y_fake_e)  #loss for cls of latent synthetic data
        D_loss = D_loss_real + gamma * D_loss_fake_e
        return D_loss

    def get_discriminator_loss_both(Y_real, Y_fake, Y_fake_e, Y_real_static,
                                    Y_fake_e_static):
        """
        returns discrminator loss for both static and temporal
        """
        bce = tf.keras.losses.BinaryCrossentropy(
            from_logits=True)  #loss for cls of latent real data seq
        #default arg for tf.keras.losses.BinaryCrossentropy reduction=losses_utils.ReductionV2.AUTO
        D_loss_real = bce(tf.ones_like(Y_real), Y_real)
        D_loss_fake = bce(tf.zeros_like(Y_fake),
                          Y_fake)  #loss for cls of latent synthethic data seq
        D_loss_fake_e = bce(tf.zeros_like(Y_fake_e),
                            Y_fake_e)  #loss for cls of latent synthetic data

        D_loss_real_static = bce(tf.ones_like(Y_real_static), Y_real_static)
        D_loss_fake_e_static = bce(tf.zeros_like(Y_fake_e_static),
                                   Y_fake_e_static)

        D_loss_temporal = D_loss_real + D_loss_fake + gamma * D_loss_fake_e
        D_loss_static = D_loss_real_static + D_loss_fake_e_static * gamma
        D_loss = D_loss_temporal + D_loss_static
        return D_loss, D_loss_static

    # optimizer
    embedder0_optimizer = tf.keras.optimizers.Adam()
    embedder_optimizer = tf.keras.optimizers.Adam()
    gen_s_optimizer = tf.keras.optimizers.Adam()
    generator_optimizer = tf.keras.optimizers.Adam()
    discriminator_optimizer = tf.keras.optimizers.Adam()

    embedder0_static_optimizer = tf.keras.optimizers.Adam()
    embedder_static_optimizer = tf.keras.optimizers.Adam()
    generator_static_optimizer = tf.keras.optimizers.Adam()
    discriminator_static_optimizer = tf.keras.optimizers.Adam()

    @tf.function
    def train_step_embedder_static(X_mb):
        """
        trains static embedder model
        """

        with tf.GradientTape() as embedder_static_tape:
            # Embedder & Recovery
            H_mb = embedder_model_static(X_mb)
            X_tilde_mb = recovery_model_static(H_mb)

            embedder_0_loss = get_embedder_0_loss(X_mb, X_tilde_mb)
            emb_vars = embedder_model_static.trainable_variables + recovery_model_static.trainable_variables
            gradients_of_embedder = embedder_static_tape.gradient(
                embedder_0_loss, emb_vars)
            embedder0_static_optimizer.apply_gradients(
                zip(gradients_of_embedder, emb_vars))

        return embedder_0_loss

    @tf.function
    def train_step_embedder(X_mb):
        """
        trains static embedder model
        """

        with tf.GradientTape() as embedder_tape:
            # Embedder & Recovery

            H_mb = embedder_model(X_mb)
            X_tilde_mb = recovery_model(H_mb)

            embedder_0_loss = get_embedder_0_loss(X_mb, X_tilde_mb)
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables
            gradients_of_embedder = embedder_tape.gradient(
                embedder_0_loss, emb_vars)
            embedder0_optimizer.apply_gradients(
                zip(gradients_of_embedder, emb_vars))

        return embedder_0_loss

    @tf.function
    def train_step_generator_s(X_mb, X_mb_static):
        """
        supervised training for generator model
        """

        with tf.GradientTape() as gen_s_tape:

            H_mb = embedder_model(X_mb)
            H_mb_static = embedder_model_static(X_mb_static)

            H_mb_static = tf.expand_dims(H_mb_static, axis=1)
            H_mb_static = tf.repeat(H_mb_static, seq_len, axis=1)

            #Embeddings of both static and temporal features
            H_mb_mix = tf.concat([H_mb, H_mb_static], axis=2)

            # Generator
            H_hat_supervise_mb = supervisor_model(H_mb_mix)

            gen_s_loss = get_generator_s_loss(H_mb, H_hat_supervise_mb)
            gen_s_vars = supervisor_model.trainable_variables
            gradients_of_gen_s = gen_s_tape.gradient(gen_s_loss, gen_s_vars)
            gen_s_optimizer.apply_gradients(zip(gradients_of_gen_s,
                                                gen_s_vars))

        return gen_s_loss

    @tf.function
    def train_step_joint_static(X_mb, X_mb_static, Z_mb):
        """
        joint training for static generator and supervisor model, embedder model
        """
        #train generator ## STATIC Z
        with tf.GradientTape() as gen_tape:

            #Embedding
            H_mb_static = embedder_model_static(X_mb_static)

            #synthetic embedding
            E_hat_mb = generator_model_static(Z_mb)

            # Synthetic data
            X_hat_mb = recovery_model_static(E_hat_mb)

            # Discriminator
            Y_fake_e_mb = discriminator_model_static(E_hat_mb)

            gen_loss, g_loss_v = get_generator_loss_static(
                Y_fake_e_mb, X_hat_mb, X_mb_static)
            gen_vars = generator_model_static.trainable_variables
            gradients_of_gen = gen_tape.gradient(gen_loss, gen_vars)
            generator_static_optimizer.apply_gradients(
                zip(gradients_of_gen, gen_vars))

        #train embedder
        with tf.GradientTape() as embedder_tape:

            H_mb_static = embedder_model_static(X_mb_static)

            X_tilde_mb = recovery_model_static(H_mb_static)

            emb_T0_loss = get_embedder_T0_loss(X_mb_static, X_tilde_mb)
            emb_loss = get_embedder_0_loss(X_mb_static, X_tilde_mb)
            emb_vars = embedder_model_static.trainable_variables + recovery_model_static.trainable_variables
            gradients_of_emb = embedder_tape.gradient(emb_loss, emb_vars)
            embedder_static_optimizer.apply_gradients(
                zip(gradients_of_emb, emb_vars))

        return emb_T0_loss, emb_loss, g_loss_v

    @tf.function
    def train_step_joint_both(X_mb, X_mb_static, Z_mb, Z_mb_static):
        """
        joint training for both static and temporal generator and supervisor model, embedder model
        """
        #train generator - temporal + static
        with tf.GradientTape() as gen_tape:

            #Embeddings
            H_mb = embedder_model(X_mb)
            H_mb_static_1 = embedder_model_static(X_mb_static)

            H_mb_static = tf.expand_dims(H_mb_static_1, axis=1)
            H_mb_static = tf.repeat(H_mb_static, seq_len, axis=1)

            #Combine static and temporal features
            H_mb_mix = tf.concat([H_mb, H_mb_static], axis=2)

            E_hat_mb = generator_model(Z_mb)
            E_hat_mb_static_1 = generator_model_static(Z_mb_static)

            E_hat_mb_static = tf.expand_dims(E_hat_mb_static_1, axis=1)
            E_hat_mb_static = tf.repeat(E_hat_mb_static, seq_len, axis=1)

            #Combine static generator with temporal generator
            E_hat_mb_mix = tf.concat([E_hat_mb, E_hat_mb_static], axis=2)

            H_hat_mb = supervisor_model(E_hat_mb_mix)
            H_hat_supervise_mb = supervisor_model(H_mb_mix)

            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)

            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_fake_e_mb = discriminator_model(E_hat_mb)

            ###### STATIC
            # Synthetic data
            X_hat_mb_static = recovery_model_static(E_hat_mb_static_1)

            # Discriminator
            Y_fake_e_mb_static = discriminator_model_static(E_hat_mb_static_1)

            gen_loss, g_loss_u, gen_s_loss, g_loss_v, g_loss_v_static = get_generator_loss_both(
                Y_fake_mb, Y_fake_e_mb, X_hat_mb, X_mb, H_mb,
                H_hat_supervise_mb, Y_fake_e_mb_static, X_hat_mb_static,
                X_mb_static)
            gen_vars = generator_model.trainable_variables + supervisor_model.trainable_variables + generator_model_static.trainable_variables
            gradients_of_gen = gen_tape.gradient(gen_loss, gen_vars)
            generator_optimizer.apply_gradients(zip(gradients_of_gen,
                                                    gen_vars))

        #train embedder - temporal
        with tf.GradientTape() as embedder_tape:

            H_mb = embedder_model(X_mb)  #recall
            H_mb_static = embedder_model_static(X_mb_static)

            X_tilde_mb = recovery_model(H_mb)

            H_mb_static = tf.expand_dims(H_mb_static, axis=1)
            H_mb_static = tf.repeat(H_mb_static, seq_len, axis=1)

            H_mb_mix = tf.concat([H_mb, H_mb_static], axis=2)
            H_hat_supervise = supervisor_model(H_mb_mix)

            emb_T0_loss = get_embedder_T0_loss(X_mb, X_tilde_mb)
            emb_loss = get_embedder_loss(X_mb, X_tilde_mb, H_mb,
                                         H_hat_supervise)
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables
            gradients_of_emb = embedder_tape.gradient(emb_loss, emb_vars)
            embedder_optimizer.apply_gradients(zip(gradients_of_emb, emb_vars))

        #train embedder - static
        with tf.GradientTape() as embedder_tape:

            H_mb_static = embedder_model_static(X_mb_static)

            X_tilde_mb_static = recovery_model_static(H_mb_static)

            emb_T0_loss_static = get_embedder_T0_loss(X_mb_static,
                                                      X_tilde_mb_static)
            emb_loss_static = get_embedder_0_loss(
                X_mb_static,
                X_tilde_mb_static)  #Not sure which embedder loss to use
            emb_vars_static = embedder_model_static.trainable_variables + recovery_model_static.trainable_variables
            gradients_of_emb_static = embedder_tape.gradient(
                emb_loss_static, emb_vars_static)
            embedder_static_optimizer.apply_gradients(
                zip(gradients_of_emb_static, emb_vars_static))

        return emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v, emb_T0_loss_static, g_loss_v_static

    @tf.function
    def train_step_discriminator_static(X_mb, X_mb_static, Z_mb):
        """
        trains static discriminator model
        """

        with tf.GradientTape() as disc_tape:

            H_mb_static = embedder_model_static(X_mb_static)

            E_hat_mb = generator_model_static(Z_mb)

            # Synthetic data
            X_hat_mb = recovery_model_static(E_hat_mb)

            # Discriminator
            Y_real_mb = discriminator_model_static(H_mb_static)
            Y_fake_e_mb = discriminator_model_static(E_hat_mb)

            # Check discriminator loss before updating
            disc_loss = get_discriminator_loss_static(Y_real_mb, Y_fake_e_mb)
            # Train discriminator (only when the discriminator does not work well)
            if (disc_loss > 0.15):
                #disc_loss = get_discriminator_loss(Y_real_mb, Y_fake_mb, Y_fake_e_mb)
                disc_vars = discriminator_model_static.trainable_variables
                gradients_of_disc = disc_tape.gradient(disc_loss, disc_vars)
                discriminator_static_optimizer.apply_gradients(
                    zip(gradients_of_disc, disc_vars))

        return disc_loss

    @tf.function
    def train_step_discriminator_both(X_mb, X_mb_static, Z_mb, Z_mb_static):
        """
        trains both static and temporal discriminator model
        """

        #training discriminator - temporal + static
        with tf.GradientTape() as disc_tape:

            H_mb = embedder_model(X_mb)
            H_mb_static_1 = embedder_model_static(X_mb_static)

            H_mb_static = tf.expand_dims(H_mb_static_1, axis=1)
            H_mb_static = tf.repeat(H_mb_static, seq_len, axis=1)
            H_mb_mix = tf.concat([H_mb, H_mb_static], axis=2)

            E_hat_mb = generator_model(Z_mb)
            E_hat_mb_static_1 = generator_model_static(Z_mb_static)

            E_hat_mb_static = tf.expand_dims(E_hat_mb_static_1, axis=1)
            E_hat_mb_static = tf.repeat(E_hat_mb_static, seq_len, axis=1)

            E_hat_mb_mix = tf.concat([E_hat_mb, E_hat_mb_static], axis=2)
            H_hat_mb = supervisor_model(E_hat_mb_mix)

            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)

            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_real_mb = discriminator_model(H_mb)
            Y_fake_e_mb = discriminator_model(E_hat_mb)

            ### Discriminator static
            Y_real_mb_static = discriminator_model_static(H_mb_static_1)
            Y_fake_e_mb_static = discriminator_model_static(E_hat_mb_static_1)

            # Check discriminator loss before updating
            disc_loss, disc_loss_static = get_discriminator_loss_both(
                Y_real_mb, Y_fake_mb, Y_fake_e_mb, Y_real_mb_static,
                Y_fake_e_mb_static)
            # Train discriminator (only when the discriminator does not work well)
            if (disc_loss > 0.15):
                disc_vars = discriminator_model.trainable_variables + discriminator_model_static.trainable_variables
                gradients_of_disc = disc_tape.gradient(disc_loss, disc_vars)
                discriminator_optimizer.apply_gradients(
                    zip(gradients_of_disc, disc_vars))

        return disc_loss, disc_loss_static

    #timeGAN training
    def train():
        #1. Embedding static network training
        print('Start Static Embedding Network Training')

        for itt in range(iterations):
            # Set mini-batch
            _, X_mb_static, _ = batch_generator_with_static(
                ori_data, ori_data_static, ori_time, batch_size)

            # Train embedder
            step_e_loss = train_step_embedder_static(X_mb_static)

            # Checkpoint
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', e_loss: ' + str(np.round(np.sqrt(step_e_loss), 4)))

        print('Finish static Embedding Network Training')

        #1. Embedding network training

        print('Start Embedding Network Training')
        for itt in range(iterations):
            # Set mini-batch
            X_mb, _, T_mb = batch_generator_with_static(
                ori_data, ori_data_static, ori_time, batch_size)
            # Train embedder
            step_e_loss = train_step_embedder(X_mb)

            # Checkpoint
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', e_loss: ' + str(np.round(np.sqrt(step_e_loss), 4)))

        #2. Training only with supervised loss
        print('Start Training with Supervised Loss Only')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, X_mb_static, T_mb = batch_generator_with_static(
                ori_data, ori_data_static, ori_time, batch_size)
            # Train generator
            step_gen_s_loss = train_step_generator_s(X_mb, X_mb_static)

            # Checkpoint
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', s_loss: ' +
                      str(np.round(np.sqrt(step_gen_s_loss), 4)))

        print('Finish Training with Supervised Loss Only')

        # 3. Joint Training
        print('Start Joint Training')

        for itt in range(iterations):

            # Generator training (twice more than discriminator training)
            for kk in range(2):
                # Set mini-batch
                X_mb, X_mb_static, T_mb = batch_generator_with_static(
                    ori_data, ori_data_static, ori_time, batch_size)
                # Random vector generation
                Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
                Z_mb_static = random_generator_static(batch_size, z_dim_static,
                                                      T_mb, max_seq_len)
                # Train generator and embedder
                emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v, emb_T0_loss_static, g_loss_v_static = train_step_joint_both(
                    X_mb, X_mb_static, Z_mb, Z_mb_static)

            # Discriminator training
            # Set mini-batch
            X_mb, X_mb_static, T_mb = batch_generator_with_static(
                ori_data, ori_data_static, ori_time, batch_size)
            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            Z_mb_static = random_generator_static(batch_size, z_dim_static,
                                                  T_mb, max_seq_len)
            #train discriminator
            d_loss, d_loss_static = train_step_discriminator_both(
                X_mb, X_mb_static, Z_mb, Z_mb_static)

            # Print multiple checkpoints
            if itt % 200 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', d_loss: ' + str(np.round(d_loss, 4)) +
                      ', g_loss_u: ' + str(np.round(g_loss_u, 4)) +
                      ', g_loss_s: ' + str(np.round(np.sqrt(gen_s_loss), 4)) +
                      ', g_loss_v: ' + str(np.round(g_loss_v, 4)) +
                      ', e_loss_t0: ' +
                      str(np.round(np.sqrt(emb_T0_loss), 4)) +
                      ', d_loss_static: ' + str(np.round(d_loss_static, 4)) +
                      ', g_loss_v_static: ' +
                      str(np.round(g_loss_v_static, 4)))

        print('Finish Joint Training')

        ## Synthetic data generation
        Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)
        Z_mb_static = random_generator_static(no, z_dim_static, ori_time,
                                              max_seq_len)

        # generate in latent dim
        E_hat_generated = generator_model(Z_mb)
        E_hat_generated_static = generator_model_static(Z_mb_static)

        # repeat for seq_len for static values
        E_hat_generated_static_ = tf.expand_dims(E_hat_generated_static,
                                                 axis=1)
        E_hat_generated_static_ = tf.repeat(E_hat_generated_static_,
                                            seq_len,
                                            axis=1)

        # join static and temporal together
        E_hat_generated_mix = tf.concat(
            [E_hat_generated, E_hat_generated_static_], axis=2)

        H_hat_generated = supervisor_model(E_hat_generated_mix)

        # map up to original dimension
        generated_data_curr = recovery_model(H_hat_generated)
        generated_data_curr_static = recovery_model_static(
            E_hat_generated_static)

        generated_data_static = list()

        for i in range(no):
            temp = generated_data_curr_static[i, :]
            generated_data_static.append(temp)

        # Renormalization
        generated_data_static = generated_data_static * max_val_static
        generated_data_static = generated_data_static + min_val_static

        generated_data_seq = np.array(
            [[generated_data_static[i] for _ in range(seq_len)]
             for i in range(no)])

        generated_data = list()

        for i in range(no):
            temp = generated_data_curr[i, :ori_time[i], :]
            generated_data.append(temp)

        # Renormalization

        generated_data = generated_data * max_val
        generated_data = generated_data + min_val

        generated_data = np.dstack((generated_data, generated_data_seq))

        return generated_data

    return train()
Example #7
0
 def assert_extract_time(self, message, ftime):
     self.assertEqual(extract_time(message), ftime)
def timegan(ori_data, parameters):

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Maximum sequence length and each sequence length
    ori_time, max_seq_len = extract_time(ori_data)

    def MinMaxScaler(data):
        """Min-Max Normalizer.
        
        Args:
        - data: raw data
        
        Returns:
        - norm_data: normalized data
        - min_val: minimum values (for renormalization)
        - max_val: maximum values (for renormalization)
        """
        min_val = np.nanmin(np.nanmin(data, axis=0), axis=0)
        data = data - min_val

        max_val = np.nanmax(np.nanmax(data, axis=0), axis=0)
        norm_data = data / (max_val + 1e-7)

        return norm_data, min_val, max_val

    # Normalization
    ori_data, min_val, max_val = MinMaxScaler(ori_data)

    ## Build a RNN networks

    # Network Parameters
    hidden_dim = parameters['hidden_dim']
    num_layers = parameters['num_layer']
    iterations = parameters['iterations']
    batch_size = parameters['batch_size']
    module_name = parameters['module']
    z_dim = dim
    gamma = 1
    mask_value = 0

    def make_embedder():
        """Embedding network between original feature space to latent space.
        
        Args:
        - X: input time-series features
        - T: input time information
        
        Returns:
        - H: embeddings
        """
        embedder_model = tf.keras.Sequential(name='embedder')
        embedder_model.add(
            tf.keras.layers.Masking(mask_value=0, input_shape=(seq_len, dim)))
        embedder_model.add(
            rnn_cell(module_name,
                     hidden_dim,
                     return_sequences=True,
                     input_shape=(seq_len, dim)))
        for i in range(num_layers - 1):
            embedder_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        embedder_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return embedder_model

    def make_recovery():
        """Recovery network from latent space to original space.
        
        Args:
        - H: latent representation
        - T: input time information
        
        Returns:
        - X_tilde: recovered data
        """
        recovery_model = tf.keras.Sequential(name='recovery')
        recovery_model.add(
            tf.keras.layers.Masking(mask_value=0,
                                    input_shape=(seq_len, hidden_dim)))
        for i in range(num_layers):
            recovery_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        recovery_model.add(tf.keras.layers.Dense(dim, activation='sigmoid'))

        return recovery_model

    def make_generator():
        """Generator function: Generate time-series data in latent space.
        
        Args:
        - Z: random variables
        - T: input time information
        
        Returns:
        - E: generated embedding
        """
        generator_model = tf.keras.Sequential(name='generator')
        generator_model.add(
            rnn_cell(module_name,
                     hidden_dim,
                     return_sequences=True,
                     input_shape=(seq_len, dim)))
        for i in range(num_layers - 1):
            generator_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        generator_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return generator_model

    def make_supervisor():
        """Generate next sequence using the previous sequence.
        
        Args:
        - H: latent representation
        - T: input time information
        
        Returns:
        - S: generated sequence based on the latent representations generated by the generator
        """
        supervisor_model = tf.keras.Sequential(name='supervisor')
        for i in range(num_layers - 1):
            supervisor_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        supervisor_model.add(
            tf.keras.layers.Dense(hidden_dim, activation='sigmoid'))

        return supervisor_model

    def make_discriminator():
        """Recovery network from latent space to original space.
        
        Args:
        - H: latent representation
        - T: input time information
        
        Returns:
        - X_tilde: recovered data
        """
        discriminator_model = tf.keras.Sequential(name='discriminator')
        for i in range(num_layers):
            discriminator_model.add(
                rnn_cell(module_name,
                         hidden_dim,
                         return_sequences=True,
                         input_shape=(seq_len, hidden_dim)))
        discriminator_model.add(tf.keras.layers.Dense(1, activation=None))

        return discriminator_model

    embedder_model = make_embedder()
    recovery_model = make_recovery()
    generator_model = make_generator()
    supervisor_model = make_supervisor()
    discriminator_model = make_discriminator()

    def get_embedder_T0_loss(X, X_tilde, mask_slice):
        """
        returns embedder_T0 loss
        Args: 
        - X: input time-series information that is masked
        - X_tilde: X that has undergone embedding + recovery and is masked
        - mask_slice: (batch_size * seq_len * 1) tensor which contains information about masked rows
        returns:
        - E_loss_T0 : Scalar embedder_T0 loss
        """
        #mse = tf.keras.losses.MeanSquaredError() #this automatically does reduction from array to scalar
        mse_loss = tf.keras.losses.mean_squared_error(
            X, X_tilde)  #this is still an array and not reduced
        #reduce array to scalar
        #take mean over number of non-masked rows (not seq_length)
        E_loss_T0 = tf.reduce_sum(mse_loss) / tf.reduce_sum(mask_slice)
        return E_loss_T0

    def get_embedder_0_loss(X, X_tilde, mask_slice):
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde, mask_slice)
        E_loss0 = 10 * tf.sqrt(E_loss_T0)
        return E_loss0

    def get_embedder_loss(X, X_tilde, H, H_hat_supervise, mask_slice):
        """
        returns embedder network loss
        """
        E_loss_T0 = get_embedder_T0_loss(X, X_tilde, mask_slice)
        E_loss0 = 10 * tf.sqrt(E_loss_T0)  #could use function above
        G_loss_S = get_generator_s_loss(H, H_hat_supervise, mask_slice)
        E_loss = E_loss0 + 0.1 * G_loss_S
        return E_loss

    def get_generator_s_loss(H, H_hat_supervise, mask_slice):
        """
        returns supervised loss
        """
        #mse = tf.keras.losses.MeanSquaredError()
        #G_loss_S = mse(H[:,1:,:], H_hat_supervise[:,:-1,:])
        mse_loss = tf.keras.losses.mean_squared_error(
            H[:, 1:, :], H_hat_supervise[:, :-1, :])
        G_loss_S = tf.reduce_sum(mse_loss) / tf.reduce_sum(mask_slice)
        return G_loss_S

    def get_generator_loss(Y_fake, Y_fake_e, X_hat, X, H, H_hat_supervise,
                           mask_slice):
        """
        returns generator loss
        """
        #1. Adversarial loss
        # bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        # G_loss_U = bce(tf.ones_like(Y_fake), Y_fake)
        # G_loss_U_e = bce(tf.ones_like(Y_fake_e), Y_fake_e)

        # i think this does what we want?
        bce_loss_y_fake = tf.keras.losses.binary_crossentropy(
            tf.ones_like(Y_fake), Y_fake,
            from_logits=True) * tf.squeeze(mask_slice)
        G_loss_U = tf.reduce_sum(bce_loss_y_fake) / tf.reduce_sum(mask_slice)
        bce_loss_y_fake_e = tf.keras.losses.binary_crossentropy(
            tf.ones_like(Y_fake_e), Y_fake_e,
            from_logits=True) * tf.squeeze(mask_slice)
        G_loss_U_e = tf.reduce_sum(bce_loss_y_fake_e) / tf.reduce_sum(
            mask_slice)

        #2. Two Moments
        X = tf.convert_to_tensor(X)
        #G_loss_V1 = tf.reduce_mean(tf.abs(tf.sqrt(tf.nn.moments(X_hat,[0])[1] + 1e-6) - tf.sqrt(tf.nn.moments(X,[0])[1] + 1e-6)))
        #G_loss_V2 = tf.reduce_mean(tf.abs((tf.nn.moments(X_hat,[0])[0]) - (tf.nn.moments(X,[0])[0])))

        #calculate mean- equivalent to masked version of tf.nn.moments(X_hat,[0])[0]
        #calculate variance- equivalent to masked version of tf.nn.moments(X_hat,[0])[1]
        mean_X_hat = tf.reduce_sum(
            X_hat, [0]) / (tf.reduce_sum(mask_slice, [0]) + 1e-6)
        squared_X_hat = tf.square(X_hat - mean_X_hat) * mask_slice
        variance_X_hat = (tf.reduce_sum(squared_X_hat, [0]) /
                          (tf.reduce_sum(mask_slice, [0]) + 1e-6)
                          )  #sample variance (biased)

        mean_X = tf.reduce_sum(X,
                               [0]) / (tf.reduce_sum(mask_slice, [0]) + 1e-6)
        squared_X = tf.square(X - mean_X) * mask_slice
        variance_X = (tf.reduce_sum(squared_X, [0]) /
                      (tf.reduce_sum(mask_slice, [0]) + 1e-6)
                      )  #sample variance (biased)

        #get num unmasked value for reduced mean = total_num_of_values (seq_len*dim) - num_values_in_completely_masked_rows (rows that are masked in all patients in the batch * dim)
        num_unmasked_values = tf.reduce_sum(
            tf.clip_by_value(tf.reduce_sum(mask_slice, [2, 0]),
                             clip_value_min=0,
                             clip_value_max=1)) * dim
        G_loss_V1 = tf.reduce_sum(
            tf.abs(tf.sqrt(variance_X_hat) -
                   tf.sqrt(variance_X))) / (num_unmasked_values + 1e-6)
        G_loss_V2 = tf.reduce_sum(
            tf.abs(mean_X_hat - mean_X)) / (num_unmasked_values + 1e-6)

        G_loss_V = G_loss_V1 + G_loss_V2

        #3. Supervised loss
        G_loss_S = get_generator_s_loss(H, H_hat_supervise, mask_slice)

        #4. Summation
        G_loss = G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(
            G_loss_S) + 100 * G_loss_V
        return G_loss, G_loss_U, G_loss_S, G_loss_V

    def get_discriminator_loss(Y_real, Y_fake, Y_fake_e, mask_slice):
        """
        returns discrminator loss
        """
        # bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) #loss for cls of latent real data seq
        #default arg for tf.keras.losses.BinaryCrossentropy reduction=losses_utils.ReductionV2.AUTO
        # D_loss_real = bce(tf.ones_like(Y_real), Y_real)
        # D_loss_fake = bce(tf.zeros_like(Y_fake), Y_fake) #loss for cls of latent synthethic data seq
        # D_loss_fake_e = bce(tf.zeros_like(Y_fake_e), Y_fake_e) #loss for cls of latent synthetic data

        #following method in adversarial loss, i think this is what we want?
        bce_loss_y_real = tf.keras.losses.binary_crossentropy(
            tf.ones_like(Y_real), Y_real,
            from_logits=True) * tf.squeeze(mask_slice)
        D_loss_real = tf.reduce_sum(bce_loss_y_real) / tf.reduce_sum(
            mask_slice)
        bce_loss_y_fake = tf.keras.losses.binary_crossentropy(
            tf.ones_like(Y_fake), Y_fake,
            from_logits=True) * tf.squeeze(mask_slice)
        D_loss_fake = tf.reduce_sum(bce_loss_y_fake) / tf.reduce_sum(
            mask_slice)
        bce_loss_y_fake_e = tf.keras.losses.binary_crossentropy(
            tf.ones_like(Y_fake_e), Y_fake_e,
            from_logits=True) * tf.squeeze(mask_slice)
        D_loss_fake_e = tf.reduce_sum(bce_loss_y_fake_e) / tf.reduce_sum(
            mask_slice)
        D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e

        return D_loss

    # optimizer
    embedder0_optimizer = tf.keras.optimizers.Adam()
    embedder_optimizer = tf.keras.optimizers.Adam()
    gen_s_optimizer = tf.keras.optimizers.Adam()
    generator_optimizer = tf.keras.optimizers.Adam()
    discriminator_optimizer = tf.keras.optimizers.Adam()

    @tf.function
    def train_step_embedder(X_mb):

        with tf.GradientTape() as embedder_tape:

            #get a mask by looking at the first column of X_mb
            mask_slice = tf.slice(
                X_mb, [0, 0, 0],
                [batch_size, seq_len, 1])  #first column as mask slice
            mask_val = tf.ones([batch_size, seq_len, 1],
                               dtype=tf.float64) * mask_value
            mask_slice = (mask_slice != mask_val)  #False means masked
            mask_slice = tf.cast(mask_slice, tf.float64)
            X_mb = tf.multiply(X_mb, mask_slice)  #masking

            # Embedder & Recovery
            H_mb = embedder_model(X_mb)
            H_mb = tf.multiply(H_mb, mask_slice)  #masking

            X_tilde_mb = recovery_model(H_mb)
            X_tilde_mb = tf.multiply(X_tilde_mb, mask_slice)  #masking

            #should i minimize embedder_0_loss but print out e_loss_T0?
            embedder_0_loss = get_embedder_0_loss(X_mb, X_tilde_mb,
                                                  mask_slice)  #minimize
            embedder_T0_loss = get_embedder_T0_loss(X_mb, X_tilde_mb,
                                                    mask_slice)  #print
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables
        gradients_of_embedder = embedder_tape.gradient(embedder_0_loss,
                                                       emb_vars)
        embedder0_optimizer.apply_gradients(
            zip(gradients_of_embedder, emb_vars))

        return embedder_T0_loss

    @tf.function
    def train_step_generator_s(X_mb):

        with tf.GradientTape() as gen_s_tape:  #, tf.GradientTape() as s_tape:

            #get a mask slice for masked value in X_mb
            mask_slice = tf.slice(
                X_mb, [0, 0, 0],
                [batch_size, seq_len, 1])  #first column as mask slice
            mask_val = tf.ones([batch_size, seq_len, 1],
                               dtype=tf.float64) * mask_value
            mask_slice = (mask_slice != mask_val)  #False means masked
            mask_slice = tf.cast(mask_slice, tf.float64)
            X_mb = tf.multiply(X_mb, mask_slice)  #masking

            H_mb = embedder_model(X_mb)  #recall
            H_mb = tf.multiply(H_mb, mask_slice)  #masking

            H_hat_supervise_mb = supervisor_model(H_mb)
            H_hat_supervise_mb = tf.multiply(H_hat_supervise_mb, mask_slice)

            gen_s_loss = get_generator_s_loss(
                H_mb, H_hat_supervise_mb, mask_slice
            )  #hot sure if i should do whole gen loss or only gen_s loss
            gen_s_vars = supervisor_model.trainable_variables  #generator_model.trainable_variables +
            #vars = [generator_model.trainable_variables, supervisor_model.trainable_variables]
        gradients_of_gen_s = gen_s_tape.gradient(gen_s_loss, gen_s_vars)
        gen_s_optimizer.apply_gradients(zip(gradients_of_gen_s, gen_s_vars))

        return gen_s_loss  # E_hat_mb, H_hat_mb, H_hat_supervise_mb,  #,generator_model, supervisor_model

    @tf.function
    def train_step_joint(X_mb, Z_mb):
        #train generator
        with tf.GradientTape() as gen_tape:

            #get a mask slice for masked value in X_mb
            mask_slice = tf.slice(
                X_mb, [0, 0, 0],
                [batch_size, seq_len, 1])  #first column as mask slice
            mask_val = tf.ones([batch_size, seq_len, 1],
                               dtype=tf.float64) * mask_value
            mask_slice = (mask_slice != mask_val)  #False means masked
            mask_slice = tf.cast(mask_slice, tf.float64)
            X_mb = tf.multiply(X_mb, mask_slice)  #masking
            Z_mb = tf.multiply(Z_mb, mask_slice)  #masking

            # Generator
            #not sure if i should call these generators and supervisors again
            #because returning models from train_step_generator_s and getting trainable variables does not work?
            #so called it again here
            H_mb = embedder_model(X_mb)  #recall
            H_mb = tf.multiply(H_mb, mask_slice)  #masking
            E_hat_mb = generator_model(Z_mb)  #is this a recall?
            E_hat_mb = tf.multiply(E_hat_mb, mask_slice)  #masking
            H_hat_mb = supervisor_model(E_hat_mb)  #recall
            H_hat_mb = tf.multiply(H_hat_mb, mask_slice)  #masking
            H_hat_supervise_mb = supervisor_model(H_mb)  #recall
            H_hat_supervise_mb = tf.multiply(H_hat_supervise_mb, mask_slice)

            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)
            X_hat_mb = tf.multiply(X_hat_mb, mask_slice)

            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_fake_mb = tf.multiply(Y_fake_mb, mask_slice)
            Y_real_mb = discriminator_model(H_mb)
            Y_real_mb = tf.multiply(Y_real_mb, mask_slice)
            Y_fake_e_mb = discriminator_model(E_hat_mb)
            Y_fake_e_mb = tf.multiply(Y_fake_e_mb, mask_slice)

            gen_loss, g_loss_u, gen_s_loss, g_loss_v = get_generator_loss(
                Y_fake_mb, Y_fake_e_mb, X_hat_mb, X_mb, H_mb,
                H_hat_supervise_mb, mask_slice)
            gen_vars = generator_model.trainable_variables + supervisor_model.trainable_variables
        gradients_of_gen = gen_tape.gradient(gen_loss, gen_vars)
        generator_optimizer.apply_gradients(zip(gradients_of_gen, gen_vars))

        #train embedder
        with tf.GradientTape() as embedder_tape:

            #get a mask slice for masked value in X_mb
            mask_slice = tf.slice(
                X_mb, [0, 0, 0],
                [batch_size, seq_len, 1])  #first column as mask slice
            mask_val = tf.ones([batch_size, seq_len, 1],
                               dtype=tf.float64) * mask_value
            mask_slice = (mask_slice != mask_val)  #False means masked
            mask_slice = tf.cast(mask_slice, tf.float64)
            X_mb = tf.multiply(X_mb, mask_slice)  #masking

            H_mb = embedder_model(X_mb)  #recall
            H_mb = tf.multiply(H_mb, mask_slice)  #masking

            X_tilde_mb = recovery_model(H_mb)
            X_tilde_mb = tf.multiply(X_tilde_mb, mask_slice)  #masking
            H_hat_supervise = supervisor_model(
                H_mb)  #called in order to get emb_loss
            H_hat_supervise = tf.multiply(H_hat_supervise, mask_slice)

            #not sure if this should be E_loss or E_loss_T0
            #i think we are minimizing E_loss but printing out E_loss_T0??
            emb_T0_loss = get_embedder_T0_loss(X_mb, X_tilde_mb, mask_slice)
            emb_loss = get_embedder_loss(X_mb, X_tilde_mb, H_mb,
                                         H_hat_supervise, mask_slice)
            emb_vars = embedder_model.trainable_variables + recovery_model.trainable_variables
        gradients_of_emb = embedder_tape.gradient(emb_loss, emb_vars)
        embedder_optimizer.apply_gradients(zip(gradients_of_emb, emb_vars))

        return emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v  #H_hat_mb, E_hat_mb,

    @tf.function
    def train_step_discriminator(X_mb, Z_mb):

        with tf.GradientTape() as disc_tape:

            #get a mask slice for masked value in X_mb
            mask_slice = tf.slice(
                X_mb, [0, 0, 0],
                [batch_size, seq_len, 1])  #first column as mask slice
            mask_val = tf.ones([batch_size, seq_len, 1],
                               dtype=tf.float64) * mask_value
            mask_slice = (mask_slice != mask_val)  #False means masked
            mask_slice = tf.cast(mask_slice, tf.float64)
            X_mb = tf.multiply(X_mb, mask_slice)  #masking

            H_mb = embedder_model(X_mb)  #recall
            H_mb = tf.multiply(H_mb, mask_slice)  #masking
            E_hat_mb = generator_model(Z_mb)  #recall
            E_hat_mb = tf.multiply(E_hat_mb, mask_slice)  #masking
            H_hat_mb = supervisor_model(E_hat_mb)  #recall
            H_hat_mb = tf.multiply(H_hat_mb, mask_slice)  #masking

            # Synthetic data
            X_hat_mb = recovery_model(H_hat_mb)
            X_hat_mb = tf.multiply(X_hat_mb, mask_slice)  #masking

            # Discriminator
            Y_fake_mb = discriminator_model(H_hat_mb)
            Y_fake_mb = tf.multiply(Y_fake_mb, mask_slice)  #masking
            Y_real_mb = discriminator_model(H_mb)
            Y_real_mb = tf.multiply(Y_real_mb, mask_slice)  #masking
            Y_fake_e_mb = discriminator_model(E_hat_mb)
            Y_fake_e_mb = tf.multiply(Y_fake_e_mb, mask_slice)  #masking

            # Check discriminator loss before updating
            disc_loss = get_discriminator_loss(Y_real_mb, Y_fake_mb,
                                               Y_fake_e_mb, mask_slice)
            # Train discriminator (only when the discriminator does not work well)
        if (disc_loss > 0.15):
            #disc_loss = get_discriminator_loss(Y_real_mb, Y_fake_mb, Y_fake_e_mb)
            disc_vars = discriminator_model.trainable_variables
            gradients_of_disc = disc_tape.gradient(disc_loss, disc_vars)
            discriminator_optimizer.apply_gradients(
                zip(gradients_of_disc, disc_vars))

        return disc_loss

    #timeGAN training
    def train():
        #1. Embedding network training
        print('Start Embedding Network Training')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            X_mb = np.nan_to_num(X_mb, nan=0)  #model can't take in nans

            # Train embedder
            step_e_loss = train_step_embedder(X_mb)

            # Checkpoint
            if itt % 100 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', e_loss: ' + str(np.round(np.sqrt(step_e_loss), 4)))

        print('Finish Embedding Network Training')

        #2. Training only with supervised loss
        print('Start Training with Supervised Loss Only')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            X_mb = np.nan_to_num(X_mb, nan=0)

            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            # Train generator
            step_gen_s_loss = train_step_generator_s(X_mb)

            # Checkpoint
            if itt % 100 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', s_loss: ' +
                      str(np.round(np.sqrt(step_gen_s_loss), 4)))

        print('Finish Training with Supervised Loss Only')

        # 3. Joint Training
        print('Start Joint Training')

        for itt in range(iterations):
            # Generator training (twice more than discriminator training)
            for kk in range(2):
                # Set mini-batch
                X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
                X_mb = np.nan_to_num(X_mb, nan=0)

                # Random vector generation
                Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
                # Train generator and embedder
                emb_T0_loss, emb_loss, g_loss_u, gen_s_loss, g_loss_v = train_step_joint(
                    X_mb, Z_mb)

            # Discriminator training
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            X_mb = np.nan_to_num(X_mb, nan=0)
            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            #train discriminator
            d_loss = train_step_discriminator(X_mb, Z_mb)

            # Print multiple checkpoints
            if itt % 100 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', d_loss: ' + str(np.round(d_loss, 4)) +
                      ', g_loss_u: ' + str(np.round(g_loss_u, 4)) +
                      ', g_loss_s: ' + str(np.round(np.sqrt(gen_s_loss), 4)) +
                      ', g_loss_v: ' + str(np.round(g_loss_v, 4)) +
                      ', e_loss_t0: ' + str(np.round(np.sqrt(emb_T0_loss), 4)))

        print('Finish Joint Training')

        ## Synthetic data generation
        Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)
        E_hat_generated = generator_model(Z_mb)
        H_hat_generated = supervisor_model(E_hat_generated)
        generated_data_curr = recovery_model(H_hat_generated)

        generated_data = list()

        for i in range(no):
            temp = generated_data_curr[i, :ori_time[i], :]
            generated_data.append(temp)

        # Renormalization
        generated_data = generated_data * max_val
        generated_data = generated_data + min_val

        return generated_data

    return train()
def timegan(ori_data, parameters):
    """TimeGAN function.
  
  Use original data as training set to generater synthetic data (time-series)
  
  Args:
    - ori_data: original time-series data
    - parameters: TimeGAN network parameters
    
  Returns:
    - generated_data: generated time-series data
  """

    # Basic Parameters
    no, seq_len, dim = np.asarray(ori_data).shape

    # Maximum sequence length and each sequence length
    ori_time, max_seq_len = extract_time(ori_data)

    def MinMaxScaler(data):
        """Min-Max Normalizer.
    
    Args:
      - data: raw data
      
    Returns:
      - norm_data: normalized data
      - min_val: minimum values (for renormalization)
      - max_val: maximum values (for renormalization)
    """
        min_val = np.min(np.min(data, axis=0), axis=0)
        data = data - min_val

        max_val = np.max(np.max(data, axis=0), axis=0)
        norm_data = data / (max_val + 1e-7)

        return norm_data, min_val, max_val

    # Normalization
    ori_data, min_val, max_val = MinMaxScaler(ori_data)

    ## Build a RNN networks

    # Network Parameters
    hidden_dim = parameters['hidden_dim']
    num_layers = parameters['num_layer']
    iterations = parameters['iterations']
    batch_size = parameters['batch_size']
    module_name = parameters['module']
    z_dim = dim
    gamma = 1

    def embedder():
        """Embedding network between original feature space to latent space.
    
    Args:
      - X: input time-series features
      - T: input time information
      
    Returns:
      - H: embeddings
    """
        e_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.GRUCell(hidden_dim,
                                    activation=tf.nn.tanh,
                                    input_shape=(seq_len, hidden_dim))
            for _ in range(num_layers - 1)
        ])
        model = tf.keras.Sequential([
            rnn_cell(module_name,
                     hidden_dim,
                     return_sequences=True,
                     input_shape=(seq_len, dim)),
            tf.keras.layers.RNN(e_cell, return_sequences=True),
            tf.keras.layers.Dense(hidden_dim, activation=tf.nn.sigmoid)
        ])

        return model

    def recovery():
        """Recovery network from latent space to original space.
    
    Args:
      - H: latent representation
      - T: input time information
      
    Returns:
      - X_tilde: recovered data
    """
        r_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.GRUCell(hidden_dim,
                                    activation=tf.nn.tanh,
                                    input_shape=(seq_len, hidden_dim))
            for _ in range(num_layers)
        ])
        model = tf.keras.Sequential([
            tf.keras.layers.RNN(r_cell, return_sequences=True),
            tf.keras.layers.Dense(dim, activation=tf.nn.sigmoid)
        ])

        return model

    def generator():
        """Generator function: Generate time-series data in latent space.
    
    Args:
      - Z: random variables
      - T: input time information
      
    Returns:
      - E: generated embedding
    """
        e_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.GRUCell(hidden_dim, activation=tf.nn.tanh)
            for _ in range(num_layers)
        ])
        model = tf.keras.Sequential([
            tf.keras.layers.RNN(e_cell, return_sequences=True),
            tf.keras.layers.Dense(hidden_dim, activation=tf.nn.sigmoid)
        ])

        return model

    def supervisor():
        """Generate next sequence using the previous sequence.
    
    Args:
      - H: latent representation
      - T: input time information
      
    Returns:
      - S: generated sequence based on the latent representations generated by the generator
    """
        e_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.GRUCell(hidden_dim,
                                    activation=tf.nn.tanh,
                                    input_shape=(seq_len, hidden_dim))
            for _ in range(num_layers - 1)
        ])
        model = tf.keras.Sequential([
            tf.keras.layers.RNN(e_cell, return_sequences=True),
            tf.keras.layers.Dense(hidden_dim, activation=tf.nn.sigmoid)
        ])

        return model

    #sequential may not work
    def discriminator():
        """Discriminate the original and synthetic time-series data.
    
    Args:
      - H: latent representation
      - T: input time information
      
    Returns:
      - Y_hat: classification results between original and synthetic time-series
    """
        d_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.GRUCell(hidden_dim, activation=tf.nn.tanh)
            for _ in range(num_layers)
        ])
        model = tf.keras.Sequential([
            tf.keras.layers.RNN(d_cell),
            tf.keras.layers.Dense(1, activation=None)
        ])

        return model

    # Embedder & Recovery
    embedder = embedder()
    recovery = recovery()
    # Generator
    generator = generator()
    supervisor = supervisor()

    # Discriminator
    discriminator = discriminator()

    # Embedder network loss
    def embed_obj(X, X_tilde):
        return 10 * tf.sqrt(tf.compat.v1.losses.mean_squared_error(X, X_tilde))

    def E_loss_T0(X, X_tilde):
        return tf.compat.v1.losses.mean_squared_error(X, X_tilde)

    def E_loss(E_loss0, G_loss_S):
        return E_loss0 + 0.1 * G_loss_S

    #Supervised loss
    def supervised_obj(H, H_hat_supervise):
        return tf.compat.v1.losses.mean_squared_error(
            H[:, 1:, :], H_hat_supervise[:, :-1, :])

    #Generator loss
    def generator_obj(G_loss_U, G_loss_U_e, G_loss_S, G_loss_V):
        return G_loss_U + gamma * G_loss_U_e + 100 * tf.sqrt(
            G_loss_S) + 100 * G_loss_V

    #Discriminator loss
    def discriminator_obj(Y_real, Y_fake, Y_fake_e):
        D_loss_real = tf.compat.v1.losses.sigmoid_cross_entropy(
            tf.ones_like(Y_real), Y_real)
        D_loss_fake = tf.compat.v1.losses.sigmoid_cross_entropy(
            tf.zeros_like(Y_fake), Y_fake)
        D_loss_fake_e = tf.compat.v1.losses.sigmoid_cross_entropy(
            tf.zeros_like(Y_fake_e), Y_fake_e)
        return D_loss_real + D_loss_fake + gamma * D_loss_fake_e

    # optimizer
    EO_solver = tf.keras.optimizers.Adam()
    GS_solver = tf.keras.optimizers.Adam()
    G_solver = tf.keras.optimizers.Adam()
    E_solver = tf.keras.optimizers.Adam()
    D_solver = tf.keras.optimizers.Adam()

    # Train embedder
    @tf.function
    def train_step_embedder(X_mb):
        with tf.GradientTape() as embed_tape:
            embed = embedder(X_mb)
            recover = recovery(embed)
            loss = embed_obj(X_mb, recover)
            gradients_embed = embed_tape.gradient(
                loss,
                embedder.trainable_variables + recovery.trainable_variables)
            EO_solver.apply_gradients(
                zip(
                    gradients_embed, embedder.trainable_variables +
                    recovery.trainable_variables))
            step_e_loss = E_loss_T0(X_mb, recover)
            return step_e_loss

    #Tensorflow 1 code was confusing so this is likely to be wrong.
    @tf.function
    def train_step_supervised(X_mb, Z_mb):
        with tf.GradientTape() as supervised_tape:
            embed = embedder(X_mb)

            generated = generator(Z_mb)
            supervised_g = supervisor(generated)
            supervised_e = supervisor(embed)

            loss = supervised_obj(embed, supervised_e)

            gradients_supervised = supervised_tape.gradient(
                loss,
                supervisor.trainable_variables + generator.trainable_variables)
            GS_solver.apply_gradients(
                zip(
                    gradients_supervised, supervisor.trainable_variables +
                    generator.trainable_variables))

            return loss

    #Generator is supposed to take it's own previous generated data as input.
    @tf.function
    def train_step_generator(X_mb, Z_mb):
        with tf.GradientTape() as gen_tape:

            ##Not sure about feeding the generator embedded
            embed = embedder(X_mb)
            generated = generator(Z_mb)
            supervised_g = supervisor(generated)
            supervised_e = supervisor(embed)

            synth_data = recovery(supervised_g)

            Y_fake_mb = discriminator(supervised_g)
            Y_real_mb = discriminator(embed)
            Y_fake_e_mb = discriminator(generated)

            G_loss_U = tf.compat.v1.losses.sigmoid_cross_entropy(
                tf.ones_like(Y_fake_mb), Y_fake_mb)
            G_loss_U_e = tf.compat.v1.losses.sigmoid_cross_entropy(
                tf.ones_like(Y_fake_e_mb), Y_fake_e_mb)
            G_loss_S = supervised_obj(embed, supervised_e)

            G_loss_V1 = G_loss_V1 = tf.reduce_mean(
                tf.abs(
                    tf.sqrt(tf.nn.moments(synth_data, [0])[1] + 1e-6) -
                    tf.sqrt(tf.nn.moments(X_mb, [0])[1] + 1e-6)))
            G_loss_V2 = G_loss_V2 = tf.reduce_mean(
                tf.abs((tf.nn.moments(synth_data, [0])[0]) -
                       (tf.nn.moments(X_mb, [0])[0])))
            G_loss_V = G_loss_V1 + G_loss_V2

            loss = generator_obj(G_loss_U, G_loss_U_e, G_loss_S, G_loss_V)

            gradients_generator = gen_tape.gradient(
                loss,
                generator.trainable_variables + supervisor.trainable_variables)
            GS_solver.apply_gradients(
                zip(
                    gradients_generator, generator.trainable_variables +
                    supervisor.trainable_variables))

            return G_loss_U, G_loss_S, G_loss_V

    @tf.function
    def train_step_joint_embed(X_mb, Z_mb):
        with tf.GradientTape() as embed_tape:
            embed = embedder(X_mb)
            supervised = supervisor(embed)

            G_loss_S = supervised_obj(embed, supervised)

            recover = recovery(embed)
            E_loss0 = embed_obj(X_mb, recover)

            loss = E_loss0 + 0.1 * G_loss_S

            gradients_embed = embed_tape.gradient(
                loss,
                embedder.trainable_variables + recovery.trainable_variables)
            GS_solver.apply_gradients(
                zip(
                    gradients_embed, embedder.trainable_variables +
                    recovery.trainable_variables))

            return tf.losses.mean_squared_error(X_mb, recover)

    @tf.function
    def train_step_discriminator(X_mb, Z_mb):
        with tf.GradientTape() as discrim_tape:
            # Check discriminator loss before updating
            embed = embedder(X_mb)
            generated = generator(Z_mb)
            supervised_g = supervisor(generated)

            Y_fake = discriminator(supervised_g)
            Y_real = discriminator(embed)
            Y_fake_e = discriminator(generated)

            loss = discriminator_obj(Y_real, Y_fake, Y_fake_e)
            if (loss > 0.15):
                gradients_discrim = discrim_tape.gradient(
                    loss, discriminator.trainable_variables)
                GS_solver.apply_gradients(
                    zip(gradients_discrim, discriminator.trainable_variables))
            return loss

## TimeGAN training

# 1. Embedding network training

    print('Start Embedding Network Training')

    def train():
        for itt in range(iterations):
            pass
            # Set mini-batch

            #X_mb = ori_data

            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            X_mb = X_mb.reshape(batch_size, seq_len, dim)

            step_e_loss = train_step_embedder(X_mb)

            # Checkpoint
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', e_loss: ' + str(np.round(np.sqrt(step_e_loss), 4)))
        print('Finish Embedding Network Training')

        # 2. Training only with supervised loss
        print('Start Training with Supervised Loss Only')

        for itt in range(iterations):
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
            # Train generator
            step_g_loss_s = train_step_supervised(X_mb, Z_mb)
            # Checkpoint
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', s_loss: ' + str(np.round(np.sqrt(step_g_loss_s), 4)))

        print('Finish Training with Supervised Loss Only')

        # 3. Joint Training
        print('Start Joint Training')

        for itt in range(iterations):
            # Generator training (twice more than discriminator training)
            for kk in range(2):
                # Set mini-batch
                X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
                # Random vector generation
                Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)
                # Train generator
                step_g_loss_u, step_g_loss_s, step_g_loss_v = train_step_generator(
                    X_mb, Z_mb)
                #_, step_g_loss_u, step_g_loss_s, step_g_loss_v = sess.run([G_solver, G_loss_U, G_loss_S, G_loss_V], feed_dict={Z: Z_mb, X: X_mb, T: T_mb})
                # Train embedder
                step_e_loss_t0 = train_step_joint_embed(X_mb, Z_mb)

                #_, step_e_loss_t0 = sess.run([E_solver, E_loss_T0], feed_dict={Z: Z_mb, X: X_mb, T: T_mb})

            # Discriminator training
            # Set mini-batch
            X_mb, T_mb = batch_generator(ori_data, ori_time, batch_size)
            # Random vector generation
            Z_mb = random_generator(batch_size, z_dim, T_mb, max_seq_len)

            step_d_loss = train_step_discriminator(X_mb, Z_mb)

            # Print multiple checkpoints
            if itt % 1000 == 0:
                print('step: ' + str(itt) + '/' + str(iterations) +
                      ', d_loss: ' + str(np.round(step_d_loss, 4)) +
                      ', g_loss_u: ' + str(np.round(step_g_loss_u, 4)) +
                      ', g_loss_s: ' +
                      str(np.round(np.sqrt(step_g_loss_s), 4)) +
                      ', g_loss_v: ' + str(np.round(step_g_loss_v, 4)) +
                      ', e_loss_t0: ' +
                      str(np.round(np.sqrt(step_e_loss_t0), 4)))
        print('Finish Joint Training')

        ## Synthetic data generation
        Z_mb = random_generator(no, z_dim, ori_time, max_seq_len)

        generated = generator(Z_mb)
        supervised = supervisor(generated)
        generated_data_curr = recovery(supervised)

        generated_data = list()

        for i in range(no):
            temp = generated_data_curr[i, :ori_time[i], :]
            generated_data.append(temp)

        # Renormalization
        generated_data = generated_data * max_val
        generated_data = generated_data + min_val

        return generated_data

    return train()