Example #1
0
def main(args):

    trial = int(args['trial'])
    pkl_name = 'rnn_gauss_%d' % trial
    channel_name = 'valid_nll'

    data_path = args['data_path']
    save_path = args['save_path']
    flgMSE = int(args['flgMSE'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    x2s_dim = 340
    s2x_dim = 340
    target_dim = k  #x_dim - 1

    model = Model()
    train_data = UKdale(name='train',
                        prep='normalize',
                        cond=False,
                        path=data_path,
                        windows=windows,
                        appliances=appliances,
                        numApps=flgAgg,
                        period=period,
                        n_steps=n_steps,
                        stride_train=stride_train,
                        stride_test=stride_test)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(name='valid',
                        prep='normalize',
                        cond=False,
                        path=data_path,
                        X_mean=X_mean,
                        X_std=X_std,
                        windows=windows,
                        appliances=appliances,
                        numApps=flgAgg,
                        period=period,
                        n_steps=n_steps,
                        stride_train=stride_train,
                        stride_test=stride_test)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, y = train_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1'],
               parent_dim=[x2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[s2x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[s2x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[s2x_dim],
                               nout=1,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[s2x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [rnn, x_1, theta_1, theta_mu, theta_sig]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        s_t = rnn.fprop([[x_t], [s_tm1]], params)
        theta_1_t = theta_1.fprop([s_t], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)
        coeff_t = coeff.fprop([theta_1_t], params)

        pred = Gaussian_sample(theta_mu_t, theta_sig_t)
        return s_t, theta_mu_t, theta_sig_t, coeff_t, pred

    ((s_temp, theta_mu_temp, theta_sig_temp, coeff_temp, pred_temp),
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp],
                            outputs_info=[s_0, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    '''
    theta_1_temp = theta_1.fprop([s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''
    x_shape = x.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    corr_in = corr_temp.reshape((x_shape[0] * x_shape[1], -1))
    binary_in = binary_temp.reshape((x_shape[0] * x_shape[1], -1))

    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        pred_temp = pred_temp.reshape((pred_temp.shape[0], pred_temp.shape[1]))
        pred_temp.name = 'pred_' + str(flgAgg)
        #y[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1))
        mse = T.mean((pred_temp - y.T)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs_(pred_temp - y.T))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((x.shape[0] * x.shape[1], -1), ndim=2)

    recon = Gaussian(pred_in, theta_mu_in, theta_sig_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    #recon = recon * mask
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'nll'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    model.inputs = [x, y]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(freq=monitoring_freq,
                   ddout=[
                       recon_term, max_theta_sig, mean_theta_sig,
                       min_theta_sig, max_x, mean_x, min_x, max_theta_mu,
                       mean_theta_mu, min_theta_mu
                   ],
                   data=[Iterator(valid_data, batch_size)]),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=recon_term,
                        outputs=[recon_term],
                        extension=extension)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    fLog.write("log,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{},{},{}\n".format(a, d, e))
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 20  #150
    p_z_dim = 20  #150
    p_x_dim = 20  #250
    x2s_dim = 20  #250
    z2s_dim = 20  #150
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    instancesPlot = reader.build_dict_instances_plot(listDates, batch_size,
                                                     Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    x.name = 'x_original'

    x_input = x  #[:,:-1,:]
    y_input = y  #[:,1:,:]
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu5 = FullyConnectedLayer(name='theta_mu5',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig5 = FullyConnectedLayer(name='theta_sig5',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff4 = FullyConnectedLayer(name='coeff4',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff5 = FullyConnectedLayer(name='coeff5',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_mu2,
        theta_mu3,
        theta_mu4,
        theta_mu5,
        theta_sig1,
        theta_sig2,
        theta_sig3,
        theta_sig4,
        theta_sig5,
        coeff1,
        coeff2,
        coeff3,
        coeff4,
        coeff5
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x_input], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)

        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t,
                theta_1_t, theta_mu1_t, theta_sig1_t, coeff1_t, theta_mu2_t,
                theta_sig2_t, coeff2_t, theta_mu3_t, theta_sig3_t, coeff3_t,
                theta_mu4_t, theta_sig4_t, coeff4_t, theta_mu5_t, theta_sig5_t,
                coeff5_t, y_pred1, y_pred2, y_pred3, y_pred4, y_pred5)
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp,
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp,
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp,
      y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0,  None, None, None, None, None, None, None, None,None,  None, None,
                                  None, None, None, None, None, None, None, None,
                                  None, None, None, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'

    theta_mu2_temp.name = 'theta_mu2'
    theta_sig2_temp.name = 'theta_sig2'
    coeff2_temp.name = 'coeff2'

    theta_mu3_temp.name = 'theta_mu3'
    theta_sig3_temp.name = 'theta_sig3'
    coeff3_temp.name = 'coeff3'

    theta_mu4_temp.name = 'theta_mu4'
    theta_sig4_temp.name = 'theta_sig4'
    coeff4_temp.name = 'coeff4'

    theta_mu5_temp.name = 'theta_mu5'
    theta_sig5_temp.name = 'theta_sig5'
    coeff5_temp.name = 'coeff5'
    #corr_temp.name = 'corr'
    #binary_temp.name = 'binary'
    #x_pred_temp.name = 'x_reconstructed'
    y_pred1_temp.name = 'disaggregation1'
    y_pred2_temp.name = 'disaggregation2'
    y_pred3_temp.name = 'disaggregation3'
    y_pred4_temp.name = 'disaggregation4'
    y_pred5_temp.name = 'disaggregation5'
    '''
    y_pred_temp = T.stack([y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp], axis=2) 
    y_pred_temp = y_pred_temp.flatten(3)# because of the stack, i guess, there's a 4th dimension created
    mse = T.mean((y_pred_temp - y.reshape((y.shape[0], y.shape[1],-1)))**2) # cause mse can be 26000
    '''
    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y_input[:, :, 0].reshape(
        (y_input.shape[0], y_input.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y_input[:, :, 0].reshape((y_input.shape[0],
                                                        y_input.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    mse2 = T.mean((y_pred2_temp - y_input[:, :, 1].reshape(
        (y_input.shape[0], y_input.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2 = T.mean(
        T.abs_(y_pred2_temp - y_input[:, :, 1].reshape((y_input.shape[0],
                                                        y_input.shape[1], 1))))
    mse2.name = 'mse2'
    mae2.name = 'mae2'

    mse3 = T.mean((y_pred3_temp - y_input[:, :, 2].reshape(
        (y_input.shape[0], y_input.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3 = T.mean(
        T.abs_(y_pred3_temp - y_input[:, :, 2].reshape((y_input.shape[0],
                                                        y_input.shape[1], 1))))
    mse3.name = 'mse3'
    mae3.name = 'mae3'

    mse4 = T.mean((y_pred4_temp - y_input[:, :, 3].reshape(
        (y_input.shape[0], y_input.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4 = T.mean(
        T.abs_(y_pred4_temp - y_input[:, :, 3].reshape((y_input.shape[0],
                                                        y_input.shape[1], 1))))
    mse4.name = 'mse4'
    mae4.name = 'mae4'

    mse5 = T.mean((y_pred5_temp - y_input[:, :, 4].reshape(
        (y_input.shape[0], y_input.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5 = T.mean(
        T.abs_(y_pred5_temp - y_input[:, :, 4].reshape((y_input.shape[0],
                                                        y_input.shape[1], 1))))
    mse5.name = 'mse5'
    mae5.name = 'mae5'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x_input.shape
    y_shape = y_input.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1))

    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMMdisag5(
        y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in,
        theta_sig2_in, coeff2_in, theta_mu3_in, theta_sig3_in, coeff3_in,
        theta_mu4_in, theta_sig4_in, coeff4_in, theta_mu5_in, theta_sig5_in,
        coeff5_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE == 1):
        nll_upper_bound = recon_term + kl_term + mse1 + mse2 + mse3 + mse4 + mse5
    else:
        nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'
    '''
    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    coeff_max = coeff_in.max()
    coeff_min = coeff_in.min()
    coeff_mean_max = coeff_in.mean(axis=0).max()
    coeff_mean_min = coeff_in.mean(axis=0).min()
    coeff_max.name = 'coeff_max'
    coeff_min.name = 'coeff_min'
    coeff_mean_max.name = 'coeff_mean_max'
    coeff_mean_min.name = 'coeff_mean_min'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'
    '''
    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse1, mse2, mse3, mse4,
                mse5, mae1, mae2, mae3, mae4, mae5, y_pred1_temp, y_pred2_temp,
                y_pred3_temp, y_pred4_temp, y_pred5_temp
            ],
            indexSep=13,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr, 150: (lr / 10)}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension,
                        lr_iterations=lr_iterations)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    lr_iterations = {0: origLR, 100: (origLR / 10)}
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(
        "log,kl,nll_upper_bound,mse1,mae1,mse2,mae2,mse3,mae3,mse4,mae4,mse5,mae5\n"
    )
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['nll_upper_bound'][i]
        d = mainloop.trainlog.monitor['mse1'][i]
        e = mainloop.trainlog.monitor['mae1'][i]
        f = mainloop.trainlog.monitor['mse2'][i]
        g = mainloop.trainlog.monitor['mae2'][i]
        h = mainloop.trainlog.monitor['mse3'][i]
        j = mainloop.trainlog.monitor['mae3'][i]
        k = mainloop.trainlog.monitor['mse4'][i]
        l = mainloop.trainlog.monitor['mae4'][i]
        m = mainloop.trainlog.monitor['mse5'][i]
        n = mainloop.trainlog.monitor['mae5'][i]
        fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
            a, b, c, d, e, f, g, h, j, k, l, m, n))
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_dis1-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 150  #250
    x2s_dim = 100  #250
    y2s_dim = 100
    z2s_dim = 100  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.6,
        valPer=0.2,
        testPer=0.2,
        typeLoad=typeLoad,
        flgAggSumScaled=1,
        flgFilterZeros=1)
    print(reader.stdTrain, reader.meanTrain)
    instancesPlot = {
        0: [4],
        2: [10]
    }  #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp
    """ print (mainloop)
    attrs = vars(mainloop)
    print ', '.join("%s: %s" % item for item in attrs.items())

    print (type(mainloop.model.nodes[1]))
    print (type(mainloop.model.params))"""

    #for node in mainloop.model.nodes:
    #  print(node.name)
    '''
    for node in mainloop.model.nodes:
      print("Name:", node.name, "Parent:", node.parent, "Unit:",node.unit,"init_W:", node.init_W, "init_b:", node.init_b)

    for param in mainloop.model.params:
      print(type(param),param)
    '''
    """rnn = LSTM(name='rnn',
             parent=['x_1', 'z_1','y_1'],
             parent_dim=[x2s_dim, z2s_dim, y_dim],
             nout=rnn_dim,
             unit='tanh',
             init_W=mainloop.model.nodes[0].init_W,
             init_U=mainloop.model.nodes[0].init_U,
             init_b=mainloop.model.nodes[0].init_b)

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=mainloop.model.nodes[1].init_W,
                              init_b=mainloop.model.nodes[1].init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=mainloop.model.nodes[2].init_W,
                              init_b=mainloop.model.nodes[2].init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=mainloop.model.nodes[3].init_W,
                              init_b=mainloop.model.nodes[3].init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1','y_1'],
                                parent_dim=[x2s_dim, rnn_dim,y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=mainloop.model.nodes[4].init_W,
                                init_b=mainloop.model.nodes[4].init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=mainloop.model.nodes[5].init_W,
                                 init_b=mainloop.model.nodes[5].init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=mainloop.model.nodes[6].init_W,
                                  init_b=mainloop.model.nodes[6].init_b)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1','s_tm1'],
                                  parent_dim=[x2s_dim,rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=mainloop.model.nodes[7].init_W,
                                  init_b=mainloop.model.nodes[7].init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=mainloop.model.nodes[8].init_W,
                                   init_b=mainloop.model.nodes[8].init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=mainloop.model.nodes[9].init_W,
                                    init_b=mainloop.model.nodes[9].init_b)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=mainloop.model.nodes[10].init_W,
                                  init_b=mainloop.model.nodes[10].init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=mainloop.model.nodes[11].init_W,
                                   init_b=mainloop.model.nodes[11].init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=mainloop.model.nodes[12].init_W,
                                    init_b=mainloop.model.nodes[12].init_b)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=mainloop.model.nodes[13].init_W,
                                init_b=mainloop.model.nodes[13].init_b)"""

    #pickle is from experiment gmmAE/18-05-30_16-07_app3
    fmodel = open('dp_dis1-sch_1.pkl', 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu = mainloop.model.nodes[11]
    theta_sig = mainloop.model.nodes[12]
    coeff = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = mainloop.model.params

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_val(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t,
                            coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = y_1.fprop([pred_t], params)
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred_t  #, y_pred
        #corr_temp, binary_temp
    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\
        theano.scan(fn=inner_fn_val,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None,  None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)

    def inner_fn_train(x_t, y_t, schedSampMask, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        if (schedSampMask == 1):
            s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        else:
            y_t_aux = y_1.fprop([pred], params)
            s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params)

        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn_train,
                    sequences=[x_1_temp, y_1_temp, scheduleSamplingMask],
                    outputs_info=[s_0, None, None, None, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        prediction.name = 'pred_' + str(flgAgg)
        mse = T.mean(
            (prediction - y)**2)  # As axis = None is calculated for all
        mae = T.mean(T.abs_(prediction - y))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((y.shape[0] * y.shape[1], -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        pred_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME
    prediction_val.name = 'generated__' + str(flgAgg)
    mse_val = T.mean(
        (prediction_val - y)**2)  # As axis = None is calculated for all
    mae_val = T.mean(T.abs_(prediction_val - y))

    #y_unNormalize = (y * reader.stdTrain) + reader.meanTrain # accessing to just an scalar when loading y_dim=1
    #prediction_valAux = (prediction_val * reader.stdTrain) + reader.meanTrain
    #mse_valUnNorm = T.mean((prediction_valAux - y_unNormalize)**2) # As axis = None is calculated for all
    #mae_valUnNorm = T.mean( T.abs_(prediction_valAux - y_unNormalize) )

    mse_val.name = 'mse_val'
    mae_val.name = 'mae_val'
    pred_in_val = y.reshape((y.shape[0] * y.shape[1], -1))

    theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in_val = theta_sig_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff_in_val = coeff_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    recon_val = GMM(
        pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out_val'

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term_val'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[nll_upper_bound, recon_term, kl_term, mse, mae, prediction],
            indexSep=5,
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr, 75: (lr / 10), 150: (lr / 100)}
    """mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
        n_steps = n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp
    )"""
    """mainloop.restore(
        data=Iterator(train_data, batch_size),
        cost=nll_upper_bound,
        model=model,
        optimizer=mainloop.optimizer
      )"""

    mainloop.restore(name=pkl_name,
                     data=Iterator(train_data, batch_size),
                     model=model,
                     optimizer=optimizer,
                     cost=nll_upper_bound,
                     outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
                     n_steps=n_steps,
                     extension=extension,
                     lr_iterations=lr_iterations,
                     k_speedOfconvergence=kSchedSamp)

    mainloop.run()

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[prediction_val, recon_term_val, mse_val,
                 mae_val]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])  #(20, 220, 1)
        testOutput.append(outputGeneration[1:])
        # outputGeneration[0].shape #(20, 220, 40)
        #if (numBatchTest<5):
        '''
      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_z_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_s_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_theta_0-4".format(numBatchTest))
      plt.clf()
      '''
        plt.figure(4)
        plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4])
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(
            save_path +
            "/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(4)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    print(testOutput.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    #mseUnNorm_test = testOutput[:, 3].mean()
    #maeUnNorm_test = testOutput[:, 4].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write("logTest,mseTest,maeTest, mseTestUnNorm, maeTestUnNorm\n")
    fLog.write("{},{},{}\n".format(recon_test, mse_test, mae_test))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i, item in enumerate(mainloop.trainlog.monitor['recon_term']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
Example #4
0
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'
    trial = int(args['trial'])
    pkl_name = 'vrnn_gauss_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args['save_path']
    save_path = args['save_path']
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 250
    x2s_dim = 10  #250
    z2s_dim = 10  #150
    target_dim = x_dim  #(x_dim-1)

    model = Model()
    train_data = UKdale(
        name='train',
        prep='none',  #normalize
        cond=False,
        path=data_path,
        period=period,
        n_steps=n_steps,
        x_dim=x_dim,
        stride_train=stride_train,
        stride_test=stride_test)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='none',  #normalize
        cond=False,
        path=data_path,
        X_mean=X_mean,
        X_std=X_std)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask = train_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(
        name='x_1',
        parent=['x_t'],  #OrderDict parent['x_t'] = x_dim
        parent_dim=[x_dim],
        nout=x2s_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(
        name='phi_1',  ## encoder
        parent=['x_1', 's_tm1'],
        parent_dim=[x2s_dim, rnn_dim],
        nout=q_z_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(
        name='theta_1',  ### decoder
        parent=['z_1', 's_tm1'],
        parent_dim=[z2s_dim, rnn_dim],
        nout=p_x_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    corr = FullyConnectedLayer(
        name='corr',  ## rho
        parent=['theta_1'],
        parent_dim=[p_x_dim],
        nout=1,
        unit='tanh',
        init_W=init_W,
        init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig,
        theta_1, theta_mu, theta_sig
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(
                node.initialize()
            )  #Initialize values of the W matrices according to dim of parents

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_1_t

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_1_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp], #non_sequences unchanging variables
                    #The tensor(s) to be looped over should be provided to scan using the sequence keyword argument
                    outputs_info=[s_0, None, None, None, None, None])#Initialization occurs in outputs_info

    for k, v in updates.iteritems():
        print("Update")
        k.default_update = v

    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_mu_temp.name = 'theta_mu'
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    theta_sig_temp.name = 'theta_sig'
    #corr_temp = corr.fprop([theta_1_temp], params)
    #corr_temp.name = 'corr'
    #binary_temp = binary.fprop([theta_1_temp], params)
    #binary_temp.name = 'binary'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = Gaussian(
        x_in, theta_mu_in, theta_sig_in
    )  # BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) # second term for the loss function
    recon = recon.reshape((x_shape[0], x_shape[1]))
    #recon = recon * mask
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask
    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    prior_sig_output = prior_sig_temp
    prior_sig_output.name = 'prior_sig_o'
    phi_sig_output = phi_sig_temp
    phi_sig_output.name = 'phi_sig_o'

    model.inputs = [x, mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound,
                recon_term,
                kl_term,
                max_phi_sig,
                mean_phi_sig,
                min_phi_sig,
                max_prior_sig,
                mean_prior_sig,
                min_prior_sig,
                max_theta_sig,
                mean_theta_sig,
                min_theta_sig,
                max_x,
                mean_x,
                min_x,
                max_theta_mu,
                mean_theta_mu,
                min_theta_mu,  #0-16
                #binary_temp, corr_temp,
                theta_mu_temp,
                theta_sig_temp,  #17-20
                s_temp,
                z_1_temp
                #phi_sig_output,phi_sig_output
            ],  ## added in order to explore the distributions
            indexSep=18,
            data=[Iterator(valid_data, batch_size)]),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
    results = mainloop.extension[2].lastResults
    seq = self.inference(results, n_steps)
    plot_lines_iamondb_example(
        X=seq
    )  #X, y=None, equal=True, show=False, save=False, save_name="tmp.png"):
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    n_steps_val = n_steps

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print str(windows)

    q_z_dim = 180
    p_z_dim = 180
    p_x_dim = 200
    x2s_dim = 100
    z2s_dim = 150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.6,
        valPer=0.2,
        testPer=0.2,
        typeLoad=typeLoad,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    instancesPlot = {
        0: [5]
    }  #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=ytrain,
        labels=Xtrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=yval,
        labels=Xval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)
    '''
    dissag_pred = FullyConnectedLayer(name='disag_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=num_apps,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)
    '''
    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_val_fn(s_tm1):
        '''
        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)
        '''
        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)
        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t,
                            coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = x_1.fprop([pred_t], params)

        s_t = rnn.fprop([[pred_1_t, z_1_t], [s_tm1]], params)

        return s_t, pred_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t
        # prior_mu_temp_val, prior_sig_temp_val
    ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\
        theano.scan(fn=inner_val_fn , n_steps=n_steps_val, #already 1 subtracted if doing next step
                    outputs_info=[s_0, None, None,  None, None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)

        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp ],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0
    '''
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    s_temp.name = 'h'  #gisse
    z_1_temp.name = 'z2'  #gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'mu'
    theta_sig_temp.name = 'sig'
    coeff_temp.name = 'coeff'

    prediction.name = 'Prediction-' + str(appliances[flgAgg][:-1])
    mse = T.mean((prediction - x)**2)  # As axis = None is calculated for all
    mae = T.mean(T.abs_(prediction - x))
    mse.name = 'mse'
    mae.name = 'mae'
    x_in = x.reshape((batch_size * n_steps, -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        x_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    #recon = recon * mask

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    ############## TEST  ###############
    theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1))
    theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1))
    coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1))

    pred_in = prediction_val.reshape((batch_size * n_steps, -1))
    recon_val = GMM(
        pred_in, theta_mu_in_val, theta_sig_in_val, coeff_in_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((batch_size, n_steps))
    recon_val.name = 'gmm_out_val'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[nll_upper_bound, recon_term, kl_term, mse, mae, prediction],
            indexSep=5,
            indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp),
                            (3, prediction)],
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {
        0: lr,
        30: (lr / 10)
    }  #, 150:(lr/10), 270:(lr/100), 370:(lr/1000)

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        n_steps=n_steps,
                        extension=extension,
                        lr_iterations=lr_iterations)
    mainloop.run()

    test_fn = theano.function(
        inputs=[],
        outputs=[prediction_val, recon_val],
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )

    outputGeneration = test_fn()
    #{0:[4,20], 2:[5,10]}
    '''
    plt.figure(1)
    plt.plot(np.transpose(outputGeneration[0],[1,0,2])[5])
    plt.savefig(save_path+"/vrnn_dis_generated_z_0-4.ps")

    plt.figure(2)
    plt.plot(np.transpose(outputGeneration[1],[1,0,2])[5])
    plt.savefig(save_path+"/vrnn_dis_generated_s_0-4.ps")

    plt.figure(3)
    plt.plot(np.transpose(outputGeneration[2],[1,0,2])[5])
    plt.savefig(save_path+"/vrnn_dis_generated_theta_0-4.ps")
    '''
    plt.figure(1)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[2])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_0-2.ps")

    plt.figure(2)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[10])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_0-10.ps")

    plt.figure(3)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[15])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_0-15.ps")

    testLogLike = np.asarray(outputGeneration[1]).mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write("Test-log-likelihood\n")
    fLog.write("{}\n".format(testLogLike))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                         z2s_dim))
    fLog.write("epoch,log,kl,nll_upper_bound,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['nll_upper_bound'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(
            f, a, b, c, d, e))
    fLog.close()

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    cPickle.dump(outputGeneration, f, -1)
    f.close()
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])
    num_sequences_per_batch = int(args['numSequences'])  #based on appliance
    loadParam = args['loadAsKelly']
    target_inclusion_prob = float(args['target_inclusion_prob'])
    loadAsKelly = True
    if (loadParam == 'N' or loadParam == 'n' or loadParam == 'no'
            or loadParam == 'NO' or loadParam == 'No'):
        loadAsKelly = False

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 100  #150
    p_z_dim = 60  #150
    p_x_dim = 20  #250
    x2s_dim = 40  #250
    z2s_dim = 40  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        isKelly=loadAsKelly,
        seq_per_batch=num_sequences_per_batch,
        target_inclusion_prob=target_inclusion_prob)

    instancesPlot = {
        0: [4, 20],
        2: [5, 10]
    }  #for now use hard coded instancesPlot for kelly sampling
    if (not loadAsKelly):
        instancesPlot = reader.build_dict_instances_plot(
            listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)
    '''
    dissag_pred = FullyConnectedLayer(name='disag_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=num_apps,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)
    '''
    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0
    '''
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'
    #corr_temp.name = 'corr'
    #binary_temp.name = 'binary'
    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        prediction.name = 'pred_' + str(flgAgg)
        #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
        mse = T.mean(
            (prediction - y)**2)  # As axis = None is calculated for all
        mae = T.mean(T.abs_(prediction - y))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((y.shape[0] * y.shape[1], -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        pred_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    #recon = recon * mask

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    coeff_max = coeff_in.max()
    coeff_min = coeff_in.min()
    coeff_mean_max = coeff_in.mean(axis=0).max()
    coeff_mean_min = coeff_in.mean(axis=0).min()
    coeff_max.name = 'coeff_max'
    coeff_min.name = 'coeff_min'
    coeff_mean_max.name = 'coeff_mean_max'
    coeff_mean_min.name = 'coeff_mean_min'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound,
                recon_term,
                kl_term,
                mse,
                mae,
                theta_mu_temp,
                theta_sig_temp,
                z_t_temp,
                prediction,  #corr_temp, binary_temp, 
                s_temp,
                z_1_temp
            ],
            indexSep=5,
            indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp),
                            (3, prediction)],
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr, 15: (lr / 10), 70: (lr / 100)}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension,
                        lr_iterations=lr_iterations)
    #mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    fLog.write("log,kl,nll_upper_bound,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['nll_upper_bound'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{},{},{},{},{}\n".format(a, b, c, d, e))
Example #7
0
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps
    typeLoad = int(args['typeLoad'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 350
    p_z_dim = 400
    p_x_dim = 450
    x2s_dim = 400
    y2s_dim = 200
    z2s_dim = 350
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25)

    instancesPlot = {0: [5]}
    #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu5 = FullyConnectedLayer(name='theta_mu5',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig5 = FullyConnectedLayer(name='theta_sig5',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff4 = FullyConnectedLayer(name='coeff4',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff5 = FullyConnectedLayer(name='coeff5',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1,
        theta_mu2,
        theta_sig2,
        coeff2,
        theta_mu3,
        theta_sig3,
        coeff3,
        theta_mu4,
        theta_sig4,
        coeff4,
        theta_mu5,
        theta_sig5,
        coeff5
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
        y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
        y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
        y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
        y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu1_temp_val,
      theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val,
      theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val,
      y_pred2_temp_val, theta_mu3_temp_val, theta_sig3_temp_val,
      coeff3_temp_val, y_pred3_temp_val, theta_mu4_temp_val,
      theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val,
      theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val,
      y_pred5_temp_val),
     updates_val) = theano.scan(fn=inner_fn_test,
                                sequences=[x_1_temp],
                                outputs_info=[
                                    s_0, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None
                                ])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)

        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp,
      theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp,
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp,
      theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp),
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp, y_1_temp],
                            outputs_info=[
                                s_0, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None
                            ])

    for k, v in updates.iteritems():
        k.default_update = v

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape

    theta_mu2_temp.name = 'theta_mu2'
    theta_sig2_temp.name = 'theta_sig2'
    coeff2_temp.name = 'coeff2'
    y_pred2_temp.name = 'disaggregation2'
    mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2 = T.mean(
        T.abs_(y_pred2_temp - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
    mse2.name = 'mse2'
    mae2.name = 'mae2'

    theta_mu3_temp.name = 'theta_mu3'
    theta_sig3_temp.name = 'theta_sig3'
    coeff3_temp.name = 'coeff3'
    y_pred3_temp.name = 'disaggregation3'
    mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3 = T.mean(
        T.abs_(y_pred3_temp - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
    mse3.name = 'mse3'
    mae3.name = 'mae3'

    theta_mu4_temp.name = 'theta_mu4'
    theta_sig4_temp.name = 'theta_sig4'
    coeff4_temp.name = 'coeff4'
    y_pred4_temp.name = 'disaggregation4'
    mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4 = T.mean(
        T.abs_(y_pred4_temp - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
    mse4.name = 'mse4'
    mae4.name = 'mae4'

    theta_mu5_temp.name = 'theta_mu5'
    theta_sig5_temp.name = 'theta_sig5'
    coeff5_temp.name = 'coeff5'
    y_pred5_temp.name = 'disaggregation5'
    mse5 = T.mean((y_pred5_temp - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5 = T.mean(
        T.abs_(y_pred5_temp - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))
    mse5.name = 'mse5'
    mae5.name = 'mae5'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1))

    x_shape = x.shape
    y_shape = y.shape
    #x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in,
                          theta_mu2_in, theta_sig2_in, coeff2_in, theta_mu3_in,
                          theta_sig3_in, coeff3_in, theta_mu4_in,
                          theta_sig4_in, coeff4_in, theta_mu5_in,
                          theta_sig5_in, coeff5_in)
    #recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in,theta_mu3_in, theta_sig3_in, coeff3_in,theta_mu4_in, theta_sig4_in, coeff4_in,theta_mu5_in, theta_sig5_in, coeff5_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target))
    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0]

    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))))
    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_temp_val.name = 'theta_mu2_val'
    theta_sig2_temp_val.name = 'theta_sig2_val'
    coeff2_temp_val.name = 'coeff2_val'
    y_pred2_temp_val.name = 'disaggregation2_val'
    mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2_val = T.mean(
        T.abs_(y_pred2_temp_val -
               y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred2_temp_val)
    totReal = T.sum(y[:, :, 1])
    relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned2_val = 1 - T.sum(
        T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse2_val.name = 'mse2_val'
    mae2_val.name = 'mae2_val'

    theta_mu2_in_val = theta_mu2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig2_in_val = theta_sig2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_temp_val.name = 'theta_mu3_val'
    theta_sig3_temp_val.name = 'theta_sig3_val'
    coeff3_temp_val.name = 'coeff3_val'
    y_pred3_temp_val.name = 'disaggregation3_val'
    mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3_val = T.mean(
        T.abs_(y_pred3_temp_val -
               y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred3_temp_val)
    totReal = T.sum(y[:, :, 2])
    relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned3_val = 1 - T.sum(
        T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse3_val.name = 'mse3_val'
    mae3_val.name = 'mae3_val'

    theta_mu3_in_val = theta_mu3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig3_in_val = theta_sig3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_temp_val.name = 'theta_mu4_val'
    theta_sig4_temp_val.name = 'theta_sig4_val'
    coeff4_temp_val.name = 'coeff4_val'
    y_pred4_temp_val.name = 'disaggregation4_val'
    mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4_val = T.mean(
        T.abs_(y_pred4_temp_val -
               y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred4_temp_val)
    totReal = T.sum(y[:, :, 3])
    relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned4_val = 1 - T.sum(
        T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse4_val.name = 'mse4_val'
    mae4_val.name = 'mae4_val'

    theta_mu4_in_val = theta_mu4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig4_in_val = theta_sig4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_temp_val.name = 'theta_mu5_val'
    theta_sig5_temp_val.name = 'theta_sig5_val'
    coeff5_temp_val.name = 'coeff5_val'
    y_pred5_temp_val.name = 'disaggregation5_val'
    mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5_val = T.mean(
        T.abs_(y_pred5_temp_val -
               y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred5_temp_val)
    totReal = T.sum(y[:, :, 4])
    relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned5_val = 1 - T.sum(
        T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse5_val.name = 'mse5_val'
    mae5_val.name = 'mae5_val'

    theta_mu5_in_val = theta_mu5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig5_in_val = theta_sig5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    prediction_val = T.concatenate([
        y_pred1_temp_val, y_pred2_temp_val, y_pred3_temp_val, y_pred4_temp_val,
        y_pred5_temp_val
    ],
                                   axis=2)

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val, theta_mu3_in_val,
        theta_sig3_in_val, coeff3_in_val, theta_mu4_in_val, theta_sig4_in_val,
        coeff4_in_val, theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = (mse1_val + mse2_val + mse3_val + mse4_val +
                   mse5_val) / y_dim
    totaMAE_val = (mae1_val + mae2_val + mae3_val + mae4_val +
                   mae5_val) / y_dim
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse1, mae1, mse2, mae2,
                mse3, mae3, mse4, mae4, mse5, mae5, y_pred1_temp, y_pred2_temp,
                y_pred3_temp, y_pred4_temp, y_pred5_temp
            ],
            indexSep=13,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        n_steps=n_steps,
                        extension=extension,
                        lr_iterations=lr_iterations,
                        k_speedOfconvergence=30)
    mainloop.run()

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,
            mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val,
            mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val,
            relErr3_val, relErr4_val, relErr5_val, propAssigned1_val,
            propAssigned2_val, propAssigned3_val, propAssigned4_val,
            propAssigned5_val
        ]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(outputGeneration[1:14])
        testMetrics2.append(outputGeneration[14:])
        #{0:[4,20], 2:[5,10]}
        #if (numBatchTest==0):

        plt.figure(1)
        plt.plot(np.transpose(outputGeneration[0],
                              [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(2)
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 8].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 9].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 10].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 11].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 12].mean()

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()

    propAssigned1_test = testMetrics2[:, 5].mean()
    propAssigned2_test = testMetrics2[:, 6].mean()
    propAssigned3_test = testMetrics2[:, 7].mean()
    propAssigned4_test = testMetrics2[:, 8].mean()
    propAssigned5_test = testMetrics2[:, 9].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format(
        recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test,
        mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test,
        mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        propAssigned1_test, propAssigned2_test, propAssigned3_test,
        propAssigned4_test, propAssigned5_test))

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write(
        "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['mse1'][i]
        h = mainloop.trainlog.monitor['mae1'][i]

        d = mainloop.trainlog.monitor['mse2'][i]
        j = mainloop.trainlog.monitor['mae2'][i]
        e = mainloop.trainlog.monitor['mse3'][i]
        k = mainloop.trainlog.monitor['mae3'][i]
        f = mainloop.trainlog.monitor['mse4'][i]
        l = mainloop.trainlog.monitor['mae4'][i]
        g = mainloop.trainlog.monitor['mse5'][i]
        m = mainloop.trainlog.monitor['mae5'][i]
        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, c, d, e, f, g, h, j, k, l, m))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'mse'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps
    typeLoad = int(args['typeLoad'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 350
    p_z_dim = 400
    p_x_dim = 450
    x2s_dim = 400
    y2s_dim = 200
    z2s_dim = 350
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad)

    instancesPlot = {0: [10]}
    #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    schedMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    fmodel = open('vrnn_gmm_disall_best.pkl', 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    model = mainloop.model

    #print(mainloop.model.__dict__)

    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu1 = mainloop.model.nodes[11]
    theta_sig1 = mainloop.model.nodes[12]
    coeff1 = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    params = mainloop.model.params

    dynamicOutput = [None, None, None, None, None, None, None, None]
    #dynamicOutput_val = [None, None, None, None, None, None,None,  None, None]
    if (y_dim > 1):
        theta_mu2 = mainloop.model.nodes[14]
        theta_sig2 = mainloop.model.nodes[15]
        coeff2 = mainloop.model.nodes[16]
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        theta_mu3 = mainloop.model.nodes[17]
        theta_sig3 = mainloop.model.nodes[18]
        coeff3 = mainloop.model.nodes[19]
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        theta_mu4 = mainloop.model.nodes[20]
        theta_sig4 = mainloop.model.nodes[21]
        coeff4 = mainloop.model.nodes[22]
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 4):
        theta_mu5 = mainloop.model.nodes[23]
        theta_sig5 = mainloop.model.nodes[24]
        coeff5 = mainloop.model.nodes[25]
        nodes = nodes + [theta_mu5, theta_sig5, coeff5]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        if (y_dim > 4):
            theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
            theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
            coeff5_t = coeff5.fprop([theta_1_t], params)
            y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
            y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)
            tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t,
                                       y_pred5)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t, ) + tupleMulti
        #corr_temp, binary_temp

    (restResults_val, updates_val) = theano.scan(fn=inner_fn_test,
                                                 sequences=[x_1_temp],
                                                 outputs_info=output_fn_val)

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, y_t, schedMask, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        y_pred = y_pred1

        tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred = T.concatenate([y_pred, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred = T.concatenate([y_pred, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred = T.concatenate([y_pred, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        if (y_dim > 4):
            theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
            theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
            coeff5_t = coeff5.fprop([theta_1_t], params)
            y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
            y_pred = T.concatenate([y_pred, y_pred5], axis=1)
            tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t,
                                       y_pred5)

        if (schedMask == 1):
            s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        else:
            y_t_aux = y_1.fprop([y_pred], params)
            s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params)

        return (s_t, ) + tupleMulti
        #corr_temp, binary_temp

    (restResults,
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp, y_1_temp, schedMask],
                            outputs_info=output_fn)
    '''
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, 
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp, 
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, 
      y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp, y_1_temp],
                    outputs_info=[s_0,  None, None, None, None, None, None, None, None,None,  None, None, 
                                  None, None, None, None, None, None, None, None,
                                  None, None, None, None, None, None, None, None])
    '''
    s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,\
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = restResults[:9]
    restResults = restResults[9:]

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    #x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA = []
    ddoutYpreds = [y_pred1_temp]
    indexSepDynamic = 6  # plus one for totaMSE

    #totaMSE = T.copy(mse1)
    mse2 = T.zeros((1, ))
    mae2 = T.zeros((1, ))
    mse3 = T.zeros((1, ))
    mae3 = T.zeros((1, ))
    mse4 = T.zeros((1, ))
    mae4 = T.zeros((1, ))
    mse5 = T.zeros((1, ))
    mae5 = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu2_temp.name = 'theta_mu2'
        theta_sig2_temp.name = 'theta_sig2'
        coeff2_temp.name = 'coeff2'
        y_pred2_temp.name = 'disaggregation2'
        mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2 = T.mean(
            T.abs_(y_pred2_temp -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
        mse2.name = 'mse2'
        mae2.name = 'mae2'

        theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in

        ddoutMSEA = ddoutMSEA + [mse2, mae2]
        ddoutYpreds = ddoutYpreds + [y_pred2_temp]
        #totaMSE+=mse2
        indexSepDynamic += 2

    if (y_dim > 2):
        theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu3_temp.name = 'theta_mu3'
        theta_sig3_temp.name = 'theta_sig3'
        coeff3_temp.name = 'coeff3'
        y_pred3_temp.name = 'disaggregation3'
        mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3 = T.mean(
            T.abs_(y_pred3_temp -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
        mse3.name = 'mse3'
        mae3.name = 'mae3'

        theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in)
        ddoutMSEA = ddoutMSEA + [mse3, mae3]
        ddoutYpreds = ddoutYpreds + [y_pred3_temp]
        #totaMSE+=mse3
        indexSepDynamic += 2

    if (y_dim > 3):
        theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu4_temp.name = 'theta_mu4'
        theta_sig4_temp.name = 'theta_sig4'
        coeff4_temp.name = 'coeff4'
        y_pred4_temp.name = 'disaggregation4'
        mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4 = T.mean(
            T.abs_(y_pred4_temp -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
        mse4.name = 'mse4'
        mae4.name = 'mae4'

        theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in)
        ddoutMSEA = ddoutMSEA + [mse4, mae4]
        ddoutYpreds = ddoutYpreds + [y_pred4_temp]
        #totaMSE+=mse4
        indexSepDynamic += 2

    if (y_dim > 4):
        theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu5_temp.name = 'theta_mu5'
        theta_sig5_temp.name = 'theta_sig5'
        coeff5_temp.name = 'coeff5'
        y_pred5_temp.name = 'disaggregation5'
        mse5 = T.mean((y_pred5_temp - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae5 = T.mean(
            T.abs_(y_pred5_temp -
                   y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))
        mse5.name = 'mse5'
        mae5.name = 'mae5'

        theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu5_in, theta_sig5_in, coeff5_in)
        ddoutMSEA = ddoutMSEA + [mse5, mae5]
        ddoutYpreds = ddoutYpreds + [y_pred5_temp]
        #totaMSE+=mse5
        indexSepDynamic += 2

    totaMSE = (mse1 + mse2 + mse3 + mse4 + mse5) / y_dim
    totaMSE.name = 'mse'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    recon = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE == 1):
        nll_upper_bound = recon_term + kl_term + totaMSE
    else:
        nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME
    s_temp_val, prior_mu_temp_val, prior_sig_temp_val,  \
      theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = restResults_val[:7]
    restResults_val = restResults_val[7:]

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target))
    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0]

    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))))
    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA_val = []
    ddoutYpreds_val = [y_pred1_temp_val]
    totaMSE_val = mse1_val
    totaMAE_val = mae1_val
    indexSepDynamic_val = 5
    prediction_val = y_pred1_temp_val

    #Initializing values of mse and mae
    mse2_val = T.zeros((1, ))
    mae2_val = T.zeros((1, ))
    mse3_val = T.zeros((1, ))
    mae3_val = T.zeros((1, ))
    mse4_val = T.zeros((1, ))
    mae4_val = T.zeros((1, ))
    mse5_val = T.zeros((1, ))
    mae5_val = T.zeros((1, ))

    relErr2_val = T.zeros((1, ))
    relErr3_val = T.zeros((1, ))
    relErr4_val = T.zeros((1, ))
    relErr5_val = T.zeros((1, ))
    propAssigned2_val = T.zeros((1, ))
    propAssigned3_val = T.zeros((1, ))
    propAssigned4_val = T.zeros((1, ))
    propAssigned5_val = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu2_temp_val.name = 'theta_mu2_val'
        theta_sig2_temp_val.name = 'theta_sig2_val'
        coeff2_temp_val.name = 'coeff2_val'
        y_pred2_temp_val.name = 'disaggregation2_val'
        mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2_val = T.mean(
            T.abs_(y_pred2_temp_val -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred2_temp_val)
        totReal = T.sum(y[:, :, 1])
        relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned2_val = 1 - T.sum(
            T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse2_val.name = 'mse2_val'
        mae2_val.name = 'mae2_val'

        theta_mu2_in_val = theta_mu2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig2_in_val = theta_sig2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val

        ddoutMSEA_val = ddoutMSEA_val + [mse2_val, mae2_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred2_temp_val]
        totaMSE_val += mse2_val
        totaMAE_val += mae2_val
        indexSepDynamic_val += 2

        prediction_val = T.concatenate([prediction_val, y_pred2_temp_val],
                                       axis=2)

    if (y_dim > 2):
        theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu3_temp_val.name = 'theta_mu3_val'
        theta_sig3_temp_val.name = 'theta_sig3_val'
        coeff3_temp_val.name = 'coeff3_val'
        y_pred3_temp_val.name = 'disaggregation3_val'
        mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3_val = T.mean(
            T.abs_(y_pred3_temp_val -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred3_temp_val)
        totReal = T.sum(y[:, :, 2])
        relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned3_val = 1 - T.sum(
            T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse3_val.name = 'mse3_val'
        mae3_val.name = 'mae3_val'

        theta_mu3_in_val = theta_mu3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig3_in_val = theta_sig3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val,
                                     coeff3_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse3_val, mae3_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred3_temp_val]
        totaMSE_val += mse3_val
        totaMAE_val += mae3_val
        indexSepDynamic_val += 2

        prediction_val = T.concatenate([prediction_val, y_pred3_temp_val],
                                       axis=2)

    if (y_dim > 3):
        theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu4_temp_val.name = 'theta_mu4_val'
        theta_sig4_temp_val.name = 'theta_sig4_val'
        coeff4_temp_val.name = 'coeff4_val'
        y_pred4_temp_val.name = 'disaggregation4_val'
        mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4_val = T.mean(
            T.abs_(y_pred4_temp_val -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred4_temp_val)
        totReal = T.sum(y[:, :, 3])
        relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned4_val = 1 - T.sum(
            T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse4_val.name = 'mse4_val'
        mae4_val.name = 'mae4_val'

        theta_mu4_in_val = theta_mu4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig4_in_val = theta_sig4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val,
                                     coeff4_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse4_val, mae4_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred4_temp_val]
        totaMSE_val += mse4_val
        totaMAE_val += mae4_val
        indexSepDynamic_val += 2
        prediction_val = T.concatenate([prediction_val, y_pred4_temp_val],
                                       axis=2)

    if (y_dim > 4):
        theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu5_temp_val.name = 'theta_mu5_val'
        theta_sig5_temp_val.name = 'theta_sig5_val'
        coeff5_temp_val.name = 'coeff5_val'
        y_pred5_temp_val.name = 'disaggregation5_val'
        mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae5_val = T.mean(
            T.abs_(y_pred5_temp_val -
                   y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred5_temp_val)
        totReal = T.sum(y[:, :, 4])
        relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned5_val = 1 - T.sum(
            T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse5_val.name = 'mse5_val'
        mae5_val.name = 'mae5_val'

        theta_mu5_in_val = theta_mu5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig5_in_val = theta_sig5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val,
                                     coeff5_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse5_val, mae5_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred5_temp_val]
        totaMSE_val += mse5_val
        totaMAE_val += mae5_val
        indexSepDynamic_val += 2
        prediction_val = T.concatenate([prediction_val, y_pred5_temp_val],
                                       axis=2)

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        *argsGMM_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = totaMSE_val / y_dim
    totaMAE_val = totaMAE_val / y_dim
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    model.inputs = [x, mask, y, y_mask, schedMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[nll_upper_bound, recon_term, kl_term, totaMSE, mse1, mae1] +
            ddoutMSEA + ddoutYpreds,
            indexSep=indexSepDynamic,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}
    """mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[nll_upper_bound],
        n_steps = n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp
    )"""

    mainloop.restore(data=Iterator(train_data, batch_size),
                     optimizer=optimizer,
                     cost=nll_upper_bound,
                     outputs=[nll_upper_bound],
                     n_steps=n_steps,
                     extension=extension,
                     lr_iterations=lr_iterations,
                     k_speedOfconvergence=kSchedSamp)

    mainloop.run()

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,
            mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val,
            mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val,
            relErr3_val, relErr4_val, relErr5_val, propAssigned1_val,
            propAssigned2_val, propAssigned3_val, propAssigned4_val,
            propAssigned5_val
        ]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(outputGeneration[1:14])
        testMetrics2.append(outputGeneration[14:])
        #{0:[4,20], 2:[5,10]}
        #if (numBatchTest==0):

        plt.figure(1)
        plt.plot(np.transpose(outputGeneration[0],
                              [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(2)
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 8].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 9].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 10].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 11].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 12].mean()

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()

    propAssigned1_test = testMetrics2[:, 5].mean()
    propAssigned2_test = testMetrics2[:, 6].mean()
    propAssigned3_test = testMetrics2[:, 7].mean()
    propAssigned4_test = testMetrics2[:, 8].mean()
    propAssigned5_test = testMetrics2[:, 9].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format(
        recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test,
        mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test,
        mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        propAssigned1_test, propAssigned2_test, propAssigned3_test,
        propAssigned4_test, propAssigned5_test))

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write(
        "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['mse1'][i]
        h = mainloop.trainlog.monitor['mae1'][i]

        if (y_dim > 1):
            d = mainloop.trainlog.monitor['mse2'][i]
            j = mainloop.trainlog.monitor['mae2'][i]
        if (y_dim > 2):
            e = mainloop.trainlog.monitor['mse3'][i]
            k = mainloop.trainlog.monitor['mae3'][i]
        if (y_dim > 3):
            f = mainloop.trainlog.monitor['mse4'][i]
            l = mainloop.trainlog.monitor['mae4'][i]
        if (y_dim > 4):
            g = mainloop.trainlog.monitor['mse5'][i]
            m = mainloop.trainlog.monitor['mae5'][i]
        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, c, d, e, f, g, h, j, k, l, m))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
Example #9
0
def main(args):
    
    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args['save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    genCase = int(args['genCase'])
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps#int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k']) #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])
    num_sequences_per_batch = int(args['numSequences']) #based on appliance
    typeLoad = int(args['typeLoad'])
    target_inclusion_prob = float(args['target_inclusion_prob'])


    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 200
    x2s_dim = 100
    y2s_dim = 100
    z2s_dim = 100
    target_dim = k#x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(data_path, windows, appliances,numApps=flgAgg, period=period,
                                              n_steps= n_steps, stride_train = stride_train, stride_test = stride_test,
                                              typeLoad= typeLoad, flgAggSumScaled = 1, flgFilterZeros = 1,
                                              seq_per_batch=num_sequences_per_batch, target_inclusion_prob=target_inclusion_prob)
    
    instancesPlot = {0:[4,20], 2:[5,10]} #for now use hard coded instancesPlot for kelly sampling
    if(typeLoad==0):
      instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = UKdale(name='train',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         inputX=Xtrain,
                         labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xval,
                         labels = yval)

    test_data = UKdale(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xtest,
                         labels = ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y , y_mask = train_data.theano_vars()

    if (genCase ==1):
      inputX = x[:-1,:]
      targetX = x[1:,:]
      n_steps = n_steps-1
    else:
      inputX = x
      targetX = x  

    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1','y_1'],
               parent_dim=[x2s_dim, z2s_dim, y_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1','y_1'],
                                parent_dim=[x2s_dim, rnn_dim,y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [rnn,
             x_1, y_1, z_1, #dissag_pred,
             phi_1, phi_mu, phi_sig,
             prior_1, prior_mu, prior_sig,
             theta_1, theta_mu, theta_sig, coeff]#, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_val(x_t, s_tm1):

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = y_1.fprop([pred_t], params)
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, z_t,  z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred_t#, y_pred
        #corr_temp, binary_temp
    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, z_t_temp_val, z_1_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\
        theano.scan(fn=inner_fn_val,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)

    def inner_fn_train(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1,y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t,  z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred#, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn_train,
                    sequences=[x_1_temp, y_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None, None, None])

    
    for k, v in updates.iteritems():
        k.default_update = v
    
    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    s_temp.name = 'h_1'#gisse
    z_1_temp.name = 'z_1'#gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    if (flgAgg == -1 ):
      prediction.name = 'x_reconstructed'
      mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion
      mae = T.mean( T.abs(prediction - x) )
      mse.name = 'mse'
      pred_in = x.reshape((x_shape[0]*x_shape[1], -1))
    else:
      prediction.name = 'pred_'+str(flgAgg)
      mse = T.mean((prediction - y)**2) # As axis = None is calculated for all
      mae = T.mean( T.abs_(prediction - y) )
      mse.name = 'mse'
      mae.name = 'mae'
      pred_in = y.reshape((y.shape[0]*y.shape[1],-1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp)

    x_shape = x.shape
    
    theta_mu_in = theta_mu_temp.reshape((x_shape[0]*x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0]*x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0]*x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(pred_in, theta_mu_in, theta_sig_in, coeff_in)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'
    
    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term #+ mse
    if (flgMSE):
      nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME
    prediction_val.name = 'generated__'+str(flgAgg)
    mse_val = T.mean((prediction_val - y)**2) # As axis = None is calculated for all
    mae_val = T.mean( T.abs_(prediction_val - y) )
    mse_val.name = 'mse_val'
    mae_val.name = 'mae_val'
    pred_in_val = y.reshape((y.shape[0]*y.shape[1],-1))

    theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    theta_sig_in_val = theta_sig_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    coeff_in_val = coeff_temp_val.reshape((x_shape[0]*x_shape[1], -1))

    recon_val = GMM(pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out_val'

    recon_term_val= recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term_val'

    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(
        lr=lr
    )

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(freq=monitoring_freq,
                   ddout=[nll_upper_bound, recon_term, kl_term, mse, mae,
                          theta_mu_temp,prediction],
                   indexSep=5,
                   instancesPlot = instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150
                   data=[Iterator(valid_data, batch_size)],
                   savedFolder = save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0:lr, 75:(lr/10), 150:(lr/100)}

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
        extension=extension,
        lr_iterations=lr_iterations

    )
    mainloop.run()

    z_t_temp_val.name='z_temp_val'
    s_temp_val.name='s_temp_val'
    theta_mu_temp_val.name='mu_temp'

    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              #givens={x:Xtest},
                              #on_unused_input='ignore',
                              #z=( ,200,1)
                              allow_input_downcast=True,
                              outputs=[z_t_temp_val, s_temp_val, theta_mu_temp_val, prediction_val, recon_term_val, mse_val, mae_val]#prediction_val, mse_val, mae_val
                              ,updates=updates_val#, allow_input_downcast=True, on_unused_input='ignore'
                              )
    testOutput = []
    numBatchTest = 0
    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])
      testOutput.append(outputGeneration[4:])
      #{0:[4,20], 2:[5,10]} 
      #if (numBatchTest==0):
      '''
      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_z_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_s_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_theta_0-4".format(numBatchTest))
      plt.clf()
      '''
      plt.figure(4)
      plt.plot(np.transpose(outputGeneration[3],[1,0,2])[4])
      plt.plot(np.transpose(batch[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(4)
      plt.plot(np.transpose(batch[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
      plt.clf()
      numBatchTest+=1

    testOutput = np.asarray(testOutput)
    print(testOutput.shape)
    recon_test = this_mean = testOutput[:, 0].mean()
    mse_test = this_mean = testOutput[:, 1].mean()
    mae_test = this_mean = testOutput[:, 2].mean()

    fLog = open(save_path+'/output.csv', 'w')
    fLog.write(str(lr_iterations)+"\n")
    fLog.write(str(windows)+"\n")
    fLog.write("logTest,mseTest,maeTest\n")
    fLog.write("{},{},{}\n".format(recon_test,mse_test,mae_test))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i , item in enumerate(mainloop.trainlog.monitor['recon_term']):
      f = mainloop.trainlog.monitor['epoch'][i]
      a = mainloop.trainlog.monitor['recon_term'][i]
      b = mainloop.trainlog.monitor['kl_term'][i]
      d = mainloop.trainlog.monitor['mse'][i]
      e = mainloop.trainlog.monitor['mae'][i]
      fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f,a,b,d,e))
Example #10
0
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_dis1-nosch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])
    typeActivFunc = args['typeActivFunc']

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 150  #250
    x2s_dim = 100  #250
    y2s_dim = 100
    z2s_dim = 100  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.6,
        valPer=0.2,
        testPer=0.2,
        typeLoad=typeLoad,
        flgAggSumScaled=1,
        flgFilterZeros=1)
    print(reader.stdTrain, reader.meanTrain)
    instancesPlot = {
        0: [4],
        2: [5]
    }  #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit=typeActivFunc,
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_train(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn_train,
                    sequences=[x_1_temp, y_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        prediction.name = 'pred_' + str(flgAgg)
        mse = T.mean(
            (prediction - y)**2)  # As axis = None is calculated for all
        mae = T.mean(T.abs_(prediction - y))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((y.shape[0] * y.shape[1], -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        pred_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp,
                prediction
            ],
            indexSep=5,
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
        n_steps=n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp)
    mainloop.run()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i, item in enumerate(mainloop.trainlog.monitor['recon_term']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_dis1-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 150  #250
    x2s_dim = 100  #250
    y2s_dim = 100
    z2s_dim = 100  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.6,
        valPer=0.2,
        testPer=0.2,
        typeLoad=typeLoad,
        flgAggSumScaled=1,
        flgFilterZeros=1)
    print(reader.stdTrain, reader.meanTrain)
    instancesPlot = {
        0: [4],
        2: [10]
    }  #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    fmodel = open('dp_dis1-sch_1_best.pkl', 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #attrs = vars(mainloop)
    #print ', '.join("%s: %s" % item for item in attrs.items())
    """names = [x.name for x in mainloop.model.nodes]
    print(names)
    print(mainloop.model.nodes)"""

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu = mainloop.model.nodes[11]
    theta_sig = mainloop.model.nodes[12]
    coeff = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = mainloop.model.params
    """params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())"""

    #params = init_tparams(params)
    """OrderedDict([('W_x_1__rnn', W_x_1__rnn), ('W_z_1__rnn', W_z_1__rnn), ('W_y_1__rnn', W_y_1__rnn), 
    ('U_rnn__rnn', U_rnn__rnn), ('b_rnn', b_rnn), ('W_x_t__x_1', W_x_t__x_1), ('b_x_1', b_x_1), 
    ('W_y_t__y_1', W_y_t__y_1), ('b_y_1', b_y_1), ('W_z_t__z_1', W_z_t__z_1), ('b_z_1', b_z_1), 
    ('W_x_1__phi_1', W_x_1__phi_1), ('W_s_tm1__phi_1', W_s_tm1__phi_1), ('W_y_1__phi_1', W_y_1__phi_1), 
    ('b_phi_1', b_phi_1), ('W_phi_1__phi_mu', W_phi_1__phi_mu), ('b_phi_mu', b_phi_mu), 
    ('W_phi_1__phi_sig', W_phi_1__phi_sig), ('b_phi_sig', b_phi_sig), ('W_x_1__prior_1', W_x_1__prior_1), 
    ('W_s_tm1__prior_1', W_s_tm1__prior_1), ('b_prior_1', b_prior_1), ('W_prior_1__prior_mu', W_prior_1__prior_mu), 
    ('b_prior_mu', b_prior_mu), ('W_prior_1__prior_sig', W_prior_1__prior_sig), ('b_prior_sig', b_prior_sig), 
    ('W_z_1__theta_1', W_z_1__theta_1), ('W_s_tm1__theta_1', W_s_tm1__theta_1), ('b_theta_1', b_theta_1), 
    ('W_theta_1__theta_mu', W_theta_1__theta_mu), ('b_theta_mu', b_theta_mu), ('W_theta_1__theta_sig', W_theta_1__theta_sig), 
    ('b_theta_sig', b_theta_sig), ('W_theta_1__coeff', W_theta_1__coeff), ('b_coeff', b_coeff)])"""
    """w1 = params['W_x_1__rnn']
    w2 = params['W_phi_1__phi_sig']
    w3 = params['W_theta_1__coeff']

    print("Initialized W matricies:")
    print("W1:",w1.get_value())
    print("W2:",w2.get_value())
    print("W3:",w3.get_value())
    print("\n\n--------------------------------\n\n")"""

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_val(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t,
                            coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = y_1.fprop([pred_t], params)
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred_t  #, y_pred
        #corr_temp, binary_temp
    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\
        theano.scan(fn=inner_fn_val,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None,  None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)

    x_shape = x.shape

    ######################## TEST (GENERATION) TIME
    prediction_val.name = 'generated__' + str(flgAgg)
    mse_val = T.mean(
        (prediction_val - y)**2)  # As axis = None is calculated for all
    mae_val = T.mean(T.abs_(prediction_val - y))

    mse_val.name = 'mse_val'
    mae_val.name = 'mae_val'
    pred_in_val = y.reshape((y.shape[0] * y.shape[1], -1))

    theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in_val = theta_sig_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff_in_val = coeff_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    recon_val = GMM(
        pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out_val'

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term_val'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            #ddout=[nll_upper_bound, recon_term, kl_term, mse, mae, prediction],
            indexSep=5,
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]
    """params = OrderedDict()

    for node in mainloop.model.nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)"""
    """ w11 = params['W_x_1__rnn']
    w21 = params['W_phi_1__phi_sig']
    w31 = params['W_theta_1__coeff']

    print("Pickle W matricies:")
    print("W1:",w11.get_value())
    print("W2:",w21.get_value())
    print("W3:",w31.get_value())"""
    """mainloop.restore(
      name=pkl_name,
      data=Iterator(train_data, batch_size),
      model=model,
      optimizer=optimizer,
      #cost=nll_upper_bound,
      #outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
      n_steps = n_steps,
      extension=extension,
      #lr_iterations=lr_iterations,
      k_speedOfconvergence=kSchedSamp
    )"""

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[prediction_val, recon_term_val, mse_val,
                 mae_val]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])  #(20, 220, 1)
        testOutput.append(outputGeneration[1:])

        plt.figure(4)
        plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4])
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(
            save_path +
            "/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(4)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    print(testOutput.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    #mseUnNorm_test = testOutput[:, 3].mean()
    #maeUnNorm_test = testOutput[:, 4].mean()

    fLog = open(save_path + '/output.csv', 'w')
    #fLog.write(str(lr_iterations)+"\n")
    fLog.write(str(windows) + "\n")
    fLog.write("logTest,mseTest,maeTest, mseTestUnNorm, maeTestUnNorm\n")
    fLog.write("{},{},{}\n".format(recon_test, mse_test, mae_test))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i, item in enumerate(mainloop.trainlog.monitor['recon_term']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
Example #12
0
def main(args):

    trial = int(args['trial'])
    pkl_name = 'vrnn_gauss_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args['save_path']

    monitoring_freq = int(args['monitoring_freq'])
    force_saving_freq = int(args['force_saving_freq'])
    reset_freq = int(args['reset_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    m_batch_size = int(args['m_batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 600
    x2s_dim = 600
    z2s_dim = 500
    target_dim = x_dim

    file_name = 'blizzard_unseg_tbptt'
    normal_params = np.load(data_path + file_name + '_normal.npz')
    X_mean = normal_params['X_mean']
    X_std = normal_params['X_std']

    model = Model()
    train_data = Blizzard_tbptt(name='train',
                                path=data_path,
                                frame_size=x_dim,
                                file_name=file_name,
                                X_mean=X_mean,
                                X_std=X_std)

    valid_data = Blizzard_tbptt(name='valid',
                                path=data_path,
                                frame_size=x_dim,
                                file_name=file_name,
                                X_mean=X_mean,
                                X_std=X_std)

    x = train_data.theano_vars()
    m_x = valid_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim),
                                    dtype=theano.config.floatX)
        m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim),
                                      dtype=theano.config.floatX)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_2 = FullyConnectedLayer(name='x_2',
                              parent=['x_1'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_3 = FullyConnectedLayer(name='x_3',
                              parent=['x_2'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_4 = FullyConnectedLayer(name='x_4',
                              parent=['x_3'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_2 = FullyConnectedLayer(name='z_2',
                              parent=['z_1'],
                              parent_dim=[z2s_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_3 = FullyConnectedLayer(name='z_3',
                              parent=['z_2'],
                              parent_dim=[z2s_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_4 = FullyConnectedLayer(name='z_4',
                              parent=['z_3'],
                              parent_dim=[z2s_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_4', 'z_4'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_4', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_2 = FullyConnectedLayer(name='phi_2',
                                parent=['phi_1'],
                                parent_dim=[q_z_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_3 = FullyConnectedLayer(name='phi_3',
                                parent=['phi_2'],
                                parent_dim=[q_z_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_4 = FullyConnectedLayer(name='phi_4',
                                parent=['phi_3'],
                                parent_dim=[q_z_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_4'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_4'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_2 = FullyConnectedLayer(name='prior_2',
                                  parent=['prior_1'],
                                  parent_dim=[p_z_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_3 = FullyConnectedLayer(name='prior_3',
                                  parent=['prior_2'],
                                  parent_dim=[p_z_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_4 = FullyConnectedLayer(name='prior_4',
                                  parent=['prior_3'],
                                  parent_dim=[p_z_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_4'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_4'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_4', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_2 = FullyConnectedLayer(name='theta_2',
                                  parent=['theta_1'],
                                  parent_dim=[p_x_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_3 = FullyConnectedLayer(name='theta_3',
                                  parent=['theta_2'],
                                  parent_dim=[p_x_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_4 = FullyConnectedLayer(name='theta_4',
                                  parent=['theta_3'],
                                  parent_dim=[p_x_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_4'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_4'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    nodes = [
        rnn, x_1, x_2, x_3, x_4, z_1, z_2, z_3, z_4, phi_1, phi_2, phi_3,
        phi_4, phi_mu, phi_sig, prior_1, prior_2, prior_3, prior_4, prior_mu,
        prior_sig, theta_1, theta_2, theta_3, theta_4, theta_mu, theta_sig
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    step_count = sharedX(0, name='step_count')
    last_rnn = np.zeros((batch_size, rnn_dim * 2), dtype=theano.config.floatX)
    rnn_tm1 = sharedX(last_rnn, name='rnn_tm1')
    shared_updates = OrderedDict()
    shared_updates[step_count] = step_count + 1

    # Resets / Initializes the cell-state or the memory-state of each LSTM to
    # zero.
    s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0),
                   rnn.get_init_state(batch_size), rnn_tm1)

    # Forward Propagate the input to get more complex features for
    # every time step.
    x_1_temp = x_1.fprop([x], params)
    x_2_temp = x_2.fprop([x_1_temp], params)
    x_3_temp = x_3.fprop([x_2_temp], params)
    x_4_temp = x_4.fprop([x_3_temp], params)

    def inner_fn(x_t, s_tm1):

        # Generate the mean and standard deviation of the
        # latent variables Z_t | X_t for every time-step of the LSTM.
        # This is a function of the input and the hidden state of the previous
        # time step.
        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_2_t = phi_2.fprop([phi_1_t], params)
        phi_3_t = phi_3.fprop([phi_2_t], params)
        phi_4_t = phi_4.fprop([phi_3_t], params)
        phi_mu_t = phi_mu.fprop([phi_4_t], params)
        phi_sig_t = phi_sig.fprop([phi_4_t], params)

        # Prior on the latent variables at every time-step
        # Dependent only on the hidden-step.
        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_2_t = prior_2.fprop([prior_1_t], params)
        prior_3_t = prior_3.fprop([prior_2_t], params)
        prior_4_t = prior_4.fprop([prior_3_t], params)
        prior_mu_t = prior_mu.fprop([prior_4_t], params)
        prior_sig_t = prior_sig.fprop([prior_4_t], params)

        # Sample from the latent distibution with mean phi_mu_t
        # and std phi_sig_t
        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)

        # h_t = f(h_(t-1)), z_t, x_t)
        z_1_t = z_1.fprop([z_t], params)
        z_2_t = z_2.fprop([z_1_t], params)
        z_3_t = z_3.fprop([z_2_t], params)
        z_4_t = z_4.fprop([z_3_t], params)

        s_t = rnn.fprop([[x_t, z_4_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_4_t

    # Iterate over every time-step
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_4_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_4_temp],
                    outputs_info=[s_0, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    shared_updates[rnn_tm1] = s_temp[-1]
    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)

    # Generate the output distribution at every time-step.
    # This is as a function of the latent variables and the hidden-state at
    # every time-step.
    theta_1_temp = theta_1.fprop([z_4_temp, s_temp], params)
    theta_2_temp = theta_2.fprop([theta_1_temp], params)
    theta_3_temp = theta_3.fprop([theta_2_temp], params)
    theta_4_temp = theta_4.fprop([theta_3_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_4_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_4_temp], params)

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    recon = Gaussian(x, theta_mu_temp, theta_sig_temp)
    recon_term = recon.mean()
    kl_term = kl_temp.mean()
    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    # Forward-propagation of the validation data.
    m_x_1_temp = x_1.fprop([m_x], params)
    m_x_2_temp = x_2.fprop([m_x_1_temp], params)
    m_x_3_temp = x_3.fprop([m_x_2_temp], params)
    m_x_4_temp = x_4.fprop([m_x_3_temp], params)

    m_s_0 = rnn.get_init_state(m_batch_size)

    # Get the hidden-states, conditional mean, standard deviation, prior mean
    # and prior standard deviation of the latent variables at every time-step.
    ((m_s_temp, m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp, m_z_4_temp), m_updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[m_x_4_temp],
                    outputs_info=[m_s_0, None, None, None, None, None])

    for k, v in m_updates.iteritems():
        k.default_update = v

    # Get the inferred mean (X_t | Z_t) at every time-step of the validation
    # data.
    m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0)
    m_theta_1_temp = theta_1.fprop([m_z_4_temp, m_s_temp], params)
    m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params)
    m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params)
    m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params)
    m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params)
    m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params)

    # Compute the data log-likelihood + KL-divergence on the validation data.
    m_kl_temp = KLGaussianGaussian(m_phi_mu_temp, m_phi_sig_temp,
                                   m_prior_mu_temp, m_prior_sig_temp)

    m_recon = Gaussian(m_x, m_theta_mu_temp, m_theta_sig_temp)
    m_recon_term = m_recon.mean()
    m_kl_term = m_kl_temp.mean()
    m_nll_upper_bound = m_recon_term + m_kl_term
    m_nll_upper_bound.name = 'nll_upper_bound'
    m_recon_term.name = 'recon_term'
    m_kl_term.name = 'kl_term'

    max_x = m_x.max()
    mean_x = m_x.mean()
    min_x = m_x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = m_theta_mu_temp.max()
    mean_theta_mu = m_theta_mu_temp.mean()
    min_theta_mu = m_theta_mu_temp.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = m_theta_sig_temp.max()
    mean_theta_sig = m_theta_sig_temp.mean()
    min_theta_sig = m_theta_sig_temp.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    max_phi_sig = m_phi_sig_temp.max()
    mean_phi_sig = m_phi_sig_temp.mean()
    min_phi_sig = m_phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = m_prior_sig_temp.max()
    mean_prior_sig = m_prior_sig_temp.mean()
    min_prior_sig = m_prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    model.inputs = [x]
    model.params = params
    model.nodes = nodes
    model.set_updates(shared_updates)

    optimizer = Adam(lr=lr)

    monitor_fn = theano.function(
        inputs=[m_x],
        outputs=[
            m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig,
            mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig,
            min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x,
            mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu
        ],
        on_unused_input='ignore')

    extension = [
        GradientClipping(batch_size=batch_size, check_nan=1),
        EpochCount(epoch),
        Monitoring(freq=monitoring_freq,
                   monitor_fn=monitor_fn,
                   ddout=[
                       m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig,
                       mean_phi_sig, min_phi_sig, max_prior_sig,
                       mean_prior_sig, min_prior_sig, max_theta_sig,
                       mean_theta_sig, min_theta_sig, max_x, mean_x, min_x,
                       max_theta_mu, mean_theta_mu, min_theta_mu
                   ],
                   data=[
                       Iterator(train_data, m_batch_size, start=0, end=112640),
                       Iterator(valid_data,
                                m_batch_size,
                                start=2040064,
                                end=2152704)
                   ]),
        Picklize(freq=monitoring_freq,
                 force_save_freq=force_saving_freq,
                 path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      force_save_freq=force_saving_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data,
                                      batch_size,
                                      start=0,
                                      end=2040064),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
Example #13
0
cost.name = 'cost'
recon_term.name = 'recon_term'
kl_term.name = 'kl_term'
recon_err = ((x - T.nnet.sigmoid(canvas_out[-1]))**2).mean() / x.std()
recon_err.name = 'recon_err'

model.inputs = [x]
model._params = params
model.nodes = nodes

optimizer = Adam(lr=0.001)

extension = [
    GradientClipping(batch_size=batch_size),
    EpochCount(10000),
    Monitoring(freq=100,
               ddout=[cost, recon_term, kl_term, recon_err],
               data=[Iterator(data, batch_size)]),
    Picklize(freq=2000, path=savepath),
    EarlyStopping(freq=500, path=savepath)
]

mainloop = Training(name='draw',
                    data=Iterator(data, batch_size),
                    model=model,
                    optimizer=optimizer,
                    cost=cost,
                    outputs=[cost],
                    extension=extension)
mainloop.run()
Example #14
0
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'

    trial = int(args['trial'])
    pkl_name = 'rnn_gmm_%d' % trial
    channel_name = 'valid_nll'

    data_path = args['data_path']
    save_path = args['save_path']
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    x2s_dim = 50  #300
    s2x_dim = 50  #300
    target_dim = k  #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval = fetch_ukdale(data_path,
                                              windows,
                                              appliances,
                                              numApps=flgAgg,
                                              period=period,
                                              n_steps=n_steps,
                                              stride_train=stride_train,
                                              stride_test=stride_test)
    print("Inside: ", Xtrain.shape, ytrain.shape, Xval.shape, yval.shape)
    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()  #mask, y_mask
    '''
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp
    '''
    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1'],
               parent_dim=[x2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[s2x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[s2x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[s2x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)
    '''
    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[s2x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[s2x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)
    '''
    nodes = [rnn, x_1, theta_1, theta_mu, theta_sig, coeff]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_shape = x.shape
    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        s_t = rnn.fprop([[x_t], [s_tm1]], params)
        theta_1_t = theta_1.fprop([s_t], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)
        coeff_t = coeff.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t)
        return s_t, theta_mu_t, theta_sig_t, coeff_t, pred

    ((s_temp, theta_mu_temp, theta_sig_temp, coeff_temp, pred_temp),
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp],
                            outputs_info=[s_0, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    '''
    theta_1_temp = theta_1.fprop([s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    x_shape = x.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    if (flgAgg == -1):
        pred_temp.name = 'x_reconstructed'
        mse = T.mean((pred_temp - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(pred_temp - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        #pred_temp = pred_temp.reshape((pred_temp.shape[0], pred_temp.shape[1]))
        pred_temp.name = 'pred_' + str(flgAgg)
        #y[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1))
        mse = T.mean((pred_temp - y)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs_(pred_temp - y))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((y.shape[0] * y.shape[1], -1))

    recon = GMM(pred_in, theta_mu_in, theta_sig_in, coeff_in)  #, binary_in
    recon.name = 'recon'
    recon = recon.reshape((y.shape[0], y.shape[1]))
    #recon = recon * y_mask #(200, 1000), (1000, 200)
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'nll'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'
    '''
    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'


    coeff_max = coeff_in.max()
    coeff_min = coeff_in.min()
    coeff_mean_max = coeff_in.mean(axis=0).max()
    coeff_mean_min = coeff_in.mean(axis=0).min()
    coeff_max.name = 'coeff_max'
    coeff_min.name = 'coeff_min'
    coeff_mean_max.name = 'coeff_mean_max'
    coeff_mean_min.name = 'coeff_mean_min'
    '''
    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                recon_term,
                mse,
                mae,
                #max_theta_sig, mean_theta_sig, min_theta_sig,
                max_x,
                mean_x,
                min_x,
                max_theta_mu,
                mean_theta_mu,
                min_theta_mu,
                #coeff_max, coeff_min, coeff_mean_max, coeff_mean_min,#16
                theta_mu_temp,
                theta_sig_temp,
                pred_temp,
                coeff_temp,
                s_temp
            ],
            indexSep=9,
            indexDDoutPlot=[(0, theta_mu_temp), (2, pred_temp)],
            instancesPlot=[10, 100],  #, 80,150
            savedFolder=save_path,
            data=[Iterator(valid_data, batch_size)]),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr, 20: (lr / 10), 150: (lr / 100), 200: (lr / 1000)}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=recon_term,
                        outputs=[recon_term],
                        extension=extension,
                        lr_iterations=lr_iterations)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    fLog.write("log,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll']):
        a = mainloop.trainlog.monitor['nll'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{},{},{}\n".format(a, d, e))
Example #15
0
def main(args):

    trial = int(args['trial'])
    pkl_name = 'vrnn_gauss_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args['save_path']

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 250
    x2s_dim = 250
    z2s_dim = 150
    target_dim = (x_dim - 1)

    model = Model()
    train_data = IAMOnDB(name='train',
                         prep='normalize',
                         cond=False,
                         path=data_path)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = IAMOnDB(name='valid',
                         prep='normalize',
                         cond=False,
                         path=data_path,
                         X_mean=X_mean,
                         X_std=X_std)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask = train_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=1,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig,
        theta_1, theta_mu, theta_sig, corr, binary
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_1_t

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_1_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    corr_in = corr_temp.reshape((x_shape[0] * x_shape[1], -1))
    binary_in = binary_temp.reshape((x_shape[0] * x_shape[1], -1))

    recon = BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon = recon * mask
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_temp = kl_temp * mask
    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    model.inputs = [x, mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(freq=monitoring_freq,
                   ddout=[
                       nll_upper_bound, recon_term, kl_term, max_phi_sig,
                       mean_phi_sig, min_phi_sig, max_prior_sig,
                       mean_prior_sig, min_prior_sig, max_theta_sig,
                       mean_theta_sig, min_theta_sig, max_x, mean_x, min_x,
                       max_theta_mu, mean_theta_mu, min_theta_mu
                   ],
                   data=[Iterator(valid_data, batch_size)]),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
def main(args):
    
    theano.optimizer='fast_compile'
    theano.config.exception_verbosity='high'
    

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args['save_path']#+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k']) #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    target_dim = k# As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    
    Xtrain, ytrain, Xval, yval, Xtest,ytest, reader = fetch_dataport(data_path, windows, appliances,numApps=-1, period=period,
                                              n_steps= n_steps, stride_train = stride_train, stride_test = stride_test,
                                              trainPer=0.5, valPer=0.25, testPer=0.25, typeLoad = loadType,
                                              flgAggSumScaled = 1, flgFilterZeros = 1)

    print("Mean ",reader.meanTrain)
    print("Std", reader.stdTrain)
    instancesPlot = {0:[4]}

    train_data = Dataport(name='train',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         inputX=Xtrain,
                         labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xval,
                         labels = yval)

    test_data = Dataport(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xtest,
                         labels = ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y , y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    #from experiment 18-05-31_18-48
    pickelModel = '/home/gissella/Documents/Research/Disaggregation/PecanStreet-dataport/VRNN_theano_version/output/allAtOnce/18-06-14_21-41_7951/dp_disall-sch_1_best.pkl'
    fmodel = open(pickelModel, 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu1 = mainloop.model.nodes[11]
    theta_sig1 = mainloop.model.nodes[12]
    coeff1 = mainloop.model.nodes[13]

    nodes = [rnn,
             x_1, y_1,z_1, #dissag_pred,
             phi_1, phi_mu, phi_sig,
             prior_1, prior_mu, prior_sig,
             theta_1, theta_mu1, theta_sig1, coeff1]

    params = mainloop.model.params

    dynamicOutput = [None, None, None, None, None, None, None, None]
    #dynamicOutput_val = [None, None, None, None, None, None,None,  None, None]
    if (y_dim>1):
      theta_mu2 = mainloop.model.nodes[14]
      theta_sig2 = mainloop.model.nodes[15]
      coeff2 = mainloop.model.nodes[16]
      nodes = nodes + [theta_mu2, theta_sig2, coeff2]
      dynamicOutput = dynamicOutput+[None, None, None, None] #mu, sig, coef and pred
    if (y_dim>2):
      theta_mu3 = mainloop.model.nodes[17]
      theta_sig3 = mainloop.model.nodes[18]
      coeff3 = mainloop.model.nodes[19]
      nodes = nodes + [theta_mu3, theta_sig3, coeff3]
      dynamicOutput = dynamicOutput +[None, None, None, None]
    if (y_dim>3):
      theta_mu4 = mainloop.model.nodes[20]
      theta_sig4 = mainloop.model.nodes[21]
      coeff4 = mainloop.model.nodes[22]
      nodes = nodes + [theta_mu4, theta_sig4, coeff4]
      dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim>4):
      theta_mu5 = mainloop.model.nodes[23]
      theta_sig5 = mainloop.model.nodes[24]
      coeff5 = mainloop.model.nodes[25]
      nodes = nodes + [theta_mu5, theta_sig5, coeff5]
      dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim>5):
      theta_mu6 = mainloop.model.nodes[26]
      theta_sig6 = mainloop.model.nodes[27]
      coeff6 = mainloop.model.nodes[28]
      nodes = nodes + [theta_mu6, theta_sig6, coeff6]
      dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim>6):
      theta_mu7 = mainloop.model.nodes[29]
      theta_sig7 = mainloop.model.nodes[30]
      coeff7 = mainloop.model.nodes[31]
      nodes = nodes + [theta_mu7, theta_sig7, coeff7]
      dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim>7):
      theta_mu8 = mainloop.model.nodes[32]
      theta_sig8 = mainloop.model.nodes[33]
      coeff8 = mainloop.model.nodes[34]
      nodes = nodes + [theta_mu8, theta_sig8, coeff8]
      dynamicOutput = dynamicOutput + [None, None, None, None]

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))


    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t,s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)#in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim>1):
          theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
          theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
          coeff2_t = coeff2.fprop([theta_1_t], params)
          y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
          y_pred1 = T.concatenate([y_pred1, y_pred2],axis=1)
          tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2)

        if (y_dim>2):
          theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
          theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
          coeff3_t = coeff3.fprop([theta_1_t], params)
          y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
          y_pred1 = T.concatenate([y_pred1, y_pred3],axis=1)
          tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3)

        if (y_dim>3):
          theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
          theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
          coeff4_t = coeff4.fprop([theta_1_t], params)
          y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
          y_pred1 = T.concatenate([y_pred1, y_pred4],axis=1)
          tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4)

        if (y_dim>4):
          theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
          theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
          coeff5_t = coeff5.fprop([theta_1_t], params)
          y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
          y_pred1 = T.concatenate([y_pred1, y_pred5],axis=1)
          tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5)

        if (y_dim>5):
          theta_mu6_t = theta_mu6.fprop([theta_1_t], params)
          theta_sig6_t = theta_sig6.fprop([theta_1_t], params)
          coeff6_t = coeff6.fprop([theta_1_t], params)
          y_pred6 = GMM_sampleY(theta_mu6_t, theta_sig6_t, coeff6_t)
          y_pred1 = T.concatenate([y_pred1, y_pred6],axis=1)
          tupleMulti = tupleMulti + (theta_mu6_t, theta_sig6_t, coeff6_t, y_pred6)

        if (y_dim>6):
          theta_mu7_t = theta_mu7.fprop([theta_1_t], params)
          theta_sig7_t = theta_sig7.fprop([theta_1_t], params)
          coeff7_t = coeff7.fprop([theta_1_t], params)
          y_pred7 = GMM_sampleY(theta_mu7_t, theta_sig7_t, coeff7_t)
          y_pred1 = T.concatenate([y_pred1, y_pred7],axis=1)
          tupleMulti = tupleMulti + (theta_mu7_t, theta_sig7_t, coeff7_t, y_pred7)

        if (y_dim>7):
          theta_mu8_t = theta_mu8.fprop([theta_1_t], params)
          theta_sig8_t = theta_sig8.fprop([theta_1_t], params)
          coeff8_t = coeff8.fprop([theta_1_t], params)
          y_pred8 = GMM_sampleY(theta_mu8_t, theta_sig8_t, coeff8_t)
          y_pred1 = T.concatenate([y_pred1, y_pred8],axis=1)
          tupleMulti = tupleMulti + (theta_mu8_t, theta_sig8_t, coeff8_t, y_pred8)

        pred_1_t=y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t,)+tupleMulti
        #corr_temp, binary_temp
    (otherResults_val, updates_val) = theano.scan(fn=inner_fn_test, sequences=[x_1_temp],
                            outputs_info=output_fn_val )

    for k, v in updates_val.iteritems():
        k.default_update = v


    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0]*y_shape[1], -1))



    ######################## TEST (GENERATION) TIME
    s_temp_val, prior_mu_temp_val, prior_sig_temp_val, \
      theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = otherResults_val[:7]
    restResults_val = otherResults_val[7:]

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'
    y_pred1_temp_val = T.clip(y_pred1_temp_val,0.0,np.inf)
    prediction_val = y_pred1_temp_val

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:,:,0].reshape((y.shape[0],y.shape[1],1)))**2)
    mae1_val = T.mean( T.abs_(y_pred1_temp_val - y[:,:,0].reshape((y.shape[0],y.shape[1],1))) )

    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:,:,0])
    relErr1_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
    propAssigned1_val = 1 - T.sum(T.abs_(y_pred1_temp_val - y[:,:,0].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTrain[0]) + reader.meanTrain[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTrain[0]) + reader.meanTrain[0]
    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0]*x_shape[1], -1))

    totaMSE_val = mse1_val
    totaMAE_val =mae1_val
    indexSepDynamic_val = 5

    #Initializing values of mse and mae
    mse2_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae2_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse3_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae3_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse4_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae4_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse5_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae5_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse6_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae6_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse7_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae7_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mse8_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))
    mae8_val = T.mean(T.zeros((y.shape[0],y.shape[1],1)))


    relErr2_val = T.zeros((1,))
    relErr3_val = T.zeros((1,))
    relErr4_val = T.zeros((1,))
    relErr5_val = T.zeros((1,))
    relErr6_val = T.zeros((1,))
    relErr7_val = T.zeros((1,))
    relErr8_val = T.zeros((1,))

    propAssigned2_val = T.zeros((1,))
    propAssigned3_val = T.zeros((1,))
    propAssigned4_val = T.zeros((1,))
    propAssigned5_val = T.zeros((1,))
    propAssigned6_val = T.zeros((1,))
    propAssigned7_val = T.zeros((1,))
    propAssigned8_val = T.zeros((1,))

    if (y_dim>1):
      theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu2_temp_val.name = 'theta_mu2_val'
      theta_sig2_temp_val.name = 'theta_sig2_val'
      coeff2_temp_val.name = 'coeff2_val'
      y_pred2_temp_val.name = 'disaggregation2_val'
      y_pred2_temp_val = T.clip(y_pred2_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred2_temp_val], axis=2) #before it gets unnormalized

      mse2_val = T.mean((y_pred2_temp_val - y[:,:,1].reshape((y.shape[0],y.shape[1],1)))**2)
      mae2_val = T.mean( T.abs_(y_pred2_temp_val - y[:,:,1].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred2_temp_val)
      totReal = T.sum(y[:,:,1])
      relErr2_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned2_val = 1 - T.sum(T.abs_(y_pred2_temp_val - y[:,:,1].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,1] * reader.stdTrain[1]) + reader.meanTrain[1]
      #y_pred2_temp_val = (y_pred2_temp_val * reader.stdTrain[1]) + reader.meanTrain[1]
      #mse2_valUnNorm = T.mean((y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae2_valUnNorm = T.mean( T.abs_(y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse2_val.name = 'mse2_val'
      mae2_val.name = 'mae2_val'

      theta_mu2_in_val = theta_mu2_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig2_in_val = theta_sig2_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff2_in_val = coeff2_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val

      totaMSE_val+=mse2_val
      totaMAE_val+=mae2_val
      indexSepDynamic_val +=2

    if (y_dim>2):
      theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu3_temp_val.name = 'theta_mu3_val'
      theta_sig3_temp_val.name = 'theta_sig3_val'
      coeff3_temp_val.name = 'coeff3_val'
      y_pred3_temp_val.name = 'disaggregation3_val'
      y_pred3_temp_val = T.clip(y_pred3_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred3_temp_val], axis=2) #before it gets unnormalized

      mse3_val = T.mean((y_pred3_temp_val - y[:,:,2].reshape((y.shape[0],y.shape[1],1)))**2)
      mae3_val = T.mean( T.abs_(y_pred3_temp_val - y[:,:,2].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred3_temp_val)
      totReal = T.sum(y[:,:,2])
      relErr3_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned3_val = 1 - T.sum(T.abs_(y_pred3_temp_val - y[:,:,2].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,2] * reader.stdTrain[2]) + reader.meanTrain[2]
      #y_pred3_temp_val = (y_pred3_temp_val * reader.stdTrain[2]) + reader.meanTrain[2]
      #mse3_valUnNorm = T.mean((y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae3_valUnNorm = T.mean( T.abs_(y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse3_val.name = 'mse3_val'
      mae3_val.name = 'mae3_val'

      theta_mu3_in_val = theta_mu3_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig3_in_val = theta_sig3_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff3_in_val = coeff3_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val, coeff3_in_val)
      totaMSE_val+=mse3_val
      totaMAE_val+=mae3_val
      indexSepDynamic_val +=2

      

    if (y_dim>3):
      theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu4_temp_val.name = 'theta_mu4_val'
      theta_sig4_temp_val.name = 'theta_sig4_val'
      coeff4_temp_val.name = 'coeff4_val'
      y_pred4_temp_val.name = 'disaggregation4_val'
      y_pred4_temp_val = T.clip(y_pred4_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred4_temp_val], axis=2) #before it gets unnormalized

      mse4_val = T.mean((y_pred4_temp_val - y[:,:,3].reshape((y.shape[0],y.shape[1],1)))**2)
      mae4_val = T.mean( T.abs_(y_pred4_temp_val - y[:,:,3].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred4_temp_val)
      totReal = T.sum(y[:,:,3])
      relErr4_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned4_val = 1 - T.sum(T.abs_(y_pred4_temp_val - y[:,:,3].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,3] * reader.stdTrain[3]) + reader.meanTrain[3]
      #y_pred4_temp_val = (y_pred4_temp_val * reader.stdTrain[3]) + reader.meanTrain[3]
      #mse4_valUnNorm = T.mean((y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae4_valUnNorm = T.mean( T.abs_(y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse4_val.name = 'mse4_val'
      mae4_val.name = 'mae4_val'

      theta_mu4_in_val = theta_mu4_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig4_in_val = theta_sig4_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff4_in_val = coeff4_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val, coeff4_in_val)
      totaMSE_val+=mse4_val
      totaMAE_val+=mae4_val
      indexSepDynamic_val +=2
      
    if (y_dim>4):
      theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu5_temp_val.name = 'theta_mu5_val'
      theta_sig5_temp_val.name = 'theta_sig5_val'
      coeff5_temp_val.name = 'coeff5_val'
      y_pred5_temp_val.name = 'disaggregation5_val'
      y_pred5_temp_val = T.clip(y_pred5_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred5_temp_val], axis=2) # before it gets unnormalized

      mse5_val = T.mean((y_pred5_temp_val - y[:,:,4].reshape((y.shape[0],y.shape[1],1)))**2)
      mae5_val = T.mean( T.abs_(y_pred5_temp_val - y[:,:,4].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred5_temp_val)
      totReal = T.sum(y[:,:,4])
      relErr5_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned5_val = 1 - T.sum(T.abs_(y_pred5_temp_val - y[:,:,4].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,4] * reader.stdTrain[4]) + reader.meanTrain[4]
      #y_pred5_temp_val = (y_pred5_temp_val * reader.stdTrain[4]) + reader.meanTrain[4]
      #mse5_valUnNorm = T.mean((y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae5_valUnNorm = T.mean( T.abs_(y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse5_val.name = 'mse5_val'
      mae5_val.name = 'mae5_val'

      theta_mu5_in_val = theta_mu5_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig5_in_val = theta_sig5_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff5_in_val = coeff5_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val)
      totaMSE_val+=mse5_val
      totaMAE_val+=mae5_val
      indexSepDynamic_val +=2
      

    if (y_dim>5):
      theta_mu6_temp_val, theta_sig6_temp_val, coeff6_temp_val, y_pred6_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu6_temp_val.name = 'theta_mu6_val'
      theta_sig6_temp_val.name = 'theta_sig6_val'
      coeff6_temp_val.name = 'coeff6_val'
      y_pred6_temp_val.name = 'disaggregation6_val'
      y_pred6_temp_val = T.clip(y_pred6_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred6_temp_val], axis=2) #before it gets unnormalized

      mse6_val = T.mean((y_pred6_temp_val - y[:,:,5].reshape((y.shape[0],y.shape[1],1)))**2)
      mae6_val = T.mean( T.abs_(y_pred6_temp_val - y[:,:,5].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred6_temp_val)
      totReal = T.sum(y[:,:,5])
      relErr6_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned6_val = 1 - T.sum(T.abs_(y_pred6_temp_val - y[:,:,5].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,5] * reader.stdTrain[5]) + reader.meanTrain[5]
      #y_pred6_temp_val = (y_pred6_temp_val * reader.stdTrain[5]) + reader.meanTrain[5]
      #mse6_valUnNorm = T.mean((y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae6_valUnNorm = T.mean( T.abs_(y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse6_val.name = 'mse6_val'
      mae6_val.name = 'mae6_val'

      theta_mu6_in_val = theta_mu6_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig6_in_val = theta_sig6_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff6_in_val = coeff6_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu6_in_val, theta_sig6_in_val, coeff6_in_val)
      totaMSE_val+=mse6_val
      totaMAE_val+=mae6_val
      indexSepDynamic_val +=2

    if (y_dim>6):
      theta_mu7_temp_val, theta_sig7_temp_val, coeff7_temp_val, y_pred7_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu7_temp_val.name = 'theta_mu7_val'
      theta_sig7_temp_val.name = 'theta_sig7_val'
      coeff7_temp_val.name = 'coeff7_val'
      y_pred7_temp_val.name = 'disaggregation7_val'
      y_pred7_temp_val = T.clip(y_pred7_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred7_temp_val], axis=2) # before it gets unnormalized

      mse7_val = T.mean((y_pred7_temp_val - y[:,:,6].reshape((y.shape[0],y.shape[1],1)))**2)
      mae7_val = T.mean( T.abs_(y_pred7_temp_val - y[:,:,6].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred7_temp_val)
      totReal = T.sum(y[:,:,6])
      relErr7_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned7_val = 1 - T.sum(T.abs_(y_pred7_temp_val - y[:,:,6].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,6] * reader.stdTrain[6]) + reader.meanTrain[6]
      #y_pred7_temp_val = (y_pred7_temp_val * reader.stdTrain[6]) + reader.meanTrain[6]
      #mse7_valUnNorm = T.mean((y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae7_valUnNorm = T.mean( T.abs_(y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

      mse7_val.name = 'mse7_val'
      mae7_val.name = 'mae7_val'

      theta_mu7_in_val = theta_mu7_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig7_in_val = theta_sig7_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff7_in_val = coeff7_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu7_in_val, theta_sig7_in_val, coeff7_in_val)
      totaMSE_val+=mse7_val
      totaMAE_val+=mae7_val
      indexSepDynamic_val +=2
      

    if (y_dim>7):
      theta_mu8_temp_val, theta_sig8_temp_val, coeff8_temp_val, y_pred8_temp_val = restResults_val[:4]
      restResults_val = restResults_val[4:]
      theta_mu8_temp_val.name = 'theta_mu8_val'
      theta_sig8_temp_val.name = 'theta_sig8_val'
      coeff8_temp_val.name = 'coeff8_val'
      y_pred8_temp_val.name = 'disaggregation8_val'
      y_pred8_temp_val = T.clip(y_pred8_temp_val,0.0,np.inf)

      prediction_val = T.concatenate([prediction_val, y_pred8_temp_val], axis=2) # before it gets unnormalized

      mse8_val = T.mean((y_pred8_temp_val - y[:,:,7].reshape((y.shape[0],y.shape[1],1)))**2)
      mae8_val = T.mean( T.abs_(y_pred8_temp_val - y[:,:,7].reshape((y.shape[0],y.shape[1],1))) )

      totPred = T.sum(y_pred8_temp_val)
      totReal = T.sum(y[:,:,7])
      relErr8_val =( totPred -  totReal)/ T.maximum(totPred,totReal)
      propAssigned8_val = 1 - T.sum(T.abs_(y_pred8_temp_val - y[:,:,7].reshape((y.shape[0],y.shape[1],1))))/(2*T.sum(x))

      #y_unNormalize = (y[:,:,7] * reader.stdTrain[7]) + reader.meanTrain[7]
      #y_pred8_temp_val = (y_pred8_temp_val * reader.stdTrain[7]) + reader.meanTrain[7]
      #mse8_valUnNorm = T.mean((y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      #mae8_valUnNorm = T.mean( T.abs_(y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )
      
      mse8_val.name = 'mse8_val'
      mae8_val.name = 'mae8_val'

      theta_mu8_in_val = theta_mu8_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig8_in_val = theta_sig8_temp_val.reshape((x_shape[0]*x_shape[1], -1))
      coeff8_in_val = coeff8_temp_val.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM_val = argsGMM_val + (theta_mu8_in_val, theta_sig8_in_val, coeff8_in_val)
      totaMSE_val+=mse8_val
      totaMAE_val+=mae8_val
      indexSepDynamic_val +=2
      

    recon_val = GMMdisagMulti(y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val, *argsGMM_val)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = totaMSE_val/y_dim
    totaMAE_val = totaMAE_val/y_dim

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'


    ######################

    optimizer = Adam(
        lr=lr
    )
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(freq=monitoring_freq,
                   #ddout=[nll_upper_bound, recon_term, kl_term,totaMSE, totaMAE, mse1, mae1]+ddoutMSEA+ddoutYpreds ,
                   #indexSep=indexSepDynamic,
                   indexDDoutPlot = [13], # adding indexes of ddout for the plotting
                   #, (6,y_pred_temp)
                   instancesPlot = instancesPlot,#0-150
                   data=[Iterator(valid_data, batch_size)],
                   savedFolder = save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0:lr}


    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              #givens={x:Xtest},
                              #on_unused_input='ignore',
                              #z=( ,200,1)
                              allow_input_downcast=True,
                              outputs=[prediction_val, recon_term_val, totaMSE_val, totaMAE_val, 
                                        mse1_val,mse2_val,mse3_val,mse4_val,mse5_val,mse6_val,mse7_val,mse8_val,
                                        mae1_val,mae2_val,mae3_val,mae4_val,mae5_val,mae6_val,mae7_val,mae8_val, #unnormalized mae and mse 16 items#
                                        relErr1_val,relErr2_val,relErr3_val,relErr4_val,relErr5_val,relErr6_val,relErr7_val,relErr8_val,
                                        propAssigned1_val, propAssigned2_val,propAssigned3_val,propAssigned4_val,propAssigned5_val,propAssigned6_val,propAssigned7_val,propAssigned8_val],
                              updates=updates_val
                              )
    testOutput = []
    testMetrics2 = []
    perEnergyAssig = []

    bestInstsancesPred = []
    bestInstsancesDisa = []
    bestInstsancesAggr = []

    numBatchTest = 0

    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])
      testOutput.append(outputGeneration[1:20]) #before 36 including unnormalized metrics
      testMetrics2.append(outputGeneration[20:])

      ########## best mae
      predTest = np.transpose(outputGeneration[0],[1,0,2]).clip(min=0)
      realTest = np.transpose(batch[2],[1,0,2])

      batchMSE = np.mean(np.absolute(predTest-realTest),axis=(1,2))
      idxMin = np.argmin(batchMSE)

      print(np.asarray(idxMin).reshape(1,-1)[0,:])
      print(batchMSE[idxMin])
      for idx in np.asarray(idxMin).reshape(1,-1)[0,:]:

        plt.figure(1)
        plt.plot(predTest[idx])
        plt.legend(appliances)
        plt.savefig(save_path+"/vrnn_disall_test-b{}_Pred_0-{}".format(numBatchTest,idx),format='eps')
        plt.clf()

        plt.figure(2)
        plt.plot(realTest[idx])
        plt.legend(appliances)
        plt.savefig(save_path+"/vrnn_disall_test-b{}_RealDisag_0-{}".format(numBatchTest,idx),format='eps')
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0],[1,0,2])[idx])
        plt.savefig(save_path+"/vrnn_disall_test-b{}_Realagg_0-{}".format(numBatchTest,idx),format='eps')
        plt.clf()

        bestInstsancesPred.append(predTest[idx])
        bestInstsancesDisa.append(realTest[idx])
        bestInstsancesAggr.append(np.transpose(batch[0],[1,0,2])[idx])

      numBatchTest+=1

      sumNumPred = np.sum(predTest, axis=(0,1))
      sumNumReal = np.sum(batch[2], axis=(0,1))
      perEnergy  = np.sum(batch[0], axis=(0,1))
      perEnergyAssig.append((sumNumReal/perEnergy,sumNumPred/perEnergy))

    scipy.io.savemat(save_path+'/testInstances.mat', mdict={'pred': bestInstsancesPred, 'disag':bestInstsancesDisa, 'agg':bestInstsancesAggr})

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:,19:] = 1000 * testOutput[:,19:] # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test =  testOutput[:, 1].mean()
    mae_test =  testOutput[:, 2].mean()
    mse1_test =  testOutput[:, 3].mean()
    mae1_test =  testOutput[:, 11].mean()
    mse2_test =  testOutput[:, 4].mean()
    mae2_test =  testOutput[:, 12].mean()
    mse3_test =  testOutput[:, 5].mean()
    mae3_test =  testOutput[:, 13].mean()
    mse4_test =  testOutput[:, 6].mean()
    mae4_test =  testOutput[:, 14].mean()
    mse5_test =  testOutput[:, 7].mean()
    mae5_test =  testOutput[:, 15].mean()
    mse6_test =  testOutput[:, 8].mean()
    mae6_test =  testOutput[:, 16].mean()
    mse7_test =  testOutput[:, 9].mean()
    mae7_test =  testOutput[:, 17].mean()
    mse8_test =  testOutput[:, 10].mean()
    mae8_test =  testOutput[:, 18].mean()

    print(testOutput[:,3:11].mean(),testOutput[:,11:19].mean())

    relErr1_test = testMetrics2[:,0].mean()
    relErr2_test = testMetrics2[:,1].mean()
    relErr3_test = testMetrics2[:,2].mean()
    relErr4_test = testMetrics2[:,3].mean()
    relErr5_test = testMetrics2[:,4].mean()
    relErr6_test = testMetrics2[:,5].mean()
    relErr7_test = testMetrics2[:,6].mean()
    relErr8_test = testMetrics2[:,7].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    propAssigned5_test = testMetrics2[:, 12].mean()
    propAssigned6_test = testMetrics2[:, 13].mean()
    propAssigned7_test = testMetrics2[:, 14].mean()
    propAssigned8_test = testMetrics2[:, 15].mean()

    fLog = open(save_path+'/output.csv', 'w')
    fLog.write(str(lr_iterations)+"\n")
    fLog.write(str(appliances)+"\n")
    fLog.write(str(windows)+"\n\n")
    fLog.write("logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test, mse6_test,mse7_test,mse8_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test, mae6_test,mae7_test,mae8_test,mseTest,maeTest\n")
    #fLog.write("Unnorm,{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},0.0,0.0\n\n".format(mse1_valUnNorm,mse2_valUnNorm,mse3_valUnNorm,mse4_valUnNorm,mse5_valUnNorm, mse6_valUnNorm,mse7_valUnNorm,mse8_valUnNorm,mae1_valUnNorm,mae2_valUnNorm,mae3_valUnNorm,mae4_valUnNorm,mae5_valUnNorm, mae6_valUnNorm,mae7_valUnNorm,mae8_valUnNorm))
    fLog.write("{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n\n".format(recon_test,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test, mse6_test,mse7_test,mse8_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test, mae6_test,mae7_test,mae8_test,mse_test,mae_test))
    fLog.write("relErr1,relErr2,relErr3,relErr4,relErr5,relErr6,relErr7,relErr8,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n")
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(relErr1_test,relErr2_test,relErr3_test,relErr4_test, relErr5_test,relErr6_test,relErr7_test,relErr8_test,propAssigned1_test,propAssigned2_test,propAssigned3_test, propAssigned4_test,propAssigned5_test,propAssigned6_test,propAssigned7_test,propAssigned8_test))

    fLog.write("batch,perReal1,perReal2,perReal3,perReal4,perReal5,perReal6,perReal7,perReal8,perPredict1,perPredict2,perPredict3,perPredict4,perPredict5,perPredict6,perPredict7,perPredict8\n")
    for batch, item in enumerate(perEnergyAssig):
      fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(batch,item[0][0],item[0][1],item[0][2],item[0][3],item[0][4],item[0][5],item[0][6],item[0][7],item[1][0],item[1][1],item[1][2],item[1][3],item[1][4],item[1][5],item[1][6],item[1][7]))
    fLog.write(pickelModel)
    f = open(save_path+'/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
Example #17
0
def main(args):

    trial = int(args['trial'])
    pkl_name = 'rnn_gauss_%d' % trial
    channel_name = 'valid_nll'

    data_path = args['data_path']
    save_path = args['save_path']

    monitoring_freq = int(args['monitoring_freq'])
    force_saving_freq = int(args['force_saving_freq'])
    reset_freq = int(args['reset_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    m_batch_size = int(args['m_batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    x2s_dim = 800
    s2x_dim = 800
    target_dim = x_dim

    file_name = 'blizzard_unseg_tbptt'
    normal_params = np.load(data_path + file_name + '_normal.npz')
    X_mean = normal_params['X_mean']
    X_std = normal_params['X_std']

    model = Model()
    train_data = Blizzard_tbptt(name='train',
                                path=data_path,
                                frame_size=x_dim,
                                file_name=file_name,
                                X_mean=X_mean,
                                X_std=X_std)

    valid_data = Blizzard_tbptt(name='valid',
                                path=data_path,
                                frame_size=x_dim,
                                file_name=file_name,
                                X_mean=X_mean,
                                X_std=X_std)

    x = train_data.theano_vars()
    m_x = valid_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim),
                                    dtype=theano.config.floatX)
        m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim),
                                      dtype=theano.config.floatX)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_2 = FullyConnectedLayer(name='x_2',
                              parent=['x_1'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_3 = FullyConnectedLayer(name='x_3',
                              parent=['x_2'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    x_4 = FullyConnectedLayer(name='x_4',
                              parent=['x_3'],
                              parent_dim=[x2s_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_4'],
               parent_dim=[x2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_2 = FullyConnectedLayer(name='theta_2',
                                  parent=['theta_1'],
                                  parent_dim=[s2x_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_3 = FullyConnectedLayer(name='theta_3',
                                  parent=['theta_2'],
                                  parent_dim=[s2x_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_4 = FullyConnectedLayer(name='theta_4',
                                  parent=['theta_3'],
                                  parent_dim=[s2x_dim],
                                  nout=s2x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_4'],
                                   parent_dim=[s2x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_4'],
                                    parent_dim=[s2x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    nodes = [
        rnn, x_1, x_2, x_3, x_4, theta_1, theta_2, theta_3, theta_4, theta_mu,
        theta_sig
    ]

    params = OrderedDict()
    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())
    params = init_tparams(params)

    step_count = sharedX(0, name='step_count')
    last_rnn = np.zeros((batch_size, rnn_dim * 2), dtype=theano.config.floatX)
    rnn_tm1 = sharedX(last_rnn, name='rnn_tm1')
    shared_updates = OrderedDict()
    shared_updates[step_count] = step_count + 1

    s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0),
                   rnn.get_init_state(batch_size), rnn_tm1)

    x_1_temp = x_1.fprop([x], params)
    x_2_temp = x_2.fprop([x_1_temp], params)
    x_3_temp = x_3.fprop([x_2_temp], params)
    x_4_temp = x_4.fprop([x_3_temp], params)

    def inner_fn(x_t, s_tm1):

        s_t = rnn.fprop([[x_t], [s_tm1]], params)

        return s_t

    (s_temp, updates) = theano.scan(fn=inner_fn,
                                    sequences=[x_4_temp],
                                    outputs_info=[s_0])

    for k, v in updates.iteritems():
        k.default_update = v

    shared_updates[rnn_tm1] = s_temp[-1]
    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    theta_1_temp = theta_1.fprop([s_temp], params)
    theta_2_temp = theta_2.fprop([theta_1_temp], params)
    theta_3_temp = theta_3.fprop([theta_2_temp], params)
    theta_4_temp = theta_4.fprop([theta_3_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_4_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_4_temp], params)

    recon = Gaussian(x, theta_mu_temp, theta_sig_temp)
    recon_term = recon.mean()
    recon_term.name = 'nll'

    m_x_1_temp = x_1.fprop([m_x], params)
    m_x_2_temp = x_2.fprop([m_x_1_temp], params)
    m_x_3_temp = x_3.fprop([m_x_2_temp], params)
    m_x_4_temp = x_4.fprop([m_x_3_temp], params)

    m_s_0 = rnn.get_init_state(m_batch_size)

    (m_s_temp, m_updates) = theano.scan(fn=inner_fn,
                                        sequences=[m_x_4_temp],
                                        outputs_info=[m_s_0])

    for k, v in m_updates.iteritems():
        k.default_update = v

    m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0)
    m_theta_1_temp = theta_1.fprop([m_s_temp], params)
    m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params)
    m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params)
    m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params)
    m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params)
    m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params)

    m_recon = Gaussian(m_x, m_theta_mu_temp, m_theta_sig_temp)
    m_recon_term = m_recon.mean()
    m_recon_term.name = 'nll'

    max_x = m_x.max()
    mean_x = m_x.mean()
    min_x = m_x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = m_theta_mu_temp.max()
    mean_theta_mu = m_theta_mu_temp.mean()
    min_theta_mu = m_theta_mu_temp.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = m_theta_sig_temp.max()
    mean_theta_sig = m_theta_sig_temp.mean()
    min_theta_sig = m_theta_sig_temp.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    model.inputs = [x]
    model.params = params
    model.nodes = nodes
    model.set_updates(shared_updates)

    optimizer = Adam(lr=lr)

    monitor_fn = theano.function(inputs=[m_x],
                                 outputs=[
                                     m_recon_term, max_theta_sig,
                                     mean_theta_sig, min_theta_sig, max_x,
                                     mean_x, min_x, max_theta_mu,
                                     mean_theta_mu, min_theta_mu
                                 ],
                                 on_unused_input='ignore')

    extension = [
        GradientClipping(batch_size=batch_size, check_nan=1),
        EpochCount(epoch),
        Monitoring(freq=monitoring_freq,
                   monitor_fn=monitor_fn,
                   ddout=[
                       m_recon_term, max_theta_sig, mean_theta_sig,
                       min_theta_sig, max_x, mean_x, min_x, max_theta_mu,
                       mean_theta_mu, min_theta_mu
                   ],
                   data=[
                       Iterator(train_data, m_batch_size, start=0, end=112640),
                       Iterator(valid_data,
                                m_batch_size,
                                start=2040064,
                                end=2152704)
                   ]),
        Picklize(freq=monitoring_freq,
                 force_save_freq=force_saving_freq,
                 path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      force_save_freq=force_saving_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data,
                                      batch_size,
                                      start=0,
                                      end=2040064),
                        model=model,
                        optimizer=optimizer,
                        cost=recon_term,
                        outputs=[recon_term],
                        extension=extension)
    mainloop.run()
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 12  #150
    p_z_dim = 12  #150
    p_x_dim = 20  #250
    x2s_dim = 15  #250
    z2s_dim = 20  #150
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        path=data_path,
        period=period,
        n_steps=n_steps,
        x_dim=x_dim,
        stride_train=stride_train,
        stride_test=stride_test)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        period=period,
        n_steps=n_steps,
        x_dim=x_dim,
        stride_train=stride_train,
        stride_test=stride_test)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)
    '''
    dissag_pred = FullyConnectedLayer(name='disag_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=num_apps,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)
    '''
    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = []
    theta_sig = []
    coeff = []
    for i in range(y_dim):
        theta_mu.append(
            FullyConnectedLayer(name='theta_mu' + str(i),
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=target_dim,
                                unit='linear',
                                init_W=init_W,
                                init_b=init_b))
        theta_sig.append(
            FullyConnectedLayer(name='theta_sig' + str(i),
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=target_dim,
                                unit='softplus',
                                cons=1e-4,
                                init_W=init_W,
                                init_b=init_b_sig))
        coeff.append(
            FullyConnectedLayer(name='coeff' + str(i),
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b))
    '''
    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)
    '''
    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1
    ]  #theta_mu1, theta_mu2, theta_mu3, theta_sig1, theta_sig2, theta_sig3, coeff1, coeff2 ,coeff3]
    for i in range(y_dim):
        nodes.append(theta_mu[i])  #, corr, binary
        nodes.append(theta_sig[i])
        nodes.append(coeff[i])

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        #theta_mu_t= TL.TypedListType(T.dtensor3)()
        theta_mu_t = TL.TypedListType(T.TensorType('float64', (False, ) * 2))()
        #heta_mu_t = []#T.ftensor3('theta_mu_t')
        #theta_mu_t = [theta_mu_y.fprop([theta_1_t], params) for theta_mu_y in theta_mu]
        for theta_mu_y in theta_mu:
            theta_mu_t.append(theta_mu_y.fprop([theta_1_t], params))

        theta_sig_t = TL.TypedListType(T.TensorType('float64',
                                                    (False, ) * 2))()
        for theta_sig_y in theta_sig:
            theta_sig_t.append(theta_sig_y.fprop([theta_1_t], params))

        coeff_t = TL.TypedListType(T.TensorType('float64', (False, ) * 2))()
        for theta_coef_y in coeff:
            coeff_t.append(theta_coef_y.fprop([theta_1_t], params))
        '''
        theta_sig_t = [theta_sig_y.fprop([theta_1_t], params) for theta_sig_y in theta_sig]
        coeff_t = [theta_coef_y.fprop([theta_1_t], params) for theta_coef_y in coeff]
        '''
        '''
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        '''
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        # I was missing this reshape that is done before BiGMM in the original code
        '''
        theta_mu_in = theta_mu_t.reshape((x_t[0]*x_t[1], -1))
        theta_sig_in = theta_sig_t.reshape((x_t[0]*x_t[1], -1))
        coeff_in = coeff_t.reshape((x_t[0]*x_t[1], -1))
        
        y_pred1 = GMM_sampleY(theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
        '''

        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        #return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t,  z_1_t, theta_1_t, theta_mu_t[0], theta_sig_t[0], coeff_t[0], theta_mu_t[1], theta_sig_t[1], coeff_t[1], theta_mu_t[2], theta_sig_t[2], coeff_t[2],y_pred1, y_pred2, y_pred3
        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_mu_t, theta_sig_t, coeff_t  #,y_pred
        #corr_temp, binary_temp


#    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, y_pred_temp), updates) =\
    [s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp], updates =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0,  None, None, None, None, None, None, None, None,None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0
    '''
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'

    theta_mu_temp.name = 'theta_mu'
    theta_sig_temp.name = 'theta_sig'
    coeff_temp.name = 'coeff'
    #y_pred_temp.name = 'disaggregation'
    '''
    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'

    theta_mu2_temp.name = 'theta_mu2'
    theta_sig2_temp.name = 'theta_sig2'
    coeff2_temp.name = 'coeff2'
    #corr_temp.name = 'corr'
    #binary_temp.name = 'binary'
    #x_pred_temp.name = 'x_reconstructed'
    y_pred1_temp.name = 'disaggregation1'
    y_pred2_temp.name = 'disaggregation2'
    y_pred3_temp.name = 'disaggregation3'
    '''

    #mse = T.mean((y_pred_temp - y)**2) # cause mse can be 26000
    #mse.name = 'mse'
    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu_in = [
        theta_mu_temp[i].reshape((y_shape[0] * y_shape[1], -1))
        for i in range(y_dim)
    ]
    theta_sig_in = [
        theta_sig_temp[i].reshape((y_shape[0] * y_shape[1], -1))
        for i in range(y_dim)
    ]
    coeff_in = [
        coeff_temp[i].reshape((y_shape[0] * y_shape[1], -1))
        for i in range(y_dim)
    ]
    '''
    theta_mu1_in = theta_mu1_temp.reshape((y_shape[0]*y_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((y_shape[0]*y_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((y_shape[0]*y_shape[1], -1))

    theta_mu2_in = theta_mu2_temp.reshape((y_shape[0]*y_shape[1], -1))
    theta_sig2_in = theta_sig2_temp.reshape((y_shape[0]*y_shape[1], -1))
    coeff2_in = coeff2_temp.reshape((y_shape[0]*y_shape[1], -1))

    theta_mu3_in = theta_mu3_temp.reshape((y_shape[0]*y_shape[1], -1))
    theta_sig3_in = theta_sig3_temp.reshape((y_shape[0]*y_shape[1], -1))
    coeff3_in = coeff3_temp.reshape((y_shape[0]*y_shape[1], -1))
    '''

    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMMdisagMulti(
        y_in, y_dim, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    #recon = recon * mask

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    nll_upper_bound = recon_term + kl_term  #+ mse
    nll_upper_bound.name = 'nll_upper_bound'
    '''
    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    coeff_max = coeff_in.max()
    coeff_min = coeff_in.min()
    coeff_mean_max = coeff_in.mean(axis=0).max()
    coeff_mean_min = coeff_in.mean(axis=0).min()
    coeff_max.name = 'coeff_max'
    coeff_min.name = 'coeff_min'
    coeff_mean_max.name = 'coeff_mean_max'
    coeff_mean_min.name = 'coeff_mean_min'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'
    '''
    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound,
                recon_term,
                kl_term,  #2
                theta_mu_temp,
                theta_sig_temp,
                z_t_temp,  #y_pred_temp, 
                coeff_temp,
                s_temp,
                z_1_temp
            ],
            indexSep=1,  # 0 for previous function, 1 for ploting personalized
            indexDDoutPlot=[(3, theta_mu_temp), (5, z_t_temp)
                            ],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=[20, 100],  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    print('Printing')
    print(len(mainloop.trainlog.monitor['nll_upper_bound']))
    fLog.write("log,kl,nll_upper_bound\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        #c = mainloop.trainlog.monitor['mse'][i]
        d = mainloop.trainlog.monitor['nll_upper_bound']
        #print(a,b)
        fLog.write("{},{},{}\n".format(a, b, d))
    fLog.close()
Example #19
0
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])
    typeActivFunc = args['typeActivFunc']

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    lr_iterations = {0: lr}

    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_redd(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25,
        typeLoad=loadType,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    print(Xtrain.shape, Xval.shape, Xtest.shape, ytrain.shape, yval.shape,
          ytest.shape)
    print("Mean ", reader.meanTraining)
    print("Std", reader.stdTraining)
    instancesPlot = {0: [4]}

    train_data = Redd(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit=typeActivFunc,
                                    init_W=init_W,
                                    init_b=init_b)

    if (y_dim > 1):
        theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 2):
        theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 3):
        theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    if (y_dim > 1):
        theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 2):
        theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 3):
        theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    if (y_dim > 1):
        coeff2 = FullyConnectedLayer(name='coeff2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 2):
        coeff3 = FullyConnectedLayer(name='coeff3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 3):
        coeff4 = FullyConnectedLayer(name='coeff4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    dynamicOutput = [None, None, None, None, None, None, None, None]
    if (y_dim > 1):
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn(x_t, y_t, scheduleSamplingMask, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        ## prediction 1
        y_pred = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred = T.concatenate([y_pred, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred = T.concatenate([y_pred, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred = T.concatenate([y_pred, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        #s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        if (scheduleSamplingMask == 1):
            s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        else:
            y_t_aux = y_1.fprop([y_pred], params)
            s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params)

        return (s_t, ) + tupleMulti

        #corr_temp, binary_temp

    (otherResults, updates) = theano.scan(
        fn=inner_fn,
        sequences=[x_1_temp, y_1_temp, scheduleSamplingMask],
        outputs_info=output_fn)  #[s_0, (None)]

    s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,\
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = otherResults[:9]
    restResults = otherResults[9:]

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA = []
    ddoutYpreds = [y_pred1_temp]
    indexSepDynamic = 7  #plus two totalmse, totalmae

    totaMAE = T.copy(mae1)
    totaMSE = T.copy(mse1)
    mse2 = T.zeros((1, ))
    mae2 = T.zeros((1, ))
    mse3 = T.zeros((1, ))
    mae3 = T.zeros((1, ))
    mse4 = T.zeros((1, ))
    mae4 = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu2_temp.name = 'theta_mu2'
        theta_sig2_temp.name = 'theta_sig2'
        coeff2_temp.name = 'coeff2'
        y_pred2_temp.name = 'disaggregation2'
        mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2 = T.mean(
            T.abs_(y_pred2_temp -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
        mse2.name = 'mse2'
        mae2.name = 'mae2'

        theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in

        ddoutMSEA = ddoutMSEA + [mse2, mae2]
        ddoutYpreds = ddoutYpreds + [y_pred2_temp]
        #totaMSE+=mse2
        indexSepDynamic += 2

    if (y_dim > 2):
        theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu3_temp.name = 'theta_mu3'
        theta_sig3_temp.name = 'theta_sig3'
        coeff3_temp.name = 'coeff3'
        y_pred3_temp.name = 'disaggregation3'
        mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3 = T.mean(
            T.abs_(y_pred3_temp -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
        mse3.name = 'mse3'
        mae3.name = 'mae3'

        theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in)
        ddoutMSEA = ddoutMSEA + [mse3, mae3]
        ddoutYpreds = ddoutYpreds + [y_pred3_temp]
        #totaMSE+=mse3
        indexSepDynamic += 2

    if (y_dim > 3):
        theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu4_temp.name = 'theta_mu4'
        theta_sig4_temp.name = 'theta_sig4'
        coeff4_temp.name = 'coeff4'
        y_pred4_temp.name = 'disaggregation4'
        mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4 = T.mean(
            T.abs_(y_pred4_temp -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
        mse4.name = 'mse4'
        mae4.name = 'mae4'

        theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in)
        ddoutMSEA = ddoutMSEA + [mse4, mae4]
        ddoutYpreds = ddoutYpreds + [y_pred4_temp]
        #totaMSE+=mse4
        indexSepDynamic += 2

    totaMSE = (mse1 + mse2 + mse3 + mse4) / y_dim
    totaMSE.name = 'mse'

    totaMAE = (mae1 + mae2 + mae3 + mae4) / y_dim
    totaMAE.name = 'mae'

    recon = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE == 1):
        nll_upper_bound = recon_term + kl_term + totaMSE
    else:
        nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, totaMSE, totaMAE, mse1,
                mae1
            ] + ddoutMSEA + ddoutYpreds,
            indexSep=indexSepDynamic,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, totaMSE, totaMAE],
        n_steps=n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp)

    mainloop.run()
    '''
    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              #givens={x:Xtest},
                              #on_unused_input='ignore',
                              #z=( ,200,1)
                              allow_input_downcast=True,
                              outputs=[prediction_val, recon_term_val, totaMSE_val, totaMAE_val, 
                                        mse1_val,mse2_val,mse3_val,mse4_val,
                                        mae1_val,mae2_val,mae3_val,mae4_val, #unnormalized mae and mse 16 items#
                                        relErr1_val,relErr2_val,relErr3_val,relErr4_val,
                                        propAssigned1_val, propAssigned2_val,propAssigned3_val,propAssigned4_val],
                              updates=updates_val
                              )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])
      testOutput.append(outputGeneration[1:12]) #before 36 including unnormalized metrics
      testMetrics2.append(outputGeneration[12:])
      #{0:[4,20], 2:[5,10]} 
      #if (numBatchTest==0):

      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(batch[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(batch[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
      plt.clf()
      numBatchTest+=1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:,19:] = 1000 * testOutput[:,19:] # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test =  testOutput[:, 1].mean()
    mae_test =  testOutput[:, 2].mean()
    mse1_test =  testOutput[:, 3].mean()
    mae1_test =  testOutput[:, 7].mean()
    mse2_test =  testOutput[:, 4].mean()
    mae2_test =  testOutput[:, 8].mean()
    mse3_test =  testOutput[:, 5].mean()
    mae3_test =  testOutput[:, 9].mean()
    mse4_test =  testOutput[:, 6].mean()
    mae4_test =  testOutput[:, 10].mean()


    print(testOutput[:,3:11].mean(),testOutput[:,11:19].mean())

    relErr1_test = testMetrics2[:,0].mean()
    relErr2_test = testMetrics2[:,1].mean()
    relErr3_test = testMetrics2[:,2].mean()
    relErr4_test = testMetrics2[:,3].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    '''

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n\n")

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write("epoch,log,kl,mse1,mse2,mse3,mse4,mae1,mae2,mae3,mae4\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        e, f, g, h, j, k, l, n, p, q, r, s, t, u = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse1'][i]
        m = mainloop.trainlog.monitor['mae1'][i]

        if (y_dim > 1):
            e = mainloop.trainlog.monitor['mse2'][i]
            n = mainloop.trainlog.monitor['mae2'][i]
        if (y_dim > 2):
            f = mainloop.trainlog.monitor['mse3'][i]
            p = mainloop.trainlog.monitor['mae3'][i]
        if (y_dim > 3):
            g = mainloop.trainlog.monitor['mse4'][i]
            q = mainloop.trainlog.monitor['mae4'][i]

        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, d, e, f, g, m, n, p, q))
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distribEach/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])

    # monitoring is a value that represents how many batches from training have to be seen to measure the validation set
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 8  #150
    p_z_dim = 8  #150
    p_x_dim = 7  #250
    x2s_dim = 7  #250
    z2s_dim = 8  #150
    target_dim = y_dim * k  #(x_dim-1)*k

    model = Model()
    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        path=data_path,
        period=period,
        n_steps=n_steps,
        x_dim=x_dim,
        stride_train=stride_train,
        stride_test=stride_test)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        period=period,
        n_steps=n_steps,
        x_dim=x_dim,
        stride_train=stride_train,
        stride_test=stride_test)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(
        name='coeff',
        parent=['theta_1'],
        parent_dim=[p_x_dim],
        nout=k,
        unit='softmax',  #to ensure that the sum adds to one
        init_W=init_W,
        init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        # I was missing this reshape that is done before BiGMM in the original code
        '''
        theta_mu_in = theta_mu_t.reshape((x_t[0]*x_t[1], -1))
        theta_sig_in = theta_sig_t.reshape((x_t[0]*x_t[1], -1))
        coeff_in = coeff_t.reshape((x_t[0]*x_t[1], -1))
        '''
        y_pred = GMM_sampleY(
            theta_mu_t, theta_sig_t,
            coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, y_pred_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0
    '''
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'theta_mu'
    theta_sig_temp.name = 'theta_sig'
    coeff_temp.name = 'coeff'
    #corr_temp.name = 'corr'
    #binary_temp.name = 'binary'
    #x_pred_temp.name = 'x_reconstructed'
    y_pred_temp.name = 'disaggregation'

    #mse = T.mean((y_pred_temp - y)**2) # cause mse can be 26000
    #mse.name = 'mse'
    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    #print("Printing shapes")
    #print (y_in.shape, theta_mu_in.shape, theta_sig_in.shape, coeff_in.shape)
    recon = GMM(
        y_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    #recon = recon * mask

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    nll_upper_bound = recon_term + kl_term  #+ mse
    nll_upper_bound.name = 'nll_upper_bound'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    coeff_max = coeff_in.max()
    coeff_min = coeff_in.min()
    coeff_mean_max = coeff_in.mean(axis=0).max()
    coeff_mean_min = coeff_in.mean(axis=0).min()
    coeff_max.name = 'coeff_max'
    coeff_min.name = 'coeff_min'
    coeff_mean_max.name = 'coeff_mean_max'
    coeff_mean_min.name = 'coeff_mean_min'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    model.inputs = [x, mask, y, y_mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound,
                recon_term,
                kl_term,  #mse,
                max_phi_sig,
                mean_phi_sig,
                min_phi_sig,
                max_prior_sig,
                mean_prior_sig,
                min_prior_sig,
                max_theta_sig,
                mean_theta_sig,
                min_theta_sig,
                max_x,
                mean_x,
                min_x,
                max_theta_mu,
                mean_theta_mu,
                min_theta_mu,
                coeff_max,
                coeff_min,
                coeff_mean_max,
                coeff_mean_min,  #23
                theta_mu_temp,
                theta_sig_temp,
                z_t_temp,
                y_pred_temp,  #corr_temp, binary_temp, 
                coeff_temp,  #22
                s_temp,
                z_1_temp
            ],
            indexSep=22,
            indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, y_pred_temp)
                            ],  # adding indexes of ddout for the plotting
            instancesPlot=[0, 150],
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    print('Printing')
    print(len(mainloop.trainlog.monitor['nll_upper_bound']))
    fLog.write("log,kl,mse,nll_upper_bound\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        #c = mainloop.trainlog.monitor['mse'][i]
        d = mainloop.trainlog.monitor['nll_upper_bound']
        #print(a,b)
        fLog.write("{},{},{}\n".format(a, b, d))
    fLog.close()
Example #21
0
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'mse_val'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    genCase = int(args['genCase'])
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])
    num_sequences_per_batch = int(args['numSequences'])  #based on appliance
    loadParam = args['loadAsKelly']
    target_inclusion_prob = float(args['target_inclusion_prob'])
    loadAsKelly = True
    if (loadParam == 'N' or loadParam == 'n' or loadParam == 'no'
            or loadParam == 'NO' or loadParam == 'No'):
        loadAsKelly = False

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 60  #150
    p_z_dim = 60  #150
    p_x_dim = 30  #250
    x2s_dim = 40  #250
    z2s_dim = 40  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        isKelly=loadAsKelly,
        seq_per_batch=num_sequences_per_batch,
        target_inclusion_prob=target_inclusion_prob)

    instancesPlot = {
        0: [10, 20],
        2: [20, 30]
    }  #for now use hard coded instancesPlot for kelly sampling
    if (not loadAsKelly):
        instancesPlot = reader.build_dict_instances_plot(
            listDates, batch_size, Xval.shape[0])

    ############# We switch x with y
    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=False,
        #path=data_path,
        validTime=0,
        inputX=ytrain,
        labels=Xtrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=False,
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        validTime=1,
        inputX=yval,
        labels=Xval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask = train_data.theano_vars()
    #valTime  = train_data.theano_valTime_vars()

    if (genCase == 1):
        inputX = x[:-1, :]
        targetX = x[1:, :]
        n_steps = n_steps - 1
    else:
        inputX = x
        targetX = x

    inputX.name = 'x_original'
    if debug:
        inputX.tag.test_value = np.zeros((15, batch_size, x_dim),
                                         dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())  #Creates matrices

    params = init_tparams(params)  #Make the parameters theano.shared

    s_0_tr = rnn.get_init_state(batch_size)
    s_0_val = T.zeros((batch_size, 2 * rnn_dim), dtype=theano.config.floatX)
    s_0_val = T.unbroadcast(
        s_0_val, *range(s_0_val.ndim)
    )  #[0,1] this is to raise an error if length of dimensions are not 1

    #x_1_temp = x_1.fprop([x], params)

    def inner_val_fn(s_tm1):
        '''
        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)
        '''
        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)
        coeff_t = coeff.fprop([theta_1_t], params)

        x_t = GMM_sample(theta_mu_t, theta_sig_t,
                         coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        x_1_t = x_1.fprop([x_t], params)

        s_t = rnn.fprop([[x_1_t, z_1_t], [s_tm1]], params)

        return s_t, x_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t
        # prior_mu_temp_val, prior_sig_temp_val
    ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\
        theano.scan(fn=inner_val_fn , n_steps=n_steps, #already 1 subtracted if doing next step
                    outputs_info=[s_0_val, None, None,  None, None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_train_fn(x_t, s_tm1):
        x_1_t = x_1.fprop([x_t], params)
        phi_1_t = phi_1.fprop([x_1_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_1_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_train_fn, sequences=[inputX],#[x_1_temp],
                    outputs_info=[s_0_tr, None, None, None, None, None, None,  None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    #########   TRAINING GRAPH  #########
    s_temp = concatenate(
        [s_0_tr[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    prediction.name = 'pred_' + str(flgAgg)
    mse = T.mean(
        (prediction - targetX)**2)  # As axis = None is calculated for all
    mae = T.mean(T.abs_(prediction - targetX))
    mse.name = 'mse'
    mae.name = 'mae'
    x_in = inputX.reshape((batch_size * n_steps, -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    target_shape = x[:, 1:].shape
    theta_mu_in = theta_mu_temp.reshape((batch_size * n_steps, -1))
    theta_sig_in = theta_sig_temp.reshape((batch_size * n_steps, -1))
    coeff_in = coeff_temp.reshape((batch_size * n_steps, -1))

    recon = GMM(
        x_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((batch_size, n_steps))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    #########   TESTING GRAPH  #########
    s_temp_val = concatenate(
        [s_0_val[None, :, :], s_temp_val[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0

    s_temp_val.name = 'h_1_val'  #gisse
    #z_1_temp_val.name = 'z_1_val'#gisse
    z_t_temp_val.name = 'z_val'
    theta_mu_temp_val.name = 'theta_mu_temp_val'
    theta_sig_temp_val.name = 'theta_sig_temp_val'
    coeff_temp_val.name = 'coeff_val'

    prediction_val.name = 'generated_' + str(flgAgg)
    mse_val = T.mean(
        (prediction_val - targetX)**2)  # As axis = None is calculated for all
    mae_val = T.mean(T.abs_(prediction_val - targetX))
    mse_val.name = 'mse_val'
    mae_val.name = 'mae_val'
    x_in_val = inputX.reshape((batch_size * n_steps, -1))

    # No sense in calculate distance to a distribution because we are not calculating phi
    #kl_temp_val = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp)

    theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1))
    theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1))
    coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1))

    recon_val = GMM(
        x_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((batch_size, n_steps))
    recon_val.name = 'gmm_out_val'

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term_val'

    ######################################
    model.inputs = [x, mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll,mse,mae\n"

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                recon_term_val, mse_val, mae_val, prediction_val,
                theta_mu_temp_val
            ],
            indexSep=3,
            indexDDoutPlot=[(0, prediction_val)],
            instancesPlot=instancesPlot,  #, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
        extension=extension,
        lr_iterations=lr_iterations)
    mainloop.run()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                         z2s_dim))
    header = "epoch,log,mse,mae\n"
    fLog.write(header)
    for i, item in enumerate(mainloop.trainlog.monitor['recon_term_val']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term_val'][i]
        d = mainloop.trainlog.monitor['mse_val'][i]
        e = mainloop.trainlog.monitor['mae_val'][i]
        fLog.write("{},{},{},{}\n".format(f, a, d, e))
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'
    trial = int(args['trial'])
    pkl_name = 'vrnn_gauss_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args['save_path']
    save_path = args['save_path']
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    lr = float(args['lr'])
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 250
    x2s_dim = 10  #250
    z2s_dim = 10  #150
    target_dim = x_dim  #(x_dim-1)

    model = Model()
    Xtrain, ytrain, Xval, yval = fetch_ukdale(data_path,
                                              windows,
                                              appliances,
                                              numApps=flgAgg,
                                              period=period,
                                              n_steps=n_steps,
                                              stride_train=stride_train,
                                              stride_test=stride_test)

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, y = train_data.theano_vars()

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(
        name='x_1',
        parent=['x_t'],  #OrderDict parent['x_t'] = x_dim
        parent_dim=[x_dim],
        nout=x2s_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(
        name='phi_1',  ## encoder
        parent=['x_1', 's_tm1'],
        parent_dim=[x2s_dim, rnn_dim],
        nout=q_z_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(
        name='theta_1',  ### decoder
        parent=['z_1', 's_tm1'],
        parent_dim=[z2s_dim, rnn_dim],
        nout=p_x_dim,
        unit='relu',
        init_W=init_W,
        init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    corr = FullyConnectedLayer(
        name='corr',  ## rho
        parent=['theta_1'],
        parent_dim=[p_x_dim],
        nout=1,
        unit='tanh',
        init_W=init_W,
        init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig,
        theta_1, theta_mu, theta_sig
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(
                node.initialize()
            )  #Initialize values of the W matrices according to dim of parents

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        pred = Gaussian_sample(theta_mu_t, theta_sig_t)

        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, pred

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, pred_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp], #non_sequences unchanging variables
                    #The tensor(s) to be looped over should be provided to scan using the sequence keyword argument
                    outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None])#Initialization occurs in outputs_info
    #=None This indicates to scan that it does not need to pass the prior result to _fn
    '''
    The general order of function parameters to:
    sequences (if any), prior result(s) (if needed), non-sequences (if any)
    '''
    for k, v in updates.iteritems():
        print("Update")
        k.default_update = v

    s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)
    s_temp.name = 'h_1'  #gisse
    z_temp.name = 'z'
    z_1_temp.name = 'z_1'  #gisse
    #theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    #theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_mu_temp.name = 'theta_mu'
    #theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    theta_sig_temp.name = 'theta_sig'
    x_pred_temp.name = 'x_reconstructed'
    #corr_temp = corr.fprop([theta_1_temp], params)
    #corr_temp.name = 'corr'
    #binary_temp = binary.fprop([theta_1_temp], params)
    #binary_temp.name = 'binary'

    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        prediction.name = 'pred_' + str(flgAgg)
        mse = T.mean((prediction - y[:, :, flgAgg].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(
            T.abs_(prediction -
                   y[:, :, flgAgg].reshape((y.shape[0], y.shape[1], 1))))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y[:, :, flgAgg].reshape((x.shape[0] * x.shape[1], -1),
                                          ndim=2)

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    #x_shape = x.shape
    #x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = Gaussian(
        pred_in, theta_mu_in, theta_sig_in
    )  # BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) # second term for the loss function
    recon = recon.reshape((x_shape[0], x_shape[1]))
    #recon = recon * mask
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask
    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    max_x = x.max()
    mean_x = x.mean()
    min_x = x.min()
    max_x.name = 'max_x'
    mean_x.name = 'mean_x'
    min_x.name = 'min_x'

    max_theta_mu = theta_mu_in.max()
    mean_theta_mu = theta_mu_in.mean()
    min_theta_mu = theta_mu_in.min()
    max_theta_mu.name = 'max_theta_mu'
    mean_theta_mu.name = 'mean_theta_mu'
    min_theta_mu.name = 'min_theta_mu'

    max_theta_sig = theta_sig_in.max()
    mean_theta_sig = theta_sig_in.mean()
    min_theta_sig = theta_sig_in.min()
    max_theta_sig.name = 'max_theta_sig'
    mean_theta_sig.name = 'mean_theta_sig'
    min_theta_sig.name = 'min_theta_sig'

    max_phi_sig = phi_sig_temp.max()
    mean_phi_sig = phi_sig_temp.mean()
    min_phi_sig = phi_sig_temp.min()
    max_phi_sig.name = 'max_phi_sig'
    mean_phi_sig.name = 'mean_phi_sig'
    min_phi_sig.name = 'min_phi_sig'

    max_prior_sig = prior_sig_temp.max()
    mean_prior_sig = prior_sig_temp.mean()
    min_prior_sig = prior_sig_temp.min()
    max_prior_sig.name = 'max_prior_sig'
    mean_prior_sig.name = 'mean_prior_sig'
    min_prior_sig.name = 'min_prior_sig'

    prior_sig_output = prior_sig_temp
    prior_sig_output.name = 'prior_sig_o'
    phi_sig_output = phi_sig_temp
    phi_sig_output.name = 'phi_sig_o'

    model.inputs = [x, mask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound,
                recon_term,
                kl_term,
                mse,
                mae,
                max_phi_sig,
                mean_phi_sig,
                min_phi_sig,
                max_prior_sig,
                mean_prior_sig,
                min_prior_sig,
                max_theta_sig,
                mean_theta_sig,
                min_theta_sig,
                max_x,
                mean_x,
                min_x,
                max_theta_mu,
                mean_theta_mu,
                min_theta_mu,  #0-17
                #binary_temp, corr_temp,
                theta_mu_temp,
                theta_sig_temp,  #17-20
                s_temp,
                z_temp,
                z_1_temp,
                x_pred_temp
                #phi_sig_output,phi_sig_output
            ],  ## added in order to explore the distributions
            indexSep=22,
            indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp),
                            (3, prediction)],
            instancesPlot=[0, 150],  #, 80,150
            savedFolder=save_path,
            data=[Iterator(valid_data, batch_size)]),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        extension=extension)
    mainloop.run()
    fLog = open(save_path + '/output.csv', 'w')
    fLog.write("log,kl,nll_upper_bound,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['nll_upper_bound'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{},{},{},{},{}\n".format(a, b, c, d, e))