def valid(self, dropout= identity, smooth= None): """-> Model with new fields, teacher forcing output : f32 (?, dim_tgt) prediction on logit scale prob : f32 (?, dim_tgt) prediction, soft pred : i32 (?,) prediction, hard errt_samp : f32 (?,) errors loss_samp : f32 (?,) losses errt : f32 () error rate loss : f32 () mean loss """ with scope('emb_src_'): w = self.position(self.max_src) + dropout(self.emb_src(self.src)) with scope('emb_tgt_'): x = self.position(self.max_tgt) + dropout(self.emb_tgt(self.tgt)) w = self.encode(w, self.mask_src, dropout, name= 'encode_') # bds x = self.decode(x, self.mask_tgt, w, self.mask_src, dropout, name= 'decode_') # bdt with scope('logit_'): y = self.emb_tgt( # ?n tf.boolean_mask( # ?d tf.transpose(x, (0,2,1)) # btd <- bdt , self.mask)) with scope('prob_'): prob = tf.nn.softmax(y, axis= -1) with scope('pred_'): pred = tf.argmax(y, axis= -1, output_type= tf.int32) with scope('errt_'): errt_samp = tf.to_float(tf.not_equal(self.true, pred)) errt = tf.reduce_mean(errt_samp) with scope('loss_'): loss_samp = tf.nn.softmax_cross_entropy_with_logits_v2(labels= smooth(self.true), logits= y) \ if smooth else tf.nn.sparse_softmax_cross_entropy_with_logits(labels= self.true, logits= y) loss = tf.reduce_mean(loss_samp) return Model(self, output= y, prob= prob, pred= pred , errt_samp= errt_samp, errt= errt , loss_samp= loss_samp, loss= loss)
def _eval(self): gold, pred, output, smooth = self.gold, self.pred, self.output, self.smooth with tf.variable_scope('acc'): acc = tf.reduce_mean(tf.to_float(tf.equal(gold, pred))) with tf.variable_scope('loss'): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=output, labels=smooth(gold))) with tf.variable_scope('prob'): prob = tf.nn.softmax(output, name='prob') return Transformer(prob=prob, loss=loss, acc=acc, **self)
def train(self, warmup=4e3, beta1=0.9, beta2=0.98, epsilon=1e-9): """-> Transformer with new fields step : i64 () global update step lr : f32 () learning rate for the current step up : update operation """ dim, loss = self.dim, self.loss with tf.variable_scope('lr'): s = tf.train.get_or_create_global_step() t = tf.to_float(s + 1) lr = (dim**-0.5) * tf.minimum(t**-0.5, t * (warmup**-1.5)) up = tf.train.AdamOptimizer(lr, beta1, beta2, epsilon).minimize(loss, s) return Transformer(step=s, lr=lr, up=up, **self)
def train(self, dropout= 0.1, smooth= 0.1, warmup= 4e3, beta1= 0.9, beta2= 0.98, epsilon= 1e-9): """-> Model with new fields, teacher forcing step : i64 () global update step lr : f32 () learning rate for the current step up : update operation along with all the fields from `valid` """ dropout, smooth = Dropout(dropout, (None, self.dim_emb, None)), Smooth(smooth, self.dim_voc) self = self.valid(dropout= dropout, smooth= smooth) with scope('lr'): s = tf.train.get_or_create_global_step() t = tf.to_float(s + 1) lr = (self.dim_emb ** -0.5) * tf.minimum(t ** -0.5, t * (warmup ** -1.5)) # up = tf.train.AdamOptimizer(lr, beta1, beta2, epsilon).minimize(self.loss, s) return Model(self, dropout= dropout, smooth= smooth, step= s, lr= lr)
def sinusoid(dim, time, freq= 1e-4, array= False): """returns a rank-2 tensor of shape `dim, time`, where each column corresponds to a time step and each row a sinusoid, with frequencies in a geometric progression from 1 to `freq`. """ assert not dim % 2 if array: a = (freq ** ((2 / dim) * np.arange(dim // 2))).reshape(-1, 1) @ (1 + np.arange(time).reshape(1, -1)) return np.concatenate((np.sin(a), np.cos(a)), -1).reshape(dim, time) else: assert False # figure out a better way to do this a = tf.reshape( freq ** ((2 / dim) * tf.range(dim // 2, dtype= tf.float32)) , (-1, 1)) @ tf.reshape( 1 + tf.range(tf.to_float(time), dtype= tf.float32) , (1, -1)) return tf.reshape(tf.concat((tf.sin(a), tf.cos(a)), axis= -1), (dim, time))
def data(self, sid, tid, src= None, tgt= None): """-> Model with new fields position : Sinusoid src_ : i32 (b, ?) source feed, in range `[0, dim_src)` tgt_ : i32 (b, ?) target feed, in range `[0, dim_tgt)` src : i32 (b, s) source with `eos` trimmed among the batch tgt : i32 (b, t) target with `eos` trimmed among the batch, padded with `bos` mask : b8 (b, t) target sequence mask true : i32 (?,) target references max_tgt : i32 () maximum target length max_src : i32 () maximum source length mask_tgt : f32 (1, t, t) target attention mask mask_src : f32 (b, 1, s) source attention mask """ src_ = placeholder(tf.int32, (None, None), src, 'src_') tgt_ = placeholder(tf.int32, (None, None), tgt, 'tgt_') with scope('src'): src, msk, max_src = trim(src_, self.eos) mask_src = tf.log(tf.expand_dims(tf.to_float(msk), axis= 1)) with scope('tgt'): tgt, msk, max_tgt = trim(tgt_, self.eos) mask = tf.pad(msk, ((0,0),(1,0)), constant_values= True) btru = tf.pad(tgt, ((0,0),(1,0)), constant_values= self.bos) true = tf.pad(tgt, ((0,0),(0,1)), constant_values= self.eos) true, tgt = tf.boolean_mask(true, mask), btru max_tgt += 1 mask_tgt = tf.log(tf.expand_dims(causal_mask(max_tgt), axis= 0)) return Model( position= Sinusoid(self.dim_emb, self.cap) , src_= src_, mask_src= mask_src, max_src= max_src, src= src , tgt_= tgt_, mask_tgt= mask_tgt, max_tgt= max_tgt, tgt= tgt , true= true, mask= mask , emb_src = self.embeds[sid] , emb_tgt = self.embeds[tid] , **self)
def data(self, src=None, tgt=None, len_cap=None): """-> Transformer with new fields src_ : i32 (b, ?) source feed, in range `[0, dim_src)` tgt_ : i32 (b, ?) target feed, in range `[0, dim_tgt)` src : i32 (b, s) source with `end` trimmed among the batch tgt : i32 (b, t) target with `end` trimmed among the batch mask : f32 (b, s) source mask gold : i32 (b, t) target one step ahead position : Sinusoid setting `len_cap` makes it more efficient for training. you won't be able to feed it longer sequences, but it doesn't affect any model parameters. """ end, dim = self.end, self.dim count_not_all = lambda x: tf.reduce_sum( tf.to_int32(~tf.reduce_all(x, 0))) with tf.variable_scope('src'): src = src_ = placeholder(tf.int32, (None, None), src) len_src = count_not_all(tf.equal(src, end)) src = src[:, :len_src] with tf.variable_scope('tgt'): tgt = tgt_ = placeholder(tf.int32, (None, None), tgt) len_tgt = count_not_all(tf.equal(tgt, end)) tgt, gold = tgt[:, :len_tgt], tgt[:, 1:1 + len_tgt] return Transformer(position=Sinusoid(dim, len_cap), src_=src_, src=src, mask=tf.to_float( tf.expand_dims(tf.not_equal(src, end), 1)), tgt_=tgt_, tgt=tgt, gold=gold, **self)
def vAe( mode, src=None, tgt=None, # model spec dim_tgt=8192, dim_emb=512, dim_rep=1024, rnn_layers=3, bidirectional=True, bidir_stacked=True, attentive=False, logit_use_embed=True, # training spec accelerate=1e-4, learn_rate=1e-3, bos=2, eos=1): # dim_tgt : vocab size # dim_emb : model dimension # dim_rep : representation dimension # # unk=0 for word dropout assert mode in ('train', 'valid', 'infer') self = Record(bos=bos, eos=eos) with scope('step'): step = self.step = tf.train.get_or_create_global_step() rate = accelerate * tf.to_float(step) rate_keepwd = self.rate_keepwd = tf.sigmoid(rate) rate_anneal = self.rate_anneal = tf.tanh(rate) rate_update = self.rate_update = learn_rate / (tf.sqrt(rate) + 1.0) with scope('src'): src = self.src = placeholder(tf.int32, (None, None), src, 'src') src = tf.transpose(src) # time major order src, msk_src, len_src = trim(src, eos) with scope('tgt'): tgt = self.tgt = placeholder(tf.int32, (None, None), tgt, 'tgt') tgt = tf.transpose(tgt) # time major order tgt, msk_tgt, len_tgt = trim(tgt, eos) msk_tgt = tf.pad(msk_tgt, ((1, 0), (0, 0)), constant_values=True) # pads for decoder : lead=[bos]+tgt -> gold=tgt+[eos] lead, gold = tgt, tf.pad(tgt, paddings=((0, 1), (0, 0)), constant_values=eos) if 'train' == mode: lead *= tf.to_int32( tf.random_uniform(tf.shape(lead)) < rate_keepwd) lead = self.lead = tf.pad(lead, paddings=((1, 0), (0, 0)), constant_values=bos) # s : src length # t : tgt length plus one padding, either eos or bos # b : batch size # # len_src : b aka s # msk_src : sb without padding # msk_tgt : tb with eos # # lead : tb with bos # gold : tb with eos with scope('embed'): b = (6 / (dim_tgt / dim_emb + 1))**0.5 embedding = tf.get_variable('embedding', (dim_tgt, dim_emb), initializer=tf.random_uniform_initializer( -b, b)) emb_tgt = tf.gather(embedding, lead, name='emb_tgt') # (t, b) -> (t, b, dim_emb) emb_src = tf.gather(embedding, src, name='emb_src') # (s, b) -> (s, b, dim_emb) with scope('encode'): # (s, b, dim_emb) -> (b, dim_emb) reverse = partial(tf.reverse_sequence, seq_lengths=len_src, seq_axis=0, batch_axis=1) if bidirectional and bidir_stacked: for i in range(rnn_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = layer_rnn(1, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(1, dim_emb, name='bwd')(reverse(emb_src)) hs = emb_src = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) elif bidirectional: with scope("rnn"): emb_fwd, _ = layer_rnn(rnn_layers, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(rnn_layers, dim_emb, name='bwd')(reverse(emb_src)) hs = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) else: hs, _ = layer_rnn(rnn_layers, dim_emb, name='rnn')(emb_src) with scope('cata'): # extract the final states from the outputs: bd <- sbd, b2 h = tf.gather_nd( hs, tf.stack( (len_src - 1, tf.range(tf.size(len_src), dtype=tf.int32)), axis=1)) if attentive: # todo fixme # the values are the outputs from all non-padding steps; # the queries are the final states; h = layer_nrm(h + tf.squeeze( # bd <- bd1 attention( # bd1 <- bd1, bds, b1s tf.expand_dims(h, axis=2), # query: bd1 <- bd tf.transpose(hs, (1, 2, 0)), # value: bds <- sbd tf.log( tf.to_float( # -inf,0 mask: b1s <- sb <- bs tf.expand_dims(tf.transpose(msk_src), axis=1))), int(h.shape[-1])), 2)) with scope('latent'): # (b, dim_emb) -> (b, dim_rep) -> (b, dim_emb) # h = layer_aff(h, dim_emb, name='in') mu = self.mu = layer_aff(h, dim_rep, name='mu') lv = self.lv = layer_aff(h, dim_rep, name='lv') with scope('z'): h = mu if 'train' == mode: h += tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv)) self.z = h h = layer_aff(h, dim_emb, name='ex') with scope('decode'): # (b, dim_emb) -> (t, b, dim_emb) -> (?, dim_emb) h = self.state_in = tf.stack((h, ) * rnn_layers) h, _ = _, (self.state_ex, ) = layer_rnn(rnn_layers, dim_emb, name='rnn')( emb_tgt, initial_state=(h, )) if 'infer' != mode: h = tf.boolean_mask(h, msk_tgt) h = layer_aff(h, dim_emb, name='out') with scope('logits'): # (?, dim_emb) -> (?, dim_tgt) if logit_use_embed: logits = self.logits = tf.tensordot(h, (dim_emb**-0.5) * tf.transpose(embedding), 1) else: logits = self.logits = layer_aff(h, dim_tgt) with scope('prob'): prob = self.prob = tf.nn.softmax(logits) with scope('pred'): pred = self.pred = tf.argmax(logits, -1, output_type=tf.int32) if 'infer' != mode: labels = tf.boolean_mask(gold, msk_tgt, name='labels') with scope('errt'): errt_samp = self.errt_samp = tf.to_float(tf.not_equal( labels, pred)) errt = self.errt = tf.reduce_mean(errt_samp) with scope('loss'): with scope('loss_gen'): loss_gen_samp = self.loss_gen_samp = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss_gen = self.loss_gen = tf.reduce_mean(loss_gen_samp) with scope('loss_kld'): loss_kld_samp = self.loss_kld_samp = 0.5 * ( tf.square(mu) + tf.exp(lv) - lv - 1.0) loss_kld = self.loss_kld = tf.reduce_mean(loss_kld_samp) loss = self.loss = rate_anneal * loss_kld + loss_gen if 'train' == mode: with scope('train'): train_step = self.train_step = tf.train.AdamOptimizer( rate_update).minimize(loss, step) return self
def VAE(data, btlnk_dim, data_dim, dense_dim, y_dim, loss_type, accelerate): def encoder(x, dim_btlnk, dim_x): x = Normalize(dim_btlnk, "nrm")(tf.nn.elu(Linear(dim_btlnk, dim_x, name= 'lin')(x))) with tf.variable_scope('latent'): mu = Linear(dim_btlnk, dim_btlnk, name= 'mu')(x) lv = Linear(dim_btlnk, dim_btlnk, name= 'lv')(x) #lv = Linear(dim_btlnk, dim_x, name= 'lv')(x) #mu = Linear(dim_btlnk, dim_x, name= 'mu')(x) with tf.variable_scope('z'): z = mu + tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv)) return z, mu, lv def decoder(x, data_dim, btlnk_dim): x = Linear(data_dim, btlnk_dim)(x) #return tf.clip_by_value(x, 0.0, 1.0) return tf.nn.sigmoid(x) with tf.variable_scope("x"): x = placeholder(tf.float32, [None, data_dim], data[0], "x") with tf.variable_scope("y"): y = placeholder(tf.float32, [None], data[1], "y") with tf.variable_scope("encoder"): z, mu, lv = encoder(x, btlnk_dim, data_dim) with tf.variable_scope("decoder"): logits = decoder(z, data_dim, btlnk_dim) with tf.variable_scope("step"): step = tf.train.get_or_create_global_step() rate = accelerate * tf.to_float(step) rate_anneal = tf.tanh(rate) with tf.variable_scope("loss"): kl_loss = tf.reduce_mean(0.5 * (tf.square(mu) + tf.exp(lv) - lv - 1.0)) if loss_type == "xtrpy": #loss_rec = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=x, logits=logits)) epsilon = 1e-10 loss_rec = tf.reduce_mean(-tf.reduce_sum(x * tf.log(epsilon+logits) + (1-x) * tf.log(epsilon+1-logits), axis=1)) else: loss_rec = tf.reduce_mean(tf.abs(x - logits)) loss = loss_rec + kl_loss*rate_anneal with tf.variable_scope("AUC"): anomaly_score = tf.reduce_mean((x-logits)**2, axis=1) _, auc = tf.metrics.auc(y, anomaly_score) with tf.variable_scope("train_step"): train_step = tf.train.AdamOptimizer().minimize(loss, step) return dict(step=step, x=x, y=y, z=z, mu=mu, logits=logits, auc=auc, train_step=train_step, loss=loss, kl_loss=kl_loss, loss_rec=loss_rec)
def model(mode, src_dwh, tgt_dwh, src_idx=None, len_src=None, tgt_img=None, tgt_idx=None, len_tgt=None, num_layers=3, num_units=512, learn_rate=1e-3, decay_rate=1e-2, dropout=0.1): assert mode in ('train', 'valid', 'infer') self = Record() src_d, src_w, src_h = src_dwh tgt_d, tgt_w, tgt_h = tgt_dwh with scope('source'): # input nodes src_idx = self.src_idx = placeholder(tf.int32, (None, None), src_idx, 'src_idx') # n s len_src = self.len_src = placeholder(tf.int32, (None, ), len_src, 'len_src') # n # time major order src_idx = tf.transpose(src_idx, (1, 0)) # s n emb_src = tf.one_hot(src_idx, src_d) # s n v for i in range(num_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='fwd')(emb_src, training='train' == mode) emb_bwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='bwd')(tf.reverse_sequence(emb_src, len_src, seq_axis=0, batch_axis=1), training='train' == mode) emb_src = tf.concat( (emb_fwd, tf.reverse_sequence( emb_bwd, len_src, seq_axis=0, batch_axis=1)), axis=-1) # emb_src = tf.layers.dense(emb_src, num_units, name= 'reduce_concat') # s n d emb_src = self.emb_src = tf.transpose(emb_src, (1, 2, 0)) # n d s with scope('target'): # input nodes tgt_img = self.tgt_img = placeholder(tf.uint8, (None, None, tgt_h, tgt_w), tgt_img, 'tgt_img') # n t h w tgt_idx = self.tgt_idx = placeholder(tf.int32, (None, None), tgt_idx, 'tgt_idx') # n t len_tgt = self.len_tgt = placeholder(tf.int32, (None, ), len_tgt, 'len_tgt') # n # time major order tgt_idx = tf.transpose(tgt_idx) # t n tgt_img = tf.transpose(tgt_img, (1, 0, 2, 3)) # t n h w tgt_img = flatten(tgt_img, 2, 3) # t n hw # normalize pixels to binary tgt_img = tf.to_float(tgt_img) / 255.0 # tgt_img = tf.round(tgt_img) # todo consider adding noise # causal padding fire = self.fire = tf.pad(tgt_img, ((1, 0), (0, 0), (0, 0)), constant_values=0.0) true = self.true = tf.pad(tgt_img, ((0, 1), (0, 0), (0, 0)), constant_values=1.0) tidx = self.tidx = tf.pad(tgt_idx, ((0, 1), (0, 0)), constant_values=1) mask_tgt = tf.transpose(tf.sequence_mask(len_tgt + 1)) # t n with scope('decode'): # needs to get input from latent space to do attention or some shit decoder = self.decoder = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, dropout=dropout) state_in = self.state_in = tf.zeros( (num_layers, tf.shape(fire)[1], num_units)) x, _ = _, (self.state_ex, ) = decoder(fire, initial_state=(state_in, ), training='train' == mode) # transform mask to -inf and 0 in order to simply sum for whatever the f**k happens next mask = tf.log(tf.sequence_mask(len_src, dtype=tf.float32)) # n s mask = tf.expand_dims(mask, 1) # n 1 s # multi-head scaled dot-product attention x = tf.transpose(x, (1, 2, 0)) # t n d ---> n d t attn = Attention(num_units, num_units, 2 * num_units)(x, emb_src, mask) if 'train' == mode: attn = tf.nn.dropout(attn, 1 - dropout) x = Normalize(num_units)(x + attn) x = tf.transpose(x, (2, 0, 1)) # n d t ---> t n d if 'infer' != mode: x = tf.boolean_mask(x, mask_tgt) true = tf.boolean_mask(true, mask_tgt) tidx = tf.boolean_mask(tidx, mask_tgt) with scope('output'): y = tf.layers.dense(x, tgt_h * tgt_w, name='dense_img') z = tf.layers.dense(x, tgt_d, name='logit_idx') pred = self.pred = tf.clip_by_value(y, 0.0, 1.0) prob = self.prob = tf.nn.softmax(z) pidx = self.pidx = tf.argmax(z, axis=-1, output_type=tf.int32) with scope('losses'): diff = true - pred mae = self.mae = tf.reduce_mean(tf.abs(diff), axis=-1) mse = self.mse = tf.reduce_mean(tf.square(diff), axis=-1) xid = self.xid = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=z, labels=tidx) err = self.err = tf.not_equal(tidx, pidx) loss = tf.reduce_mean(xid) with scope('update'): step = self.step = tf.train.get_or_create_global_step() lr = self.lr = learn_rate / (1.0 + decay_rate * tf.sqrt(tf.to_float(step))) if 'train' == mode: down = self.down = tf.train.AdamOptimizer(lr).minimize(loss, step) return self
def build(self, x, y, z, loss_type): d_scale_factor = tf.constant(0.) #tf.constant(0.25) g_scale_factor = tf.constant(0.) #tf.constant(1 - 0.75/2) with scope("x"): x = placeholder(tf.float32, [None, self.dim_x], x, "x") with scope("y"): y = placeholder(tf.float32, [None], y, "y") with scope("z"): z = placeholder(tf.float32, [None, self.dim_noise], z, "z") zx, mu, lv, hl_e = self.enc(x) gzx = self.gen(zx) #gz = self.gen(z) dx, hl_dx = self.dis(x) dgzx, hl_dgzx = self.dis(gzx) #dgz, hl_dgz = self.dis(gz) with tf.variable_scope("step"): step = tf.train.get_or_create_global_step() rate = self.accelerate * tf.to_float(step) rate_anneal = tf.tanh(rate) with scope("loss"): dx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dx) - d_scale_factor, logits=dx)) dgzx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(dgzx), logits=dgzx)) #dgz_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dgz), logits=dgz)) d_loss = dx_loss + dgzx_loss #+ dgz_loss kl_loss = tf.reduce_mean(0.5 * (tf.square(mu) + tf.exp(lv) - lv - 1.0)) gzx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dgzx) - g_scale_factor, logits=dgzx)) if loss_type == "xtrpy": epsilon = 1e-10 ftr_loss = tf.reduce_mean( -tf.reduce_sum(x * tf.log(epsilon + gzx) + (1 - x) * tf.log(epsilon + 1 - gzx), axis=1)) g_loss = gzx_loss / 10 + ftr_loss / 5 + kl_loss * rate_anneal else: ftr_loss = tf.reduce_mean(tf.abs(x - gzx)) g_loss = gzx_loss / 2 + ftr_loss * 10 + kl_loss * rate_anneal with scope("AUC"): _, auc_gzx = tf.metrics.auc(y, tf.reduce_mean((x - gzx)**2, axis=1)) _, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(dx)) _, auc_dgzx = tf.metrics.auc(y, tf.nn.sigmoid(dgzx)) g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="generator") g_vars.append( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="encoder")) print(g_vars) d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator") print(d_vars) with scope('train_step'): #optimizer = tf.train.RMSPropOptimizer() optimizer = tf.train.AdamOptimizer() d_step = optimizer.minimize(d_loss, step, var_list=d_vars) g_step = optimizer.minimize(g_loss, step, var_list=g_vars) return VAEGAN( self, step=step, x=x, y=y, z=z, zx=zx, mu=mu, lv=lv, m=tf.reduce_mean(mu), l=tf.reduce_mean(lv) #, gz=gz , gzx=gzx, auc_gzx=auc_gzx, auc_dx=auc_dx, auc_dgzx=auc_dgzx, g_step=g_step, d_step=d_step, g_loss=g_loss, d_loss=d_loss #,gz_loss=gz_loss , gzx_loss=gzx_loss, ftr_loss=ftr_loss, kl_loss=kl_loss, dx_loss=dx_loss #, dgz_loss=dgz_loss , dgzx_loss=dgzx_loss)