def build(self, x, y, weight): with scope("x"): x = placeholder(tf.float32, [None, self.dim_x], x, "x") with scope("y"): y = placeholder(tf.float32, [None], y, "y") gx = self.gen(x) dx, dgx = self.dis(x), self.dis(gx) with scope("loss"): d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dx) * 0.9, logits=dx)) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(dgx), logits=dgx)) d_loss = d_loss_real + d_loss_fake epsilon = 1e-10 loss_rec = tf.reduce_mean( -tf.reduce_sum(x * tf.log(epsilon + gx) + (1 - x) * tf.log(epsilon + 1 - gx), axis=1)) g_loss = weight* loss_rec \ + tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dgx), logits=dgx)) with scope("AUC"): _, auc_dgx = tf.metrics.auc(y, tf.nn.sigmoid(dgx)) _, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(dx)) _, auc_gx = tf.metrics.auc(y, tf.reduce_mean((x - gx)**2, axis=1)) g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="generator") d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator") with scope('train_step'): step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer() d_step = optimizer.minimize(d_loss, step, var_list=d_vars) g_step = optimizer.minimize(g_loss, step, var_list=g_vars) return AEGAN(self, step=step, x=x, y=y, gx=gx, auc_dgx=auc_dgx, auc_gx=auc_gx, auc_dx=auc_dx, g_step=g_step, d_step=d_step, g_loss=g_loss, d_loss=d_loss)
def ae(data, btlnk_dim, data_dim, dense_dim, y_dim, loss_type): def encoder(x, btlnk_dim): x = normalize( tf.nn.relu(tf.keras.layers.Dense(btlnk_dim, use_bias=False)(x)), "layer_norm_1") return x def decoder(x, data_dim): x = tf.keras.layers.Dense(data_dim, use_bias=False)(x) #return tf.clip_by_value(x, 0.0, 1.0) return tf.sigmoid(x) with tf.variable_scope("x"): x = placeholder(tf.float32, [None, data_dim], data[0], "x") with tf.variable_scope("y"): y = placeholder(tf.float32, [None], data[1], "y") with tf.variable_scope("encoder"): z = encoder(x, btlnk_dim) with tf.variable_scope("decoder"): logits = decoder(z, data_dim) with tf.variable_scope("loss"): if loss_type == "xtrpy": #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=x, logits=logits)) epsilon = 1e-10 loss = tf.reduce_mean( -tf.reduce_sum(x * tf.log(epsilon + logits) + (1 - x) * tf.log(epsilon + 1 - logits), axis=1)) else: loss = tf.reduce_mean(tf.abs(x - logits)) step = tf.train.get_or_create_global_step() with tf.variable_scope("AUC"): anomaly_score = tf.reduce_mean((x - logits)**2, axis=1) _, auc = tf.metrics.auc(y, anomaly_score) with tf.variable_scope("train_step"): train_step = tf.train.AdamOptimizer().minimize(loss, step) return dict(step=step, x=x, y=y, logits=logits, auc=auc, train_step=train_step, loss=loss)
def infer(self): """-> Model with new fields, autoregressive len_tgt : i32 () steps to unfold aka t pred : i32 (b, t) prediction, hard """ dropout = identity with scope('infer'): with scope('encode'): w = self.position(self.max_src) + self.emb_src(self.src) w = self.encode(w, self.mask_src, dropout) # bds with scope('decode'): cap = placeholder(tf.int32, (), self.cap) msk = tf.log(tf.expand_dims(causal_mask(cap), axis= 0)) # 1tt pos = self.position(cap) # dt i,q = tf.constant(0), tf.zeros_like(self.src[:,:1]) + self.bos def body(i, q): j = i + 1 x = pos[:,:j] + self.emb_tgt(q) # bdj <- bj x = self.decode(x, msk[:,:j,:j], w, self.mask_src, dropout) # bdj p = tf.expand_dims( # b1 tf.argmax( # b self.emb_tgt( # bn tf.squeeze( # bd x[:,:,-1:] # bd1 <- bdj , axis= -1)) , axis= -1, output_type= tf.int32) , axis= -1) return j, tf.concat((q, p), axis= -1) # bk <- bj, b1 cond = lambda i, q: ((i < cap) & ~ tf.reduce_all(tf.equal(q[:,-1], self.eos))) _, p = tf.while_loop(cond, body, (i, q), back_prop= False, swap_memory= True) pred = p[:,1:] return Model(self, len_tgt= cap, pred= pred)
def build(self, x, y, z): with scope("x"): x = placeholder(tf.float32, [None, self.dim_x], x, "x") with scope("y"): y = placeholder(tf.float32, [None], y, "y") with scope("z"): z = placeholder(tf.float32, [None, self.dim_z], z, "z") gz = self.gen(z) dx, dgz = self.dis(x), self.dis(gz) with scope("loss"): loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= tf.ones_like(dx), logits= dx)) \ + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= tf.zeros_like(dgz), logits= dgz)) #with scope("d_loss"): #d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_real)*0.9, logits=y_real)) #d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_fake), logits=y_fake)) #d_loss = d_loss_real + d_loss_fake #with scope("g_loss"): #g_loss = tf.reduce_mean( #tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_fake), logits=y_fake)) #with scope("g/d_loss"): #loss = d_loss_real + g_loss with scope("AUC"): _, auc_d = tf.metrics.auc(y, tf.nn.sigmoid(dx)) with scope("train_step"): step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer() train_step = optimizer.apply_gradients( [((-grad if var.name.startswith("generator") else grad), var) for grad, var in optimizer.compute_gradients(loss)], step) return GAN(self, step=step, x=x, y=y, z=z, auc_d=auc_d, gz=gz, train_step=train_step)
def data(self, sid, tid, src= None, tgt= None): """-> Model with new fields position : Sinusoid src_ : i32 (b, ?) source feed, in range `[0, dim_src)` tgt_ : i32 (b, ?) target feed, in range `[0, dim_tgt)` src : i32 (b, s) source with `eos` trimmed among the batch tgt : i32 (b, t) target with `eos` trimmed among the batch, padded with `bos` mask : b8 (b, t) target sequence mask true : i32 (?,) target references max_tgt : i32 () maximum target length max_src : i32 () maximum source length mask_tgt : f32 (1, t, t) target attention mask mask_src : f32 (b, 1, s) source attention mask """ src_ = placeholder(tf.int32, (None, None), src, 'src_') tgt_ = placeholder(tf.int32, (None, None), tgt, 'tgt_') with scope('src'): src, msk, max_src = trim(src_, self.eos) mask_src = tf.log(tf.expand_dims(tf.to_float(msk), axis= 1)) with scope('tgt'): tgt, msk, max_tgt = trim(tgt_, self.eos) mask = tf.pad(msk, ((0,0),(1,0)), constant_values= True) btru = tf.pad(tgt, ((0,0),(1,0)), constant_values= self.bos) true = tf.pad(tgt, ((0,0),(0,1)), constant_values= self.eos) true, tgt = tf.boolean_mask(true, mask), btru max_tgt += 1 mask_tgt = tf.log(tf.expand_dims(causal_mask(max_tgt), axis= 0)) return Model( position= Sinusoid(self.dim_emb, self.cap) , src_= src_, mask_src= mask_src, max_src= max_src, src= src , tgt_= tgt_, mask_tgt= mask_tgt, max_tgt= max_tgt, tgt= tgt , true= true, mask= mask , emb_src = self.embeds[sid] , emb_tgt = self.embeds[tid] , **self)
def data(self, src=None, tgt=None, len_cap=None): """-> Transformer with new fields src_ : i32 (b, ?) source feed, in range `[0, dim_src)` tgt_ : i32 (b, ?) target feed, in range `[0, dim_tgt)` src : i32 (b, s) source with `end` trimmed among the batch tgt : i32 (b, t) target with `end` trimmed among the batch mask : f32 (b, s) source mask gold : i32 (b, t) target one step ahead position : Sinusoid setting `len_cap` makes it more efficient for training. you won't be able to feed it longer sequences, but it doesn't affect any model parameters. """ end, dim = self.end, self.dim count_not_all = lambda x: tf.reduce_sum( tf.to_int32(~tf.reduce_all(x, 0))) with tf.variable_scope('src'): src = src_ = placeholder(tf.int32, (None, None), src) len_src = count_not_all(tf.equal(src, end)) src = src[:, :len_src] with tf.variable_scope('tgt'): tgt = tgt_ = placeholder(tf.int32, (None, None), tgt) len_tgt = count_not_all(tf.equal(tgt, end)) tgt, gold = tgt[:, :len_tgt], tgt[:, 1:1 + len_tgt] return Transformer(position=Sinusoid(dim, len_cap), src_=src_, src=src, mask=tf.to_float( tf.expand_dims(tf.not_equal(src, end), 1)), tgt_=tgt_, tgt=tgt, gold=gold, **self)
def build(self, x, y, lr_max, mult): with tf.variable_scope("x"): x = placeholder(tf.float32, [None, self.dim_x], x, "x") with tf.variable_scope("y"): y = placeholder(tf.float32, [None], y, "y") gx = self.gen(x) dx, dgx = self.dis(x), self.dis(gx) with tf.variable_scope("loss"): a = tf.reduce_mean(tf.abs(x - dx)) b = tf.reduce_mean(tf.abs(gx - dgx)) c = tf.reduce_mean(tf.abs(x - gx)) d_vs_g = a - (b + c) / 2 # for balancing the learnign rate lr_d = sigmoid(d_vs_g, mult=mult) lr_g = (tf.constant(1.0) - lr_d) * lr_max lr_d = lr_d * lr_max # balance parameter for discriminator caring more about autoencoding real, or discriminating fake sigma = 0.5 w_fake = tf.clip_by_value( sigmoid(b * sigma - a, shift=0., mult=mult), 0., 0.9 ) # hold the discrim proportion fake aways at less than half d_loss = a - b * w_fake # weights for generator wg_fake = tf.clip_by_value(sigmoid(b - c, shift=0., mult=mult), 0., 1.0) wg_reconstruct = 1 - wg_fake g_loss = b * wg_fake + c * wg_reconstruct with tf.variable_scope("AUC"): _, auc_dgx = tf.metrics.auc(y, tf.reduce_mean((x - dgx)**2, axis=1)) _, auc_dx = tf.metrics.auc(y, tf.reduce_mean((x - dx)**2, axis=1)) _, auc_gx = tf.metrics.auc(y, tf.reduce_mean((x - gx)**2, axis=1)) with scope('down'): g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="generator") d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator") step = tf.train.get_or_create_global_step() d_step = tf.train.AdamOptimizer(lr_d).minimize(d_loss, step, var_list=d_vars) g_step = tf.train.AdamOptimizer(lr_g).minimize(g_loss, step, var_list=g_vars) return DAE(self, step=step, x=x, y=y, gx=gx, dgx=dgx, dx=dx, auc_dgx=auc_dgx, auc_gx=auc_gx, auc_dx=auc_dx, g_loss=g_loss, d_loss=d_loss, d_step=d_step, g_step=g_step)
def build(self, x, y, context_weight, loss, lam=0., weight_type="normal"): with scope("x"): x = placeholder(tf.float32, [None, None, None, self.channel_x], x, "x") with scope("y"): y = placeholder(tf.float32, [None], y, "y") gx = self.gen(x) dx = {k: v(x) for k, v in self.dis.items()} dgx = {k: v(gx) for k, v in self.dis.items()} #dx, dgx = self.dis(x), self.dis(gx) with scope("loss"): d_loss = [tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dx[k])*0.9, logits=dx[k])) \ + \ tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(dgx[k]), logits=dgx[k])) for k in dx.keys()] ### old d_loss #d_loss_real, d_loss_fake = [], [] #for k in dx.keys(): #d_loss_real.append(tf.reduce_mean( #tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dx[k])*0.9, logits=dx[k]))) #d_loss_fake.append(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dgx[k]), logits=dgx[k]))) #if loss=="mean": #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake) #elif loss=="max": #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake) #elif loss=="softmax": #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake) epsilon = 1e-10 loss_rec = tf.reduce_mean( -tf.reduce_sum(x * tf.log(epsilon + gx) + (1 - x) * tf.log(epsilon + 1 - gx), axis=1)) loss_g_fake = [ tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dgx_), logits=dgx_)) for dgx_ in dgx.values() ] lam = placeholder(tf.float32, None, lam, "lam") # only for softmax, otherwise dummy with scope("lambda"): if loss == "softmax_self_challenged": trained_l = tf.Variable(initial_value=-2., name='controlled_lambda') used_lam = tf.nn.softplus(trained_l, name='used_lambda') else: used_lam = lam if loss == "mean": gl_adv = tf.reduce_mean(loss_g_fake) g_loss = context_weight * loss_rec + gl_adv elif loss == "max": # max picks biggest loss = best discriminators feedback is used gl_adv = tf.reduce_max(loss_g_fake) g_loss = context_weight * loss_rec + gl_adv elif "softmax" in loss: # if lambda is self_learnt if used_lam == 0.: weights = tf.ones_like(loss_g_fake) else: if weight_type == 'log': weights = tf.pow(loss_g_fake, used_lam) else: weights = tf.exp(used_lam * loss_g_fake) gl_adv = weighted_arithmetic(weights, loss_g_fake) if loss == "softmax": g_loss = context_weight * loss_rec + gl_adv else: g_loss = context_weight * loss_rec + gl_adv - 0.001 * used_lam #g_loss = weight* loss_rec + tf.reduce_mean(tf.nn.softmax(loss_g_fake)*loss_g_fake) with scope("AUC"): #_, auc_dgx = tf.metrics.auc(y, tf.nn.sigmoid(tf.reduce_mean(list(dgx.values())))) #_, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(tf.reduce_mean(list(dx.values())))) _, auc_gx = tf.metrics.auc( y, tf.reduce_mean((x - gx)**2, axis=(1, 2, 3))) g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="generator") if loss == "softmax_self_challenged": lambda_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="loss/lambda") g_vars.extend(lambda_var) #d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator") d_vars = { i: tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=f"discriminator_{i}") for i in dx.keys() } with scope('train_step'): step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer() #d_step = optimizer.minimize(d_loss, step, var_list=d_vars) d_step = [ optimizer.minimize(loss, var_list=d_vars[i]) for i, loss in enumerate(d_loss) ] g_step = optimizer.minimize(g_loss, step, var_list=g_vars) return MG_GAN( self, lam=used_lam, step=step, x=x, y=y, gx=gx #, auc_dgx=auc_dgx , auc_gx=auc_gx #, auc_dx=auc_dx , g_step=g_step, d_step=d_step, gl_rec=context_weight * loss_rec, gl_lam=0.001 * used_lam, gl_adv=gl_adv, g_loss=g_loss, d_loss=d_loss, d_loss_mean=tf.reduce_mean(d_loss), d_max=tf.argmax(d_loss))
def vAe( mode, src=None, tgt=None, # model spec dim_tgt=8192, dim_emb=512, dim_rep=1024, rnn_layers=3, bidirectional=True, bidir_stacked=True, attentive=False, logit_use_embed=True, # training spec accelerate=1e-4, learn_rate=1e-3, bos=2, eos=1): # dim_tgt : vocab size # dim_emb : model dimension # dim_rep : representation dimension # # unk=0 for word dropout assert mode in ('train', 'valid', 'infer') self = Record(bos=bos, eos=eos) with scope('step'): step = self.step = tf.train.get_or_create_global_step() rate = accelerate * tf.to_float(step) rate_keepwd = self.rate_keepwd = tf.sigmoid(rate) rate_anneal = self.rate_anneal = tf.tanh(rate) rate_update = self.rate_update = learn_rate / (tf.sqrt(rate) + 1.0) with scope('src'): src = self.src = placeholder(tf.int32, (None, None), src, 'src') src = tf.transpose(src) # time major order src, msk_src, len_src = trim(src, eos) with scope('tgt'): tgt = self.tgt = placeholder(tf.int32, (None, None), tgt, 'tgt') tgt = tf.transpose(tgt) # time major order tgt, msk_tgt, len_tgt = trim(tgt, eos) msk_tgt = tf.pad(msk_tgt, ((1, 0), (0, 0)), constant_values=True) # pads for decoder : lead=[bos]+tgt -> gold=tgt+[eos] lead, gold = tgt, tf.pad(tgt, paddings=((0, 1), (0, 0)), constant_values=eos) if 'train' == mode: lead *= tf.to_int32( tf.random_uniform(tf.shape(lead)) < rate_keepwd) lead = self.lead = tf.pad(lead, paddings=((1, 0), (0, 0)), constant_values=bos) # s : src length # t : tgt length plus one padding, either eos or bos # b : batch size # # len_src : b aka s # msk_src : sb without padding # msk_tgt : tb with eos # # lead : tb with bos # gold : tb with eos with scope('embed'): b = (6 / (dim_tgt / dim_emb + 1))**0.5 embedding = tf.get_variable('embedding', (dim_tgt, dim_emb), initializer=tf.random_uniform_initializer( -b, b)) emb_tgt = tf.gather(embedding, lead, name='emb_tgt') # (t, b) -> (t, b, dim_emb) emb_src = tf.gather(embedding, src, name='emb_src') # (s, b) -> (s, b, dim_emb) with scope('encode'): # (s, b, dim_emb) -> (b, dim_emb) reverse = partial(tf.reverse_sequence, seq_lengths=len_src, seq_axis=0, batch_axis=1) if bidirectional and bidir_stacked: for i in range(rnn_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = layer_rnn(1, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(1, dim_emb, name='bwd')(reverse(emb_src)) hs = emb_src = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) elif bidirectional: with scope("rnn"): emb_fwd, _ = layer_rnn(rnn_layers, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(rnn_layers, dim_emb, name='bwd')(reverse(emb_src)) hs = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) else: hs, _ = layer_rnn(rnn_layers, dim_emb, name='rnn')(emb_src) with scope('cata'): # extract the final states from the outputs: bd <- sbd, b2 h = tf.gather_nd( hs, tf.stack( (len_src - 1, tf.range(tf.size(len_src), dtype=tf.int32)), axis=1)) if attentive: # todo fixme # the values are the outputs from all non-padding steps; # the queries are the final states; h = layer_nrm(h + tf.squeeze( # bd <- bd1 attention( # bd1 <- bd1, bds, b1s tf.expand_dims(h, axis=2), # query: bd1 <- bd tf.transpose(hs, (1, 2, 0)), # value: bds <- sbd tf.log( tf.to_float( # -inf,0 mask: b1s <- sb <- bs tf.expand_dims(tf.transpose(msk_src), axis=1))), int(h.shape[-1])), 2)) with scope('latent'): # (b, dim_emb) -> (b, dim_rep) -> (b, dim_emb) # h = layer_aff(h, dim_emb, name='in') mu = self.mu = layer_aff(h, dim_rep, name='mu') lv = self.lv = layer_aff(h, dim_rep, name='lv') with scope('z'): h = mu if 'train' == mode: h += tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv)) self.z = h h = layer_aff(h, dim_emb, name='ex') with scope('decode'): # (b, dim_emb) -> (t, b, dim_emb) -> (?, dim_emb) h = self.state_in = tf.stack((h, ) * rnn_layers) h, _ = _, (self.state_ex, ) = layer_rnn(rnn_layers, dim_emb, name='rnn')( emb_tgt, initial_state=(h, )) if 'infer' != mode: h = tf.boolean_mask(h, msk_tgt) h = layer_aff(h, dim_emb, name='out') with scope('logits'): # (?, dim_emb) -> (?, dim_tgt) if logit_use_embed: logits = self.logits = tf.tensordot(h, (dim_emb**-0.5) * tf.transpose(embedding), 1) else: logits = self.logits = layer_aff(h, dim_tgt) with scope('prob'): prob = self.prob = tf.nn.softmax(logits) with scope('pred'): pred = self.pred = tf.argmax(logits, -1, output_type=tf.int32) if 'infer' != mode: labels = tf.boolean_mask(gold, msk_tgt, name='labels') with scope('errt'): errt_samp = self.errt_samp = tf.to_float(tf.not_equal( labels, pred)) errt = self.errt = tf.reduce_mean(errt_samp) with scope('loss'): with scope('loss_gen'): loss_gen_samp = self.loss_gen_samp = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss_gen = self.loss_gen = tf.reduce_mean(loss_gen_samp) with scope('loss_kld'): loss_kld_samp = self.loss_kld_samp = 0.5 * ( tf.square(mu) + tf.exp(lv) - lv - 1.0) loss_kld = self.loss_kld = tf.reduce_mean(loss_kld_samp) loss = self.loss = rate_anneal * loss_kld + loss_gen if 'train' == mode: with scope('train'): train_step = self.train_step = tf.train.AdamOptimizer( rate_update).minimize(loss, step) return self
def VAE(data, btlnk_dim, data_dim, dense_dim, y_dim, loss_type, accelerate): def encoder(x, dim_btlnk, dim_x): x = Normalize(dim_btlnk, "nrm")(tf.nn.elu(Linear(dim_btlnk, dim_x, name= 'lin')(x))) with tf.variable_scope('latent'): mu = Linear(dim_btlnk, dim_btlnk, name= 'mu')(x) lv = Linear(dim_btlnk, dim_btlnk, name= 'lv')(x) #lv = Linear(dim_btlnk, dim_x, name= 'lv')(x) #mu = Linear(dim_btlnk, dim_x, name= 'mu')(x) with tf.variable_scope('z'): z = mu + tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv)) return z, mu, lv def decoder(x, data_dim, btlnk_dim): x = Linear(data_dim, btlnk_dim)(x) #return tf.clip_by_value(x, 0.0, 1.0) return tf.nn.sigmoid(x) with tf.variable_scope("x"): x = placeholder(tf.float32, [None, data_dim], data[0], "x") with tf.variable_scope("y"): y = placeholder(tf.float32, [None], data[1], "y") with tf.variable_scope("encoder"): z, mu, lv = encoder(x, btlnk_dim, data_dim) with tf.variable_scope("decoder"): logits = decoder(z, data_dim, btlnk_dim) with tf.variable_scope("step"): step = tf.train.get_or_create_global_step() rate = accelerate * tf.to_float(step) rate_anneal = tf.tanh(rate) with tf.variable_scope("loss"): kl_loss = tf.reduce_mean(0.5 * (tf.square(mu) + tf.exp(lv) - lv - 1.0)) if loss_type == "xtrpy": #loss_rec = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=x, logits=logits)) epsilon = 1e-10 loss_rec = tf.reduce_mean(-tf.reduce_sum(x * tf.log(epsilon+logits) + (1-x) * tf.log(epsilon+1-logits), axis=1)) else: loss_rec = tf.reduce_mean(tf.abs(x - logits)) loss = loss_rec + kl_loss*rate_anneal with tf.variable_scope("AUC"): anomaly_score = tf.reduce_mean((x-logits)**2, axis=1) _, auc = tf.metrics.auc(y, anomaly_score) with tf.variable_scope("train_step"): train_step = tf.train.AdamOptimizer().minimize(loss, step) return dict(step=step, x=x, y=y, z=z, mu=mu, logits=logits, auc=auc, train_step=train_step, loss=loss, kl_loss=kl_loss, loss_rec=loss_rec)
def model(mode, src_dwh, tgt_dwh, src_idx=None, len_src=None, tgt_img=None, tgt_idx=None, len_tgt=None, num_layers=3, num_units=512, learn_rate=1e-3, decay_rate=1e-2, dropout=0.1): assert mode in ('train', 'valid', 'infer') self = Record() src_d, src_w, src_h = src_dwh tgt_d, tgt_w, tgt_h = tgt_dwh with scope('source'): # input nodes src_idx = self.src_idx = placeholder(tf.int32, (None, None), src_idx, 'src_idx') # n s len_src = self.len_src = placeholder(tf.int32, (None, ), len_src, 'len_src') # n # time major order src_idx = tf.transpose(src_idx, (1, 0)) # s n emb_src = tf.one_hot(src_idx, src_d) # s n v for i in range(num_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='fwd')(emb_src, training='train' == mode) emb_bwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='bwd')(tf.reverse_sequence(emb_src, len_src, seq_axis=0, batch_axis=1), training='train' == mode) emb_src = tf.concat( (emb_fwd, tf.reverse_sequence( emb_bwd, len_src, seq_axis=0, batch_axis=1)), axis=-1) # emb_src = tf.layers.dense(emb_src, num_units, name= 'reduce_concat') # s n d emb_src = self.emb_src = tf.transpose(emb_src, (1, 2, 0)) # n d s with scope('target'): # input nodes tgt_img = self.tgt_img = placeholder(tf.uint8, (None, None, tgt_h, tgt_w), tgt_img, 'tgt_img') # n t h w tgt_idx = self.tgt_idx = placeholder(tf.int32, (None, None), tgt_idx, 'tgt_idx') # n t len_tgt = self.len_tgt = placeholder(tf.int32, (None, ), len_tgt, 'len_tgt') # n # time major order tgt_idx = tf.transpose(tgt_idx) # t n tgt_img = tf.transpose(tgt_img, (1, 0, 2, 3)) # t n h w tgt_img = flatten(tgt_img, 2, 3) # t n hw # normalize pixels to binary tgt_img = tf.to_float(tgt_img) / 255.0 # tgt_img = tf.round(tgt_img) # todo consider adding noise # causal padding fire = self.fire = tf.pad(tgt_img, ((1, 0), (0, 0), (0, 0)), constant_values=0.0) true = self.true = tf.pad(tgt_img, ((0, 1), (0, 0), (0, 0)), constant_values=1.0) tidx = self.tidx = tf.pad(tgt_idx, ((0, 1), (0, 0)), constant_values=1) mask_tgt = tf.transpose(tf.sequence_mask(len_tgt + 1)) # t n with scope('decode'): # needs to get input from latent space to do attention or some shit decoder = self.decoder = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, dropout=dropout) state_in = self.state_in = tf.zeros( (num_layers, tf.shape(fire)[1], num_units)) x, _ = _, (self.state_ex, ) = decoder(fire, initial_state=(state_in, ), training='train' == mode) # transform mask to -inf and 0 in order to simply sum for whatever the f**k happens next mask = tf.log(tf.sequence_mask(len_src, dtype=tf.float32)) # n s mask = tf.expand_dims(mask, 1) # n 1 s # multi-head scaled dot-product attention x = tf.transpose(x, (1, 2, 0)) # t n d ---> n d t attn = Attention(num_units, num_units, 2 * num_units)(x, emb_src, mask) if 'train' == mode: attn = tf.nn.dropout(attn, 1 - dropout) x = Normalize(num_units)(x + attn) x = tf.transpose(x, (2, 0, 1)) # n d t ---> t n d if 'infer' != mode: x = tf.boolean_mask(x, mask_tgt) true = tf.boolean_mask(true, mask_tgt) tidx = tf.boolean_mask(tidx, mask_tgt) with scope('output'): y = tf.layers.dense(x, tgt_h * tgt_w, name='dense_img') z = tf.layers.dense(x, tgt_d, name='logit_idx') pred = self.pred = tf.clip_by_value(y, 0.0, 1.0) prob = self.prob = tf.nn.softmax(z) pidx = self.pidx = tf.argmax(z, axis=-1, output_type=tf.int32) with scope('losses'): diff = true - pred mae = self.mae = tf.reduce_mean(tf.abs(diff), axis=-1) mse = self.mse = tf.reduce_mean(tf.square(diff), axis=-1) xid = self.xid = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=z, labels=tidx) err = self.err = tf.not_equal(tidx, pidx) loss = tf.reduce_mean(xid) with scope('update'): step = self.step = tf.train.get_or_create_global_step() lr = self.lr = learn_rate / (1.0 + decay_rate * tf.sqrt(tf.to_float(step))) if 'train' == mode: down = self.down = tf.train.AdamOptimizer(lr).minimize(loss, step) return self
def feed(src, tgt, cws=cws, cwt=cwt): src_idx, len_src = cws(src, ret_img=False, ret_idx=True) tgt_img, tgt_idx, len_tgt = cwt(tgt, ret_img=True, ret_idx=True) return src_idx, len_src, tgt_img, tgt_idx, len_tgt def batch(src=src_train, tgt=tgt_train, size=128, seed=0): for bat in batch_sample(len(tgt), size, seed): yield feed(src[bat], tgt[bat]) src_idx, len_src, tgt_img, tgt_idx, len_tgt = pipe( batch, (tf.int32, tf.int32, tf.uint8, tf.int32, tf.int32)) train = model('train', cws.dwh(), cwt.dwh(), src_idx, len_src, tgt_img, tgt_idx, len_tgt) valid = model('valid', cws.dwh(), cwt.dwh()) dummy = tuple(placeholder(tf.float32, ()) for _ in range(3)) def log(step, wtr=tf.summary.FileWriter("../log/{}".format(trial)), log=tf.summary.merge((tf.summary.scalar('step_mae', dummy[0]), tf.summary.scalar('step_xid', dummy[1]), tf.summary.scalar('step_err', dummy[2]))), fet=(valid.mae, valid.xid, valid.err), inp=(valid.src_idx, valid.len_src, valid.tgt_img, valid.tgt_idx, valid.len_tgt), src=src_valid, tgt=tgt_valid, bat=256): stats = [ sess.run(fet, dict(zip(inp, feed(src[i:j], tgt[i:j])))) for i, j in partition(len(tgt), bat)
def build(self, x, y, z, loss_type): d_scale_factor = tf.constant(0.) #tf.constant(0.25) g_scale_factor = tf.constant(0.) #tf.constant(1 - 0.75/2) with scope("x"): x = placeholder(tf.float32, [None, self.dim_x], x, "x") with scope("y"): y = placeholder(tf.float32, [None], y, "y") with scope("z"): z = placeholder(tf.float32, [None, self.dim_noise], z, "z") zx, mu, lv, hl_e = self.enc(x) gzx = self.gen(zx) #gz = self.gen(z) dx, hl_dx = self.dis(x) dgzx, hl_dgzx = self.dis(gzx) #dgz, hl_dgz = self.dis(gz) with tf.variable_scope("step"): step = tf.train.get_or_create_global_step() rate = self.accelerate * tf.to_float(step) rate_anneal = tf.tanh(rate) with scope("loss"): dx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dx) - d_scale_factor, logits=dx)) dgzx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(dgzx), logits=dgzx)) #dgz_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dgz), logits=dgz)) d_loss = dx_loss + dgzx_loss #+ dgz_loss kl_loss = tf.reduce_mean(0.5 * (tf.square(mu) + tf.exp(lv) - lv - 1.0)) gzx_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(dgzx) - g_scale_factor, logits=dgzx)) if loss_type == "xtrpy": epsilon = 1e-10 ftr_loss = tf.reduce_mean( -tf.reduce_sum(x * tf.log(epsilon + gzx) + (1 - x) * tf.log(epsilon + 1 - gzx), axis=1)) g_loss = gzx_loss / 10 + ftr_loss / 5 + kl_loss * rate_anneal else: ftr_loss = tf.reduce_mean(tf.abs(x - gzx)) g_loss = gzx_loss / 2 + ftr_loss * 10 + kl_loss * rate_anneal with scope("AUC"): _, auc_gzx = tf.metrics.auc(y, tf.reduce_mean((x - gzx)**2, axis=1)) _, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(dx)) _, auc_dgzx = tf.metrics.auc(y, tf.nn.sigmoid(dgzx)) g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="generator") g_vars.append( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="encoder")) print(g_vars) d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator") print(d_vars) with scope('train_step'): #optimizer = tf.train.RMSPropOptimizer() optimizer = tf.train.AdamOptimizer() d_step = optimizer.minimize(d_loss, step, var_list=d_vars) g_step = optimizer.minimize(g_loss, step, var_list=g_vars) return VAEGAN( self, step=step, x=x, y=y, z=z, zx=zx, mu=mu, lv=lv, m=tf.reduce_mean(mu), l=tf.reduce_mean(lv) #, gz=gz , gzx=gzx, auc_gzx=auc_gzx, auc_dx=auc_dx, auc_dgzx=auc_dgzx, g_step=g_step, d_step=d_step, g_loss=g_loss, d_loss=d_loss #,gz_loss=gz_loss , gzx_loss=gzx_loss, ftr_loss=ftr_loss, kl_loss=kl_loss, dx_loss=dx_loss #, dgz_loss=dgz_loss , dgzx_loss=dgzx_loss)