Beispiel #1
0
    def build(self, x, y, weight):
        with scope("x"):
            x = placeholder(tf.float32, [None, self.dim_x], x, "x")
        with scope("y"):
            y = placeholder(tf.float32, [None], y, "y")

        gx = self.gen(x)
        dx, dgx = self.dis(x), self.dis(gx)

        with scope("loss"):
            d_loss_real = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.ones_like(dx) * 0.9, logits=dx))
            d_loss_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.zeros_like(dgx), logits=dgx))
            d_loss = d_loss_real + d_loss_fake

            epsilon = 1e-10
            loss_rec = tf.reduce_mean(
                -tf.reduce_sum(x * tf.log(epsilon + gx) +
                               (1 - x) * tf.log(epsilon + 1 - gx),
                               axis=1))
            g_loss = weight* loss_rec \
                +  tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dgx), logits=dgx))

        with scope("AUC"):
            _, auc_dgx = tf.metrics.auc(y, tf.nn.sigmoid(dgx))
            _, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(dx))
            _, auc_gx = tf.metrics.auc(y, tf.reduce_mean((x - gx)**2, axis=1))

        g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="generator")
        d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="discriminator")

        with scope('train_step'):
            step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer()
            d_step = optimizer.minimize(d_loss, step, var_list=d_vars)
            g_step = optimizer.minimize(g_loss, step, var_list=g_vars)

        return AEGAN(self,
                     step=step,
                     x=x,
                     y=y,
                     gx=gx,
                     auc_dgx=auc_dgx,
                     auc_gx=auc_gx,
                     auc_dx=auc_dx,
                     g_step=g_step,
                     d_step=d_step,
                     g_loss=g_loss,
                     d_loss=d_loss)
Beispiel #2
0
Datei: ae.py Projekt: jmilde/gan
def ae(data, btlnk_dim, data_dim, dense_dim, y_dim, loss_type):
    def encoder(x, btlnk_dim):
        x = normalize(
            tf.nn.relu(tf.keras.layers.Dense(btlnk_dim, use_bias=False)(x)),
            "layer_norm_1")
        return x

    def decoder(x, data_dim):
        x = tf.keras.layers.Dense(data_dim, use_bias=False)(x)
        #return tf.clip_by_value(x, 0.0, 1.0)
        return tf.sigmoid(x)

    with tf.variable_scope("x"):
        x = placeholder(tf.float32, [None, data_dim], data[0], "x")
    with tf.variable_scope("y"):
        y = placeholder(tf.float32, [None], data[1], "y")

    with tf.variable_scope("encoder"):
        z = encoder(x, btlnk_dim)

    with tf.variable_scope("decoder"):
        logits = decoder(z, data_dim)

    with tf.variable_scope("loss"):
        if loss_type == "xtrpy":
            #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=x, logits=logits))
            epsilon = 1e-10
            loss = tf.reduce_mean(
                -tf.reduce_sum(x * tf.log(epsilon + logits) +
                               (1 - x) * tf.log(epsilon + 1 - logits),
                               axis=1))
        else:
            loss = tf.reduce_mean(tf.abs(x - logits))
    step = tf.train.get_or_create_global_step()

    with tf.variable_scope("AUC"):
        anomaly_score = tf.reduce_mean((x - logits)**2, axis=1)
        _, auc = tf.metrics.auc(y, anomaly_score)

    with tf.variable_scope("train_step"):
        train_step = tf.train.AdamOptimizer().minimize(loss, step)

    return dict(step=step,
                x=x,
                y=y,
                logits=logits,
                auc=auc,
                train_step=train_step,
                loss=loss)
Beispiel #3
0
    def infer(self):
        """-> Model with new fields, autoregressive

        len_tgt : i32 ()      steps to unfold aka t
           pred : i32 (b, t)  prediction, hard

        """
        dropout = identity
        with scope('infer'):
            with scope('encode'):
                w = self.position(self.max_src) + self.emb_src(self.src)
                w = self.encode(w, self.mask_src, dropout) # bds
            with scope('decode'):
                cap = placeholder(tf.int32, (), self.cap)
                msk = tf.log(tf.expand_dims(causal_mask(cap), axis= 0)) # 1tt
                pos = self.position(cap) # dt
                i,q = tf.constant(0), tf.zeros_like(self.src[:,:1]) + self.bos
                def body(i, q):
                    j = i + 1
                    x = pos[:,:j] + self.emb_tgt(q) # bdj <- bj
                    x = self.decode(x, msk[:,:j,:j], w, self.mask_src, dropout) # bdj
                    p = tf.expand_dims( # b1
                        tf.argmax( # b
                            self.emb_tgt( # bn
                                tf.squeeze( # bd
                                    x[:,:,-1:] # bd1 <- bdj
                                    , axis= -1))
                            , axis= -1, output_type= tf.int32)
                        , axis= -1)
                    return j, tf.concat((q, p), axis= -1) # bk <- bj, b1
                cond = lambda i, q: ((i < cap) & ~ tf.reduce_all(tf.equal(q[:,-1], self.eos)))
                _, p = tf.while_loop(cond, body, (i, q), back_prop= False, swap_memory= True)
                pred = p[:,1:]
        return Model(self, len_tgt= cap, pred= pred)
Beispiel #4
0
Datei: gan.py Projekt: jmilde/gan
    def build(self, x, y, z):
        with scope("x"):
            x = placeholder(tf.float32, [None, self.dim_x], x, "x")
        with scope("y"):
            y = placeholder(tf.float32, [None], y, "y")
        with scope("z"):
            z = placeholder(tf.float32, [None, self.dim_z], z, "z")

        gz = self.gen(z)
        dx, dgz = self.dis(x), self.dis(gz)

        with scope("loss"):
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= tf.ones_like(dx), logits= dx)) \
                + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= tf.zeros_like(dgz), logits= dgz))

            #with scope("d_loss"):
            #d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_real)*0.9, logits=y_real))
            #d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_fake), logits=y_fake))
            #d_loss = d_loss_real + d_loss_fake
            #with scope("g_loss"):
            #g_loss = tf.reduce_mean(
            #tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_fake), logits=y_fake))
            #with scope("g/d_loss"):
            #loss = d_loss_real + g_loss

        with scope("AUC"):
            _, auc_d = tf.metrics.auc(y, tf.nn.sigmoid(dx))

        with scope("train_step"):
            step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer()
            train_step = optimizer.apply_gradients(
                [((-grad if var.name.startswith("generator") else grad), var)
                 for grad, var in optimizer.compute_gradients(loss)], step)

        return GAN(self,
                   step=step,
                   x=x,
                   y=y,
                   z=z,
                   auc_d=auc_d,
                   gz=gz,
                   train_step=train_step)
Beispiel #5
0
    def data(self, sid, tid, src= None, tgt= None):
        """-> Model with new fields

        position : Sinusoid
            src_ : i32 (b, ?)     source feed, in range `[0, dim_src)`
            tgt_ : i32 (b, ?)     target feed, in range `[0, dim_tgt)`
             src : i32 (b, s)     source with `eos` trimmed among the batch
             tgt : i32 (b, t)     target with `eos` trimmed among the batch, padded with `bos`
            mask : b8  (b, t)     target sequence mask
            true : i32 (?,)       target references
         max_tgt : i32 ()         maximum target length
         max_src : i32 ()         maximum source length
        mask_tgt : f32 (1, t, t)  target attention mask
        mask_src : f32 (b, 1, s)  source attention mask

        """
        src_ = placeholder(tf.int32, (None, None), src, 'src_')
        tgt_ = placeholder(tf.int32, (None, None), tgt, 'tgt_')
        with scope('src'):
            src, msk, max_src = trim(src_, self.eos)
            mask_src = tf.log(tf.expand_dims(tf.to_float(msk), axis= 1))
        with scope('tgt'):
            tgt, msk, max_tgt = trim(tgt_, self.eos)
            mask = tf.pad(msk, ((0,0),(1,0)), constant_values= True)
            btru = tf.pad(tgt, ((0,0),(1,0)), constant_values= self.bos)
            true = tf.pad(tgt, ((0,0),(0,1)), constant_values= self.eos)
            true, tgt = tf.boolean_mask(true, mask), btru
            max_tgt += 1
            mask_tgt = tf.log(tf.expand_dims(causal_mask(max_tgt), axis= 0))
        return Model(
            position= Sinusoid(self.dim_emb, self.cap)
            , src_= src_, mask_src= mask_src, max_src= max_src, src= src
            , tgt_= tgt_, mask_tgt= mask_tgt, max_tgt= max_tgt, tgt= tgt
            , true= true, mask= mask
            , emb_src = self.embeds[sid]
            , emb_tgt = self.embeds[tid]
            , **self)
Beispiel #6
0
    def data(self, src=None, tgt=None, len_cap=None):
        """-> Transformer with new fields

            src_ : i32 (b, ?) source feed, in range `[0, dim_src)`
            tgt_ : i32 (b, ?) target feed, in range `[0, dim_tgt)`
             src : i32 (b, s) source with `end` trimmed among the batch
             tgt : i32 (b, t) target with `end` trimmed among the batch
            mask : f32 (b, s) source mask
            gold : i32 (b, t) target one step ahead
        position : Sinusoid

        setting `len_cap` makes it more efficient for training.  you
        won't be able to feed it longer sequences, but it doesn't
        affect any model parameters.

        """
        end, dim = self.end, self.dim
        count_not_all = lambda x: tf.reduce_sum(
            tf.to_int32(~tf.reduce_all(x, 0)))
        with tf.variable_scope('src'):
            src = src_ = placeholder(tf.int32, (None, None), src)
            len_src = count_not_all(tf.equal(src, end))
            src = src[:, :len_src]
        with tf.variable_scope('tgt'):
            tgt = tgt_ = placeholder(tf.int32, (None, None), tgt)
            len_tgt = count_not_all(tf.equal(tgt, end))
            tgt, gold = tgt[:, :len_tgt], tgt[:, 1:1 + len_tgt]
        return Transformer(position=Sinusoid(dim, len_cap),
                           src_=src_,
                           src=src,
                           mask=tf.to_float(
                               tf.expand_dims(tf.not_equal(src, end), 1)),
                           tgt_=tgt_,
                           tgt=tgt,
                           gold=gold,
                           **self)
Beispiel #7
0
Datei: dae.py Projekt: jmilde/gan
    def build(self, x, y, lr_max, mult):
        with tf.variable_scope("x"):
            x = placeholder(tf.float32, [None, self.dim_x], x, "x")
        with tf.variable_scope("y"):
            y = placeholder(tf.float32, [None], y, "y")

        gx = self.gen(x)
        dx, dgx = self.dis(x), self.dis(gx)

        with tf.variable_scope("loss"):
            a = tf.reduce_mean(tf.abs(x - dx))
            b = tf.reduce_mean(tf.abs(gx - dgx))
            c = tf.reduce_mean(tf.abs(x - gx))
            d_vs_g = a - (b + c) / 2  # for balancing the learnign rate
            lr_d = sigmoid(d_vs_g, mult=mult)
            lr_g = (tf.constant(1.0) - lr_d) * lr_max
            lr_d = lr_d * lr_max

            # balance parameter for discriminator caring more about autoencoding real, or discriminating fake
            sigma = 0.5
            w_fake = tf.clip_by_value(
                sigmoid(b * sigma - a, shift=0., mult=mult), 0., 0.9
            )  # hold the discrim proportion fake aways at less than half
            d_loss = a - b * w_fake

            # weights for generator
            wg_fake = tf.clip_by_value(sigmoid(b - c, shift=0., mult=mult), 0.,
                                       1.0)
            wg_reconstruct = 1 - wg_fake
            g_loss = b * wg_fake + c * wg_reconstruct

        with tf.variable_scope("AUC"):
            _, auc_dgx = tf.metrics.auc(y, tf.reduce_mean((x - dgx)**2,
                                                          axis=1))
            _, auc_dx = tf.metrics.auc(y, tf.reduce_mean((x - dx)**2, axis=1))
            _, auc_gx = tf.metrics.auc(y, tf.reduce_mean((x - gx)**2, axis=1))

        with scope('down'):
            g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                       scope="generator")
            d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                       scope="discriminator")
            step = tf.train.get_or_create_global_step()
            d_step = tf.train.AdamOptimizer(lr_d).minimize(d_loss,
                                                           step,
                                                           var_list=d_vars)
            g_step = tf.train.AdamOptimizer(lr_g).minimize(g_loss,
                                                           step,
                                                           var_list=g_vars)

        return DAE(self,
                   step=step,
                   x=x,
                   y=y,
                   gx=gx,
                   dgx=dgx,
                   dx=dx,
                   auc_dgx=auc_dgx,
                   auc_gx=auc_gx,
                   auc_dx=auc_dx,
                   g_loss=g_loss,
                   d_loss=d_loss,
                   d_step=d_step,
                   g_step=g_step)
    def build(self, x, y, context_weight, loss, lam=0., weight_type="normal"):
        with scope("x"):
            x = placeholder(tf.float32, [None, None, None, self.channel_x], x,
                            "x")
        with scope("y"):
            y = placeholder(tf.float32, [None], y, "y")

        gx = self.gen(x)
        dx = {k: v(x) for k, v in self.dis.items()}
        dgx = {k: v(gx) for k, v in self.dis.items()}
        #dx, dgx = self.dis(x), self.dis(gx)

        with scope("loss"):
            d_loss = [tf.reduce_mean(
                          tf.nn.sigmoid_cross_entropy_with_logits(
                              labels=tf.ones_like(dx[k])*0.9, logits=dx[k])) \
                      + \
                      tf.reduce_mean(
                          tf.nn.sigmoid_cross_entropy_with_logits(
                              labels=tf.zeros_like(dgx[k]), logits=dgx[k]))
                      for k in dx.keys()]

            ### old d_loss
            #d_loss_real, d_loss_fake = [], []
            #for k in dx.keys():
            #d_loss_real.append(tf.reduce_mean(
            #tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dx[k])*0.9, logits=dx[k])))
            #d_loss_fake.append(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dgx[k]), logits=dgx[k])))

            #if loss=="mean":
            #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake)
            #elif loss=="max":
            #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake)
            #elif loss=="softmax":
            #d_loss = tf.reduce_mean(d_loss_real) + tf.reduce_mean(d_loss_fake)

            epsilon = 1e-10
            loss_rec = tf.reduce_mean(
                -tf.reduce_sum(x * tf.log(epsilon + gx) +
                               (1 - x) * tf.log(epsilon + 1 - gx),
                               axis=1))
            loss_g_fake = [
                tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        labels=tf.ones_like(dgx_), logits=dgx_))
                for dgx_ in dgx.values()
            ]

            lam = placeholder(tf.float32, None, lam,
                              "lam")  # only for softmax, otherwise dummy

            with scope("lambda"):
                if loss == "softmax_self_challenged":
                    trained_l = tf.Variable(initial_value=-2.,
                                            name='controlled_lambda')
                    used_lam = tf.nn.softplus(trained_l, name='used_lambda')
                else:
                    used_lam = lam

            if loss == "mean":
                gl_adv = tf.reduce_mean(loss_g_fake)
                g_loss = context_weight * loss_rec + gl_adv
            elif loss == "max":  # max picks biggest loss = best discriminators feedback is used
                gl_adv = tf.reduce_max(loss_g_fake)
                g_loss = context_weight * loss_rec + gl_adv

            elif "softmax" in loss:
                # if lambda is self_learnt
                if used_lam == 0.:
                    weights = tf.ones_like(loss_g_fake)
                else:
                    if weight_type == 'log':
                        weights = tf.pow(loss_g_fake, used_lam)
                    else:
                        weights = tf.exp(used_lam * loss_g_fake)

                gl_adv = weighted_arithmetic(weights, loss_g_fake)

                if loss == "softmax":
                    g_loss = context_weight * loss_rec + gl_adv
                else:
                    g_loss = context_weight * loss_rec + gl_adv - 0.001 * used_lam
                #g_loss = weight* loss_rec + tf.reduce_mean(tf.nn.softmax(loss_g_fake)*loss_g_fake)

        with scope("AUC"):
            #_, auc_dgx = tf.metrics.auc(y, tf.nn.sigmoid(tf.reduce_mean(list(dgx.values()))))
            #_, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(tf.reduce_mean(list(dx.values()))))
            _, auc_gx = tf.metrics.auc(
                y, tf.reduce_mean((x - gx)**2, axis=(1, 2, 3)))

        g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="generator")
        if loss == "softmax_self_challenged":
            lambda_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                           scope="loss/lambda")
            g_vars.extend(lambda_var)
        #d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="discriminator")
        d_vars = {
            i: tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                 scope=f"discriminator_{i}")
            for i in dx.keys()
        }

        with scope('train_step'):
            step = tf.train.get_or_create_global_step()
            optimizer = tf.train.AdamOptimizer()
            #d_step = optimizer.minimize(d_loss, step, var_list=d_vars)
            d_step = [
                optimizer.minimize(loss, var_list=d_vars[i])
                for i, loss in enumerate(d_loss)
            ]
            g_step = optimizer.minimize(g_loss, step, var_list=g_vars)

        return MG_GAN(
            self,
            lam=used_lam,
            step=step,
            x=x,
            y=y,
            gx=gx
            #, auc_dgx=auc_dgx
            ,
            auc_gx=auc_gx
            #, auc_dx=auc_dx
            ,
            g_step=g_step,
            d_step=d_step,
            gl_rec=context_weight * loss_rec,
            gl_lam=0.001 * used_lam,
            gl_adv=gl_adv,
            g_loss=g_loss,
            d_loss=d_loss,
            d_loss_mean=tf.reduce_mean(d_loss),
            d_max=tf.argmax(d_loss))
Beispiel #9
0
def vAe(
        mode,
        src=None,
        tgt=None,
        # model spec
        dim_tgt=8192,
        dim_emb=512,
        dim_rep=1024,
        rnn_layers=3,
        bidirectional=True,
        bidir_stacked=True,
        attentive=False,
        logit_use_embed=True,
        # training spec
        accelerate=1e-4,
        learn_rate=1e-3,
        bos=2,
        eos=1):

    # dim_tgt : vocab size
    # dim_emb : model dimension
    # dim_rep : representation dimension
    #
    # unk=0 for word dropout

    assert mode in ('train', 'valid', 'infer')
    self = Record(bos=bos, eos=eos)

    with scope('step'):
        step = self.step = tf.train.get_or_create_global_step()
        rate = accelerate * tf.to_float(step)
        rate_keepwd = self.rate_keepwd = tf.sigmoid(rate)
        rate_anneal = self.rate_anneal = tf.tanh(rate)
        rate_update = self.rate_update = learn_rate / (tf.sqrt(rate) + 1.0)

    with scope('src'):
        src = self.src = placeholder(tf.int32, (None, None), src, 'src')
        src = tf.transpose(src)  # time major order
        src, msk_src, len_src = trim(src, eos)

    with scope('tgt'):
        tgt = self.tgt = placeholder(tf.int32, (None, None), tgt, 'tgt')
        tgt = tf.transpose(tgt)  # time major order
        tgt, msk_tgt, len_tgt = trim(tgt, eos)
        msk_tgt = tf.pad(msk_tgt, ((1, 0), (0, 0)), constant_values=True)
        # pads for decoder : lead=[bos]+tgt -> gold=tgt+[eos]
        lead, gold = tgt, tf.pad(tgt,
                                 paddings=((0, 1), (0, 0)),
                                 constant_values=eos)
        if 'train' == mode:
            lead *= tf.to_int32(
                tf.random_uniform(tf.shape(lead)) < rate_keepwd)
        lead = self.lead = tf.pad(lead,
                                  paddings=((1, 0), (0, 0)),
                                  constant_values=bos)

    # s : src length
    # t : tgt length plus one padding, either eos or bos
    # b : batch size
    #
    # len_src :  b  aka s
    # msk_src : sb  without padding
    # msk_tgt : tb  with eos
    #
    #    lead : tb  with bos
    #    gold : tb  with eos

    with scope('embed'):
        b = (6 / (dim_tgt / dim_emb + 1))**0.5
        embedding = tf.get_variable('embedding', (dim_tgt, dim_emb),
                                    initializer=tf.random_uniform_initializer(
                                        -b, b))
        emb_tgt = tf.gather(embedding, lead,
                            name='emb_tgt')  # (t, b) -> (t, b, dim_emb)
        emb_src = tf.gather(embedding, src,
                            name='emb_src')  # (s, b) -> (s, b, dim_emb)

    with scope('encode'):  # (s, b, dim_emb) -> (b, dim_emb)
        reverse = partial(tf.reverse_sequence,
                          seq_lengths=len_src,
                          seq_axis=0,
                          batch_axis=1)

        if bidirectional and bidir_stacked:
            for i in range(rnn_layers):
                with scope("rnn{}".format(i + 1)):
                    emb_fwd, _ = layer_rnn(1, dim_emb, name='fwd')(emb_src)
                    emb_bwd, _ = layer_rnn(1, dim_emb,
                                           name='bwd')(reverse(emb_src))
                    hs = emb_src = tf.concat((emb_fwd, reverse(emb_bwd)),
                                             axis=-1)

        elif bidirectional:
            with scope("rnn"):
                emb_fwd, _ = layer_rnn(rnn_layers, dim_emb,
                                       name='fwd')(emb_src)
                emb_bwd, _ = layer_rnn(rnn_layers, dim_emb,
                                       name='bwd')(reverse(emb_src))
            hs = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1)

        else:
            hs, _ = layer_rnn(rnn_layers, dim_emb, name='rnn')(emb_src)

        with scope('cata'):
            # extract the final states from the outputs: bd <- sbd, b2
            h = tf.gather_nd(
                hs,
                tf.stack(
                    (len_src - 1, tf.range(tf.size(len_src), dtype=tf.int32)),
                    axis=1))
            if attentive:  # todo fixme
                # the values are the outputs from all non-padding steps;
                # the queries are the final states;
                h = layer_nrm(h + tf.squeeze(  # bd <- bd1
                    attention(  # bd1 <- bd1, bds, b1s
                        tf.expand_dims(h, axis=2),  # query: bd1 <- bd
                        tf.transpose(hs, (1, 2, 0)),  # value: bds <- sbd
                        tf.log(
                            tf.to_float(  # -inf,0  mask: b1s <- sb <- bs
                                tf.expand_dims(tf.transpose(msk_src),
                                               axis=1))),
                        int(h.shape[-1])),
                    2))

    with scope('latent'):  # (b, dim_emb) -> (b, dim_rep) -> (b, dim_emb)
        # h = layer_aff(h, dim_emb, name='in')
        mu = self.mu = layer_aff(h, dim_rep, name='mu')
        lv = self.lv = layer_aff(h, dim_rep, name='lv')
        with scope('z'):
            h = mu
            if 'train' == mode:
                h += tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv))
            self.z = h
        h = layer_aff(h, dim_emb, name='ex')

    with scope('decode'):  # (b, dim_emb) -> (t, b, dim_emb) -> (?, dim_emb)
        h = self.state_in = tf.stack((h, ) * rnn_layers)
        h, _ = _, (self.state_ex, ) = layer_rnn(rnn_layers,
                                                dim_emb,
                                                name='rnn')(
                                                    emb_tgt,
                                                    initial_state=(h, ))
        if 'infer' != mode: h = tf.boolean_mask(h, msk_tgt)
        h = layer_aff(h, dim_emb, name='out')

    with scope('logits'):  # (?, dim_emb) -> (?, dim_tgt)
        if logit_use_embed:
            logits = self.logits = tf.tensordot(h, (dim_emb**-0.5) *
                                                tf.transpose(embedding), 1)
        else:
            logits = self.logits = layer_aff(h, dim_tgt)

    with scope('prob'):
        prob = self.prob = tf.nn.softmax(logits)
    with scope('pred'):
        pred = self.pred = tf.argmax(logits, -1, output_type=tf.int32)

    if 'infer' != mode:
        labels = tf.boolean_mask(gold, msk_tgt, name='labels')
        with scope('errt'):
            errt_samp = self.errt_samp = tf.to_float(tf.not_equal(
                labels, pred))
            errt = self.errt = tf.reduce_mean(errt_samp)
        with scope('loss'):
            with scope('loss_gen'):
                loss_gen_samp = self.loss_gen_samp = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels, logits=logits)
                loss_gen = self.loss_gen = tf.reduce_mean(loss_gen_samp)
            with scope('loss_kld'):
                loss_kld_samp = self.loss_kld_samp = 0.5 * (
                    tf.square(mu) + tf.exp(lv) - lv - 1.0)
                loss_kld = self.loss_kld = tf.reduce_mean(loss_kld_samp)
            loss = self.loss = rate_anneal * loss_kld + loss_gen

    if 'train' == mode:
        with scope('train'):
            train_step = self.train_step = tf.train.AdamOptimizer(
                rate_update).minimize(loss, step)

    return self
Beispiel #10
0
def VAE(data, btlnk_dim, data_dim, dense_dim, y_dim, loss_type, accelerate):

    def encoder(x, dim_btlnk, dim_x):
        x = Normalize(dim_btlnk, "nrm")(tf.nn.elu(Linear(dim_btlnk, dim_x, name= 'lin')(x)))
        with tf.variable_scope('latent'):
            mu = Linear(dim_btlnk, dim_btlnk, name= 'mu')(x)
            lv = Linear(dim_btlnk, dim_btlnk, name= 'lv')(x)
            #lv = Linear(dim_btlnk, dim_x, name= 'lv')(x)
            #mu = Linear(dim_btlnk, dim_x, name= 'mu')(x)
        with tf.variable_scope('z'):
            z = mu + tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv))
        return z, mu, lv

    def decoder(x, data_dim, btlnk_dim):
        x = Linear(data_dim, btlnk_dim)(x)
        #return tf.clip_by_value(x, 0.0, 1.0)
        return tf.nn.sigmoid(x)

    with tf.variable_scope("x"):
        x = placeholder(tf.float32, [None, data_dim], data[0], "x")
    with tf.variable_scope("y"):
        y = placeholder(tf.float32, [None], data[1], "y")

    with tf.variable_scope("encoder"):
        z, mu, lv = encoder(x, btlnk_dim, data_dim)

    with tf.variable_scope("decoder"):
        logits = decoder(z, data_dim, btlnk_dim)

    with tf.variable_scope("step"):
        step = tf.train.get_or_create_global_step()
        rate = accelerate * tf.to_float(step)
        rate_anneal = tf.tanh(rate)

    with tf.variable_scope("loss"):
        kl_loss = tf.reduce_mean(0.5 * (tf.square(mu) + tf.exp(lv) - lv - 1.0))
        if loss_type == "xtrpy":
            #loss_rec = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=x, logits=logits))
            epsilon = 1e-10
            loss_rec = tf.reduce_mean(-tf.reduce_sum(x * tf.log(epsilon+logits) +
                                                     (1-x) * tf.log(epsilon+1-logits),  axis=1))
        else:
            loss_rec = tf.reduce_mean(tf.abs(x - logits))

        loss = loss_rec + kl_loss*rate_anneal

    with tf.variable_scope("AUC"):
        anomaly_score = tf.reduce_mean((x-logits)**2, axis=1)
        _, auc = tf.metrics.auc(y, anomaly_score)

    with tf.variable_scope("train_step"):
        train_step = tf.train.AdamOptimizer().minimize(loss, step)


    return dict(step=step,
                x=x,
                y=y,
                z=z,
                mu=mu,
                logits=logits,
                auc=auc,
                train_step=train_step,
                loss=loss,
                kl_loss=kl_loss,
                loss_rec=loss_rec)
def model(mode,
          src_dwh,
          tgt_dwh,
          src_idx=None,
          len_src=None,
          tgt_img=None,
          tgt_idx=None,
          len_tgt=None,
          num_layers=3,
          num_units=512,
          learn_rate=1e-3,
          decay_rate=1e-2,
          dropout=0.1):
    assert mode in ('train', 'valid', 'infer')
    self = Record()

    src_d, src_w, src_h = src_dwh
    tgt_d, tgt_w, tgt_h = tgt_dwh

    with scope('source'):
        # input nodes
        src_idx = self.src_idx = placeholder(tf.int32, (None, None), src_idx,
                                             'src_idx')  # n s
        len_src = self.len_src = placeholder(tf.int32, (None, ), len_src,
                                             'len_src')  # n

        # time major order
        src_idx = tf.transpose(src_idx, (1, 0))  # s n
        emb_src = tf.one_hot(src_idx, src_d)  # s n v

        for i in range(num_layers):
            with scope("rnn{}".format(i + 1)):
                emb_fwd, _ = tf.contrib.cudnn_rnn.CudnnGRU(
                    1, num_units, dropout=dropout,
                    name='fwd')(emb_src, training='train' == mode)
                emb_bwd, _ = tf.contrib.cudnn_rnn.CudnnGRU(
                    1, num_units, dropout=dropout,
                    name='bwd')(tf.reverse_sequence(emb_src,
                                                    len_src,
                                                    seq_axis=0,
                                                    batch_axis=1),
                                training='train' == mode)
            emb_src = tf.concat(
                (emb_fwd,
                 tf.reverse_sequence(
                     emb_bwd, len_src, seq_axis=0, batch_axis=1)),
                axis=-1)
        # emb_src = tf.layers.dense(emb_src, num_units, name= 'reduce_concat') # s n d
        emb_src = self.emb_src = tf.transpose(emb_src, (1, 2, 0))  # n d s

    with scope('target'):
        # input nodes
        tgt_img = self.tgt_img = placeholder(tf.uint8,
                                             (None, None, tgt_h, tgt_w),
                                             tgt_img, 'tgt_img')  # n t h w
        tgt_idx = self.tgt_idx = placeholder(tf.int32, (None, None), tgt_idx,
                                             'tgt_idx')  # n t
        len_tgt = self.len_tgt = placeholder(tf.int32, (None, ), len_tgt,
                                             'len_tgt')  # n

        # time major order
        tgt_idx = tf.transpose(tgt_idx)  # t n
        tgt_img = tf.transpose(tgt_img, (1, 0, 2, 3))  # t n h w
        tgt_img = flatten(tgt_img, 2, 3)  # t n hw

        # normalize pixels to binary
        tgt_img = tf.to_float(tgt_img) / 255.0
        # tgt_img = tf.round(tgt_img)
        # todo consider adding noise

        # causal padding
        fire = self.fire = tf.pad(tgt_img, ((1, 0), (0, 0), (0, 0)),
                                  constant_values=0.0)
        true = self.true = tf.pad(tgt_img, ((0, 1), (0, 0), (0, 0)),
                                  constant_values=1.0)
        tidx = self.tidx = tf.pad(tgt_idx, ((0, 1), (0, 0)), constant_values=1)
        mask_tgt = tf.transpose(tf.sequence_mask(len_tgt + 1))  # t n

    with scope('decode'):
        # needs to get input from latent space to do attention or some shit
        decoder = self.decoder = tf.contrib.cudnn_rnn.CudnnGRU(num_layers,
                                                               num_units,
                                                               dropout=dropout)
        state_in = self.state_in = tf.zeros(
            (num_layers, tf.shape(fire)[1], num_units))
        x, _ = _, (self.state_ex, ) = decoder(fire,
                                              initial_state=(state_in, ),
                                              training='train' == mode)
        # transform mask to -inf and 0 in order to simply sum for whatever the f**k happens next
        mask = tf.log(tf.sequence_mask(len_src, dtype=tf.float32))  # n s
        mask = tf.expand_dims(mask, 1)  # n 1 s
        # multi-head scaled dot-product attention
        x = tf.transpose(x, (1, 2, 0))  # t n d ---> n d t
        attn = Attention(num_units, num_units, 2 * num_units)(x, emb_src, mask)
        if 'train' == mode: attn = tf.nn.dropout(attn, 1 - dropout)
        x = Normalize(num_units)(x + attn)
        x = tf.transpose(x, (2, 0, 1))  # n d t ---> t n d

    if 'infer' != mode:
        x = tf.boolean_mask(x, mask_tgt)
        true = tf.boolean_mask(true, mask_tgt)
        tidx = tf.boolean_mask(tidx, mask_tgt)

    with scope('output'):
        y = tf.layers.dense(x, tgt_h * tgt_w, name='dense_img')
        z = tf.layers.dense(x, tgt_d, name='logit_idx')
        pred = self.pred = tf.clip_by_value(y, 0.0, 1.0)
        prob = self.prob = tf.nn.softmax(z)
        pidx = self.pidx = tf.argmax(z, axis=-1, output_type=tf.int32)

    with scope('losses'):
        diff = true - pred
        mae = self.mae = tf.reduce_mean(tf.abs(diff), axis=-1)
        mse = self.mse = tf.reduce_mean(tf.square(diff), axis=-1)
        xid = self.xid = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=z, labels=tidx)
        err = self.err = tf.not_equal(tidx, pidx)
        loss = tf.reduce_mean(xid)

    with scope('update'):
        step = self.step = tf.train.get_or_create_global_step()
        lr = self.lr = learn_rate / (1.0 +
                                     decay_rate * tf.sqrt(tf.to_float(step)))
        if 'train' == mode:
            down = self.down = tf.train.AdamOptimizer(lr).minimize(loss, step)

    return self
    def feed(src, tgt, cws=cws, cwt=cwt):
        src_idx, len_src = cws(src, ret_img=False, ret_idx=True)
        tgt_img, tgt_idx, len_tgt = cwt(tgt, ret_img=True, ret_idx=True)
        return src_idx, len_src, tgt_img, tgt_idx, len_tgt

    def batch(src=src_train, tgt=tgt_train, size=128, seed=0):
        for bat in batch_sample(len(tgt), size, seed):
            yield feed(src[bat], tgt[bat])

    src_idx, len_src, tgt_img, tgt_idx, len_tgt = pipe(
        batch, (tf.int32, tf.int32, tf.uint8, tf.int32, tf.int32))
    train = model('train', cws.dwh(), cwt.dwh(), src_idx, len_src, tgt_img,
                  tgt_idx, len_tgt)
    valid = model('valid', cws.dwh(), cwt.dwh())
    dummy = tuple(placeholder(tf.float32, ()) for _ in range(3))

    def log(step,
            wtr=tf.summary.FileWriter("../log/{}".format(trial)),
            log=tf.summary.merge((tf.summary.scalar('step_mae', dummy[0]),
                                  tf.summary.scalar('step_xid', dummy[1]),
                                  tf.summary.scalar('step_err', dummy[2]))),
            fet=(valid.mae, valid.xid, valid.err),
            inp=(valid.src_idx, valid.len_src, valid.tgt_img, valid.tgt_idx,
                 valid.len_tgt),
            src=src_valid,
            tgt=tgt_valid,
            bat=256):
        stats = [
            sess.run(fet, dict(zip(inp, feed(src[i:j], tgt[i:j]))))
            for i, j in partition(len(tgt), bat)
Beispiel #13
0
    def build(self, x, y, z, loss_type):
        d_scale_factor = tf.constant(0.)  #tf.constant(0.25)
        g_scale_factor = tf.constant(0.)  #tf.constant(1 - 0.75/2)
        with scope("x"):
            x = placeholder(tf.float32, [None, self.dim_x], x, "x")
        with scope("y"):
            y = placeholder(tf.float32, [None], y, "y")
        with scope("z"):
            z = placeholder(tf.float32, [None, self.dim_noise], z, "z")

        zx, mu, lv, hl_e = self.enc(x)

        gzx = self.gen(zx)
        #gz = self.gen(z)

        dx, hl_dx = self.dis(x)
        dgzx, hl_dgzx = self.dis(gzx)
        #dgz, hl_dgz = self.dis(gz)

        with tf.variable_scope("step"):
            step = tf.train.get_or_create_global_step()
            rate = self.accelerate * tf.to_float(step)
            rate_anneal = tf.tanh(rate)

        with scope("loss"):
            dx_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.ones_like(dx) - d_scale_factor, logits=dx))
            dgzx_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.zeros_like(dgzx), logits=dgzx))
            #dgz_loss =  tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dgz), logits=dgz))
            d_loss = dx_loss + dgzx_loss  #+ dgz_loss

            kl_loss = tf.reduce_mean(0.5 *
                                     (tf.square(mu) + tf.exp(lv) - lv - 1.0))

            gzx_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.ones_like(dgzx) - g_scale_factor, logits=dgzx))

            if loss_type == "xtrpy":
                epsilon = 1e-10
                ftr_loss = tf.reduce_mean(
                    -tf.reduce_sum(x * tf.log(epsilon + gzx) +
                                   (1 - x) * tf.log(epsilon + 1 - gzx),
                                   axis=1))
                g_loss = gzx_loss / 10 + ftr_loss / 5 + kl_loss * rate_anneal

            else:
                ftr_loss = tf.reduce_mean(tf.abs(x - gzx))
                g_loss = gzx_loss / 2 + ftr_loss * 10 + kl_loss * rate_anneal

        with scope("AUC"):
            _, auc_gzx = tf.metrics.auc(y, tf.reduce_mean((x - gzx)**2,
                                                          axis=1))
            _, auc_dx = tf.metrics.auc(y, tf.nn.sigmoid(dx))
            _, auc_dgzx = tf.metrics.auc(y, tf.nn.sigmoid(dgzx))

        g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="generator")
        g_vars.append(
            tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="encoder"))
        print(g_vars)
        d_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="discriminator")
        print(d_vars)

        with scope('train_step'):
            #optimizer = tf.train.RMSPropOptimizer()
            optimizer = tf.train.AdamOptimizer()
            d_step = optimizer.minimize(d_loss, step, var_list=d_vars)
            g_step = optimizer.minimize(g_loss, step, var_list=g_vars)

        return VAEGAN(
            self,
            step=step,
            x=x,
            y=y,
            z=z,
            zx=zx,
            mu=mu,
            lv=lv,
            m=tf.reduce_mean(mu),
            l=tf.reduce_mean(lv)
            #, gz=gz
            ,
            gzx=gzx,
            auc_gzx=auc_gzx,
            auc_dx=auc_dx,
            auc_dgzx=auc_dgzx,
            g_step=g_step,
            d_step=d_step,
            g_loss=g_loss,
            d_loss=d_loss
            #,gz_loss=gz_loss
            ,
            gzx_loss=gzx_loss,
            ftr_loss=ftr_loss,
            kl_loss=kl_loss,
            dx_loss=dx_loss
            #, dgz_loss=dgz_loss
            ,
            dgzx_loss=dgzx_loss)