def __init__(self, dim, dim_mid, act, name): with tf.variable_scope(name): self.name = name with tf.variable_scope('csl'): self.csl = Attention(dim, layer=Forward, mid=dim_mid, act=act) self.norm_csl = Normalize(dim) with tf.variable_scope('att'): self.att = Attention(dim, layer=Forward, mid=dim_mid, act=act) self.norm_att = Normalize(dim) with tf.variable_scope('fwd'): self.fwd = Forward(dim, dim, dim_mid, act) self.norm_fwd = Normalize(dim)
def __init__(self, dim, name): self.name = name with scope(name): self.latt = Attention(dim, name= 'latt') self.ratt = Attention(dim, name= 'ratt') self.norm = Normalize(dim)
def model(mode, src_dwh, tgt_dwh, src_idx=None, len_src=None, tgt_img=None, tgt_idx=None, len_tgt=None, num_layers=3, num_units=512, learn_rate=1e-3, decay_rate=1e-2, dropout=0.1): assert mode in ('train', 'valid', 'infer') self = Record() src_d, src_w, src_h = src_dwh tgt_d, tgt_w, tgt_h = tgt_dwh with scope('source'): # input nodes src_idx = self.src_idx = placeholder(tf.int32, (None, None), src_idx, 'src_idx') # n s len_src = self.len_src = placeholder(tf.int32, (None, ), len_src, 'len_src') # n # time major order src_idx = tf.transpose(src_idx, (1, 0)) # s n emb_src = tf.one_hot(src_idx, src_d) # s n v for i in range(num_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='fwd')(emb_src, training='train' == mode) emb_bwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='bwd')(tf.reverse_sequence(emb_src, len_src, seq_axis=0, batch_axis=1), training='train' == mode) emb_src = tf.concat( (emb_fwd, tf.reverse_sequence( emb_bwd, len_src, seq_axis=0, batch_axis=1)), axis=-1) # emb_src = tf.layers.dense(emb_src, num_units, name= 'reduce_concat') # s n d emb_src = self.emb_src = tf.transpose(emb_src, (1, 2, 0)) # n d s with scope('target'): # input nodes tgt_img = self.tgt_img = placeholder(tf.uint8, (None, None, tgt_h, tgt_w), tgt_img, 'tgt_img') # n t h w tgt_idx = self.tgt_idx = placeholder(tf.int32, (None, None), tgt_idx, 'tgt_idx') # n t len_tgt = self.len_tgt = placeholder(tf.int32, (None, ), len_tgt, 'len_tgt') # n # time major order tgt_idx = tf.transpose(tgt_idx) # t n tgt_img = tf.transpose(tgt_img, (1, 0, 2, 3)) # t n h w tgt_img = flatten(tgt_img, 2, 3) # t n hw # normalize pixels to binary tgt_img = tf.to_float(tgt_img) / 255.0 # tgt_img = tf.round(tgt_img) # todo consider adding noise # causal padding fire = self.fire = tf.pad(tgt_img, ((1, 0), (0, 0), (0, 0)), constant_values=0.0) true = self.true = tf.pad(tgt_img, ((0, 1), (0, 0), (0, 0)), constant_values=1.0) tidx = self.tidx = tf.pad(tgt_idx, ((0, 1), (0, 0)), constant_values=1) mask_tgt = tf.transpose(tf.sequence_mask(len_tgt + 1)) # t n with scope('decode'): # needs to get input from latent space to do attention or some shit decoder = self.decoder = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, dropout=dropout) state_in = self.state_in = tf.zeros( (num_layers, tf.shape(fire)[1], num_units)) x, _ = _, (self.state_ex, ) = decoder(fire, initial_state=(state_in, ), training='train' == mode) # transform mask to -inf and 0 in order to simply sum for whatever the f**k happens next mask = tf.log(tf.sequence_mask(len_src, dtype=tf.float32)) # n s mask = tf.expand_dims(mask, 1) # n 1 s # multi-head scaled dot-product attention x = tf.transpose(x, (1, 2, 0)) # t n d ---> n d t attn = Attention(num_units, num_units, 2 * num_units)(x, emb_src, mask) if 'train' == mode: attn = tf.nn.dropout(attn, 1 - dropout) x = Normalize(num_units)(x + attn) x = tf.transpose(x, (2, 0, 1)) # n d t ---> t n d if 'infer' != mode: x = tf.boolean_mask(x, mask_tgt) true = tf.boolean_mask(true, mask_tgt) tidx = tf.boolean_mask(tidx, mask_tgt) with scope('output'): y = tf.layers.dense(x, tgt_h * tgt_w, name='dense_img') z = tf.layers.dense(x, tgt_d, name='logit_idx') pred = self.pred = tf.clip_by_value(y, 0.0, 1.0) prob = self.prob = tf.nn.softmax(z) pidx = self.pidx = tf.argmax(z, axis=-1, output_type=tf.int32) with scope('losses'): diff = true - pred mae = self.mae = tf.reduce_mean(tf.abs(diff), axis=-1) mse = self.mse = tf.reduce_mean(tf.square(diff), axis=-1) xid = self.xid = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=z, labels=tidx) err = self.err = tf.not_equal(tidx, pidx) loss = tf.reduce_mean(xid) with scope('update'): step = self.step = tf.train.get_or_create_global_step() lr = self.lr = learn_rate / (1.0 + decay_rate * tf.sqrt(tf.to_float(step))) if 'train' == mode: down = self.down = tf.train.AdamOptimizer(lr).minimize(loss, step) return self