def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # First fully connected layer. Reshape the convolution output to 1D vector W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024]) b_fc1 = utils.bias_variable([1024]) h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]]) h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) # Dropout (should be added to earlier layers too...) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_out = tf.sigmoid(y_conv) is_zero = tf.clip_by_value(tf.reduce_sum(self.x), 0, 1) # basically will be 1 iff at least one entry of x is nonzero y_out = tf.multiply(y_out, is_zero) return y_out
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [ 4, 40, 80 ] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # First fully connected layer. Reshape the convolution output to 1D vector W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024]) b_fc1 = utils.bias_variable([1024]) h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]]) h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) # Dropout (should be added to earlier layers too...) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 3]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return y_conv
def res_block(self, name, inputs): output = inputs output = tf.nn.relu(output) output = utils.conv1d(name + '.1', self.filter_output_dim, self.filter_output_dim, 5, output) output = tf.nn.relu(output) output = utils.conv1d(name + '.2', self.filter_output_dim, self.filter_output_dim, 5, output) return inputs + (self.res_rate * output)
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # Conv layer on top of the coverage W_conv_coverage = utils.weight_variable([fs[0], 1, cs[2]]) b_conv_coverage = utils.bias_variable([cs[2]]) conv_c = tf.expand_dims(self.e, -1) #print(conv_c.shape, W_conv_coverage.shape, b_conv_coverage.shape) h_conv_coverage = utils.lrelu(utils.conv1d(conv_c, W_conv_coverage) + b_conv_coverage) h_concatenated = tf.concat([h_conv2, h_conv_coverage], axis = -1) # First fully connected layer. Reshape the convolution output to 1D vector orig_shape = h_concatenated.get_shape().as_list() flat_shape = np.prod(orig_shape[1:]) new_shape = [-1,] + [flat_shape] h_concatenated_flat = tf.reshape(h_concatenated, new_shape) h_concat_drop = tf.nn.dropout(h_concatenated_flat, self.keep_prob) fc1_in = h_concatenated_flat.get_shape().as_list()[-1] W_fc1 = utils.weight_variable([fc1_in, 1024]) b_fc1 = utils.bias_variable([1024]) h_fc1 = utils.lrelu(tf.matmul(h_concat_drop, W_fc1) + b_fc1) # Fully-connected layer on top of the coverage #W_fc_coverage = utils.weight_variable([self.config.strlen, cs[2]]) #b_fc_coverage = utils.bias_variable([cs[2]]) #h_fc_coverage = tf.nn.relu(tf.matmul(self.e, W_fc_coverage) + b_fc_coverage) #h_concatenated = tf.concat([h_fc1, h_fc_coverage], axis = -1) # Dropout (should be added to earlier layers too...) #h_concatenated_drop = tf.nn.dropout(h_concatenated, self.keep_prob) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_out = tf.sigmoid(y_conv) return y_out
def add_prediction_op(self): left_half, right_half = tf.split( self.x, [self.config.window, self.config.window + 1], axis=1) # First conv layer W_convleft1 = utils.weight_variable([5, 4, 40]) b_convleft1 = utils.bias_variable([40]) W_convright1 = utils.weight_variable([5, 4, 40]) b_convright1 = utils.bias_variable([40]) h_convleft1 = utils.lrelu( utils.conv1d(left_half, W_convleft1) + b_convleft1) h_convright1 = utils.lrelu( utils.conv1d(right_half, W_convright1) + b_convright1) # Second conv layer W_convleft2 = utils.weight_variable([5, 40, 80]) b_convleft2 = utils.bias_variable([80]) W_convright2 = utils.weight_variable([5, 40, 80]) b_convright2 = utils.bias_variable([80]) h_convleft2 = utils.lrelu( utils.conv1d(h_convleft1, W_convleft2) + b_convleft2) h_convright2 = utils.lrelu( utils.conv1d(h_convright1, W_convright2) + b_convright2) h_convout = tf.concat([h_convleft2, h_convright2], 1) # First fully connected layer. Reshape the convolution output to 1D vector fc_dim_1 = int(self.config.strlen * 80 / 7.89) W_fc1 = utils.weight_variable([self.config.strlen * 80, fc_dim_1]) b_fc1 = utils.bias_variable([fc_dim_1]) h_conv_flat = tf.reshape(h_convout, [-1, self.config.strlen * 80]) #h_conv_flat = tf.nn.dropout(h_conv_flat, self.keep_prob) h_fc1 = utils.lrelu(tf.matmul(h_conv_flat, W_fc1) + b_fc1) h_fc1 = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([fc_dim_1, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2 y_out = tf.sigmoid(y_conv) #TODO: Add separate filter with unshared weights that looks at center? return y_out
def discriminator(self, inputs_logits, num_blocks=3, use_bias=False, num_classes=1): ''' The discriminator to score the distribution of time and event If the time is consistent with the history times, give high score. If it is on the constant, give low score. Implementation: CNN ''' with tf.variable_scope('Discriminator'): inputs = tf.transpose(inputs_logits, [0, 2, 1]) output = utils.conv1d('D.Input', 1, self.filter_output_dim, self.filter_size, inputs) output = self.res_block('D.1', output) output = self.res_block('D.2', output) output = self.res_block('D.3', output) output = self.res_block('D.4', output) output = self.res_block('D.5', output) output = tf.reshape(output, [-1, self.num_steps * self.filter_output_dim]) output = utils.linear('D.Output', self.num_steps * self.filter_output_dim, 1, output) return output
def discriminator(self, inputs_logits, num_blocks=3, use_bias=False, num_classes=1): """ The discriminator to score the distribution of time and event If the time is consistent with the history times, give high score. If it is on the constant, give low score. Implementation: CNN""" with tf.variable_scope('Discriminator'): # inputs = tf.transpose(inputs_logits, [0,2,1]) inputs = inputs_logits output = utils.conv1d('D.Input', 1, self.filter_output_dim, self.filter_size, inputs) output = self.res_block('D.1', output) output = self.res_block('D.2', output) output = self.res_block('D.3', output) output = self.res_block('D.4', output) output = self.res_block('D.5', output) output = tf.reshape( output, [-1, (self.length + self.num_steps) * self.filter_output_dim]) # if the output size is 1, it is the discriminator score of D # if the output size is 2, it is a bi-classification result of D output = tf.nn.sigmoid( utils.linear('D.Output', (self.length + self.num_steps) * self.filter_output_dim, 1, output)) logging.info('The shape of output from D {}'.format( output.get_shape())) return output
def encoder_RecConv(self, cell_type, inputs, t): with tf.variable_scope('Generator/Event-Time'): outputs_e = utils.build_encoder_graph_gru( inputs, self.hidden_size, self.num_layers, self.batch_size, self.num_steps, self.keep_prob, self.is_training, "Encoder_e" + cell_type) hidden_re = [tf.expand_dims(output_e, 1) for output_e in outputs_e] hidden_re = tf.concat(hidden_re, 1) inputs_t = tf.expand_dims(t, 2) output_t = utils.conv1d('G.T.Input', 1, self.filter_output_dim, self.filter_size, inputs_t) output_t = self.res_block('G.T.1', output_t) output_t = self.res_block('G.T.2', output_t) output_t = self.res_block('G.T.3', output_t) output_t = self.res_block('G.T.4', output_t) output_t = self.res_block('G.T.5', output_t) hidden_rt = tf.reshape( output_t, [-1, self.num_steps, self.filter_output_dim]) # hidden_r = tf.concat([hidden_re, hidden_rt], 2) # hidden_r = tf.reshape(hidden_r, [self.batch_size, -1]) # add a self-attention layer hidden_re = self.encoder_attention(hidden_re, 'SA4E') hidden_rt = self.encoder_attention(hidden_rt, 'SA4T') return hidden_re, hidden_rt
def forward(self, x, n_state, past): assert len(x.shape) == 3 # Should be [batch, sequence, features] assert n_state % self.n_head == 0 if past is not None: assert len( past.shape ) == 5 # Should be [batch, 2, heads, sequence, features], where 2 is [k, v] c = utils.conv1d(x, nf=n_state * 3) q, k, v = map(self.split_heads, c.split(c.shape[-1] // 3, dim=2)) present = torch.stack([k, v], dim=1) if past is not None: pk, pv = torch.unbind(past, dim=1) k = torch.cat([pk, k], dim=-2) v = torch.cat([pv, v], dim=-2) a = self.multihead_attn(q, k, v) a = self.merge_heads(a) a = utils.conv1d(a, nf=n_state) return a, present
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [ 4, 40, 80 ] # cs[i] is number of output channels from layer i [where layer 0 is the input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # First fully connected layer W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024]) b_fc1 = utils.bias_variable([1024]) h_conv2_flat = tf.reshape( h_conv2, [-1, self.config.strlen * cs[2]] ) # Reshape the convolution output to 1D vector and use this as input to the FC layer h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) # Dropout (should be added to earlier layers too... (TODO: Further investigate performance with dropout at various points)) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_out = tf.sigmoid( y_conv) # Apply sigmoid to get a probability as the final output return y_out
def encoder_t(self, t): with tf.variable_scope('Generator'): inputs_t = tf.expand_dims(t, 2) output_t = utils.conv1d('G.T.Input', 1, self.filter_output_dim, self.filter_size, inputs_t) output_t = self.res_block('G.T.1', output_t) output_t = self.res_block('G.T.2', output_t) output_t = self.res_block('G.T.3', output_t) output_t = self.res_block('G.T.4', output_t) output_t = self.res_block('G.T.5', output_t) hidden_rt = tf.reshape( output_t, [-1, self.num_steps, self.filter_output_dim]) # hidden_r = tf.concat([hidden_re, hidden_rt], 2) # hidden_r = tf.reshape(hidden_r, [self.batch_size, -1]) return hidden_rt
def encoder(self, inputs, scope=None): '''Encode sentence and return a latent representation.''' with tf.variable_scope(scope or "Encoder"): if cfg.convolutional: out = inputs widths = [int(i) for i in cfg.conv_width.split(',')] for i, width in enumerate(widths): out = utils.conv1d(out, cfg.hidden_size, width, 1, 'VALID', scope='conv%d' % i) out = tf.contrib.layers.batch_norm( inputs=out, is_training=self.training, scope='bn%d' % i) if i < len(widths) - 1: out = tf.nn.elu(out) z = tf.reduce_max(out, 1) else: if cfg.encoder_birnn: outputs, fs = tf.nn.bidirectional_dynamic_rnn( self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2), self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2), inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) outputs = tf.concat(2, outputs) fs = tf.concat(1, fs[0] + fs[1]) # last states of fwd and bkwd else: if cfg.encoder_summary == 'laststate': inputs = tf.reverse_sequence(inputs, self.lengths, 1) outputs, fs = tf.nn.dynamic_rnn( self.rnn_cell(cfg.num_layers), inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) fs = tf.concat(1, fs) if cfg.encoder_summary == 'laststate': fs = utils.highway(fs, scope='encoder_output_highway') z = tf.nn.tanh( utils.linear(fs, cfg.latent_size, True, scope='outputs_transform')) else: outputs = tf.reshape(outputs, [-1, cfg.hidden_size]) outputs = utils.highway(outputs, scope='encoder_output_highway') if cfg.encoder_summary == 'attention': flat_input = tf.reshape( inputs, [-1, inputs.get_shape()[2].value]) weights = utils.linear(tf.concat( 1, [flat_input, outputs]), cfg.hidden_size, True, scope='outputs_attention') outputs = tf.reshape( outputs, [cfg.batch_size, -1, cfg.hidden_size]) weights = tf.reshape( weights, [cfg.batch_size, -1, cfg.hidden_size]) weights = tf.nn.softmax(weights, 1) z = tf.reduce_sum(outputs * weights, [1]) z = tf.nn.tanh( utils.linear(z, cfg.latent_size, True, scope='outputs_transform')) elif cfg.encoder_summary == 'mean': outputs = utils.linear(outputs, cfg.latent_size, True, scope='outputs_transform') outputs = tf.reshape( outputs, [cfg.batch_size, -1, cfg.latent_size]) z = tf.nn.tanh(tf.reduce_mean(outputs, [1])) else: raise ValueError( 'Invalid encoder_summary configuration.') z_mean = utils.linear(z, cfg.latent_size, True, scope='encoder_z_mean') z_logvar = utils.linear(z, cfg.latent_size, True, scope='encoder_z_logvar') return z_mean, z_logvar
def mlp(self, x, n_state): nx = x.shape[-1] h = self.gelu(utils.conv1d(x, nf=n_state)) h2 = utils.conv1d(h, nf=nx) return h2