def __init__(self, img_size, glimpse_size, n_what, glimpse_encoder, scale_offset=0., masked_glimpse=False, debug=False): super(AIREncoder, self).__init__(img_size, glimpse_size, inverse=False) self.n_what = n_what self._masked_glimpse = masked_glimpse with self._enter_variable_scope(): self._glimpse_encoder = glimpse_encoder self._what_distrib = GaussianFromParamVec( n_what, scale_offset=scale_offset, validate_args=debug, allow_nan_stats=not debug) if self._masked_glimpse: self._mask_mlp = MLP( 128, n_out=np.prod(glimpse_size), transfer=tf.nn.sigmoid, output_initializers={'b': tf.constant_initializer(1.)})
def _build(self, inpt): n = np.prod(self._output_size) mlp = MLP(self._n_hidden, n_out=n) reshape = snt.BatchReshape(self._output_size) seq = snt.Sequential([mlp, reshape]) return seq(inpt) * tf.get_variable('output_scale', initializer=self._output_scale)
def _make_priors(self, time_step, prior_conditioning): """Instantiates prior distributions for discovery. """ is_first_timestep = tf.to_float(tf.equal(time_step, 0)) if self._disc_prior_type == 'geom': num_steps_prior = tfd.Geometric(probs=1. - self._init_disc_step_success_prob) elif self._disc_prior_type == 'cat': init = [0.] * (self._n_steps + 1) step_logits = tf.Variable(init, trainable=True, dtype=tf.float32, name='step_prior_bias') # increase probability of zero steps when t>0 init = [10.] + [0] * self._n_steps timstep_bias = tf.Variable(init, trainable=True, dtype=tf.float32, name='step_prior_timestep_bias') step_logits += (1. - is_first_timestep) * timstep_bias if prior_conditioning is not None: step_logits = tf.expand_dims(step_logits, 0) + MLP(10, n_out=self._n_steps + 1)(prior_conditioning) step_logits = tf.nn.elu(step_logits) num_steps_prior = tfd.Categorical(logits=step_logits) else: raise ValueError('Invalid prior type: {}'.format(self._disc_prior_type)) return self._what_prior, self._where_prior, num_steps_prior
def _build(self, inpt): flatten = snt.BatchFlatten() mlp = MLP(self._n_hidden, n_out=8) seq = snt.Sequential([flatten, mlp]) params = seq(inpt) return params[..., :4], params[..., 4:] + self._scale_bias
def _build(self, inpt): flatten = snt.BatchFlatten() mlp = MLP(self._n_hidden, n_out=8) seq = snt.Sequential([flatten, mlp]) params = seq(inpt) scale_offset = tf.get_variable('scale_offset', initializer=self._scale_offset) return params[..., :4], params[..., 4:] + scale_offset
def _build(self, img, what, where, presence_prob): batch_size = int(img.get_shape()[0]) parts = [ tf.reshape(tf.transpose(i, (1, 0, 2)), (batch_size, -1)) for i in (what, where, presence_prob) ] img_flat = tf.reshape(img, (batch_size, -1)) baseline_inpts = [img_flat] + parts baseline_inpts = tf.concat(baseline_inpts, -1) mlp = MLP(self._n_hidden, n_out=1) baseline = mlp(baseline_inpts) return baseline
def __init__(self, n_steps, n_latent_code=0, relation_embedding=False): """Initialises the module. :param n_steps: Integer, number of inference steps to perform at this time-step. :param n_latent_code: Integer, dimensionality of summary of latent variables. :param relation_embedding: Boolean; computes DeepSet-like embedding of latent variables if True. """ super(AbstractTimstepModule, self).__init__() self._n_steps = n_steps self._n_latent_code = n_latent_code self._relation_embedding = relation_embedding with self._enter_variable_scope(): if n_latent_code > 0: self._latent_encoder = MLP([n_latent_code] * 2)
def _build(self, previous_presence, previois_logit, *features): init = {'b': tf.constant_initializer(self._steps_bias)} mlp = MLP(self._n_hidden, n_out=1, output_initializers=init) features = ops.maybe_concat(features) logit = mlp(features) logit = previous_presence * logit + (previous_presence - 1.) * 88. if previois_logit is not None: if self._max_rel_logit_change != np.inf: min_logit = (1. - self._max_rel_logit_change) * previois_logit max_logit = (1. + self._max_rel_logit_change) * previois_logit logit = tf.clip_by_value(logit, min_logit, max_logit) elif self._max_logit_change != np.inf: logit = previois_logit + self._max_logit_change * tf.nn.tanh(logit) return self._bernoulli(logit)
def _build(self, inpt): mlp = MLP(self._n_hidden, n_out=1) logit = mlp(inpt) + self._steps_bias return tf.nn.sigmoid(logit)
def _build(self, inpt): n = np.prod(self._output_size) mlp = MLP(self._n_hidden, n_out=n) reshape = snt.BatchReshape(self._output_size) seq = snt.Sequential([mlp, reshape]) return seq(inpt)
def _build(self, inpt): flat = snt.BatchFlatten() mlp = MLP(self._n_hidden) seq = snt.Sequential([flat, mlp]) return seq(inpt)
def _embed(self, inpt): flatten = snt.BatchFlatten() mlp = MLP(self._n_hidden, n_out=self._n_param) seq = snt.Sequential([flatten, mlp]) return seq(inpt)
def build(self): with tf.variable_scope("Embeddings"): self.embeddings = tf.get_variable( "emb", [self.config.n_embed, self.config.d_embed], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) embeddings_root = tf.get_variable( "emb_root", [1, 1, 2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) embeddings_root_s = tf.get_variable( "emb_root_s", [1, 1, 2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope("Model"): w_comb = tf.get_variable( "w_comb", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_comb = tf.get_variable("bias_comb", [2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.constant_initializer()) w_comb_s = tf.get_variable( "w_comb_s", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_comb_s = tf.get_variable("bias_comb_s", [2 * self.config.dim_sem], dtype=tf.float32, initializer=tf.constant_initializer()) w_softmax = tf.get_variable( "w_softmax", [2 * self.config.dim_sem, self.config.dim_output], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_softmax = tf.get_variable( "bias_softmax", [self.config.dim_output], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope("Structure/doc"): tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope("Structure/sent"): tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) sent_l = self.t_variables['sent_l'] doc_l = self.t_variables['doc_l'] max_sent_l = self.t_variables['max_sent_l'] max_doc_l = self.t_variables['max_doc_l'] batch_l = self.t_variables['batch_l'] tokens_input = tf.nn.embedding_lookup( self.embeddings, self.t_variables['token_idxs'][:, :max_doc_l, :max_sent_l]) tokens_input = tf.nn.dropout(tokens_input, self.t_variables['keep_prob']) mask_tokens = self.t_variables[ 'mask_tokens'][:, :max_doc_l, :max_sent_l] mask_sents = self.t_variables['mask_sents'][:, :max_doc_l] [_, _, _, rnn_size] = tokens_input.get_shape().as_list() tokens_input_do = tf.reshape( tokens_input, [batch_l * max_doc_l, max_sent_l, rnn_size]) sent_l = tf.reshape(sent_l, [batch_l * max_doc_l]) mask_tokens = tf.reshape(mask_tokens, [batch_l * max_doc_l, -1]) tokens_output, _ = dynamicBiRNN(tokens_input_do, sent_l, n_hidden=self.config.dim_hidden, cell_type=self.config.rnn_cell, cell_name='Model/sent') tokens_sem = tf.concat([ tokens_output[0][:, :, :self.config.dim_sem], tokens_output[1][:, :, :self.config.dim_sem] ], 2) tokens_str = tf.concat([ tokens_output[0][:, :, self.config.dim_sem:], tokens_output[1][:, :, self.config.dim_sem:] ], 2) temp1 = tf.zeros([batch_l * max_doc_l, max_sent_l, 1], tf.float32) temp2 = tf.zeros([batch_l * max_doc_l, 1, max_sent_l], tf.float32) mask1 = tf.ones([batch_l * max_doc_l, max_sent_l, max_sent_l - 1], tf.float32) mask2 = tf.ones([batch_l * max_doc_l, max_sent_l - 1, max_sent_l], tf.float32) mask1 = tf.concat([temp1, mask1], 2) mask2 = tf.concat([temp2, mask2], 1) str_scores_s_ = get_structure('sent', tokens_str, max_sent_l, mask1, mask2) # batch_l, sent_l+1, sent_l str_scores_s = tf.matrix_transpose(str_scores_s_) # soft parent tokens_sem_root = tf.concat([ tf.tile(embeddings_root_s, [batch_l * max_doc_l, 1, 1]), tokens_sem ], 1) tokens_output_ = tf.matmul(str_scores_s, tokens_sem_root) tokens_output = LReLu( tf.tensordot(tf.concat([tokens_sem, tokens_output_], 2), w_comb_s, [[2], [0]]) + b_comb_s) if (self.config.sent_attention == 'sum'): tokens_output = tokens_output * tf.expand_dims(mask_tokens, 2) tokens_output = tf.reduce_sum(tokens_output, 1) elif (self.config.sent_attention == 'mean'): tokens_output = tokens_output * tf.expand_dims(mask_tokens, 2) tokens_output = tf.reduce_sum(tokens_output, 1) / tf.expand_dims( tf.cast(sent_l, tf.float32), 1) elif (self.config.sent_attention == 'max'): tokens_output = tokens_output + tf.expand_dims( (mask_tokens - 1) * 999, 2) tokens_output = tf.reduce_max(tokens_output, 1) sents_input = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem]) sents_output, _ = dynamicBiRNN(sents_input, doc_l, n_hidden=self.config.dim_hidden, cell_type=self.config.rnn_cell, cell_name='Model/doc') sents_sem = tf.concat([ sents_output[0][:, :, :self.config.dim_sem], sents_output[1][:, :, :self.config.dim_sem] ], 2) sents_str = tf.concat([ sents_output[0][:, :, self.config.dim_sem:], sents_output[1][:, :, self.config.dim_sem:] ], 2) str_scores_ = get_structure( 'doc', sents_str, max_doc_l, self.t_variables['mask_parser_1'], self.t_variables['mask_parser_2']) #batch_l, sent_l+1, sent_l str_scores = tf.matrix_transpose(str_scores_) # soft parent sents_sem_root = tf.concat( [tf.tile(embeddings_root, [batch_l, 1, 1]), sents_sem], 1) sents_output_ = tf.matmul(str_scores, sents_sem_root) sents_output = LReLu( tf.tensordot(tf.concat([sents_sem, sents_output_], 2), w_comb, [[2], [0]]) + b_comb) if (self.config.doc_attention == 'sum'): sents_output = sents_output * tf.expand_dims(mask_sents, 2) sents_output = tf.reduce_sum(sents_output, 1) elif (self.config.doc_attention == 'mean'): sents_output = sents_output * tf.expand_dims(mask_sents, 2) sents_output = tf.reduce_sum(sents_output, 1) / tf.expand_dims( tf.cast(doc_l, tf.float32), 1) elif (self.config.doc_attention == 'max'): sents_output = sents_output + tf.expand_dims( (mask_sents - 1) * 999, 2) sents_output = tf.reduce_max(sents_output, 1) final_output = MLP(sents_output, 'output', self.t_variables['keep_prob']) self.final_output = tf.matmul(final_output, w_softmax) + b_softmax
from neural import MLP df = pd.read_csv('data.csv') df = df.iloc[np.random.permutation(len(df))] y = df.iloc[0:100, 4].values y = np.where(y == "Iris-setosa", 1, 0).reshape(-1, 1) X = df.iloc[0:100, [0, 2]].values inputSize = X.shape[ 1] # количество входных сигналов равно количеству признаков задачи hiddenSizes = 10 # задаем число нейронов скрытого (А) слоя outputSize = 1 if len(y.shape) else y.shape[ 1] # количество выходных сигналов равно количеству классов задачи iterations = 50 learning_rate = 0.1 net = MLP(inputSize, outputSize, learning_rate, hiddenSizes) # обучаем сеть (фактически сеть это вектор весов weights) for i in range(iterations): net.train(X, y) if i % 10 == 0: print("На итерации: " + str(i) + ' || ' + "Средняя ошибка: " + str(np.mean(np.square(y - net.predict(X))))) # считаем ошибку на обучающей выборке pr = net.predict(X) print(sum(abs(y - (pr > 0.5))))
class PropagationCore(BaseSQAIRCore): """Recurrent propagation core. It is run iteratively to propagate several objects. """ _output_names = 'what what_sample what_loc what_scale where where_sample where_loc where_scale presence_prob' \ ' presence presence_logit temporal_state'.split() _init_presence_value = 0. # at the beginning we assume no objects _what_scale_bias = -3. def __init__(self, img_size, crop_size, n_what, transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, temporal_cell, where_update_scale=1.0, debug=False): """Initialises the model. If argument is not covered here, see BaseSQAIRCore for documentation. :param temporal_cell: RNNCore for the temporal rnn. :param where_update_scale: Float, rescales the update of the `where` variables. """ super(PropagationCore, self).__init__(img_size, crop_size, n_what, transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, debug=debug) self._temporal_cell = temporal_cell with self._enter_variable_scope(): self._where_update_scale = tf.get_variable( 'where_update_scale', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(where_update_scale), trainable=False) self._where_distrib = AffineDiagNormal( validate_args=self._debug, allow_nan_stats=not self._debug) @property def output_size(self): return [ self._n_what, # what code self._n_what, # what sample self._n_what, # what loc self._n_what, # what scale self._n_transform_param, # where code self._n_transform_param, # where sample self._n_transform_param, # where loc self._n_transform_param, # where scale 1, # presence prob 1, # presence 1, # presence_logit, self._temporal_cell.state_size, ] def _build(self, (z_tm1, temporal_hidden_state), state): """Input is unused; it's only to force a maximum number of steps""" # same object, previous timestep what_tm1, where_tm1, presence_tm1, presence_logit_tm1 = z_tm1 temporal_state = nest.flatten(temporal_hidden_state)[-1] # different object, current timestep img_flat, what_km1, where_km1, presence_km1, hidden_state = state img = tf.reshape(img_flat, (-1, ) + tuple(self._img_size)) with tf.variable_scope('rnn_inpt'): where_bias = MLP(128, n_out=4)(temporal_state) * .1 what_distrib = self._glimpse_encoder(img, where_tm1 + where_bias, mask_inpt=temporal_state)[0] rnn_inpt = what_distrib.loc rnn_inpt = [ rnn_inpt, # img what_km1, where_km1, presence_km1, # explaining away what_tm1, where_tm1, presence_tm1, temporal_state # previous state ] rnn_inpt = tf.concat(rnn_inpt, -1) hidden_output, hidden_state = self._cell(rnn_inpt, hidden_state) with tf.variable_scope('where'): where, where_sample, where_loc, where_scale = self._compute_where( where_tm1, hidden_output, temporal_state) with tf.variable_scope('what'): what, what_sample, what_loc, what_scale, temporal_hidden_state\ = self._compute_what(img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state) with tf.variable_scope('presence'): presence, presence_prob, presence_logit \ = self._compute_presence(presence_tm1, presence_logit_tm1, hidden_output, temporal_state, what) output = [ what, what_sample, what_loc, what_scale, where, where_sample, where_loc, where_scale, presence_prob, presence, presence_logit, temporal_hidden_state ] new_state = [img_flat, what, where, presence, hidden_state] return output, new_state
class PropagationCore(BaseSQAIRCore): """Recurrent propagation core. It is run iteratively to propagate several objects. """ _output_names = 'what what_sample what_loc what_scale where where_sample where_loc where_scale presence_prob' \ ' presence presence_logit temporal_state'.split() _init_presence_value = 0. # at the beginning we assume no objects _what_scale_bias = -3. def __init__(self, img_size, crop_size, n_what, transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, temporal_cell, where_update_scale=1.0, debug=False): """Initialises the model. If argument is not covered here, see BaseSQAIRCore for documentation. :param temporal_cell: RNNCore for the temporal rnn. :param where_update_scale: Float, rescales the update of the `where` variables. """ #inhereting from BaseSQAIRCore super(PropagationCore, self).__init__(img_size, crop_size, n_what, transition, input_encoder, glimpse_encoder, transform_estimator, steps_predictor, debug = debug) #adding temporal RNN which we get as input parameter self._temporal_cell = temporal_cell with tf._enter_variable_scope(): #getting st deviation for where latent variable distribution self._where_update_scale = tf.get_variable('where_update_scale', shape[], dtype = tf.float32, initializer = tf.constant_initializer(where_update_scale), trainable = False) #specifying distribution for where latent variables self._where_distrib = AffineDiagNormal() @property def output_size(self): return [ self._n_what, # what code self._n_what, # what sample self._n_what, # what loc self._n_what, # what scale self._n_transform_param, # where code self._n_transform_param, # where sample self._n_transform_param, # where loc self._n_transform_param, # where scale 1, # presence prob 1, # presence 1, # presence_logit, self._temporal_cell.state_size, ] def _build(self, (z_tm1, temporal_hidden_state), state): """Input is unused; it's only to force a maximum number of steps""" # same object, previous timestep #splitting z_tm1 into what where and presense along with the presense logits #getting the object from the previous time step what_tm1, where_tm1, presence_tm1, presence_logit_tm1 = z_tm1 #getting numpy list of flattened temporal_hidden_state temporal_state = nest.flatten(temporal_hidden_state)[-1] #initialize another object itself,and latent variables for another object at the current time step img_flat, what_km1, where_km1, presence_km1, hidden state = state #transforming vector of image pixels into matrix of pixels img = tf.reshape(img_flat, (-1,) + tuple(self._img_size)) with tf.variable_scope('rnn_inpt'): #achieving the bias value that we add to where latent variable from the previous time step in order to get new #proposal where latent variable for the current step where_bias = MLP(128, n_out=4)(temporal_state) * .1 #extracting and encoding proposal glimpse(includes spatial transformer inside) what_distrib = self._glimpse_encoder(img, where_tm1 + where_bias, mask_inpt=temporal_state)[0] #taking the mean of what dustribution for the object which corresponds to our object rnn_inpt = what_distrib.loc #constructing the input to relational RNN rnn_inpt = [ rnn_inpt, # img what_km1, where_km1, presence_km1, # explaining away what_tm1, where_tm1, presence_tm1, temporal_state # previous state ] #making tensor from array rnn_inpt = tf.concat(rnn_inpt, -1) #getting the output from relational RNN hidden_output, hidden_state = self._cell(rnn_inpt, hidden_state) #sample latent variable 'where' for the current time step using 'where' from the previous time step and weights from #relational rnn with tf.variable_scope('where'): where, where_sample, where_loc, where_scale = self._compute_where(where_tm1, hidden_output, temporal_state) #sample latent variable 'what' for the current time step using 'what' from the previous time step and weights from #relational and temporal rnns alonf with encoded glimpse of the object with tf.variable_scope('what'): what, what_sample, what_loc, what_scale, temporal_hidden_state\ = self._compute_what(img, what_tm1, where, hidden_output, temporal_hidden_state, temporal_state) #compute the presense of the oobject for the current time step using #presence from the previous time step, 'what' and 'where' latent variable from the current timestep #and weights from relational and temporal RNNs with tf.variable_scope('presence'): presence, presence_prob, presence_logit \ = self._compute_presence(presence_tm1, presence_logit_tm1, hidden_output, temporal_state, what) output = [what, what_sample, what_loc, what_scale, where, where_sample, where_loc, where_scale, presence_prob, presence, presence_logit, temporal_hidden_state] new_state = [img_flat, what, where, presence, hidden_state] return output, new_state
def build(self): with tf.variable_scope("Embeddings"): self.embeddings = tf.get_variable("emb", [self.config.n_embed, self.config.d_embed], dtype=tf.float64, initializer=self.xavier_init) embeddings_root = tf.get_variable("emb_root", [1, 1, 2 * self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) embeddings_root_s = tf.get_variable("emb_root_s", [1, 1,2* self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) with tf.variable_scope("Model"): w_comb = tf.get_variable("w_comb", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) w_comb_both = tf.get_variable("w_comb_both", [6 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) b_comb = tf.get_variable("bias_comb", [2 * self.config.dim_sem], dtype=tf.float64, initializer=tf.constant_initializer()) w_comb_s = tf.get_variable("w_comb_s", [4 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) b_comb_s = tf.get_variable("bias_comb_s", [2 * self.config.dim_sem], dtype=tf.float64, initializer=tf.constant_initializer()) w_softmax = tf.get_variable("w_softmax", [2 * self.config.dim_sem, self.config.dim_output], dtype=tf.float64, initializer=self.xavier_init) b_softmax = tf.get_variable("bias_softmax", [self.config.dim_output], dtype=tf.float64, initializer=self.xavier_init) w_sem_doc = tf.get_variable("w_sem_doc", [2 * self.config.dim_sem, 2 * self.config.dim_sem], dtype=tf.float64, initializer=self.xavier_init) w_str_doc = tf.get_variable("w_str_doc", [2 * self.config.dim_sem, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) with tf.variable_scope("Structure/doc"): tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float64, initializer=self.xavier_init) with tf.variable_scope("Structure/sent"): tf.get_variable("w_parser_p", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_c", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("bias_parser_p", [2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("bias_parser_c", [2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_s", [2 * self.config.dim_str, 2 * self.config.dim_str], dtype=tf.float64, initializer=self.xavier_init) tf.get_variable("w_parser_root", [2 * self.config.dim_str, 1], dtype=tf.float64, initializer=self.xavier_init) sent_l = self.t_variables['sent_l'] doc_l = self.t_variables['doc_l'] max_sent_l = self.t_variables['max_sent_l'] max_doc_l = self.t_variables['max_doc_l'] batch_l = self.t_variables['batch_l'] tokens_input = tf.nn.embedding_lookup(self.embeddings, self.t_variables['token_idxs'][:, :max_doc_l, :max_sent_l]) tokens_input = tf.nn.dropout(tokens_input, self.t_variables['keep_prob']) # [batch_size, doc_l, sent_l, d_embed] mask_tokens = self.t_variables['mask_tokens'][:, :max_doc_l, :max_sent_l] mask_sents = self.t_variables['mask_sents'][:, :max_doc_l] # [batch_size, doc_l] tokens_input_do = tf.reshape(tokens_input, [batch_l * max_doc_l, max_sent_l, self.config.d_embed]) sent_l = tf.reshape(sent_l, [batch_l * max_doc_l]) mask_tokens = tf.reshape(mask_tokens, [batch_l * max_doc_l, -1]) tokens_output, _ = dynamicBiRNN(tokens_input_do, sent_l, n_hidden=self.config.dim_hidden, xavier_init=self.xavier_init, cell_type=self.config.rnn_cell, cell_name='Model/sent') tokens_sem = tf.concat([tokens_output[0][:,:,:self.config.dim_sem], tokens_output[1][:,:,:self.config.dim_sem]], 2) tokens_str = tf.concat([tokens_output[0][:,:,self.config.dim_sem:], tokens_output[1][:,:,self.config.dim_sem:]], 2) if self.config.skip_sent_attention: tokens_output = LReLu(tf.tensordot(tf.concat([tokens_sem, tokens_input_do], 2), w_comb_s, [[2], [0]]) + b_comb_s) else: temp1 = tf.zeros([batch_l * max_doc_l, max_sent_l,1], tf.float64) temp2 = tf.zeros([batch_l * max_doc_l,1,max_sent_l], tf.float64) mask1 = tf.ones([batch_l * max_doc_l, max_sent_l, max_sent_l-1], tf.float64) mask2 = tf.ones([batch_l * max_doc_l, max_sent_l-1, max_sent_l], tf.float64) mask1 = tf.concat([temp1,mask1],2) mask2 = tf.concat([temp2,mask2],1) if self.config.skip_mask_bug_fix: str_scores_s_, _, LL_tokens = get_structure('sent', tokens_str, mask1, mask2, None, None, None) # batch_l, sent_l+1, sent_l else: # create mask for setting all padded cells to 0 mask_ll_tokens = tf.expand_dims(mask_tokens, 2) mask_ll_tokens_trans = tf.transpose(mask_ll_tokens, perm=[0, 2, 1]) mask_ll_tokens = mask_ll_tokens mask_tokens_mult = mask_ll_tokens * mask_ll_tokens_trans # create mask for setting the padded diagonals to 1 mask_diags = tf.matrix_diag_part(mask_tokens_mult) mask_diags_invert = tf.cast(tf.logical_not(tf.cast(mask_diags, tf.bool)), tf.float64) zero_matrix = tf.zeros([batch_l * max_doc_l, max_sent_l, max_sent_l], tf.float64) mask_tokens_add = tf.matrix_set_diag(zero_matrix, mask_diags_invert) str_scores_s_, _, LL_tokens = get_structure('sent', tokens_str, mask1, mask2, mask_tokens_mult, mask_tokens_add, tf.expand_dims(mask_tokens, 2)) # batch_l, sent_l+1, sent_l str_scores_s = tf.matrix_transpose(str_scores_s_) # soft parent tokens_sem_root = tf.concat([tf.tile(embeddings_root_s, [batch_l * max_doc_l, 1, 1]), tokens_sem], 1) tokens_output_ = tf.matmul(str_scores_s, tokens_sem_root) tokens_output = LReLu(tf.tensordot(tf.concat([tokens_sem, tokens_output_], 2), w_comb_s, [[2], [0]]) + b_comb_s) if (self.config.sent_attention == 'sum'): tokens_output = tokens_output * tf.expand_dims(mask_tokens,2) tokens_output = tf.reduce_sum(tokens_output, 1) elif (self.config.sent_attention == 'mean'): tokens_output = tokens_output * tf.expand_dims(mask_tokens,2) tokens_output = tf.reduce_sum(tokens_output, 1)/tf.expand_dims(tf.cast(sent_l,tf.float64),1) elif (self.config.sent_attention == 'max'): tokens_output = tokens_output + tf.expand_dims((mask_tokens-1)*999,2) tokens_output = tf.reduce_max(tokens_output, 1) # batch_l * max_doc_l, 200 if self.config.skip_doc_bilstm: if self.config.use_positional_encoding: tokens_output = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem]) tokens_output = self.add_timing_signal(tokens_output, max_doc_l, num_timescales=self.config.dim_sem) tokens_output = tf.reshape(tokens_output, [batch_l * max_doc_l, 2 * self.config.dim_sem]) sents_sem = tf.matmul(tokens_output, w_sem_doc) sents_sem = tf.reshape(sents_sem, [batch_l, max_doc_l, 2 * self.config.dim_sem]) sents_str = tf.matmul(tokens_output, w_str_doc) sents_str = tf.reshape(sents_str, [batch_l, max_doc_l, 2 * self.config.dim_str]) else: sents_input = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem]) sents_output, _ = dynamicBiRNN(sents_input, doc_l, n_hidden=self.config.dim_hidden, xavier_init=self.xavier_init, cell_type=self.config.rnn_cell, cell_name='Model/doc') sents_sem = tf.concat([sents_output[0][:,:,:self.config.dim_sem], sents_output[1][:,:,:self.config.dim_sem]], 2) # [batch_l, doc+l, dim_sem*2] sents_str = tf.concat([sents_output[0][:,:,self.config.dim_sem:], sents_output[1][:,:,self.config.dim_sem:]], 2) # [batch_l, doc+l, dim_str*2] if self.config.skip_doc_attention: if self.config.skip_doc_bilstm: sents_input = tf.reshape(tokens_output, [batch_l, max_doc_l, 2 * self.config.dim_sem]) sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_input], 2), w_comb, [[2], [0]]) + b_comb) else: sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_input], 2), w_comb, [[2], [0]]) + b_comb) else: if self.config.skip_mask_bug_fix: str_scores_, str_scores_no_root, LL_sents = get_structure('doc', sents_str, self.t_variables['mask_parser_1'], self.t_variables['mask_parser_2'], None, None, None) # [batch_size, doc_l+1, doc_l] else: # create mask for setting all padded cells to 0 mask_ll_sents = tf.expand_dims(mask_sents, 2) mask_ll_sents_trans = tf.transpose(mask_ll_sents, perm=[0, 2, 1]) mask_ll_sents = mask_ll_sents mask_sents_mult = mask_ll_sents * mask_ll_sents_trans # create mask for setting the padded diagonals to 1 mask_sents_diags = tf.matrix_diag_part(mask_sents_mult) mask_sents_diags_invert = tf.cast(tf.logical_not(tf.cast(mask_sents_diags, tf.bool)), tf.float64) zero_matrix_sents = tf.zeros([batch_l, max_doc_l, max_doc_l], tf.float64) mask_sents_add = tf.matrix_set_diag(zero_matrix_sents, mask_sents_diags_invert) str_scores_, str_scores_no_root, LL_sents = get_structure('doc', sents_str, self.t_variables['mask_parser_1'], self.t_variables['mask_parser_2'], mask_sents_mult, mask_sents_add, tf.expand_dims(mask_sents, 2)) # [batch_size, doc_l+1, doc_l] str_scores = tf.matrix_transpose(str_scores_) self.str_scores = str_scores # shape is [batch_size, doc_l, doc_l+1] sents_children = tf.matmul(str_scores_no_root, sents_sem) if self.config.tree_percolation == "child": sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_children], 2), w_comb, [[2], [0]]) + b_comb) else: sents_sem_root = tf.concat([tf.tile(embeddings_root, [batch_l, 1, 1]), sents_sem], 1) sents_parents = tf.matmul(str_scores, sents_sem_root) if self.config.tree_percolation == "parent": sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_parents], 2), w_comb, [[2], [0]]) + b_comb) elif self.config.tree_percolation == "both": sents_output = LReLu(tf.tensordot(tf.concat([sents_sem, sents_parents, sents_children], 2), w_comb_both, [[2], [0]]) + b_comb) # percolation is only supported for "child" option if self.config.tree_percolation_levels > 0: count = 0 while count < self.config.tree_percolation_levels: sents_children_2 = tf.matmul(str_scores_no_root, sents_output) sents_output = LReLu(tf.tensordot(tf.concat([sents_output, sents_children_2], 2), w_comb, [[2], [0]]) + b_comb) count += 1 if (self.config.doc_attention == 'sum'): sents_output = sents_output * tf.expand_dims(mask_sents, 2) # mask is [batch_size, doc_l, 1] sents_output = tf.reduce_sum(sents_output, 1) # [batch_size, dim_sem*2] elif (self.config.doc_attention == 'mean'): sents_output = sents_output * tf.expand_dims(mask_sents, 2) sents_output = tf.reduce_sum(sents_output, 1)/tf.expand_dims(tf.cast(doc_l,tf.float64),1) elif (self.config.doc_attention == 'max'): sents_output = sents_output + tf.expand_dims((mask_sents-1)*999,2) sents_output = tf.reduce_max(sents_output, 1) elif (self.config.doc_attention == 'weighted_sum'): sents_weighted = sents_output * tf.expand_dims(str_scores[:,:,0], 2) sents_output = sents_weighted * tf.expand_dims(mask_sents, 2) # apply mask sents_output = tf.reduce_sum(sents_output, 1) final_output = MLP(sents_output, 'output', self.t_variables['keep_prob'], self.config.seed, self.xavier_init) self.final_output = tf.matmul(final_output, w_softmax) + b_softmax