def output_function(self, state): params = dense_layer(state.h3, self.output_units, scope='gmm', reuse=tf.AUTO_REUSE) pis, mus, sigmas, rhos, es = self._parse_parameters(params) mu1, mu2 = tf.split(mus, 2, axis=1) mus = tf.stack([mu1, mu2], axis=2) sigma1, sigma2 = tf.split(sigmas, 2, axis=1) covar_matrix = [ tf.square(sigma1), rhos * sigma1 * sigma2, rhos * sigma1 * sigma2, tf.square(sigma2) ] covar_matrix = tf.stack(covar_matrix, axis=2) covar_matrix = tf.reshape( covar_matrix, (self.batch_size, self.num_output_mixture_components, 2, 2)) mvn = tfd.MultivariateNormalFullCovariance( loc=mus, covariance_matrix=covar_matrix) b = tfd.Bernoulli(probs=es) c = tfd.Categorical(probs=pis) sampled_e = b.sample() sampled_coords = mvn.sample() sampled_idx = c.sample() idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1) coords = tf.gather_nd(sampled_coords, idx) return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
def calculate_loss(self): self.order_id = tf.placeholder(tf.int32, [None]) self.product_id = tf.placeholder(tf.int32, [None]) self.features = tf.placeholder(tf.float32, [None, self.reader.data_dim]) self.label = tf.placeholder(tf.int32, [None]) h = dense_layer(self.features, self.hidden_units, activation=tf.nn.relu, scope='dense1') h = tf.concat([h, self.features], axis=1) y_hat = tf.squeeze(dense_layer(h, 1, activation=tf.nn.sigmoid, scope='dense2'), 1) loss = log_loss(self.label, y_hat) self.prediction_tensors = { 'order_ids': self.order_id, 'product_ids': self.product_id, 'predictions': y_hat, 'labels': self.label } return loss
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__, reuse=tf.AUTO_REUSE): # lstm 1 s1_in = tf.concat([state.w, inputs], axis=1) cell1 = tf.contrib.rnn.LSTMCell(self.lstm_size) s1_out, s1_state = cell1(s1_in, state=(state.c1, state.h1)) # attention attention_inputs = tf.concat([state.w, inputs, s1_out], axis=1) attention_params = dense_layer(attention_inputs, 3*self.num_attn_mixture_components, scope='attention') alpha, beta, kappa = tf.split(tf.nn.softplus(attention_params), 3, axis=1) kappa = state.kappa + kappa / 25.0 beta = tf.clip_by_value(beta, .01, np.inf) kappa_flat, alpha_flat, beta_flat = kappa, alpha, beta kappa, alpha, beta = tf.expand_dims(kappa, 2), tf.expand_dims(alpha, 2), tf.expand_dims(beta, 2) enum = tf.reshape(tf.range(self.char_len), (1, 1, self.char_len)) u = tf.cast(tf.tile(enum, (self.batch_size, self.num_attn_mixture_components, 1)), tf.float32) phi_flat = tf.reduce_sum(alpha*tf.exp(-tf.square(kappa - u) / beta), axis=1) phi = tf.expand_dims(phi_flat, 2) sequence_mask = tf.cast(tf.sequence_mask(self.attention_values_lengths, maxlen=self.char_len), tf.float32) sequence_mask = tf.expand_dims(sequence_mask, 2) w = tf.reduce_sum(phi*self.attention_values*sequence_mask, axis=1) # lstm 2 s2_in = tf.concat([inputs, s1_out, w], axis=1) cell2 = tf.contrib.rnn.LSTMCell(self.lstm_size) s2_out, s2_state = cell2(s2_in, state=(state.c2, state.h2)) # lstm 3 s3_in = tf.concat([inputs, s2_out, w], axis=1) cell3 = tf.contrib.rnn.LSTMCell(self.lstm_size) s3_out, s3_state = cell3(s3_in, state=(state.c3, state.h3)) new_state = LSTMAttentionCellState( s1_state.h, s1_state.c, s2_state.h, s2_state.c, s3_state.h, s3_state.c, alpha_flat, beta_flat, kappa_flat, w, phi_flat, ) return s3_out, new_state
def __call__(self, inputs, state, scope=None): with tf.compat.v1.variable_scope(scope or type(self).__name__, reuse=tf.compat.v1.AUTO_REUSE): # lstm 1 s1_in = tf.concat([state.w, inputs], axis=1) cell1 = tf.compat.v1.nn.rnn_cell.LSTMCell(self.lstm_size) s1_out, s1_state = cell1(s1_in, state=(state.c1, state.h1)) # attention attention_inputs = tf.concat([state.w, inputs, s1_out], axis=1) attention_params = dense_layer(attention_inputs, 3*self.num_attn_mixture_components, scope='attention') alpha, beta, kappa = tf.split(tf.nn.softplus(attention_params), 3, axis=1) kappa = state.kappa + kappa / 25.0 beta = tf.clip_by_value(beta, .01, np.inf) kappa_flat, alpha_flat, beta_flat = kappa, alpha, beta kappa, alpha, beta = tf.expand_dims(kappa, 2), tf.expand_dims(alpha, 2), tf.expand_dims(beta, 2) enum = tf.reshape(tf.range(self.char_len), (1, 1, self.char_len)) u = tf.cast(tf.tile(enum, (self.batch_size, self.num_attn_mixture_components, 1)), tf.float32) phi_flat = tf.reduce_sum(input_tensor=alpha*tf.exp(-tf.square(kappa - u) / beta), axis=1) phi = tf.expand_dims(phi_flat, 2) sequence_mask = tf.cast(tf.sequence_mask(self.attention_values_lengths, maxlen=self.char_len), tf.float32) sequence_mask = tf.expand_dims(sequence_mask, 2) w = tf.reduce_sum(input_tensor=phi*self.attention_values*sequence_mask, axis=1) # lstm 2 s2_in = tf.concat([inputs, s1_out, w], axis=1) cell2 = tf.compat.v1.nn.rnn_cell.LSTMCell(self.lstm_size) s2_out, s2_state = cell2(s2_in, state=(state.c2, state.h2)) # lstm 3 s3_in = tf.concat([inputs, s2_out, w], axis=1) cell3 = tf.compat.v1.nn.rnn_cell.LSTMCell(self.lstm_size) s3_out, s3_state = cell3(s3_in, state=(state.c3, state.h3)) new_state = LSTMAttentionCellState( s1_state.h, s1_state.c, s2_state.h, s2_state.c, s3_state.h, s3_state.c, alpha_flat, beta_flat, kappa_flat, w, phi_flat, ) return s3_out, new_state
def calculate_loss(self): self.order_id = tf.placeholder(tf.int32, [None]) self.product_id = tf.placeholder(tf.int32, [None]) self.features = tf.placeholder(tf.float32, [None, self.reader.data_dim]) self.label = tf.placeholder(tf.int32, [None]) h = dense_layer(self.features, self.hidden_units, activation=tf.nn.relu, scope='dense1') h = tf.concat([h, self.features], axis=1) y_hat = tf.squeeze( dense_layer(h, 1, activation=tf.nn.sigmoid, scope='dense2'), 1) loss = log_loss(self.label, y_hat) self.prediction_tensors = { 'order_ids': self.order_id, 'product_ids': self.product_id, 'predictions': y_hat, 'labels': self.label } return loss
def output_function(self, state): params = dense_layer(state.h3, self.output_units, scope='gmm', reuse=tf.AUTO_REUSE) pis, mus, sigmas, rhos, es = self._parse_parameters(params) mu1, mu2 = tf.split(mus, 2, axis=1) mus = tf.stack([mu1, mu2], axis=2) sigma1, sigma2 = tf.split(sigmas, 2, axis=1) covar_matrix = [tf.square(sigma1), rhos*sigma1*sigma2, rhos*sigma1*sigma2, tf.square(sigma2)] covar_matrix = tf.stack(covar_matrix, axis=2) covar_matrix = tf.reshape(covar_matrix, (self.batch_size, self.num_output_mixture_components, 2, 2)) mvn = tfd.MultivariateNormalFullCovariance(loc=mus, covariance_matrix=covar_matrix) b = tfd.Bernoulli(probs=es) c = tfd.Categorical(probs=pis) sampled_e = b.sample() sampled_coords = mvn.sample() sampled_idx = c.sample() idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1) coords = tf.gather_nd(sampled_coords, idx) return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
def get_input_sequences(self): self.user_id = tf.placeholder(tf.int32, [None]) self.product_id = tf.placeholder(tf.int32, [None]) self.aisle_id = tf.placeholder(tf.int32, [None]) self.department_id = tf.placeholder(tf.int32, [None]) self.is_none = tf.placeholder(tf.int32, [None]) self.history_length = tf.placeholder(tf.int32, [None]) self.is_ordered_history = tf.placeholder(tf.int32, [None, 100]) self.index_in_order_history = tf.placeholder(tf.int32, [None, 100]) self.order_dow_history = tf.placeholder(tf.int32, [None, 100]) self.order_hour_history = tf.placeholder(tf.int32, [None, 100]) self.days_since_prior_order_history = tf.placeholder(tf.int32, [None, 100]) self.order_size_history = tf.placeholder(tf.int32, [None, 100]) self.reorder_size_history = tf.placeholder(tf.int32, [None, 100]) self.order_is_weekend_history = tf.placeholder(tf.int32, [None, 100]) self.order_part_of_day_history = tf.placeholder(tf.int32, [None, 100]) self.order_number_history = tf.placeholder(tf.int32, [None, 100]) self.product_name = tf.placeholder(tf.int32, [None, 30]) self.product_name_length = tf.placeholder(tf.int32, [None]) self.next_is_ordered = tf.placeholder(tf.int32, [None, 100]) self.keep_prob = tf.placeholder(tf.float32) self.is_training = tf.placeholder(tf.bool) # product data product_embeddings = tf.get_variable( name='product_embeddings', shape=[50000, self.lstm_size], dtype=tf.float32 ) aisle_embeddings = tf.get_variable( name='aisle_embeddings', shape=[250, 50], dtype=tf.float32 ) department_embeddings = tf.get_variable( name='department_embeddings', shape=[50, 10], dtype=tf.float32 ) product_names = tf.one_hot(self.product_name, 2532) product_names = tf.reduce_max(product_names, 1) product_names = dense_layer(product_names, 100, activation=tf.nn.relu) is_none = tf.cast(tf.expand_dims(self.is_none, 1), tf.float32) x_product = tf.concat([ tf.nn.embedding_lookup(product_embeddings, self.product_id), tf.nn.embedding_lookup(aisle_embeddings, self.aisle_id), tf.nn.embedding_lookup(department_embeddings, self.department_id), is_none, product_names ], axis=1) x_product = tf.tile(tf.expand_dims(x_product, 1), (1, 100, 1)) # user data user_embeddings = tf.get_variable( name='user_embeddings', shape=[207000, self.lstm_size], dtype=tf.float32 ) x_user = tf.nn.embedding_lookup(user_embeddings, self.user_id) x_user = tf.tile(tf.expand_dims(x_user, 1), (1, 100, 1)) # sequence data is_ordered_history = tf.one_hot(self.is_ordered_history, 2) index_in_order_history = tf.one_hot(self.index_in_order_history, 20) order_dow_history = tf.one_hot(self.order_dow_history, 8) order_hour_history = tf.one_hot(self.order_hour_history, 25) days_since_prior_order_history = tf.one_hot(self.days_since_prior_order_history, 31) order_size_history = tf.one_hot(self.order_size_history, 60) reorder_size_history = tf.one_hot(self.reorder_size_history, 50) order_is_weekend_history = tf.one_hot(self.order_is_weekend_history, 2) order_part_of_day_history = tf.one_hot(self.order_part_of_day_history, 3) order_number_history = tf.one_hot(self.order_number_history, 101) index_in_order_history_scalar = tf.expand_dims(tf.cast(self.index_in_order_history, tf.float32) / 20.0, 2) order_dow_history_scalar = tf.expand_dims(tf.cast(self.order_dow_history, tf.float32) / 8.0, 2) order_hour_history_scalar = tf.expand_dims(tf.cast(self.order_hour_history, tf.float32) / 25.0, 2) days_since_prior_order_history_scalar = tf.expand_dims(tf.cast(self.days_since_prior_order_history, tf.float32) / 31.0, 2) order_size_history_scalar = tf.expand_dims(tf.cast(self.order_size_history, tf.float32) / 60.0, 2) reorder_size_history_scalar = tf.expand_dims(tf.cast(self.reorder_size_history, tf.float32) / 50.0, 2) order_number_history_scalar = tf.expand_dims(tf.cast(self.order_number_history, tf.float32) / 100.0, 2) x_history = tf.concat([ is_ordered_history, index_in_order_history, order_dow_history, order_hour_history, days_since_prior_order_history, order_size_history, reorder_size_history, order_is_weekend_history, order_part_of_day_history, order_number_history, index_in_order_history_scalar, order_dow_history_scalar, order_hour_history_scalar, days_since_prior_order_history_scalar, order_size_history_scalar, reorder_size_history_scalar, order_number_history_scalar, ], axis=2) x = tf.concat([x_history, x_product, x_user], axis=2) return x
def get_input_sequences(self): self.user_id = tf.placeholder(tf.int32, [None]) self.product_id = tf.placeholder(tf.int32, [None]) self.aisle_id = tf.placeholder(tf.int32, [None]) self.department_id = tf.placeholder(tf.int32, [None]) self.is_none = tf.placeholder(tf.int32, [None]) self.history_length = tf.placeholder(tf.int32, [None]) self.is_ordered_history = tf.placeholder(tf.int32, [None, 100]) self.index_in_order_history = tf.placeholder(tf.int32, [None, 100]) self.order_dow_history = tf.placeholder(tf.int32, [None, 100]) self.order_hour_history = tf.placeholder(tf.int32, [None, 100]) self.days_since_prior_order_history = tf.placeholder(tf.int32, [None, 100]) self.order_size_history = tf.placeholder(tf.int32, [None, 100]) self.reorder_size_history = tf.placeholder(tf.int32, [None, 100]) self.order_number_history = tf.placeholder(tf.int32, [None, 100]) self.product_name = tf.placeholder(tf.int32, [None, 30]) self.product_name_length = tf.placeholder(tf.int32, [None]) self.next_is_ordered = tf.placeholder(tf.int32, [None, 100]) self.keep_prob = tf.placeholder(tf.float32) self.is_training = tf.placeholder(tf.bool) # product data product_embeddings = tf.get_variable( name='product_embeddings', shape=[50000, self.lstm_size], dtype=tf.float32 ) aisle_embeddings = tf.get_variable( name='aisle_embeddings', shape=[250, 50], dtype=tf.float32 ) department_embeddings = tf.get_variable( name='department_embeddings', shape=[50, 10], dtype=tf.float32 ) product_names = tf.one_hot(self.product_name, 2532) product_names = tf.reduce_max(product_names, 1) product_names = dense_layer(product_names, 100, activation=tf.nn.relu) is_none = tf.cast(tf.expand_dims(self.is_none, 1), tf.float32) x_product = tf.concat([ tf.nn.embedding_lookup(product_embeddings, self.product_id), tf.nn.embedding_lookup(aisle_embeddings, self.aisle_id), tf.nn.embedding_lookup(department_embeddings, self.department_id), is_none, product_names ], axis=1) x_product = tf.tile(tf.expand_dims(x_product, 1), (1, 100, 1)) # user data user_embeddings = tf.get_variable( name='user_embeddings', shape=[207000, self.lstm_size], dtype=tf.float32 ) x_user = tf.nn.embedding_lookup(user_embeddings, self.user_id) x_user = tf.tile(tf.expand_dims(x_user, 1), (1, 100, 1)) # sequence data is_ordered_history = tf.one_hot(self.is_ordered_history, 2) index_in_order_history = tf.one_hot(self.index_in_order_history, 20) order_dow_history = tf.one_hot(self.order_dow_history, 8) order_hour_history = tf.one_hot(self.order_hour_history, 25) days_since_prior_order_history = tf.one_hot(self.days_since_prior_order_history, 31) order_size_history = tf.one_hot(self.order_size_history, 60) reorder_size_history = tf.one_hot(self.reorder_size_history, 50) order_number_history = tf.one_hot(self.order_number_history, 101) index_in_order_history_scalar = tf.expand_dims(tf.cast(self.index_in_order_history, tf.float32) / 20.0, 2) order_dow_history_scalar = tf.expand_dims(tf.cast(self.order_dow_history, tf.float32) / 8.0, 2) order_hour_history_scalar = tf.expand_dims(tf.cast(self.order_hour_history, tf.float32) / 25.0, 2) days_since_prior_order_history_scalar = tf.expand_dims(tf.cast(self.days_since_prior_order_history, tf.float32) / 31.0, 2) order_size_history_scalar = tf.expand_dims(tf.cast(self.order_size_history, tf.float32) / 60.0, 2) reorder_size_history_scalar = tf.expand_dims(tf.cast(self.reorder_size_history, tf.float32) / 50.0, 2) order_number_history_scalar = tf.expand_dims(tf.cast(self.order_number_history, tf.float32) / 100.0, 2) x_history = tf.concat([ is_ordered_history, index_in_order_history, order_dow_history, order_hour_history, days_since_prior_order_history, order_size_history, reorder_size_history, order_number_history, index_in_order_history_scalar, order_dow_history_scalar, order_hour_history_scalar, days_since_prior_order_history_scalar, order_size_history_scalar, reorder_size_history_scalar, order_number_history_scalar, ], axis=2) x = tf.concat([x_history, x_product, x_user], axis=2) return x