def unsaple_2d(image: tf.Variable, size: int): """ Operation which produced image `size` times bigger. If input image has size (10,10) then output image wil have (10*size, 10*size) shape. """ width = int(image.get_shape()[1] * size) height = int(image.get_shape()[2] * size) return tf.image.resize_nearest_neighbor(image, (height, width))
def reader(context: Tuple[tf.Variable, tf.Variable], emb0: tf.Variable, n_slots: None, weights=None, step_size=1.0, scale_prediction=0.0, start_from_zeros=False, loss_grad=loss_quadratic_grad, emb_update=multilinear_grad): """ Read a series of data and update the embeddings accordingly Args: context (Tuple[tf.Variable, tf.Variable]): contextual information emb0 (tf.Variable): initial embeddings n_slots (int): number of slots to update weights: weights give to every observation in the inputs. Size: (batch_size, n_obs) loss_grad: gradient of the loss emb_update: update of the embeddings (could be the gradient of the score with respect to the embeddings) Returns: The variable representing updated embeddings """ if context is None: # empty contexts are not read return emb0 context_inputs, context_ouputs = context # context_inputs has shape (n_data, n_obs, order) n_data, n_obs, order = [d.value for d in context_inputs.get_shape()] step_size = tf.Variable(step_size, name='step_size', trainable=True) if len(emb0.get_shape()) > 2: # different set of embeddings for every data n_data2, n_ent, rank = [d.value for d in emb0.get_shape()] if n_slots is None: n_slots = n_ent shift_indices = tf.constant( n_ent * np.reshape(np.outer(range(n_data), np.ones(n_obs * order)), (n_data, n_obs, order)), dtype='int64') emb0_rsh = tf.reshape(emb0, (-1, rank)) grad_score, preds = emb_update(emb0_rsh, context_inputs + shift_indices, score=True) else: rank = emb0.get_shape()[1].value grad_score, preds = emb_update(emb0, context_inputs, score=True) update_strength = tf.tile(tf.reshape(loss_grad(preds * scale_prediction, context_ouputs) * weights, (n_data, n_obs, 1, 1)), (1, 1, 2, rank)) grad_loss = tf.reshape(grad_score, (n_data, n_obs, 2, rank)) * update_strength one_hot = tf.Variable(np.eye(n_slots + 1, n_slots, dtype=np.float32), trainable=False) # last column removed indic_mat = tf.gather(one_hot, tf.minimum(context_inputs, n_slots)) # shape: (n_data, n_obs, order, n_slots) total_grad_loss = tf.reduce_sum(tf.batch_matmul(indic_mat, grad_loss, adj_x=True), 1) if start_from_zeros: return total_grad_loss * step_size # size of the output: (n_data, n_slots, rank) else: if len(emb0.get_shape()) > 2: # different set of embeddings for every data initial_slot_embs = emb0[:, :n_slots, :] else: initial_slot_embs = tf.reshape(tf.tile(emb0[:n_slots, :], (n_data, 1)), (n_data, n_slots, rank)) return initial_slot_embs - total_grad_loss * step_size # size of the output: (n_data, n_slots, rank)
def answerer(embeddings, tuples: tf.Variable, scoring=multilinear): """ Evaluate the score of tuples with embeddings that are specific to every data sample Args: embeddings (tf.Variable): embedding tensor with shape (n_data, n_slots, rank) tuples: question tensor with int64 entries and shape (n_data, n_tuples, order) scoring: operator that is used to compute the scores Returns: scores (tf.Tensor): scores tensor with shape (n_data, n_tuples) """ n_data, n_slots, rank = [d.value for d in embeddings.get_shape()] n_data, n_tuples, order = [d.value for d in tuples.get_shape()] shift_indices = tf.constant(np.reshape( np.outer(range(n_data), np.ones(n_tuples * n_slots)) * n_slots, (n_data, n_tuples, n_slots)), dtype='int64') questions_shifted = tuples + shift_indices preds = scoring( tf.reshape(embeddings, (n_data * n_slots, rank)), tf.reshape(questions_shifted, (n_data * n_tuples, order))) return tf.reshape(preds, (n_data, n_tuples))
def _prediction_layer(aggregation: tf.Variable, layer_size: int, att_size: int, prev_rnn_state: tf.Variable = None, reuse: bool = False) -> Tuple[tf.Variable, tf.Variable]: batch_size = int(aggregation.get_shape()[0]) agg_dim = int(aggregation.get_shape()[2]) rnn_size = int(prev_rnn_state.get_shape()[1]) # actual logic with tf.variable_scope('prediction_static', reuse=reuse): WPh = tf.get_variable('WPh', [agg_dim, att_size], tf.float32) Wah = tf.get_variable('Wah', [rnn_size, att_size], tf.float32) v = tf.get_variable('v', [att_size], tf.float32) # do calculations # s^t_j # [batch_size, num_words, att_size] att_term = tf.einsum('ijk,kl->ijl', aggregation, WPh) # [batch_size, att_size] rnn_term = tf.matmul(prev_rnn_state, Wah) # [batch_size, num_words, att_size] term = tf.add(att_term, tf.expand_dims(rnn_term, axis=1)) # [batch_size, num_words] s = tf.einsum('ijl,l->ij', tf.tanh(term), v) # a^t_i a = tf.nn.softmax(s, dim=1) # c_t c = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2), aggregation), axis=1) # next h^a_t with tf.variable_scope('prediction_static_rnn', reuse=reuse) as scope: rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_size, reuse=reuse) _, next_hat = tf.nn.static_rnn(rnn_cell, [c], initial_state=prev_rnn_state, sequence_length=tf.ones([batch_size], tf.int32), scope=scope, dtype=tf.float32) return next_hat, tf.log(a)
def _extract_vec_values(batched_vecs: tf.Variable, batched_indices: tf.Variable) -> tf.Variable: batch_size = int(batched_vecs.get_shape()[0]) batch_index_range = tf.constant(np.array([n for n in range(batch_size)], dtype=np.int32), dtype=tf.int32) full_batched_indices = tf.stack([batch_index_range, batched_indices], axis=1) return tf.gather_nd(batched_vecs, full_batched_indices)
def apply_mask(weights: tf.Variable, mask_collections=None, masked_weight_collections=None): """ Creates a mask for the given variable, and returns a masked version of the variable. Parameters ---------- weights: The variable for which to create the masks. mask_collections: A list of collections into which to add the mask variable. By default, only adds to MASK_COLLECTION. masked_weight_collections: A list of collections into which to add the masked weights. By default, only add to MASKED_WEIGHT_COLLECTION. Returns ------- masked_weight: The value of the masked variable. mask: The variable corresponding to the created mask. """ variable_name = os.path.basename(weights.op.name) with tf.variable_scope(variable_name) as vs: mask = tf.get_variable('mask', weights.get_shape(), initializer=tf.ones_initializer, trainable=False, dtype=weights.dtype) name_scope_name = vs.name # Need to re-open a name scope with an absolute name # as otherwise the variable_scope is set correctly # but the name_scope is not set correctly. with tf.name_scope(name_scope_name + '/'): masked_weight = tf.multiply(weights, mask, 'masked_weight') if mask_collections is not None: for collection in mask_collections: tf.add_to_collection(collection, mask) else: tf.add_to_collection(MASK_COLLECTION, mask) if masked_weight_collections is not None: for collection in masked_weight_collections: tf.add_to_collection(collection, masked_weight) else: tf.add_to_collection(MASKED_WEIGHT_COLLECTION, masked_weight) return masked_weight
def tf_show(var: tf.Variable, name=None, summarize=1000): """ Useful function to print the value of the current variable during evaluation Args: var: variable to show name: name to display summarize: number of values to display Returns: the same variable but wrapped with a Print module """ name = name or var.name shape = tuple([d.value for d in var.get_shape()]) return tf.Print(var, [var], message=name + str(shape), summarize=summarize)
def loss_neg_log_prob(prediction_logits: tf.Variable, labels: tf.Variable) -> tf.Variable: print('labels', labels.get_shape()) batch_size = int(prediction_logits.get_shape()[0]) prediction_list = tf.unstack(tf.log(tf.nn.softmax(prediction_logits, dim=2)), axis=1) pred_start = prediction_list[0] pred_end = prediction_list[1] label_list = tf.unstack(labels, axis=1) label_start = label_list[0] label_end = label_list[1] negative_log_prob_start = tf.div( _extract_vec_values(pred_start, label_start), tf.constant(-batch_size, dtype=tf.float32)) negative_log_prob_end = tf.div(_extract_vec_values(pred_end, label_end), tf.constant(-batch_size, dtype=tf.float32)) return tf.add(negative_log_prob_start, negative_log_prob_end) / tf.constant( 2, dtype=tf.float32, shape=[])
def _char_embedding_layer( embedder: EmbeddingService, chars: tf.Variable, num_words: tf.Variable, num_chars: tf.Variable, char_rnn_size: int, dropout_function: Callable[[tf.Variable], tf.Variable]) -> tf.Variable: batch_size = int(chars.get_shape()[0]) embedding_size = embedder.embedding_dim with tf.variable_scope('char_embedding_layer'): # [batch_size, dim_num_words, dim_num_chars] char_embeddings = tf.get_variable(name='char_embeddings', trainable=True, dtype=tf.float32, initializer=tf.constant( embedder.embedding_matrix, dtype=tf.float32)) char_raw_embed = dropout_function( tf.nn.embedding_lookup(char_embeddings, chars)) # we need to unstack instead of reshape as two dimension are unknown # batch_size * [dim_num_words, dim_num_chars, embedding_size] char_raw_embed_list = tf.unstack(char_raw_embed, batch_size, axis=0) char_raw_embed_length_list = tf.unstack(num_chars, batch_size, axis=0) # batch_size * [dim_num_words, layer_size] char_embed_list = [] with tf.variable_scope('encoding') as scope: fw_cell = GRUCell(char_rnn_size) bw_cell = GRUCell(char_rnn_size) for i in range(len(char_raw_embed_list)): batch_embed = char_raw_embed_list[i] batch_char_length = char_raw_embed_length_list[i] (_, _), (fw_final, bw_final) = bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs=batch_embed, dtype=tf.float32, sequence_length=batch_char_length, scope=scope, parallel_iterations=64, swap_memory=True) out = tf.concat([fw_final, bw_final], axis=1) char_embed_list.append(out) return tf.stack(char_embed_list, axis=0)
def _prediction_init_state(qu_vecs: tf.Variable, att_size: int) -> tf.Variable: qu_vec_dim = int(qu_vecs.get_shape()[2]) with tf.variable_scope('prediction_init'): v = tf.get_variable('v', [att_size], tf.float32) WQu = tf.get_variable('WQu', [qu_vec_dim, att_size], tf.float32) WQvVQr = tf.get_variable('WQvVQr', [1, 1, att_size], tf.float32) # do calculations # s^t_j # [batch_size, num_words, layer_size] att_term = tf.add(tf.einsum('ijk,kl->ijl', qu_vecs, WQu), WQvVQr) # [batch_size, num_words] s = tf.einsum('ijl,l->ij', tf.nn.tanh(att_term), v) # a^t_i a = tf.nn.softmax(s, dim=1) # r^Q (is the equivalent of c in prediction_layer) # [batch_size, layer_size] r = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2), qu_vecs), axis=1) return r
def print_tensor_shape(x: tf.Variable, prefix=""): shape = x.get_shape().as_list() logger.info(prefix + ", shape: %s" % str(shape))
def check_shape(var1_tf: tf.Variable, var2_np: np.ndarray): if var1_tf.get_shape().as_list() != list(var2_np.shape): log("Shapes do not match! Exception will follow.", color="red")
def _gate_input(input: tf.Variable) -> tf.Variable: agg_dim = int(input.get_shape()[2]) W = tf.get_variable('W', [agg_dim, agg_dim]) g_t = tf.sigmoid(tf.einsum('ijk,kl->ijl', input, W)) return tf.multiply(g_t, input)
class DenseLayer(BuildableNode): def __init__(self, name=None, protected=False): super().__init__(name=name, protected=protected) self.hasTrainableVariables = True self.activationLookup = { "relu": relu, "linear": self.linear, "sigmoid": sigmoid, "tanh": tanh } def linear(self, x): return (x) #throws unknownActivationFunction if activation function not it activationLookup def newLayer(self, layerSize, activationFunction): self.size = layerSize self.outputShape = [layerSize] self.activationKey = activationFunction try: self.activation = self.activationLookup[activationFunction] except KeyError as e: raise unknownActivationFunction(activationFunction) #TODO make it throw an error if inputShape not [None] def build(self, seed=None): if self.built: return if len(self.inputConnections) < 1: raise (notEnoughNodeConnections(len(self.inputConnections), 1)) #now make the variables inputShape = self.inputConnections[0].outputShape self.inputSize = inputShape[0] biasInit = 0.1 weightInitSTDDEV = 1 / self.inputSize self.biases = Variable(constant(biasInit, shape=[self.size])) self.weights = Variable( normal([self.inputSize, self.size], stddev=weightInitSTDDEV, mean=0, seed=seed)) self.built = True #function that executes the layer for a list of inputs #inp has shape [None,inputSize] #returns shape [None,outputSize] def execute(self, inp): if not self.built: raise (operationWithUnbuiltNode("execute")) else: return (( self.activation(matmul([inp], self.weights) + self.biases))[0]) #function that returns a shape [2] list of trainable variables #because these are tf varialbe it is returning a list of pointers def getTrainableVariables(self): #does error checking super().getTrainableVariables() #the set of weights and the biases are each a single multi-dimensional variable return ([self.biases, self.weights]) def connect(self, connections): if len(connections) == 0: return if len(connections[0].outputShape) != 1: raise (invalidNodeConnection(connections[0].outputShape, [None])) super().connect(connections) #Creates a directory for the layer #export to a weight and bias file #files: # [path]/[subdir]/mat.weights (byteformat) # [path]/[subdir]/mat.biases (byteformat) # [path]/[subdir]/hyper.txt def exportLayer(self, path, subdir): if not self.built: raise (operationWithUnbuiltNode("exportLayer")) import struct from os import mkdir accessPath = path + "\\" + subdir #first step is to create a directory for the network if one does not already exist try: mkdir(accessPath) except FileExistsError: pass except Exception as e: raise (invalidPath(accessPath)) #save hyper.txt #contins: inputSize, layerSize, activation with open(accessPath + "\\hyper.txt", "w") as f: f.write(str(self.inputSize) + "\n") f.write(str(self.size) + "\n") f.write(self.activationKey + "\n") #save mat.weights weightFloats = [] for i in range(self.weights.get_shape()[0]): for j in range(self.weights[i].get_shape()[0]): weightFloats.append(float(self.weights[i][j])) with open(accessPath + "\\mat.weights", "wb") as f: f.write( bytearray( struct.pack(str(len(weightFloats)) + "f", *weightFloats))) del weightFloats #this is important because this can be very large and the function can take a long time to load #save mat.biases biasFloats = [] with open(accessPath + "\\mat.biases", "wb") as f: for i in range(self.biases.get_shape()[0]): biasFloats.append(float(self.biases[i])) f.write( bytearray(struct.pack(str(len(biasFloats)) + "f", *biasFloats))) #function that loads a layer from files and stores perameters on stack #gets from to [path]/[subdir] def importLayer(self, superdir, subdir): from os import path accessPath = superdir + "\\" + subdir #check if directory exists if not path.exists(accessPath): raise (missingDirectoryForImport(accessPath)) #import from hyper.txt try: with open(accessPath + "\\hyper.txt", "r") as f: fileLines = f.readlines() #strip line breaks fileLines = [i[:-1] for i in fileLines] try: self.inputSize = int(fileLines[0]) except ValueError as e: raise (invalidDataInFile(accessPath + "\\hyper.txt", "inputSize", fileLines[0])) try: self.size = int(fileLines[1]) self.outputShape = [self.size] except ValueError as e: raise (invalidDataInFile(accessPath + "\\hyper.txt", "size", fileLines[1])) try: self.activationKey = fileLines[2] self.activation = self.activationLookup[fileLines[2]] except KeyError as e: raise unknownActivationFunction(fileLines[2]) except IOError: raise (missingFileForImport(accessPath + "\\hyper.txt")) #import weights import struct try: with open(accessPath + "//mat.weights", "rb") as f: raw = f.read() #type of bytes try: inp = struct.unpack( str(self.inputSize * self.size) + "f", raw) #list of float32s except struct.error as e: raise (invalidByteFile(accessPath + "//mat.weights")) weights = [] for i in range(self.inputSize): weights.append([]) for j in range(self.size): weights[i].append(inp[i * self.size + j]) self.weights = Variable(weights) except IOError: raise (missingFileForImport(accessPath, "mat.weights")) #import biases try: with open(accessPath + "//mat.biases", "rb") as f: raw = f.read() #type of bytes try: inp = struct.unpack(str(self.size) + "f", raw) #list of float32s except struct.error as e: raise (invalidByteFile(accessPath + "//mat.biases")) self.biases = Variable([i for i in inp]) except IOError: raise (missingFileForImport(accessPath, "mat.biases")) self.built = True
def init_first_layer_weights(var: tf.Variable, rgb_weights: np.ndarray, sess: tf.Session, hs_weight_init: str) -> None: '''Initializes the weights for filters in the first conv layer. 'resnet/scale1/weights:0' for ResNet 'vggf/conv1/conv1_weights:0' for VGGF If we are using RGB-only, then just initializes var to rgb_weights. Otherwise, uses hs_weight_init to determine how to initialize the weights for non-RGB bands. Args - var: tf.Variable, the filters in the 1st convolution layer, shape [F, F, C, 64] - F is the filter size (7 for ResNet, 11 for VGGF) - C is either 3 (RGB), 7 (lxv3), or 9 (Landsat7) - rgb_weights: ndarray of np.float32, shape [F, F, 3, 64] - sess: tf.Session - hs_weight_init: str, one of ['random', 'same', 'samescaled'] ''' var_shape = np.asarray(var.get_shape().as_list()) rgb_weights_shape = np.asarray(rgb_weights.shape) # only weights in the 1st conv layer need to be adjusted for dealing with hyperspectral images # check that the filter shape and num_filters match up, and that RGB weights have 3 channels if 'scale1/weights:0' in var.name: # ResNet F = 7 elif 'conv1/conv1_weights:0' in var.name: # VGGF F = 11 else: raise ValueError('var is not the weights for the first conv layer') assert np.all(var_shape[[0, 1]] == [F, F]) assert np.all(var_shape[[0, 1, 3]] == rgb_weights_shape[[0, 1, 3]]) assert rgb_weights.shape[2] == 3 assert rgb_weights.dtype == np.float32 # if we are using the RGB-only model, then just initialize to saved weights if var_shape[2] == 3: print('Using rgb only model') sess.run(var.assign(rgb_weights)) return # Set up the initializer function print('Initializing var different from saved rgb weights:', var.name, ' With shape:', var_shape) print('Using ' + hs_weight_init + ' initialization for hyperspectral weights.') num_hs_channels = var_shape[2] - rgb_weights.shape[2] hs_weights_shape = [F, F, num_hs_channels, 64] if hs_weight_init == 'random': # initialize the weights in the hyperspectral bands to gaussian with same overall mean and # stddev as the RGB channels rgb_mean = np.mean(rgb_weights) rgb_std = np.std(rgb_weights) hs_weights = tf.truncated_normal(hs_weights_shape, mean=rgb_mean, stddev=rgb_std, dtype=tf.float32) elif hs_weight_init == 'same': # initialize the weight for each position in each filter to the average of the 3 RGB weights # at the same position in the same filter rgb_mean = rgb_weights.mean(axis=2, keepdims=True) # shape [F, F, 1, 64] hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1)) elif hs_weight_init == 'samescaled': # similar to hs_weight_init == 'same', but we normalize the weights rgb_mean = rgb_weights.mean(axis=2, keepdims=True) # shape [F, F, 1, 64] hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1)) rgb_weights *= 3 / (3 + num_hs_channels) hs_weights *= 3 / (3 + num_hs_channels) else: raise ValueError(f'Unknown hs_weight_init type: {hs_weight_init}') final_weight = tf.concat([rgb_weights, hs_weights], axis=2) print('Shape of 1st layer weights:', final_weight.shape) # should be (F, F, C, 64) sess.run(var.assign(final_weight))