Esempio n. 1
0
    def unsaple_2d(image: tf.Variable, size: int):
        """
        Operation which produced image `size` times bigger.

        If input image has size (10,10) then output image wil have (10*size, 10*size) shape.
        """
        width = int(image.get_shape()[1] * size)
        height = int(image.get_shape()[2] * size)
        return tf.image.resize_nearest_neighbor(image, (height, width))
Esempio n. 2
0
def reader(context: Tuple[tf.Variable, tf.Variable], emb0: tf.Variable, n_slots: None,
           weights=None,
           step_size=1.0,
           scale_prediction=0.0,
           start_from_zeros=False,
           loss_grad=loss_quadratic_grad,
           emb_update=multilinear_grad):
    """
    Read a series of data and update the embeddings accordingly
    Args:
        context (Tuple[tf.Variable, tf.Variable]): contextual information
        emb0 (tf.Variable): initial embeddings
        n_slots (int): number of slots to update
        weights: weights give to every observation in the inputs. Size: (batch_size, n_obs)
        loss_grad: gradient of the loss
        emb_update: update of the embeddings (could be the gradient of the score with respect to the embeddings)

    Returns:
        The variable representing updated embeddings
    """
    if context is None:  # empty contexts are not read
        return emb0

    context_inputs, context_ouputs = context  # context_inputs has shape (n_data, n_obs, order)
    n_data, n_obs, order = [d.value for d in context_inputs.get_shape()]
    step_size = tf.Variable(step_size, name='step_size', trainable=True)

    if len(emb0.get_shape()) > 2:  # different set of embeddings for every data
        n_data2, n_ent, rank = [d.value for d in emb0.get_shape()]
        if n_slots is None:
            n_slots = n_ent
        shift_indices = tf.constant(
                n_ent * np.reshape(np.outer(range(n_data), np.ones(n_obs * order)), (n_data, n_obs, order)),
                dtype='int64')
        emb0_rsh = tf.reshape(emb0, (-1, rank))
        grad_score, preds = emb_update(emb0_rsh, context_inputs + shift_indices, score=True)
    else:
        rank = emb0.get_shape()[1].value
        grad_score, preds = emb_update(emb0, context_inputs, score=True)
    update_strength = tf.tile(tf.reshape(loss_grad(preds * scale_prediction, context_ouputs) * weights,
                                         (n_data, n_obs, 1, 1)), (1, 1, 2, rank))
    grad_loss = tf.reshape(grad_score, (n_data, n_obs, 2, rank)) * update_strength
    one_hot = tf.Variable(np.eye(n_slots + 1, n_slots, dtype=np.float32), trainable=False)  # last column removed
    indic_mat = tf.gather(one_hot, tf.minimum(context_inputs, n_slots))  # shape: (n_data, n_obs, order, n_slots)
    total_grad_loss = tf.reduce_sum(tf.batch_matmul(indic_mat, grad_loss, adj_x=True), 1)

    if start_from_zeros:
        return total_grad_loss * step_size  # size of the output: (n_data, n_slots, rank)
    else:
        if len(emb0.get_shape()) > 2:  # different set of embeddings for every data
            initial_slot_embs = emb0[:, :n_slots, :]
        else:
            initial_slot_embs = tf.reshape(tf.tile(emb0[:n_slots, :], (n_data, 1)), (n_data, n_slots, rank))
        return initial_slot_embs - total_grad_loss * step_size  # size of the output: (n_data, n_slots, rank)
Esempio n. 3
0
def answerer(embeddings, tuples: tf.Variable, scoring=multilinear):
    """
    Evaluate the score of tuples with embeddings that are specific to every data sample

    Args:
        embeddings (tf.Variable): embedding tensor with shape (n_data, n_slots, rank)
        tuples: question tensor with int64 entries and shape (n_data, n_tuples, order)
        scoring: operator that is used to compute the scores

    Returns:
        scores (tf.Tensor): scores tensor with shape (n_data, n_tuples)

    """
    n_data, n_slots, rank = [d.value for d in embeddings.get_shape()]
    n_data, n_tuples, order = [d.value for d in tuples.get_shape()]

    shift_indices = tf.constant(np.reshape(
            np.outer(range(n_data), np.ones(n_tuples * n_slots)) * n_slots, (n_data, n_tuples, n_slots)), dtype='int64')
    questions_shifted = tuples + shift_indices

    preds = scoring(
            tf.reshape(embeddings, (n_data * n_slots, rank)),
            tf.reshape(questions_shifted, (n_data * n_tuples, order)))

    return tf.reshape(preds, (n_data, n_tuples))
Esempio n. 4
0
def _prediction_layer(aggregation: tf.Variable,
                      layer_size: int,
                      att_size: int,
                      prev_rnn_state: tf.Variable = None,
                      reuse: bool = False) -> Tuple[tf.Variable, tf.Variable]:
    batch_size = int(aggregation.get_shape()[0])
    agg_dim = int(aggregation.get_shape()[2])
    rnn_size = int(prev_rnn_state.get_shape()[1])

    # actual logic
    with tf.variable_scope('prediction_static', reuse=reuse):
        WPh = tf.get_variable('WPh', [agg_dim, att_size], tf.float32)
        Wah = tf.get_variable('Wah', [rnn_size, att_size], tf.float32)
        v = tf.get_variable('v', [att_size], tf.float32)

    # do calculations
    # s^t_j
    # [batch_size, num_words, att_size]
    att_term = tf.einsum('ijk,kl->ijl', aggregation, WPh)
    # [batch_size, att_size]
    rnn_term = tf.matmul(prev_rnn_state, Wah)
    # [batch_size, num_words, att_size]
    term = tf.add(att_term, tf.expand_dims(rnn_term, axis=1))

    # [batch_size, num_words]
    s = tf.einsum('ijl,l->ij', tf.tanh(term), v)

    # a^t_i
    a = tf.nn.softmax(s, dim=1)

    # c_t
    c = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2), aggregation),
                      axis=1)

    # next h^a_t
    with tf.variable_scope('prediction_static_rnn', reuse=reuse) as scope:
        rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_size, reuse=reuse)
        _, next_hat = tf.nn.static_rnn(rnn_cell, [c],
                                       initial_state=prev_rnn_state,
                                       sequence_length=tf.ones([batch_size],
                                                               tf.int32),
                                       scope=scope,
                                       dtype=tf.float32)

    return next_hat, tf.log(a)
Esempio n. 5
0
def _extract_vec_values(batched_vecs: tf.Variable,
                        batched_indices: tf.Variable) -> tf.Variable:
    batch_size = int(batched_vecs.get_shape()[0])

    batch_index_range = tf.constant(np.array([n for n in range(batch_size)],
                                             dtype=np.int32),
                                    dtype=tf.int32)
    full_batched_indices = tf.stack([batch_index_range, batched_indices],
                                    axis=1)
    return tf.gather_nd(batched_vecs, full_batched_indices)
def apply_mask(weights: tf.Variable,
               mask_collections=None,
               masked_weight_collections=None):
    """ Creates a mask for the given variable, and returns a masked version
    of the variable.

    Parameters
    ----------
    weights: The variable for which to create the masks.
    mask_collections: A list of collections into which to add the mask variable.
        By default, only adds to MASK_COLLECTION.
    masked_weight_collections: A list of collections into which to add the masked
        weights. By default, only add to MASKED_WEIGHT_COLLECTION.

    Returns
    -------
    masked_weight: The value of the masked variable.
    mask: The variable corresponding to the created mask.
    """
    variable_name = os.path.basename(weights.op.name)

    with tf.variable_scope(variable_name) as vs:
        mask = tf.get_variable('mask',
                               weights.get_shape(),
                               initializer=tf.ones_initializer,
                               trainable=False,
                               dtype=weights.dtype)

        name_scope_name = vs.name

    # Need to re-open a name scope with an absolute name
    # as otherwise the variable_scope is set correctly
    # but the name_scope is not set correctly.
    with tf.name_scope(name_scope_name + '/'):
        masked_weight = tf.multiply(weights, mask, 'masked_weight')

    if mask_collections is not None:
        for collection in mask_collections:
            tf.add_to_collection(collection, mask)
    else:
        tf.add_to_collection(MASK_COLLECTION, mask)

    if masked_weight_collections is not None:
        for collection in masked_weight_collections:
            tf.add_to_collection(collection, masked_weight)
    else:
        tf.add_to_collection(MASKED_WEIGHT_COLLECTION, masked_weight)

    return masked_weight
Esempio n. 7
0
def tf_show(var: tf.Variable, name=None, summarize=1000):
    """
    Useful function to print the value of the current variable during evaluation
    Args:
        var: variable to show
        name: name to display
        summarize: number of values to display

    Returns:
        the same variable but wrapped with a Print module

    """
    name = name or var.name
    shape = tuple([d.value for d in var.get_shape()])
    return tf.Print(var, [var], message=name + str(shape), summarize=summarize)
Esempio n. 8
0
def loss_neg_log_prob(prediction_logits: tf.Variable,
                      labels: tf.Variable) -> tf.Variable:
    print('labels', labels.get_shape())
    batch_size = int(prediction_logits.get_shape()[0])

    prediction_list = tf.unstack(tf.log(tf.nn.softmax(prediction_logits,
                                                      dim=2)),
                                 axis=1)
    pred_start = prediction_list[0]
    pred_end = prediction_list[1]

    label_list = tf.unstack(labels, axis=1)
    label_start = label_list[0]
    label_end = label_list[1]

    negative_log_prob_start = tf.div(
        _extract_vec_values(pred_start, label_start),
        tf.constant(-batch_size, dtype=tf.float32))
    negative_log_prob_end = tf.div(_extract_vec_values(pred_end, label_end),
                                   tf.constant(-batch_size, dtype=tf.float32))

    return tf.add(negative_log_prob_start,
                  negative_log_prob_end) / tf.constant(
                      2, dtype=tf.float32, shape=[])
Esempio n. 9
0
def _char_embedding_layer(
        embedder: EmbeddingService, chars: tf.Variable, num_words: tf.Variable,
        num_chars: tf.Variable, char_rnn_size: int,
        dropout_function: Callable[[tf.Variable], tf.Variable]) -> tf.Variable:
    batch_size = int(chars.get_shape()[0])
    embedding_size = embedder.embedding_dim

    with tf.variable_scope('char_embedding_layer'):
        # [batch_size, dim_num_words, dim_num_chars]
        char_embeddings = tf.get_variable(name='char_embeddings',
                                          trainable=True,
                                          dtype=tf.float32,
                                          initializer=tf.constant(
                                              embedder.embedding_matrix,
                                              dtype=tf.float32))
        char_raw_embed = dropout_function(
            tf.nn.embedding_lookup(char_embeddings, chars))

        # we need to unstack instead of reshape as two dimension are unknown
        # batch_size * [dim_num_words, dim_num_chars, embedding_size]
        char_raw_embed_list = tf.unstack(char_raw_embed, batch_size, axis=0)
        char_raw_embed_length_list = tf.unstack(num_chars, batch_size, axis=0)
        # batch_size * [dim_num_words, layer_size]
        char_embed_list = []

        with tf.variable_scope('encoding') as scope:
            fw_cell = GRUCell(char_rnn_size)
            bw_cell = GRUCell(char_rnn_size)

            for i in range(len(char_raw_embed_list)):
                batch_embed = char_raw_embed_list[i]
                batch_char_length = char_raw_embed_length_list[i]

                (_, _), (fw_final, bw_final) = bidirectional_dynamic_rnn(
                    fw_cell,
                    bw_cell,
                    inputs=batch_embed,
                    dtype=tf.float32,
                    sequence_length=batch_char_length,
                    scope=scope,
                    parallel_iterations=64,
                    swap_memory=True)
                out = tf.concat([fw_final, bw_final], axis=1)
                char_embed_list.append(out)

    return tf.stack(char_embed_list, axis=0)
Esempio n. 10
0
def _prediction_init_state(qu_vecs: tf.Variable, att_size: int) -> tf.Variable:
    qu_vec_dim = int(qu_vecs.get_shape()[2])

    with tf.variable_scope('prediction_init'):
        v = tf.get_variable('v', [att_size], tf.float32)
        WQu = tf.get_variable('WQu', [qu_vec_dim, att_size], tf.float32)
        WQvVQr = tf.get_variable('WQvVQr', [1, 1, att_size], tf.float32)

    # do calculations
    # s^t_j
    # [batch_size, num_words, layer_size]
    att_term = tf.add(tf.einsum('ijk,kl->ijl', qu_vecs, WQu), WQvVQr)
    # [batch_size, num_words]
    s = tf.einsum('ijl,l->ij', tf.nn.tanh(att_term), v)

    # a^t_i
    a = tf.nn.softmax(s, dim=1)

    # r^Q (is the equivalent of c in prediction_layer)
    # [batch_size, layer_size]
    r = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2), qu_vecs), axis=1)

    return r
Esempio n. 11
0
def print_tensor_shape(x: tf.Variable, prefix=""):
    shape = x.get_shape().as_list()
    logger.info(prefix + ", shape: %s" % str(shape))
Esempio n. 12
0
def check_shape(var1_tf: tf.Variable, var2_np: np.ndarray):
    if var1_tf.get_shape().as_list() != list(var2_np.shape):
        log("Shapes do not match! Exception will follow.", color="red")
Esempio n. 13
0
def _gate_input(input: tf.Variable) -> tf.Variable:
    agg_dim = int(input.get_shape()[2])
    W = tf.get_variable('W', [agg_dim, agg_dim])
    g_t = tf.sigmoid(tf.einsum('ijk,kl->ijl', input, W))
    return tf.multiply(g_t, input)
Esempio n. 14
0
class DenseLayer(BuildableNode):
    def __init__(self, name=None, protected=False):
        super().__init__(name=name, protected=protected)
        self.hasTrainableVariables = True
        self.activationLookup = {
            "relu": relu,
            "linear": self.linear,
            "sigmoid": sigmoid,
            "tanh": tanh
        }

    def linear(self, x):
        return (x)

    #throws unknownActivationFunction if activation function not it activationLookup
    def newLayer(self, layerSize, activationFunction):
        self.size = layerSize
        self.outputShape = [layerSize]
        self.activationKey = activationFunction
        try:
            self.activation = self.activationLookup[activationFunction]
        except KeyError as e:
            raise unknownActivationFunction(activationFunction)

    #TODO make it throw an error if inputShape not [None]
    def build(self, seed=None):
        if self.built: return

        if len(self.inputConnections) < 1:
            raise (notEnoughNodeConnections(len(self.inputConnections), 1))
        #now make the variables

        inputShape = self.inputConnections[0].outputShape
        self.inputSize = inputShape[0]

        biasInit = 0.1
        weightInitSTDDEV = 1 / self.inputSize

        self.biases = Variable(constant(biasInit, shape=[self.size]))
        self.weights = Variable(
            normal([self.inputSize, self.size],
                   stddev=weightInitSTDDEV,
                   mean=0,
                   seed=seed))

        self.built = True

    #function that executes the layer for a list of inputs
    #inp has shape [None,inputSize]
    #returns shape [None,outputSize]
    def execute(self, inp):
        if not self.built:
            raise (operationWithUnbuiltNode("execute"))
        else:
            return ((
                self.activation(matmul([inp], self.weights) + self.biases))[0])

    #function that returns a shape [2] list of trainable variables
    #because these are tf varialbe it is returning a list of pointers
    def getTrainableVariables(self):
        #does error checking
        super().getTrainableVariables()
        #the set of weights and the biases are each a single multi-dimensional variable
        return ([self.biases, self.weights])

    def connect(self, connections):
        if len(connections) == 0: return
        if len(connections[0].outputShape) != 1:
            raise (invalidNodeConnection(connections[0].outputShape, [None]))
        super().connect(connections)

    #Creates a directory for the layer

    #export to a weight and bias file
    #files:
    # [path]/[subdir]/mat.weights (byteformat)
    # [path]/[subdir]/mat.biases (byteformat)
    # [path]/[subdir]/hyper.txt
    def exportLayer(self, path, subdir):
        if not self.built:
            raise (operationWithUnbuiltNode("exportLayer"))

        import struct
        from os import mkdir

        accessPath = path + "\\" + subdir

        #first step is to create a directory for the network if one does not already exist
        try:
            mkdir(accessPath)
        except FileExistsError:
            pass
        except Exception as e:
            raise (invalidPath(accessPath))

        #save hyper.txt
        #contins: inputSize, layerSize, activation
        with open(accessPath + "\\hyper.txt", "w") as f:
            f.write(str(self.inputSize) + "\n")
            f.write(str(self.size) + "\n")
            f.write(self.activationKey + "\n")

        #save mat.weights
        weightFloats = []
        for i in range(self.weights.get_shape()[0]):
            for j in range(self.weights[i].get_shape()[0]):
                weightFloats.append(float(self.weights[i][j]))
        with open(accessPath + "\\mat.weights", "wb") as f:
            f.write(
                bytearray(
                    struct.pack(str(len(weightFloats)) + "f", *weightFloats)))

        del weightFloats  #this is important because this can be very large and the function can take a long time to load

        #save mat.biases
        biasFloats = []
        with open(accessPath + "\\mat.biases", "wb") as f:
            for i in range(self.biases.get_shape()[0]):
                biasFloats.append(float(self.biases[i]))
            f.write(
                bytearray(struct.pack(str(len(biasFloats)) + "f",
                                      *biasFloats)))

    #function that loads a layer from files and stores perameters on stack
    #gets from to [path]/[subdir]
    def importLayer(self, superdir, subdir):

        from os import path

        accessPath = superdir + "\\" + subdir

        #check if directory exists
        if not path.exists(accessPath):
            raise (missingDirectoryForImport(accessPath))

        #import from hyper.txt
        try:
            with open(accessPath + "\\hyper.txt", "r") as f:
                fileLines = f.readlines()
                #strip line breaks
                fileLines = [i[:-1] for i in fileLines]
                try:
                    self.inputSize = int(fileLines[0])
                except ValueError as e:
                    raise (invalidDataInFile(accessPath + "\\hyper.txt",
                                             "inputSize", fileLines[0]))
                try:
                    self.size = int(fileLines[1])
                    self.outputShape = [self.size]
                except ValueError as e:
                    raise (invalidDataInFile(accessPath + "\\hyper.txt",
                                             "size", fileLines[1]))
                try:
                    self.activationKey = fileLines[2]
                    self.activation = self.activationLookup[fileLines[2]]
                except KeyError as e:
                    raise unknownActivationFunction(fileLines[2])
        except IOError:
            raise (missingFileForImport(accessPath + "\\hyper.txt"))

        #import weights
        import struct

        try:
            with open(accessPath + "//mat.weights", "rb") as f:
                raw = f.read()  #type of bytes

                try:
                    inp = struct.unpack(
                        str(self.inputSize * self.size) + "f",
                        raw)  #list of float32s
                except struct.error as e:
                    raise (invalidByteFile(accessPath + "//mat.weights"))

                weights = []
                for i in range(self.inputSize):
                    weights.append([])
                    for j in range(self.size):
                        weights[i].append(inp[i * self.size + j])
                self.weights = Variable(weights)

        except IOError:
            raise (missingFileForImport(accessPath, "mat.weights"))

        #import biases
        try:
            with open(accessPath + "//mat.biases", "rb") as f:
                raw = f.read()  #type of bytes
                try:
                    inp = struct.unpack(str(self.size) + "f",
                                        raw)  #list of float32s
                except struct.error as e:
                    raise (invalidByteFile(accessPath + "//mat.biases"))
                self.biases = Variable([i for i in inp])

        except IOError:
            raise (missingFileForImport(accessPath, "mat.biases"))

        self.built = True
def check_shape(var1_tf: tf.Variable, var2_np: np.ndarray):
    if var1_tf.get_shape().as_list() != list(var2_np.shape):
        log("Shapes do not match! Exception will follow.", color="red")
def init_first_layer_weights(var: tf.Variable, rgb_weights: np.ndarray,
                             sess: tf.Session, hs_weight_init: str) -> None:
    '''Initializes the weights for filters in the first conv layer.

    'resnet/scale1/weights:0' for ResNet
    'vggf/conv1/conv1_weights:0' for VGGF

    If we are using RGB-only, then just initializes var to rgb_weights. Otherwise, uses
    hs_weight_init to determine how to initialize the weights for non-RGB bands.

    Args
    - var: tf.Variable, the filters in the 1st convolution layer, shape [F, F, C, 64]
        - F is the filter size (7 for ResNet, 11 for VGGF)
        - C is either 3 (RGB), 7 (lxv3), or 9 (Landsat7)
    - rgb_weights: ndarray of np.float32, shape [F, F, 3, 64]
    - sess: tf.Session
    - hs_weight_init: str, one of ['random', 'same', 'samescaled']
    '''
    var_shape = np.asarray(var.get_shape().as_list())
    rgb_weights_shape = np.asarray(rgb_weights.shape)

    # only weights in the 1st conv layer need to be adjusted for dealing with hyperspectral images
    # check that the filter shape and num_filters match up, and that RGB weights have 3 channels
    if 'scale1/weights:0' in var.name:  # ResNet
        F = 7
    elif 'conv1/conv1_weights:0' in var.name:  # VGGF
        F = 11
    else:
        raise ValueError('var is not the weights for the first conv layer')

    assert np.all(var_shape[[0, 1]] == [F, F])
    assert np.all(var_shape[[0, 1, 3]] == rgb_weights_shape[[0, 1, 3]])
    assert rgb_weights.shape[2] == 3
    assert rgb_weights.dtype == np.float32

    # if we are using the RGB-only model, then just initialize to saved weights
    if var_shape[2] == 3:
        print('Using rgb only model')
        sess.run(var.assign(rgb_weights))
        return

    # Set up the initializer function
    print('Initializing var different from saved rgb weights:', var.name,
          ' With shape:', var_shape)
    print('Using ' + hs_weight_init +
          ' initialization for hyperspectral weights.')
    num_hs_channels = var_shape[2] - rgb_weights.shape[2]
    hs_weights_shape = [F, F, num_hs_channels, 64]

    if hs_weight_init == 'random':
        # initialize the weights in the hyperspectral bands to gaussian with same overall mean and
        # stddev as the RGB channels
        rgb_mean = np.mean(rgb_weights)
        rgb_std = np.std(rgb_weights)
        hs_weights = tf.truncated_normal(hs_weights_shape,
                                         mean=rgb_mean,
                                         stddev=rgb_std,
                                         dtype=tf.float32)
    elif hs_weight_init == 'same':
        # initialize the weight for each position in each filter to the average of the 3 RGB weights
        # at the same position in the same filter
        rgb_mean = rgb_weights.mean(axis=2,
                                    keepdims=True)  # shape [F, F, 1, 64]
        hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1))
    elif hs_weight_init == 'samescaled':
        # similar to hs_weight_init == 'same', but we normalize the weights
        rgb_mean = rgb_weights.mean(axis=2,
                                    keepdims=True)  # shape [F, F, 1, 64]
        hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1))
        rgb_weights *= 3 / (3 + num_hs_channels)
        hs_weights *= 3 / (3 + num_hs_channels)
    else:
        raise ValueError(f'Unknown hs_weight_init type: {hs_weight_init}')

    final_weight = tf.concat([rgb_weights, hs_weights], axis=2)
    print('Shape of 1st layer weights:',
          final_weight.shape)  # should be (F, F, C, 64)

    sess.run(var.assign(final_weight))