Beispiel #1
0
def generate_encoding_template(batch_size, *args):
    boxes_batch = []

    boxes_list = args[0]
    n_classes = args[1]
    variances = args[2]

    # Create boxes_list first.
    for boxes in boxes_list:
        boxes = tf.expand_dims(boxes, 0)
        boxes = tf.tile(boxes, (batch_size, 1, 1, 1, 1))
        
        # Reshape -> (Batch, Feature_Height * Feature_Width * n_boxes, 4)
        boxes = tf.reshape(boxes, (batch_size, -1, 4))
        boxes_batch.append(boxes)
    
    boxes_tensor = tf.concatenate(boxes_batch, 1)

    classes_tensor = tf.zeros((batch_size, boxes_tensor.shape[1], n_classes))

    variances_tensor = tf.zeros_like(boxes_tensor)
    variances_tensor += variances

    y_encoding_template = tf.concatenate((classes_tensor, boxes_tensor, boxes_tensor, variances_tensor), 2)

    return y_encoding_template
Beispiel #2
0
def pool(state,
         action,
         next_state,
         reward,
         pool_size,
         state_pool=None,
         action_pool=None,
         next_state_pool=None,
         reward_pool=None):
    if state_pool == None:
        state_pool = tf.expand_dims(state, axis=0)
        action_pool = tf.expand_dims(action, axis=0)
        next_state_pool = tf.expand_dims(next_state, axis=0)
        reward_pool = tf.expand_dims(reward, axis=0)
    else:
        state_pool = tf.concatenate(state_pool, tf.expand_dims(state, axis=0))
        action_pool = tf.concatenate(action_pool, tf.expand_dims(action,
                                                                 axis=0))
        next_state_pool = tf.concatenate(next_state_pool,
                                         tf.expand_dims(next_state, axis=0))
        reward_pool = tf.concatenate(reward_pool, tf.expand_dims(reward,
                                                                 axis=0))
    if len(state_pool) > pool_size:
        state_pool = state_pool[1:]
        action_pool = action_pool[1:]
        next_state_pool = next_state_pool[1:]
        reward_pool = reward_pool[1:]
    return state_pool, action_pool, next_state_pool, reward_pool
Beispiel #3
0
def rpn_graph(self,
              rpn_feature_maps,
              num_anchors_per_location,
              weight_decay=0.0005):
    """
    Args:
        rpn_feature_maps:tensor,(N,H,W,C), used for region proposals

    """
    rpn_probs = []
    rpn_bboxes_delta = []
    rpn_logits = []
    with slim.arg_scope([slim.conv2d],
                        padding='SAME',
                        weights_initializer=slim.l2_regularizer(weight_decay),
                        activation_fn=None):
        for stage_i in enumerate(rpn_feature_maps):
            # start from 2
            with tf.variable_scope('rpn' + str(stage_i + 2)):
                shared = slim.conv2d(rpn_feature_maps,
                                     512,
                                     kernel_size=3,
                                     stride=1,
                                     scope='shared')

                x = slim.conv2d(shared,
                                2 * num_anchors_per_location[stage_i],
                                kernel_size=1,
                                stride=1,
                                scope='rpn_class_logit')
                rpn_logit = tf.reshape(x, (-1, 2))
                rpn_logits.append(rpn_logit)

                # BG/FG
                rpn_prob = slim.softmax(rpn_logit, scope='rpn_class_probs')
                rpn_probs.append(rpn_prob)

                # box delta
                x = slim.conv2d(shared,
                                4 * num_anchors_per_location,
                                kernel_size=1,
                                stride=1,
                                scope='rpn_box_pred')
                rpn_bbox_delta = tf.reshape(x, tf.shape(x)[0], -1, 4)

                rpn_bboxes_delta.append(rpn_bbox_delta)

    all_rpn_bboxes_delta = tf.concatenate(rpn_bboxes_delta,
                                          axis=1,
                                          name='rpn_bboxes_delta')
    all_rpn_probs = tf.concatenate(rpn_probs, axis=1, name='rpn_probs')
    all_rpn_logits = tf.concatenate(rpn_logits, axis=1, name='rpn_logits')

    # shape (N,all_num_anchors,4)
    return all_rpn_bboxes_delta, all_rpn_logits, all_rpn_probs
            def f(pre_logits, **c):
                logits = layers.conv(pre_logits,
                                     1,
                                     class_count * 2,
                                     bias=True,
                                     name='logits/conv')
                if resize:
                    logits = tf.image.resize_bilinear(logits, c['image_shape'])

                logits_mean = logits[..., :class_count]  # NHWC
                logits_logvar = logits[..., class_count:]  # NHWC
                logits_var = tf.exp(logits_logvar)
                logits_std = tf.sqrt(logits_var)
                # TODO: try log(1+exp(logits[..., class_count:])) for logits_std

                samples_shape = tf.concatenate(
                    [sample_count, tf.shape(logits_std)])
                noise = tf.random.normal(samples_shape)  # nNHWC
                logits_samples = logits_mean + noise * logits_std  # nNHWC

                return logits_samples, {
                    'logits_mean': logits_mean,
                    'logits_logvar': logits_logvar,
                    'logits_var': logits_var,
                    'logits_std': logits_std,
                    'logits_samples': logits_samples
                }
Beispiel #5
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""

    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = tf.reshape(tf.constant(anchors),
                                [1, 1, 1, num_anchors, 2])

    grid_shape = tf.shape(feats)[1:3]  # height, width
    grid_y = tf.tile(
        tf.reshape(tf.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = tf.tile(
        tf.reshape(tf.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = tf.concatenate([grid_x, grid_y])
    grid = tf.cast(grid, tf.dtype(feats))

    feats = tf.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (tf.sigmoid(feats[..., :2]) + grid) / tf.cast(
        grid_shape[::-1], tf.dtype(feats))
    box_wh = tf.exp(feats[..., 2:4]) * anchors_tensor / tf.cast(
        input_shape[::-1], tf.dtype(feats))
    box_confidence = tf.sigmoid(feats[..., 4:5])
    box_class_probs = tf.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Beispiel #6
0
    def rotate(self, batch_data):
        mini_batch = batch_data.shape[0]
        rotated_batch = tf.reshape(batch_data[:1],
                                   (1, int(np.sqrt(self.input_size)),
                                    int(np.sqrt(self.input_size)), 1))
        rotated_angles = tf.zeros(shape=(1, ))
        for i in range(int(batch_data.shape[0] / mini_batch)):
            #random_angles = tf.random.uniform(shape = (mini_batch, ), minval = -np.pi / 2, maxval = np.pi / 2)
            random_angles = np.random.uniform(low=-np.pi / 2,
                                              high=np.pi / 2,
                                              size=(mini_batch, 1))
            self.rotated_images = tf.contrib.image.transform(
                self.images,
                tf.contrib.image.angles_to_projective_transforms(
                    self.angles, tf.cast(tf.shape(self.images)[1], tf.float32),
                    tf.cast(tf.shape(self.images)[2], tf.float32)))
            new_batch = self.sess.run(self.rotated_images, \
                    feed_dict = {self.original_images: batch_data[i * mini_batch: (i + 1) * mini_batch], self.angles: random_angles.flatten()})

            if mini_batch == batch_data.shape[0]:
                #return new_batch, np.reshape(random_angles.numpy(), [mini_batch, 1])
                return new_batch, random_angles
            else:
                rotated_angles = tf.concatentate(
                    [rotated_angles, self.random_angles], concat_dim=0)
                rotated_batch = tf.concatenate([rotated_batch, new_batch],
                                               concat_dim=0)

        return rotated_batch.numpy(), np.reshape(rotated_angles.numpy(),
                                                 [rotated_angles.shape[0], 1])
Beispiel #7
0
    def call(self, x, mask=None):
        assert(len(x) == 2)
        img = x[0]
        rois = x[1]
        input_shape = tf.shape(img)
        outputs = []

        for roi_idx in range(self.num_rois):
            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]
            
            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size
            x = tf.cast(x, 'int32')
            y = tf.cast(y, 'int32')
            w = tf.cast(w, 'int32')
            h = tf.cast(h, 'int32')

            rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)

        final_output = tf.concatenate(outputs, axis=0)
        final_output = tf.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
        final_output = tf.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
Beispiel #8
0
 def bias_initializer(shape, *args, **kwargs):
     return tf.concatenate([
         self.bias_initializer((self.units, ), *args, **kwargs),
         initializers.Ones()((self.units, ), *args, **kwargs),
         self.bias_initializer((self.units * 2, ), *args,
                               **kwargs),
     ])
def extract_features_mfcc(input_path, class_name, train_test, class_names,
                          bands, frames):
    window_size = 512 * (frames - 1)
    log_spectograms = []
    labels = []
    class_files = os.listdir(input_path + class_name + "/" + train_test)
    n_files = len(class_files)
    for i, aud_filename in enumerate(class_files):
        audio_path = input_path + class_name + "/" + train_test + "/" + aud_filename
        print("Preprocessing: " + class_name + "_" + train_test + ": " +
              str(i) + " of " + str(n_files) + " :" + class_name + "/" +
              train_test + "/" + aud_filename)
        audio_clip, sr = librosa.load(audio_path)
        for (start, end) in windows(audio_clip, window_size):
            if (len(audio_clip[start:end]) == int(window_size)):
                audio_signal = audio_clip[start:end]
                mel_spec = librosa.feature.melspectrogram(audio_signal,
                                                          n_mels=bands)
                log_spec = librosa.logamplitude(mel_spec)
                log_spec = log_spec.T.flatten()[:, np.newaxis].T
                log_spectograms.append(log_spec)
                labels.append(encode_class(class_name, class_names))

    log_specgrams = np.asarray(log_spectograms).reshape(
        len(log_spectograms), bands, frames, 1)
    features = np.concatenate(
        (log_specgrams, np.zeros(np.shape(log_specgrams))), axis=3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    return np.array(features), np.array(labels)
Beispiel #10
0
 def context_module(self, x, channels, name):
     # see Figure 4 (SSH Context Module)
     with tf.variable_scope(name):
         with argscope([tf.layers.conv2d],
                       kernel_size=3,
                       activation=tf.nn.relu,
                       padding='same'):
             c1 = tf.layers.conv2d(x, channels // 2, name='conv1')
             # upper path
             c2 = tf.layers.conv2d(c1, channels // 2, name='conv2')
             # lower path
             c3 = tf.layers.conv2d(c1, channels // 2, name='conv3a')
             c3 = tf.layers.conv2d(c3, channels // 2, name='conv3b')
             return tf.concatenate([c2, c3], axis=-1)
Beispiel #11
0
    def __call__(self, x, support):
        out = [x]
        for a in support:
            x1 = self.nconv(x, a)
            out.append(x1)
            for k in range(2, self.order + 1):
                x2 = self.nconv(x1, a)
                out.append(x2)
                x1 = x2

        h = tf.concatenate(out, axis=3)
        h = self.mlp(h)
        h = tf.nn.dropout(h, self.dropout)
        return h
Beispiel #12
0
def main(np_location, data_name):
    np_list = os.listdir(np_location)
    first_train = 1
    first_valid = 1
    for np_file in np_list:
        np_array = np.load(np_file)
        np_fname = basename(np_file)
        if np_fname[6:9] == 'Data':
            label_name = np_fname[0:4] + '_Labels' + np_fname[10:]
            np_labels = np.load(label_name)
        iter_dataset = tf.data.Dataset.from_tensor_slices((np_array, np_labels))
        # Check if training or validation set and assign accordingly 
        if np_fname[0:4] == 'Train'
            if first_train == 1:
                main_dataset = iter_dataset
                first_train = 0
            else:
                main_dataset = tf.concatenate(main_dataset, iter_dataset)
        elif np_fname[0:4] == 'Valid'
            if first_valid == 1:
                main_dataset = iter_dataset
                first_valid = 0
            else:
                main_dataset = tf.concatenate(main_dataset, iter_dataset)
def tf_roll(a, shift, axis=None):
    if axis is None:
        n = a.get_shape()[0]
        reshape = True
    else:
        try:
            n = a.get_shape()[axis]
        except IndexError:
            raise ValueError('axis must be >= 0 and < %d' % len(a.get_shape()))
        reshape = False
    if n == 0:
        return a
    shift %= n
    indexes = tf.concatenate((tf.range(n - shift, n), tf.range(n - shift)))
    res = tf.gather_nd(indexes, axis)
    if reshape:
        res = res.reshape(a.shape)
    return res
Beispiel #14
0
def encode_ssd(gt_labels, *args):
    
    n_classes = args[1]

    class_id = 0
    xmin = 1
    ymin = 2
    xmax = 3
    ymax = 4

    batch_size = len(gt_labels)

    y_encoded = generate_encoding_template(batch_size, args)

    y_encoded[:, :, background_id] = 1
    n_boxes = y_encoded.shape[1]

    class_vectors = tf.eye(n_classes)

    for i in range(batch_size):
        labels = gt_labels[1]

        classes_one_hot = class_vectors[labels[:, class_id]]
        labels_one_hot = tf.concatenate([classes_one_hot, labels[:, [xmin, ymin, xmax, ymax]]], -1)

        similarities = iou(labels[:,[xmin, ymin, xmax, ymax]], y_encoded[i,:,-12:-8])

        bipartite_matches = match_bipartite_greedy(weight_matrix=similarities)

        y_encoded[i, bipartite_matches, :-8] = labels_one_hot

        similarities[:, bipartite_matches] = 0

        max_background_similiarites = tf.amax(similarities, 0)
        neutral_boxes = tf.nonzero(max_background_similiarites >= neg_iou_limit)[0]
        y_encoded[i, neutral_boxes, 0] = 0

        y_encoded[:,:,-12:-8] -= y_encoded[:,:,-8:-4]
        y_encoded[:,:,[-12,-10]] /= tf.expand_dims(y_encoded[:,:,-6] - y_encoded[:,:,-8], axis=-1) # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor)
        y_encoded[:,:,[-11,-9]] /= tf.expand_dims(y_encoded[:,:,-5] - y_encoded[:,:,-7], axis=-1) # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor)
        y_encoded[:,:,-12:-8] /= y_encoded[:,:,-4:] # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively

        return y_encoded
Beispiel #15
0
    def __init__(self, target_height, target_width, target_control_points,
                 **kwargs):
        '''
		target_control_points should have the extent from -1 to 1
		'''
        super(TPSGRidGen_Layer, self).__init__(**kwargs)
        assert tf.rank(target_control_points) == 3
        assert tf.shape(target_control_points)[1] == 2

        N = tf.shape(target_control_points)[0]
        self.num_points = N
        target_control_points = tf.cast(target_control_points, tf.float32)
        U_target_control = U_matrix(target_control_points,
                                    target_control_points)
        ones_vec = tf.ones([N, 1], tf.float32)
        P = tf.concat([ones_vec, target_control_points])
        L_upperRows = tf.concat([U_target_control, P], axis=1)
        L_lowerRows = tf.concat([tf.transpose(P), tf.zeros([3, 3])], axis=1)
        L = tf.concat([L_upperRows, L_lowerRows], axis=0)

        self.L_inverse = tf.linalg.inv(L)

        # create target coordinate matrix
        HW = target_height * target_width
        self.HW = HW

        y = tf.range(limit=target_height, dtype=tf.int32)
        x = tf.range(limit=target_width, dtype=tf.int32)
        X, Y = tf.meshgrid(x, y)
        # Scale x,y to (-1,1)
        Y = Y * 2.0 / (target_height - 1) - 1.0
        X = X * 2.0 / (target_width - 1) - 1.0

        target_coordinate = tf.concat([X, Y], axis=1)
        U_target_coordinate2Control = U_matrix(target_coordinate,
                                               target_control_points)
        self.P_target_coordinate2Control = tf.concatenate([
            U_target_coordinate2Control,
            tf.ones([HW, 1], dtype=tf.float32), target_coordinate
        ],
                                                          axis=1)
Beispiel #16
0
    def __call__(self, i, condition=None):

        output_dims = self.config.get("output dims", 3)
        output_act_fn = get_activation(
            self.config.get('output_activation', 'none'))

        x, end_points = self.network(i)

        x = tcl.flatten(x)
        if condition is not None:
            x = tf.concatenate([x, condition], axis=-1)

        with tf.variable_scope(self.name):
            if self.reuse:
                tf.get_variable_scope().reuse_variables()
            else:
                assert tf.get_variable_scope().reuse is False
                self.reuse = True

            if self.output_distribution == 'gaussian':
                mean = self.fc('fc_out_mean', x, output_dims,
                               **self.out_fc_args)
                log_var = self.fc('fc_out_log_var', x, output_dims,
                                  **self.out_fc_args)
                return mean, log_var

            elif self.output_distribution == 'mean':
                mean = self.fc('fc_out_mean', x, output_dims,
                               **self.out_fc_args)
                return mean

            elif self.output_distribution == 'none':
                out = self.fc('fc_out_mean', x, output_dims,
                              **self.out_fc_args)
                return out
            else:
                raise Exception("None output distribution named " +
                                self.output_distribution)
Beispiel #17
0
    def detection_module(self, x, channels, name):
        # see Figure 3 (SSH Detection Module)
        yc = self.context_module(x, channels, 'context_%s' % name)

        with argscope([tf.layers.conv2d], padding='same'):
            y = tf.layers.conv2d(x,
                                 channels,
                                 kernel_size=3,
                                 activation=tf.nn.relu,
                                 name='conv1')
            y = tf.concatenate([yc, y], axis=-1)
            logits = tf.layers.conv2d(x,
                                      2,
                                      kernel_size=1,
                                      activation=tf.identity,
                                      name='conv2')
            reg = tf.layers.conv2d(x,
                                   8,
                                   kernel_size=1,
                                   activation=tf.identity,
                                   name='conv2')

        return logits, reg
Beispiel #18
0
    def setup(self):
        passage = tf.placeholder(
            tf.int32, [None, passage_max_length],
            name='passage')  # shape (batch_size, passage_max_length)
        question = tf.placeholder(
            tf.int32, [None, question_max_length],
            name='question')  # shape (batch_size, question_max_length)
        desired_output = tf.placeholder(
            tf.float32, [None, passage_max_length],
            name='desired_output')  # shape (batch_size, passage_max_length)

        embedding = tf.constant(embedding_matrix,
                                name='embedding',
                                dtype=tf.float32)

        #######################
        # Preprocessing layer #
        #######################

        passage_embedded = tf.nn.embedding_lookup(
            embedding,
            passage)  # shape (batch_size, passage_max_length, embedding_size)
        question_embedded = tf.nn.embedding_lookup(
            embedding, question
        )  # shape (batch_size, question_max_length, embedding_size)

        dropout = tf.placeholder(tf.float32)

        with tf.variable_scope('passage_lstm'):
            passage_cell = tf.nn.rnn_cell.LSTMCell(hidden_size)
            passage_cell = tf.nn.rnn_cell.DropoutWrapper(
                passage_cell, output_keep_prob=dropout)
            passage_cell = tf.nn.rnn_cell.MultiRNNCell([passage_cell] * 2)
            H_p, _ = tf.nn.dynamic_rnn(
                passage_cell, passage_embedded, dtype=tf.float32
            )  # shape (batch_size, passage_max_length, hidden_size)

        with tf.variable_scope('question_lstm'):
            question_cell = tf.nn.rnn_cell.LSTMCell(hidden_size)
            question_cell = tf.nn.rnn_cell.DropoutWrapper(
                question_cell, output_keep_prob=dropout)
            question_cell = tf.nn.rnn_cell.MultiRNNCell([question_cell] * 2)
            H_q, _ = tf.nn.dynamic_rnn(
                question_cell, question_embedded, dtype=tf.float32
            )  # shape (batch_size, question_max_length, hidden_size)

        ####################
        # Match-LSTM layer #
        ####################

        # Weights and bias to compute `G`
        W_q = self.weight_variable(shape=[hidden_size, hidden_size])
        W_p = self.weight_variable(shape=[hidden_size, hidden_size])
        W_r = self.weight_variable(shape=[hidden_size, hidden_size])
        b_p = self.bias_variable(shape=[hidden_size])

        # Weight and bias to compute `a`
        w = self.weight_variable(shape=[hidden_size])
        b_alpha = self.bias_variable(shape=[])  # In the paper, this is `b`

        # Only calculate `WH_q` once
        WH_q = tf.matmul(W_q, H_q)

        # Results for forward and backward LSTMs
        H_r_forward = []
        H_r_backward = []

        with tf.variable_scope('forward_match_lstm'):
            forward_cell = tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout)
            forward_state = forward_cell.zero_state(batch_size,
                                                    dtype=tf.float32)
            h = forward_state.h
            for i in range(len(H_p)):
                G_forward = tf.tanh(WH_q + tf.tile(
                    (tf.matmul(W_p, H_p[i]) + tf.matmul(W_r, h) +
                     b_p), [question_max_length, 1]))
                alpha_forward = tf.nn.softmax(
                    w * G_forward + tf.tile(b_alpha, [question_max_length, 1]))

                z_forward = tf.concatenate(H_p[i], H_q * alpha_forward[i])
                h, forward_state = forward_cell(z_forward, forward_state)
                H_r_forward.append(h)

        with tf.variable_scope('backward_match_lstm'):
            backward_cell = tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout)
            backward_state = backward_cell.zero_state(batch_size,
                                                      dtype=tf.float32)
            h = backward_state.h
            for i in reversed(range(len(H_p))):
                G_backward = tf.tanh(WH_q + tf.tile(
                    (tf.matmul(W_p, H_p[i]) + tf.matmul(W_r, h) +
                     b_p), [question_max_length, 1]))
                alpha_backward = tf.nn.softmax(
                    w * G_backward +
                    tf.tile(b_alpha, [question_max_length, 1]))

                z_backward = tf.concatenate(H_p[i], H_q * alpha_backward[i])
                h, backward_state = backward_cell(z_backward, backward_state)
                H_r_backward.append(h)

        # After finding forward and backward `H_r[i]` for all `i`, concatenate `H_r_forward` and `H_r_backward`
        H_r = tf.concatenate(H_r_forward, H_r_backward)

        # TODO: Assert that the shape of `H_r` is (2 * hidden_size, passage_max_length)

        ########################
        # Answer-Pointer layer #
        ########################

        # TODO: Switch this over to boundary model or add zero vector padding at end of H_r
        #       ^ Might not be necessary ??

        # Weights and bias to compute `F`
        V = self.weight_variable(shape=[hidden_size, 2 * hidden_size])
        W_a = self.weight_variable(shape=[hidden_size, hidden_size])
        b_a = self.bias_variable(shape=[hidden_size
                                        ])  # In the paper, this is `c`

        # Weight and bias to compute `beta`
        v = self.weight_variable(shape=[hidden_size])
        b_beta = self.bias_variable(shape=[])

        # Only calculate `VH` once
        VH = tf.matmul(V, H_r)  # shape (hidden_size, passage_max_length)

        H_a = []

        with tf.variable_scope('answer_pointer_lstm'):
            pointer_cell = tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout)
            pointer_state = pointer_cell.zero_state(batch_size,
                                                    dtype=tf.float32)
            h = pointer_state.h
            for k in range(len(H_p)):
                F = tf.tanh(VH + tf.tile((tf.matmul(W_a, H_a[k]) +
                                          b_a), [passage_max_length, 1]))
                beta = tf.nn.softmax(v * F +
                                     tf.tile(b_beta, [passage_max_length, 1]))

                h, pointer_state = pointer_cell(tf.matmul(H_r, beta),
                                                pointer_state)
                H_a.append(h)

        # TODO: Replace the loss function below with the loss function from the paper
        loss = tf.reduce_mean(
            tf.reduce_sum(tf.pow(desired_output - output, 2),
                          reduction_indices=[1]))
        train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

        self.passage = passage
        self.question = question
        self.output = output
        self.desired_output = desired_output
        self.train_step = train_step
        self.loss = loss
        self.dropout = dropout
Beispiel #19
0
def train(with_gan=False,
          load_x=True,
          with_y=True,
          match_mask=False,
          in_frames=4):
    """Train ring_net for a number of steps."""
    with tf.Graph().as_default():
        x_all = tf.placeholder(tf.float32, [None, FLAGS.seq_length, 512, 1])
        if match_mask: with_gan = False
        # possible dropout inside
        keep_prob = tf.placeholder("float")
        #x_dropout = tf.nn.dropout(x, keep_prob)

        x_in = x_all[:, :in_frames, :, :]
        # conv network

        hidden = None
        x_unwrap = []
        for i in range(FLAGS.seq_length - 1):
            if i < FLAGS.seq_start:
                x_1, hidden = network_template(x_all[:, i:i + in_frames, :, :],
                                               hidden)
                x_unwrap.append(x_all[:, i + 1, :, :])
            else:  #conditional generation
                x_1, hidden = network_template(
                    tf.concatenate(x_unwrap[-in_frames:], append(x_1), axis=1),
                    hidden)
            x_unwrap.append(x_1)

        # pack them all together
        x_unwrap = tf.stack(x_unwrap)
        x_unwrap = tf.transpose(x_unwrap, [1, 0, 2, 3])

        # this part will be used for generating video
        x_unwrap_g = []
        hidden_g = None
        for i in range(30):
            if i < FLAGS.seq_start:
                x_1_g, hidden_g = network_template(
                    x_all[:, i:i + in_frames, :, :], hidden_g)
                x_unwrap_g.append(x_dropout[:, i + 1, :, :])
            else:  #conditional generation
                x_1_g, hidden_g = network_template(
                    tf.concatenate(x_unwrap_g[-in_frames:],
                                   append(x_1_g),
                                   axis=1), hidden_g)
                x_unwrap_g.append(x_1_g)

        # pack them generated ones
        x_unwrap_g = tf.stack(x_unwrap_g)
        x_unwrap_g = tf.transpose(x_unwrap_g, [1, 0, 2, 3])

        img = x[:, FLAGS.seq_start + 1:, :, :]
        img_ = x_unwrap[:, FLAGS.seq_start:, :, :]
        # calc total loss (compare x_t to x_t+1)
        loss_l2 = tf.nn.l2_loss(img - img_)
        #loss_l2 = rms_loss(img - img_) * 50
        tf.summary.scalar('loss_l2', loss_l2)

        if with_gan:
            img = x_all[:, FLAGS.seq_start:, :, :]
            img_ = y_1
            #import IPython; IPython.embed()
            D, D_logits, D3 = discriminator(img, reuse=False)
            #import IPython; IPython.embed()
            D_, D_logits_, D3_ = discriminator(
                y_1, reuse=True, fc_shape=D3.get_shape().as_list())
            d_loss_real = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=D_logits, labels=tf.ones_like(D)))
            d_loss_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=D_logits_, labels=tf.zeros_like(D_)))
            d_loss = d_loss_real + d_loss_fake
            g_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=D_logits_, labels=tf.ones_like(D_)))
            D3_loss = tf.nn.l2_loss(D3 - D3_)
            t_vars = tf.trainable_variables()
            d_vars = [var for var in t_vars if 'd_' in var.name]
            g_vars = [var for var in t_vars if 'd_' not in var.name]
            tf.summary.scalar('loss_g', g_loss)
            tf.summary.scalar('loss_d', d_loss)
            tf.summary.scalar('loss_feature', D3_loss)
            loss = 0.05 * (past_loss_l2 +
                           future_loss_l2) + g_loss + D3_loss * 1.e-4
            tf.summary.scalar('past_loss_l2', past_loss_l2)
            tf.summary.scalar('future_loss_l2', future_loss_l2)
            d_optim = tf.train.AdamOptimizer(FLAGS.lr).minimize(
                d_loss, var_list=d_vars)
            g_optim = tf.train.AdamOptimizer(FLAGS.lr).minimize(
                loss, var_list=g_vars)
            #import IPython; IPython.embed()
            train_op = tf.group(d_optim, d_optim, g_optim)

        else:
            loss = past_loss_l2 + future_loss_l2
            tf.summary.scalar('loss', loss)

            # training
            optimizer = tf.train.AdamOptimizer(FLAGS.lr)
            gvs = optimizer.compute_gradients(loss)
            # gradient clipping
            capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs)

        # List of all Variables
        variables = tf.global_variables()

        # Build a saver
        saver = tf.train.Saver(tf.global_variables())

        # Summary op
        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        sess = tf.Session()

        # init if this is the very time training

        sess.run(init)
        if FLAGS.resume:
            latest = tf.train.latest_checkpoint(FLAGS.train_dir)
            if not latest:
                print("No checkpoint to continue from in", FLAGS.train_dir)
                sys.exit(1)
            print("resume", latest)
            saver.restore(sess, latest)
        else:
            print("init network from scratch")

        # Summary op
        graph_def = sess.graph.as_graph_def(add_shapes=True)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph_def=graph_def)
        if not with_y:
            files = find_files(FLAGS.train_data_index)
        else:
            files = find_pairs(FLAGS.train_data_index)
        sample_dir = FLAGS.train_dir + '/samples/'
        if not os.path.exists(sample_dir):
            os.makedirs(sample_dir)
        for step in range(FLAGS.max_step):
            #<<<<<<< HEAD
            #dat = generate_bouncing_ball_sample(FLAGS.batch_size, FLAGS.seq_length, 32, FLAGS.num_balls)
            if load_x:
                dat = load_batch(FLAGS.batch_size, files, step)
            else:
                tgen = tf.range(start=0.,
                                limit=FLAGS.seq_length,
                                dtype=tf.float32)[tf.newaxis, tf.newaxis, ...,
                                                  tf.newaxis]
                fgen = tf.range(start=0., limit=512.,
                                dtype=tf.float32)[tf.newaxis, tf.newaxis,
                                                  tf.newaxis, ...]
                dat = sess.run(generate_x_batch(FLAGS.batch_size, tgen, fgen))
            fdict = {x: dat, keep_prob: FLAGS.keep_prob}
            #import IPython; IPython.embed()
            #=======
            dat = load_batch(FLAGS.batch_size,
                             files,
                             step,
                             with_y=with_y,
                             normalize=FLAGS.norm_input)
            dat = random_flip(dat)
            #>>>>>>> gan-l
            t = time.time()
            errG, errD = sess.run([g_loss, d_loss],
                                  feed_dict={
                                      x_all: dat,
                                      keep_prob: FLAGS.keep_prob
                                  })
            if errG > 0.6 and errD > 0.6:
                _, loss_r = sess.run([train_op, loss],
                                     feed_dict={
                                         x_all: dat,
                                         keep_prob: FLAGS.keep_prob
                                     })
            else:
                i = 0
                while errG > 0.6:

                    _ = sess.run(g_optim,
                                 feed_dict={
                                     x_all: dat,
                                     keep_prob: FLAGS.keep_prob
                                 })
                    i += 1
                    if i > 2: break
                    else:
                        errG = sess.run(g_loss,
                                        feed_dict={
                                            x_all: dat,
                                            keep_prob: FLAGS.keep_prob
                                        })
                print('G', i, errG)

                i = 0
                while errD > 0.6:
                    # only update discriminator if loss are within given bounds
                    _ = sess.run(d_optim,
                                 feed_dict={
                                     x_all: dat,
                                     keep_prob: FLAGS.keep_prob
                                 })
                    i += 1
                    if i > 2: break
                    else:
                        errD = sess.run(d_loss,
                                        feed_dict={
                                            x_all: dat,
                                            keep_prob: FLAGS.keep_prob
                                        })
                print('D', i, errD)
                loss_r = sess.run(loss,
                                  feed_dict={
                                      x_all: dat,
                                      keep_prob: FLAGS.keep_prob
                                  })
            #_, loss_r = sess.run([train_op, loss],feed_dict={x:dat, keep_prob:FLAGS.keep_prob})
            elapsed = time.time() - t

            if step % 1000 == 0 and step != 0:
                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           x_all: dat,
                                           keep_prob: FLAGS.keep_prob
                                       })
                summary_writer.add_summary(summary_str, step)
                print("time per batch is " + str(elapsed))
                print(step)
                print(loss_r)

            assert not np.isnan(loss_r), 'Model diverged with loss = NaN'

            if step % 4000 == 0:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                print("saved to " + FLAGS.train_dir)

                print("now saving sample!")
                im_x, im_y = sess.run([x_1, y_1],
                                      feed_dict={
                                          x_all: dat,
                                          keep_prob: FLAGS.keep_prob
                                      })
                if match_mask:
                    im_x = im_x[..., 1]
                    im_y = im_y[..., 1]
                _plot_samples(dat[:, :FLAGS.seq_start, :, :].squeeze(),
                              sample_dir + 'step_{}_past_t.png'.format(step))
                _plot_samples(im_x.squeeze(),
                              sample_dir + 'step_{}_past.png'.format(step))
                _plot_samples(dat[:, FLAGS.seq_start:, :, :].squeeze(),
                              sample_dir + 'step_{}_future_t.png'.format(step))
                _plot_samples(im_y.squeeze(),
                              sample_dir + 'step_{}_future.png'.format(step))
tf.linalg.matmul(t4, tf.transpose(t5), transpose_A=True)
tf.linalg.matmul(t4, tf.transpose(t5), transpose_a=True)

tf.math.reduce_mean(t5)
tf.math.reduce_std(t5)
tf.math.reduce_sum(t5)

tf.linalg.norm(t5, 2)

# How to split, stack, and concatenate ? (difference stack / concatenate ?)
tf.split(t3, num_or_size_splits=3, axis=0)
tf.split(t5, num_or_size_splits=[1, 2], axis=1)

tf.stack([t4, tf.transpose(t5)], axis=1)
tf.concatenate([t3, t4], axis=0)

# TF DATASETS API
# When can Keras api .fit() be used or not
# how to construct a tf Dataset from existing tensors, list or array ?
# What are some preprocessing Dataset:  how to get different rows of dataset ? how to create batches ?
# How to create dataset from t_x(features) and t_y(labels)
# How to apply on each element of dataset a transformation ? (for instance function(x,y) -> (x_normalized, y)
# When to shuffle ? batch ? how to go through different epochs ? --> how to iterate 3 times on a dataset that is shuffled in batch of 2 ?

# How to create a dataset from file on my locale storage disk ?
# Take the images folder to create a data set and display images

# show built in datasets. Import mnist et montrer caracteristiques
# creer train, test datasets et visualiser 10 images
Beispiel #21
0
    middle_center = x # middle center
    middle_right = x[:, :, :p] # middle right
    bottom_left = x[:, :p, -p:] # bottom left
    bottom_center = x[:, :p, :] # bottom center
    bottom_right = x[:, :p, :p] # bottom right
    top = tf.concat([top_left, top_center, top_right], axis=2)
    middle = tf.concat([middle_left, middle_center, middle_right], axis=2)
    bottom = tf.concat([bottom_left, bottom_center, bottom_right], axis=2)
    padded_x = tf.concat([top, middle, bottom], axis=1)
    return padded_x

import tensorflow as tf
import numpy as np
y = tf.placeholder(name='y', dtype=tf.float32, shape=[None,3,3])
z_rand = tf.placeholder(name='z_r', dtype=tf.float32, shape = [None, 3,3])
z = tf.concatenate([z_rand,y], axis=1)
sess = tf.InteractiveSession()
b = np.random.normal(size = [5,3,3])
#
# # print('a:')
# print(b)
# # print('padded a:')
c = np.array([[1],[2],[1],[2],[3]])
d = np.repeat(c,9).reshape(5,3,3)
print( sess.run(z, feed_dict = {z_rand:b, y:d}) )
print(z)
# sess.close()
# import tensorflow as tf
# import numpy as np
# import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec
Beispiel #22
0
    def __call__(self, x, condition=None):
        if condition is not None:
            x = tf.concatenate([x, condition], axis=-1)
        x, end_points = self.network(x)

        return x
Beispiel #23
0
    def attention(self,
                  pre_q,
                  pre_v,
                  pre_k,
                  out_seq_len: int,
                  d_model: int,
                  training=None):
        """
        Calculates the output of the attention once the affine transformations
        of the inputs are done. Here's the shapes of the arguments:
        :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads)
        :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads)
        :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads)
        :param out_seq_len: the length of the output sequence
        :param d_model: dimensionality of the model (by the paper)
        :param training: Passed by tferas. Should not be defined manually.
          Optional scalar tensor indicating if we're in training
          or inference phase.
        """
        # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads)
        q = tf.transpose(pre_q, [0, 2, 1, 3])
        v = tf.transpose(pre_v, [0, 2, 1, 3])

        if self.compression_window_size is None:
            k_transposed = tf.transpose(pre_k, [0, 2, 3, 1])
        else:
            # Memory-compressed attention described in paper
            # "Generating Wikipedia by Summarizing Long Sequences"
            # (https://arxiv.org/pdf/1801.10198.pdf)
            # It compresses keys and values using 1D-convolution which reduces
            # the size of Q * tf_transposed from roughly seq_len^2
            # to convoluted_seq_len^2. If we use strided convolution with
            # window size = 3 and stride = 3, memory requirements of such
            # memory-compressed attention will be 9 times smaller than
            # that of the original version.
            if self.use_masking:
                raise NotImplementedError(
                    "Masked memory-compressed attention has not "
                    "been implemented yet")
            k = tf.transpose(pre_k, [0, 2, 1, 3])
            k, v = [
                tf.reshape(
                    # Step 3: Return the result to its original dimensions
                    # (batch_size, num_heads, seq_len, d_model//heads)
                    tf.bias_add(
                        # Step 3: ... and add bias
                        tf.conv1d(
                            # Step 2: we "compress" tf and V using strided conv
                            tf.reshape(
                                # Step 1: we reshape tf and V to
                                # (batch + num_heads,  seq_len, d_model//heads)
                                item,
                                (-1, tf.shape(item)[-2],
                                 d_model // self.num_heads)),
                            kernel,
                            strides=self.compression_window_size,
                            padding='valid',
                            data_format='channels_last'),
                        bias,
                        data_format='channels_last'),
                    # new shape
                    tf.concatenate(
                        [tf.shape(item)[:2], [-1, d_model // self.num_heads]]))
                for item, kernel, bias in ((k, self.k_conv_kernel,
                                            self.k_conv_bias),
                                           (v, self.v_conv_kernel,
                                            self.v_conv_bias))
            ]
            k_transposed = tf.transpose(k, [0, 1, 3, 2])
        # shaping tf into (batch_size, num_heads, d_model//heads, seq_len)
        # for further matrix multiplication
        a = tf.cast(d_model // self.num_heads, dtype=tf.float32)
        sqrt_d = tf.math.sqrt(a)
        q_shape = tf.shape(q)
        k_t_shape = tf.shape(k_transposed)
        v_shape = tf.shape(v)
        # before performing batch_dot all tensors are being converted to 3D
        # shape (batch_size * num_heads, rows, cols) to make sure batch_dot
        # performs identically on all backends
        new_q_shape = tf.concat([[-1], q_shape[-2:]], axis=0)
        new_k_shape = tf.concat([[-1], k_t_shape[-2:]], axis=0)
        new_v_shape = tf.concat([[-1], v_shape[-2:]], axis=0)
        factor1 = tf.reshape(q, new_q_shape)
        factor2 = tf.reshape(k_transposed, new_k_shape)
        factor3 = tf.reshape(v, new_v_shape)
        batch_dot_raw = K.batch_dot(factor1, factor2)
        attention_heads = tf.reshape(
            K.batch_dot(
                self.apply_dropout_if_needed(tf.nn.softmax(
                    self.mask_attention_if_needed(batch_dot_raw / sqrt_d)),
                                             training=training), factor3),
            (-1, self.num_heads, q_shape[-2], v_shape[-1]))
        attention_heads_merged = tf.reshape(
            tf.transpose(attention_heads, [0, 2, 1, 3]), (-1, d_model))
        attention_out = tf.reshape(
            tf.tensordot(attention_heads_merged, self.output_weights, axes=1),
            (-1, out_seq_len, d_model))
        return attention_out
Beispiel #24
0
 def choose_action(self, state, goal, epsilon=None):
     return self.controller.choose_action(np.concatenate((state, goal),
                                                         axis=0),
                                          epsilon=epsilon)
Beispiel #25
0
def augment(pointcloud_inp, pointcloud_indices_0_inp, heatmapBatches, augmentation, numPoints=50000,
            numInputChannels=7):
    pointcloud_indices_inp = tf.zeros((FETCH_BATCH_SIZE, 6, NUM_POINTS), dtype='int32')
    newHeatmapBatches = [[] for heatmapIndex in xrange(len(heatmapBatches))]

    for imageIndex in xrange(pointcloud_inp.shape[0]):
        # pointcloud = pointcloud_inp[imageIndex]
        # pointcloud_indices_0 = pointcloud_indices_0_inp[imageIndex]
        # corner = corner_gt[imageIndex]
        # icon = icon_gt[imageIndex]
        # room = room_gt[imageIndex]
        # feature = feature_inp[imageIndex]
        # if 'w' in augmentation:
        #     pointcloud_indices_0, [corner, icon, room, feature] = augmentWarping(pointcloud_indices_0, [corner, icon, room, feature], gridStride=32., randomScale=4)
        #     pass
        # if 's' in augmentation:
        #     pointcloud_indices_0, [corner, icon, room, feature] = augmentScaling(pointcloud_indices_0, [corner, icon, room, feature], randomScale=0)
        #     pass
        # if 'f' in augmentation:
        #     pointcloud_indices_0, [corner, icon, room, feature] = augmentFlipping(pointcloud_indices_0, [corner, icon, room, feature])
        #     pass
        # if 'd' in augmentation:
        #     pointcloud, pointcloud_indices_0 = augmentDropping(pointcloud, pointcloud_indices_0, changeIndices=True)
        #     pass
        # if 'p' in augmentation:
        #     pointcloud, pointcloud_indices_0 = augmentDropping(pointcloud, pointcloud_indices_0, changeIndices=False)
        #     pass

        # pointcloud_inp[imageIndex] = pointcloud
        # pointcloud_indices_inp[imageIndex] = getCoarseIndicesMaps(pointcloud_indices_0, WIDTH, HEIGHT, 0)
        # corner_gt[imageIndex] = corner
        # icon_gt[imageIndex] = icon
        # room_gt[imageIndex] = room
        # feature_inp[imageIndex] = feature

        newHeatmaps = [heatmapBatch[imageIndex] for heatmapBatch in heatmapBatches]
        if 'w' in augmentation:
            pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentWarping(pointcloud_indices_0_inp[imageIndex],
                                                                               newHeatmaps, gridStride=32,
                                                                               randomScale=4)
            pass
        if 's' in augmentation:
            pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentScaling(
                pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps)
            pass
        if 'f' in augmentation:
            pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentFlipping(
                pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps)
            pass
        if 'd' in augmentation:
            pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex] = augmentDropping(
                pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], changeIndices=True)
            pass
        if 'p' in augmentation:
            pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex] = augmentDropping(
                pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], changeIndices=False)
            pass

        # print(pointcloud_indices_0_inp[imageIndex].shape, pointcloud_indices_inp[imageIndex].shape)
        pointcloud_indices_inp[imageIndex] = getCoarseIndicesMaps(pointcloud_indices_0_inp[imageIndex], WIDTH, HEIGHT,
                                                                  0)
        for heatmapIndex, newHeatmap in enumerate(newHeatmaps):
            newHeatmapBatches[heatmapIndex].append(newHeatmap)
            continue
        continue
    newHeatmapBatches = [tf.array(newHeatmapBatch) for newHeatmapBatch in newHeatmapBatches]
    pointcloud_inp = tf.concatenate([pointcloud_inp, tf.ones((FETCH_BATCH_SIZE, NUM_POINTS, 1))], axis=2)
    # print(pointcloud_itf.shape)
    # writePointCloud('test/pointcloud.ply', pointcloud_inp[0, :, :6])
    # exit(1)

    if numPoints < pointcloud_itf.shape[1]:
        sampledInds = tf.range(pointcloud_itf.shape[1])
        tf.random.shuffle(sampledInds)
        sampledInds = sampledInds[:numPoints]
        pointcloud_inp = pointcloud_inp[:, sampledInds]
        pointcloud_indices_inp = pointcloud_indices_inp[:, :, sampledInds]
        pass

    if numInputChannels == 4:
        pointcloud_inp = tf.concatenate([pointcloud_inp[:, :, :3], pointcloud_inp[:, :, 6:]], axis=2)
        pass

    return pointcloud_inp, pointcloud_indices_inp, newHeatmapBatches
Beispiel #26
0
def dense_layer(x, layer_configs):
    layers = []
    for i in range(2):
        if layer_configs[i]["layer_type"] == "Conv2D":
            layer = Conv2D(layer_configs[i]["filters"], layer_configs[i]["kernel_size"], strides = layer_configs[i]["strides"], padding = layer_configs[i]["padding"], activation = layer_configs[i]["activation"])(x)
        layers.append(layer)
            
    for n in range(2, len(layer_configs)):
        if layer_configs[n]["layer_type"] == "Conv2D":
            layer = Conv2D(layer_configs[n]["filters"], layer_configs[n]["kernel_size"], strides = layer_configs[n]["strides"], padding = layer_configs[n]["padding"], activation = layer_configs[n]["activation"])(concatenate(layers, axis = 3))
        layers.append(layer)
    return layer
Beispiel #27
0
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = tf.cast(
        tf.shape(yolo_outputs[0])[1:3] * 32, tf.dtype(y_true[0]))
    grid_shapes = [
        tf.cast(tf.shape(yolo_outputs[l])[1:3], tf.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = tf.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = tf.cast(m, tf.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = tf.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = tf.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                             input_shape[::-1])
        raw_true_wh = tf.switch(
            object_mask, raw_true_wh,
            tf.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(tf.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = tf.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = tf.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.control_flow_ops.while_loop(
            lambda b, *args: b < m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = tf.expand_dims(ignore_mask, -1)

        # tf.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * tf.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * tf.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                     from_logits=True) * ignore_mask
        class_loss = object_mask * tf.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = tf.sum(xy_loss) / mf
        wh_loss = tf.sum(wh_loss) / mf
        confidence_loss = tf.sum(confidence_loss) / mf
        class_loss = tf.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                tf.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Beispiel #28
0
def filter_detections(boxes,
                      classification,
                      other=[],
                      class_specific_filter=True,
                      nms=True,
                      score_threshold=0.05,
                      max_detections=300,
                      nms_threshold=0.5):
    """ Filter detections using the boxes and classification values.

    Args
        boxes                 : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
        classification        : Tensor of shape (num_boxes, num_classes) containing the classification scores.
        other                 : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
        class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
        nms                   : Flag to enable/disable non maximum suppression.
        score_threshold       : Threshold used to prefilter the boxes with.
        max_detections        : Maximum number of detections to keep.
        nms_threshold         : Threshold for the IoU value to determine when a box should be suppressed.

    Returns
        A list of [boxes, scores, labels, other[0], other[1], ...].
        boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
        scores is shaped (max_detections,) and contains the scores of the predicted class.
        labels is shaped (max_detections,) and contains the predicted label.
        other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
        In case there are less than max_detections detections, the tensors are padded with -1's.
    """
    def _filter_detections(scores, labels):
        # threshold based on score
        indices = tf.where(tfgreater(scores, score_threshold))

        if nms:
            filtered_boxes = tf.gather_nd(boxes, indices)
            filtered_scores = tf.gather(scores, indices)[:, 0]

            # perform NMS
            nms_indices = tf.non_max_suppression(
                filtered_boxes,
                filtered_scores,
                max_output_size=max_detections,
                iou_threshold=nms_threshold)

            # filter indices based on NMS
            indices = tf.gather(indices, nms_indices)

        # add indices to list of all indices
        labels = tf.gather_nd(labels, indices)
        indices = tf.stack([indices[:, 0], labels], axis=1)

        return indices

    if class_specific_filter:
        all_indices = []
        # perform per class filtering
        for c in range(int(classification.shape[1])):
            scores = classification[:, c]
            labels = c * tf.ones((tf.shape(scores)[0], ), dtype='int64')
            all_indices.append(_filter_detections(scores, labels))

        # concatenate indices to single tensor
        indices = tf.concatenate(all_indices, axis=0)
    else:
        scores = tf.max(classification, axis=1)
        labels = tf.argmax(classification, axis=1)
        indices = _filter_detections(scores, labels)

    # select top k
    scores = tf.gather_nd(classification, indices)
    labels = indices[:, 1]
    scores, top_indices = tf.top_k(scores,
                                   k=tf.minimum(max_detections,
                                                tf.shape(scores)[0]))

    # filter input using the final set of indices
    indices = tf.gather(indices[:, 0], top_indices)
    boxes = tf.gather(boxes, indices)
    labels = tf.gather(labels, top_indices)
    other_ = [tf.gather(o, indices) for o in other]

    # zero pad the outputs
    pad_size = tf.maximum(0, max_detections - tf.shape(scores)[0])
    boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
    scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
    labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
    labels = tf.cast(labels, 'int32')
    other_ = [
        tf.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))],
               constant_values=-1) for o in other_
    ]

    # set shapes, since we know what they are
    boxes.set_shape([max_detections, 4])
    scores.set_shape([max_detections])
    labels.set_shape([max_detections])
    for o, s in zip(other_, [list(tf.int_shape(o)) for o in other]):
        o.set_shape([max_detections] + s[1:])

    return [boxes, scores, labels] + other_
Beispiel #29
0
    def build_model(self):

        self.x_real = tf.placeholder(
            tf.float32,
            shape=[None, np.product(self.input_shape)],
            name='x_input')
        self.y_real = tf.placeholder(tf.float32,
                                     shape=[None, self.nb_classes],
                                     name='y_input')

        # self.encoder_input_shape = int(np.product(self.input_shape))

        self.config['encoder parmas']['name'] = 'EncoderX'
        self.config['encoder params']["output dims"] = self.z_dim
        self.encoder = get_encoder(self.config['x encoder'],
                                   self.config['encoder params'],
                                   self.is_training)

        self.config['decoder params']['name'] = 'Decoder'
        self.config['decoder params']["output dims"] = self.encoder_input_shape

        # self.y_encoder = get_encoder(self.config['y encoder'], self.config['y encoder params'], self.is_training)
        self.decoder = get_decoder(self.config['decoder'],
                                   self.config['decoder params'],
                                   self.is_training)

        # build encoder
        self.z_mean, self.z_log_var = self.x_encoder(
            tf.concatenate([self.x_real, self.y_real]))
        self.z_mean_y = self.y_encoder(self.y_real)

        # sample z from z_mean and z_log_var
        self.z_sample = self.draw_sample(self.z_mean, self.z_log_var)

        # build decoder
        self.x_decode = self.decoder(self.z_sample)

        # build test decoder
        self.z_test = tf.placeholder(tf.float32,
                                     shape=[None, self.z_dim],
                                     name='z_test')
        self.x_test = self.decoder(self.z_test, reuse=True)

        # loss function
        self.kl_loss = (get_loss(
            'kl', self.config['kl loss'], {
                'z_mean': (self.z_mean - self.z_mean_y),
                'z_log_var': self.z_log_var
            }) * self.config.get('kl loss prod', 1.0))
        self.xent_loss = (
            get_loss('reconstruction', self.config['reconstruction loss'], {
                'x': self.x_real,
                'y': self.x_decode
            }) * self.config.get('reconstruction loss prod', 1.0))
        self.loss = self.kl_loss + self.xent_loss

        # optimizer configure
        self.global_step, self.global_step_update = get_global_step()
        if 'lr' in self.config:
            self.learning_rate = get_learning_rate(self.config['lr_scheme'],
                                                   float(self.config['lr']),
                                                   self.global_step,
                                                   self.config['lr_params'])
            self.optimizer = get_optimizer(
                self.config['optimizer'],
                {'learning_rate': self.learning_rate}, self.loss,
                self.decoder.vars + self.x_encoder.vars + self.y_encoder.vars)
        else:
            self.optimizer = get_optimizer(
                self.config['optimizer'], {}, self.loss,
                self.decoder.vars + self.x_encoder.vars + self.y_encoder.vars)

        self.train_update = tf.group([self.optimizer, self.global_step_update])

        # model saver
        self.saver = tf.train.Saver(self.x_encoder.vars + self.y_encoder.vars,
                                    self.decoder.vars + [
                                        self.global_step,
                                    ])
Beispiel #30
0
    def __call__(self, i, condition=None):

        act_fn = get_activation(self.config.get('activation', 'relu'))

        norm_fn, norm_params = get_normalization(
            self.config.get('batch_norm', 'batch_norm'),
            self.config.get('batch_norm_params', self.normalizer_params))

        winit_fn = get_weightsinit(
            self.config.get('weightsinit', 'normal 0.00 0.02'))

        nb_fc_nodes = self.config.get('nb_fc_nodes', [1024, 1024])

        output_dims = self.config.get("output dims", 3)
        output_act_fn = get_activation(
            self.config.get('output_activation', 'none'))

        x, end_points = self.network(i)

        x = tcl.flatten(x)
        if condition is not None:
            x = tf.concatenate([x, condition], axis=-1)

        with tf.variable_scope(self.name):
            if self.reuse:
                tf.get_variable_scope().reuse_variables()
            else:
                assert tf.get_variable_scope().reuse is False
                self.reuse = True

            for ind, nb_nodes in enumerate(nb_fc_nodes):
                x = tcl.fully_connected(x,
                                        nb_nodes,
                                        activation_fn=act_fn,
                                        normalizer_fn=norm_fn,
                                        normalizer_params=norm_params,
                                        weights_initializer=winit_fn,
                                        scope='fc%d' % ind)

            if self.output_distribution == 'gaussian':
                mean = tcl.fully_connected(x,
                                           output_dims,
                                           activation_fn=output_act_fn,
                                           weights_initializer=winit_fn,
                                           scope='fc_out_mean')
                log_var = tcl.fully_connected(x,
                                              output_dims,
                                              activation_fn=output_act_fn,
                                              weights_initializer=winit_fn,
                                              scope='fc_out_log_var')
                return mean, log_var

            elif self.output_distribution == 'mean':
                mean = tcl.fully_connected(x,
                                           output_dims,
                                           activation_fn=output_act_fn,
                                           weights_initializer=winit_fn,
                                           scope='fc_out_mean')
                return mean

            elif self.output_distribution == 'none':
                out = tcl.fully_connected(x,
                                          output_dims,
                                          activation_fn=output_act_fn,
                                          weights_initializer=winit_fn,
                                          scope='fc_out_mean')
                return out
            else:
                raise Exception("None output distribution named " +
                                self.output_distribution)