def t_noise3d(v, perm, grad3):
    x = v[0]
    y = v[1]
    z = v[2]
    skew_factor = (x + y + z) * 1.0 / 3.0
    i = T.floor(x + skew_factor)
    j = T.floor(y + skew_factor)
    k = T.floor(z + skew_factor)
    unskew_factor = (i + j + k) * 1.0 / 6.0
    x0 = x - (i - unskew_factor)
    y0 = y - (j - unskew_factor)
    z0 = z - (k - unskew_factor)
    vertices = T.switch(
        T.ge(x0, y0),
        T.switch(
            T.ge(y0, z0), vertices_options[0],
            T.switch(T.ge(x0, z0), vertices_options[1], vertices_options[2])),
        T.switch(
            T.lt(y0, z0), vertices_options[3],
            T.switch(T.lt(x0, z0), vertices_options[4], vertices_options[5])))
    x1 = x0 - vertices[0][0] + 1.0 / 6.0
    y1 = y0 - vertices[0][1] + 1.0 / 6.0
    z1 = z0 - vertices[0][2] + 1.0 / 6.0
    x2 = x0 - vertices[1][0] + 1.0 / 3.0
    y2 = y0 - vertices[1][1] + 1.0 / 3.0
    z2 = z0 - vertices[1][2] + 1.0 / 3.0
    x3 = x0 - 0.5
    y3 = y0 - 0.5
    z3 = z0 - 0.5
    ii = T.bitwise_and(i.astype('int32'), 255)
    jj = T.bitwise_and(j.astype('int32'), 255)
    kk = T.bitwise_and(k.astype('int32'), 255)
    gi0 = perm[ii + perm[jj + perm[kk].astype('int32')].astype('int32')] % 12
    gi1 = perm[ii + vertices[0][0] + perm[jj + vertices[0][1] + perm[
        kk + vertices[0][2]].astype('int32')].astype('int32')] % 12
    gi2 = perm[ii + vertices[1][0] + perm[jj + vertices[1][1] + perm[
        kk + vertices[1][2]].astype('int32')].astype('int32')] % 12
    gi3 = perm[ii + 1 +
               perm[jj + 1 +
                    perm[kk + 1].astype('int32')].astype('int32')] % 12
    t0 = 0.5 - x0**2 - y0**2 - z0**2
    n0 = T.switch(T.lt(t0, 0), 0.0,
                  t0**4 * T.dot(grad3[gi0.astype('int32')], [x0, y0, z0]))
    t1 = 0.5 - x1**2 - y1**2 - z1**2
    n1 = T.switch(T.lt(t1, 0), 0.0,
                  t1**4 * T.dot(grad3[gi1.astype('int32')], [x1, y1, z1])),
    t2 = 0.5 - x2**2 - y2**2 - z2**2
    n2 = T.switch(T.lt(t2, 0), 0.0,
                  t2**4 * T.dot(grad3[gi2.astype('int32')], [x2, y2, z2]))
    t3 = 0.5 - x3**2 - y3**2 - z3**2
    n3 = T.switch(T.lt(t3, 0), 0.0,
                  t3**4 * T.dot(grad3[gi3.astype('int32')], [x3, y3, z3]))
    return 23.0 * (n0 + n1 + n2 + n3)
def matrix_noise3d(input_vectors, perm, grad3, vertex_table):
    skew_factors = (input_vectors[:, 0] + input_vectors[:, 1] + input_vectors[:, 2]) * 1.0 / 3.0
    skewed_vectors = T.floor(input_vectors + skew_factors[:, np.newaxis])
    unskew_factors = (skewed_vectors[:, 0] + skewed_vectors[:, 1] + skewed_vectors[:, 2]) * 1.0 / 6.0
    offsets_0 = input_vectors - (skewed_vectors - unskew_factors[:, np.newaxis])
    vertex_table_x_index = T.ge(offsets_0[:, 0], offsets_0[:, 1])
    vertex_table_y_index = T.ge(offsets_0[:, 1], offsets_0[:, 2])
    vertex_table_z_index = T.ge(offsets_0[:, 0], offsets_0[:, 2])
    simplex_vertices = vertex_table[
        vertex_table_x_index,
        vertex_table_y_index,
        vertex_table_z_index].reshape((input_vectors.shape[0], 2, 3))
    offsets_1 = offsets_0 - simplex_vertices[:, 0] + 1.0 / 6.0
    offsets_2 = offsets_0 - simplex_vertices[:, 1] + 1.0 / 3.0
    offsets_3 = offsets_0 - 0.5
    masked_skewed_vectors = T.bitwise_and(skewed_vectors.astype('int32'), 255)
    gi0s = perm[masked_skewed_vectors[:, 0] + perm[
        masked_skewed_vectors[:, 1] + perm[
            masked_skewed_vectors[:, 2]].astype('int32')].astype('int32')] % 12
    gi1s = perm[masked_skewed_vectors[:, 0] + simplex_vertices[:, 0, 0] + perm[
        masked_skewed_vectors[:, 1] + simplex_vertices[:, 0, 1] + perm[
            masked_skewed_vectors[:, 2] + simplex_vertices[:, 0, 2]].astype('int32')].astype('int32')] % 12
    gi2s = perm[masked_skewed_vectors[:, 0] + simplex_vertices[:, 1, 0] + perm[
        masked_skewed_vectors[:, 1] + simplex_vertices[:, 1, 1] + perm[
            masked_skewed_vectors[:, 2] + simplex_vertices[:, 1, 2]].astype('int32')].astype('int32')] % 12
    gi3s = perm[masked_skewed_vectors[:, 0] + 1 + perm[
        masked_skewed_vectors[:, 1] + 1 + perm[
            masked_skewed_vectors[:, 2] + 1].astype('int32')].astype('int32')] % 12
    n0s = calculate_gradient_contribution(offsets_0, gi0s, grad3)
    n1s = calculate_gradient_contribution(offsets_1, gi1s, grad3)
    n2s = calculate_gradient_contribution(offsets_2, gi2s, grad3)
    n3s = calculate_gradient_contribution(offsets_3, gi3s, grad3)
    return 23.0 * (n0s + n1s + n2s + n3s)
Exemple #3
0
    def dtw_inner_step(x2_index, d_slice_slice, insert_cost, x1_length,
                       x2_length, x1_index, previous_cost_row):
        assert x2_index.ndim == 0
        assert 0 <= d_slice_slice.ndim <= 1
        assert insert_cost.ndim == d_slice_slice.ndim
        assert x1_length.ndim == d_slice_slice.ndim
        assert x2_length.ndim == d_slice_slice.ndim
        assert x1_index.ndim == 0
        assert previous_cost_row.ndim == d_slice_slice.ndim + 1

        x2_index = _debug(x2_index, 'dtw_inner_step.x2_index', debug_level)
        d_slice_slice = _debug(d_slice_slice, 'dtw_inner_step.d_slice_slice',
                               debug_level)
        insert_cost = _debug(insert_cost, 'dtw_inner_step.insert_cost',
                             debug_level)

        delete_cost = _debug(previous_cost_row[x2_index],
                             'dtw_inner_step.delete_cost', debug_level)
        match_cost = _debug(previous_cost_row[x2_index - 1],
                            'dtw_inner_step.match_cost', debug_level)
        assert delete_cost.ndim == d_slice_slice.ndim
        assert match_cost.ndim == d_slice_slice.ndim

        min_cost = _debug(
            tt.min(tt.stack(insert_cost, delete_cost, match_cost), axis=0),
            'dtw_inner_step.min_cost', debug_level)
        assert min_cost.ndim == d_slice_slice.ndim

        in_first_row = _debug(tt.eq(x1_index, 0),
                              'dtw_inner_step.in_first_row', debug_level)
        in_first_column = _debug(tt.eq(x2_index, 0),
                                 'dtw_inner_step.in_first_column', debug_level)
        assert in_first_row.ndim == 0
        assert in_first_column.ndim == 0

        cost = _debug(
            d_slice_slice +
            tt.switch(in_first_row, insert_cost,
                      tt.switch(in_first_column, delete_cost, min_cost)),
            'dtw_inner_step.cost', debug_level)
        assert cost.ndim == d_slice_slice.ndim

        length_filtered_cost = _debug(
            tt.switch(
                tt.bitwise_and(tt.lt(x1_index, x1_length),
                               tt.lt(x2_index, x2_length)), cost, 0.),
            'dtw_inner_step.length_filtered_cost', debug_level)
        assert length_filtered_cost.ndim == d_slice_slice.ndim

        return length_filtered_cost
Exemple #4
0
    def while_search(alpha0, alpha1, phi_a0, phi_a1, derphi_a0, i_t,
                    alpha_star, phi_star, derphi_star):
        derphi_a1 = derphi(alpha1)
        cond1 = TT.bitwise_or(phi_a1 > phi0 + c1 * alpha1 * derphi0,
                              TT.bitwise_and(phi_a1 >= phi_a0, i_t > zero))
        cond2 = abs(derphi_a1) <= -c2 * derphi0
        cond3 = derphi_a1 >= zero
        alpha_star_c1, phi_star_c1, derphi_star_c1 = \
                _zoom(alpha0, alpha1, phi_a0, phi_a1, derphi_a0,
                      phi, derphi, phi0, derphi0, c1, c2,
                     profile=profile)
        alpha_star_c3, phi_star_c3, derphi_star_c3 = \
                _zoom(alpha1, alpha0, phi_a1, phi_a0, derphi_a1, phi,
                      derphi, phi0, derphi0, c1, c2,
                     profile=profile)
        nw_alpha1 = alpha1 * numpy.asarray(2, dtype=theano.config.floatX)
        nw_phi = phi(nw_alpha1)
        alpha_star, phi_star, derphi_star = \
                ifelse(cond1,
                          (alpha_star_c1, phi_star_c1, derphi_star_c1),
                ifelse(cond2,
                          (alpha1, phi_a1, derphi_a1),
                ifelse(cond3,
                          (alpha_star_c3, phi_star_c3, derphi_star_c3),
                           (nw_alpha1, nw_phi, nan),
                      name='alphastar_c3'),
                      name='alphastar_c2'),
                      name='alphastar_c1')

        return ([alpha1,
                 nw_alpha1,
                 phi_a1,
                 ifelse(lazy_or('allconds',
                                cond1,
                                cond2,
                                cond3),
                        phi_a1,
                        nw_phi,
                        name='nwphi1'),
                 ifelse(cond1, derphi_a0, derphi_a1, name='derphi'),
                 i_t + one,
                 alpha_star,
                 phi_star,
                 derphi_star],
                theano.scan_module.scan_utils.until(
                    lazy_or('until_cond_',
                            TT.eq(nw_alpha1, zero),
                            cond1,
                            cond2,
                            cond3)))
Exemple #5
0
def mask_loss_mse(grid_idx, image):
    indicies = T.bitwise_and(T.neq(grid_idx, MASK["IGNORE"]),
                             T.neq(grid_idx, MASK["BACKGROUND_RING"]))
    bw = binary_mask(grid_idx, ignore=0.0)
    diff = (bw - image)
    loss = (diff[indicies.nonzero()]**2).mean()
    visual_diff = T.zeros_like(diff)
    visual_diff = T.set_subtensor(visual_diff[indicies.nonzero()],
                                  diff[indicies.nonzero()]**2)
    return DotMap({
        'loss': loss,
        'visual': {
            'diff': visual_diff,
            'bw_grid': bw
        }
    })
Exemple #6
0
def mask_loss_mse(grid_idx, image):
    indicies = T.bitwise_and(T.neq(grid_idx, MASK["IGNORE"]),
                             T.neq(grid_idx, MASK["BACKGROUND_RING"]))
    bw = binary_mask(grid_idx, ignore=0.0)
    diff = (bw - image)
    loss = (diff[indicies.nonzero()]**2).mean()
    visual_diff = T.zeros_like(diff)
    visual_diff = T.set_subtensor(visual_diff[indicies.nonzero()],
                                  diff[indicies.nonzero()]**2)
    return DotMap({
        'loss': loss,
        'visual': {
            'diff': visual_diff,
            'bw_grid': bw
        }
    })
Exemple #7
0
def mask_loss_adaptive_mse(grid_idx, image, impl='auto'):
    black_mean, white_mean, _ = segment_means(grid_idx, image, impl)
    white_mean = T.maximum(white_mean, 0.40)
    white_mean = T.maximum(white_mean, black_mean + 0.20)
    black_mean = T.minimum(white_mean - 0.20, black_mean)
    dimsuffle = (0, 'x', 'x', 'x')
    bw = adaptive_mask(grid_idx,
                       ignore=0.0,
                       black=black_mean.dimshuffle(*dimsuffle),
                       white=white_mean.dimshuffle(*dimsuffle))
    # bw = gaussian_filter_2d(bw, sigma=2.)
    diff = T.zeros_like(bw)
    idx = T.bitwise_and(T.neq(grid_idx, MASK["IGNORE"]),
                        T.neq(grid_idx, MASK["BACKGROUND_RING"]))
    diff = T.set_subtensor(diff[idx.nonzero()], abs(bw - image)[idx.nonzero()])
    loss = (T.maximum(diff, 0.15)[idx.nonzero()]**2).mean()
    return DotMap({'loss': loss, 'visual': {'diff': diff, 'bw_grid': bw}})
Exemple #8
0
    def while_search(alpha0, alpha1, phi_a0, phi_a1, derphi_a0, i_t,
                    alpha_star, phi_star, derphi_star):
        derphi_a1 = derphi(alpha1)
        cond1 = TT.bitwise_or(phi_a1 > phi0 + c1*alpha1*derphi0,
                              TT.bitwise_and(phi_a1 >= phi_a0, i_t > zero))
        cond2 = abs(derphi_a1) <= -c2*derphi0
        cond3 = derphi_a1 >= zero
        alpha_star_c1, phi_star_c1, derphi_star_c1 = \
                _zoom(alpha0, alpha1, phi_a0, phi_a1, derphi_a0,
                      phi, derphi, phi0, derphi0, c1,c2,
                     profile = profile, mode=mode)
        alpha_star_c3, phi_star_c3, derphi_star_c3 = \
                _zoom(alpha1, alpha0, phi_a1, phi_a0, derphi_a1, phi,
                      derphi, phi0, derphi0, c1,c2,
                     profile = profile, mode=mode)
        nw_alpha1 = alpha1 * numpy.asarray(2, dtype=theano.config.floatX)
        nw_phi = phi(nw_alpha1)
        alpha_star, phi_star, derphi_star = \
                ifelse(cond1,
                          (alpha_star_c1, phi_star_c1, derphi_star_c1),
                ifelse(cond2,
                          (alpha1, phi_a1, derphi_a1),
                ifelse(cond3,
                          (alpha_star_c3, phi_star_c3, derphi_star_c3),
                           (nw_alpha1, nw_phi, nan),
                      name = 'alphastar_c3'),
                      name = 'alphastar_c2'),
                      name ='alphastar_c1')

        return ( [alpha1,
                  nw_alpha1,
                  phi_a1,
                  ifelse(lazy_or('allconds',cond1, cond2, cond3),
                         phi_a1, nw_phi, name='nwphi1'),
                  ifelse(cond1, derphi_a0, derphi_a1, name='derphi'),
                  i_t + one,
                  alpha_star,
                  phi_star,
                  derphi_star],
                theano.scan_module.scan_utils.until(
                    lazy_or('until_cond_',TT.eq(nw_alpha1,zero), cond1, cond2, cond3)))
Exemple #9
0
def mask_loss_adaptive_mse(grid_idx, image, impl='auto'):
    black_mean, white_mean, _ = segment_means(grid_idx, image, impl)
    white_mean = T.maximum(white_mean, 0.40)
    white_mean = T.maximum(white_mean, black_mean + 0.20)
    black_mean = T.minimum(white_mean - 0.20, black_mean)
    dimsuffle = (0, 'x', 'x', 'x')
    bw = adaptive_mask(grid_idx, ignore=0.0,
                       black=black_mean.dimshuffle(*dimsuffle),
                       white=white_mean.dimshuffle(*dimsuffle))
    # bw = gaussian_filter_2d(bw, sigma=2.)
    diff = T.zeros_like(bw)
    idx = T.bitwise_and(T.neq(grid_idx, MASK["IGNORE"]),
                        T.neq(grid_idx, MASK["BACKGROUND_RING"]))
    diff = T.set_subtensor(diff[idx.nonzero()], abs(bw - image)[idx.nonzero()])
    loss = (T.maximum(diff, 0.15)[idx.nonzero()]**2).mean()
    return DotMap({
        'loss': loss,
        'visual': {
            'diff': diff,
            'bw_grid': bw
        }
    })
def matrix_noise3d(input_vectors, perm, grad3, vertex_table):
    skew_factors = (input_vectors[:, 0] + input_vectors[:, 1] +
                    input_vectors[:, 2]) * 1.0 / 3.0
    skewed_vectors = T.floor(input_vectors + skew_factors[:, np.newaxis])
    unskew_factors = (skewed_vectors[:, 0] + skewed_vectors[:, 1] +
                      skewed_vectors[:, 2]) * 1.0 / 6.0
    offsets_0 = input_vectors - (skewed_vectors -
                                 unskew_factors[:, np.newaxis])
    vertex_table_x_index = T.ge(offsets_0[:, 0], offsets_0[:, 1])
    vertex_table_y_index = T.ge(offsets_0[:, 1], offsets_0[:, 2])
    vertex_table_z_index = T.ge(offsets_0[:, 0], offsets_0[:, 2])
    simplex_vertices = vertex_table[vertex_table_x_index, vertex_table_y_index,
                                    vertex_table_z_index].reshape(
                                        (input_vectors.shape[0], 2, 3))
    offsets_1 = offsets_0 - simplex_vertices[:, 0] + 1.0 / 6.0
    offsets_2 = offsets_0 - simplex_vertices[:, 1] + 1.0 / 3.0
    offsets_3 = offsets_0 - 0.5
    masked_skewed_vectors = T.bitwise_and(skewed_vectors.astype('int32'), 255)
    gi0s = perm[masked_skewed_vectors[:, 0] +
                perm[masked_skewed_vectors[:, 1] +
                     perm[masked_skewed_vectors[:, 2]].astype('int32')].astype(
                         'int32')] % 12
    gi1s = perm[masked_skewed_vectors[:, 0] + simplex_vertices[:, 0, 0] + perm[
        masked_skewed_vectors[:, 1] + simplex_vertices[:, 0, 1] +
        perm[masked_skewed_vectors[:, 2] +
             simplex_vertices[:, 0, 2]].astype('int32')].astype('int32')] % 12
    gi2s = perm[masked_skewed_vectors[:, 0] + simplex_vertices[:, 1, 0] + perm[
        masked_skewed_vectors[:, 1] + simplex_vertices[:, 1, 1] +
        perm[masked_skewed_vectors[:, 2] +
             simplex_vertices[:, 1, 2]].astype('int32')].astype('int32')] % 12
    gi3s = perm[masked_skewed_vectors[:, 0] + 1 +
                perm[masked_skewed_vectors[:, 1] + 1 +
                     perm[masked_skewed_vectors[:, 2] +
                          1].astype('int32')].astype('int32')] % 12
    n0s = calculate_gradient_contribution(offsets_0, gi0s, grad3)
    n1s = calculate_gradient_contribution(offsets_1, gi1s, grad3)
    n2s = calculate_gradient_contribution(offsets_2, gi2s, grad3)
    n3s = calculate_gradient_contribution(offsets_3, gi3s, grad3)
    return 23.0 * (n0s + n1s + n2s + n3s)
Exemple #11
0
    def dtw_inner_step(x2_index, d_slice_slice, insert_cost, x1_length, x2_length, x1_index, previous_cost_row):
        assert x2_index.ndim == 0
        assert 0 <= d_slice_slice.ndim <= 1
        assert insert_cost.ndim == d_slice_slice.ndim
        assert x1_length.ndim == d_slice_slice.ndim
        assert x2_length.ndim == d_slice_slice.ndim
        assert x1_index.ndim == 0
        assert previous_cost_row.ndim == d_slice_slice.ndim + 1

        x2_index = _debug(x2_index, 'dtw_inner_step.x2_index', debug_level)
        d_slice_slice = _debug(d_slice_slice, 'dtw_inner_step.d_slice_slice', debug_level)
        insert_cost = _debug(insert_cost, 'dtw_inner_step.insert_cost', debug_level)

        delete_cost = _debug(previous_cost_row[x2_index], 'dtw_inner_step.delete_cost', debug_level)
        match_cost = _debug(previous_cost_row[x2_index - 1], 'dtw_inner_step.match_cost', debug_level)
        assert delete_cost.ndim == d_slice_slice.ndim
        assert match_cost.ndim == d_slice_slice.ndim

        min_cost = _debug(tt.min(tt.stack(insert_cost, delete_cost, match_cost), axis=0), 'dtw_inner_step.min_cost',
                          debug_level)
        assert min_cost.ndim == d_slice_slice.ndim

        in_first_row = _debug(tt.eq(x1_index, 0), 'dtw_inner_step.in_first_row', debug_level)
        in_first_column = _debug(tt.eq(x2_index, 0), 'dtw_inner_step.in_first_column', debug_level)
        assert in_first_row.ndim == 0
        assert in_first_column.ndim == 0

        cost = _debug(
            d_slice_slice + tt.switch(in_first_row, insert_cost, tt.switch(in_first_column, delete_cost, min_cost)),
            'dtw_inner_step.cost', debug_level)
        assert cost.ndim == d_slice_slice.ndim

        length_filtered_cost = _debug(
            tt.switch(tt.bitwise_and(tt.lt(x1_index, x1_length), tt.lt(x2_index, x2_length)), cost, 0.),
            'dtw_inner_step.length_filtered_cost', debug_level)
        assert length_filtered_cost.ndim == d_slice_slice.ndim

        return length_filtered_cost
Exemple #12
0
    def __init__(self, inputs, labels, y_mask,
                 n_dim,
                 cutoff, project_factor=4):
        '''
        Args:
            inputs: flattened logits with shape of [n_step*n_batch, n_dim]
            labels: flattened labels with shape of [n_step*n_batch]
            y_mask: mask the null space of sentences with shape of [n_step*n_batch]
            cutoff: frequency binning, i.e. [2000, vocab_size]
            project_factor: project for low-frequency words
        '''
        self.input_dim = n_dim
        self.sample_num = inputs.shape[0]
        self.cluster_num = len(cutoff) - 1
        self.head_dim = cutoff[0] + self.cluster_num
        self.params = []
        self.y_mask = y_mask

        init_head_w = np.asarray(np.random.uniform(low=-np.sqrt(1./self.input_dim),
                                              high=np.sqrt(1./self.input_dim),
                                              size=(self.input_dim,self.head_dim)))
        self.head_w=theano.shared(value=init_head_w,name='head_w')
        self.params.append(self.head_w)

        tail_project_factor = project_factor
        tail_w_list = []
        for i in range(self.cluster_num):
            project_dim = max(1, self.input_dim // tail_project_factor)
            tail_dim = cutoff[i + 1] - cutoff[i]
            _tail_proj_w = np.asarray(np.random.uniform(low=-np.sqrt(1./self.input_dim),
                                             high=np.sqrt(1./self.input_dim),
                                             size=(self.input_dim, project_dim)),dtype=theano.config.floatX)
            _tail_w = np.asarray(np.random.uniform(low=-np.sqrt(1./project_dim),
                                             high=np.sqrt(1./project_dim),
                                             size=(project_dim,tail_dim)),dtype=theano.config.floatX)
            tail_proj_w = theano.shared(value=_tail_proj_w, name="adaptive_softmax_tail{}_proj_w".format(i+1))
            tail_w = theano.shared(value=_tail_w, name="adaptive_softmax_tail{}_w".format(i+1))
            tail_w_list.append([tail_proj_w, tail_w])
            tail_project_factor *= project_factor
            self.params.append(tail_proj_w)
            self.params.append(tail_w)
        # delete null indexes by y_mask
        # y_mask = y_mask.flatten()
        # inputs = inputs[y_mask.nonzero()]
        # labels = labels[y_mask.nonzero()]
        # Get tail masks and update head labels
        training_losses = []
        loss = 0.
        head_labels = labels
        for i in range(self.cluster_num):
            mask = T.bitwise_and(T.ge(labels, cutoff[i]), T.lt(labels, cutoff[i + 1]))  # mask that delete words not in cluster
            # update head labels
            head_labels = T.switch(mask, T.constant([cutoff[0] + i]).repeat(self.sample_num), head_labels)

            # compute tail loss
            tail_inputs = inputs[mask.nonzero()]
            tail_logits = T.dot(T.dot(tail_inputs, tail_w_list[i][0]), tail_w_list[i][1])
            tail_labels = (labels - cutoff[i])[mask.nonzero()]
            tail_y_mask = self.y_mask[mask.nonzero()]  # mask that eases the effect of null space
            tail_logits = tail_logits[T.eq(tail_y_mask, 1).nonzero()]
            tail_labels = tail_labels[T.eq(tail_y_mask, 1).nonzero()]
            tail_logits = T.clip(tail_logits, 1.0e-8, 1.0 - 1.0e-8)
            tail_loss = T.mean(T.nnet.categorical_crossentropy(tail_logits, tail_labels))
            training_losses.append(tail_loss)
            loss += tail_loss
            self.tail_logits = tail_logits
            self.tail_labels = tail_labels
            self.tail_loss = tail_loss

        # compute head loss
        head_logits = T.dot(inputs, self.head_w)
        head_logits = head_logits[T.eq(self.y_mask, 1).nonzero()]
        head_logits = T.clip(head_logits, 1.0e-8, 1.0 - 1.0e-8)
        head_labels = head_labels[T.eq(self.y_mask, 1).nonzero()]
        head_loss = T.mean(T.nnet.categorical_crossentropy(head_logits, head_labels))
        loss += head_loss
        training_losses.append(head_loss)

        self.loss = loss
        self.training_losses = training_losses
        self.head_loss = head_loss
Exemple #13
0
	def _get_cost3(
			self,
			output,
			truth,
			rescore=True
		):

		if not hasattr(self, '_lambda_obj'):
			lambda_obj, lambda_noobj = T.scalar('lambda_obj'), T.scalar('lambda_noobj')
			self._lambda_obj, self._lambda_noobj = lambda_obj, lambda_noobj
		else:
			lambda_obj, lambda_noobj, thresh = self._lambda_obj, self._lambda_noobj, self._thresh

		cost = 0.
		
		# penalize everything, this will be undone if box matches ground truth
		#cost += lambda_noobj_coord * T.mean(output[:,:,:4]**2)
		cost += lambda_noobj * T.mean(output[:,:,4]**2)
		
		# get index for each truth
		row_idx = T.cast(T.floor((truth[:,:,0] + 0.5 * truth[:,:,2]) * self.output_shape[1]), 'int32')
		col_idx = T.cast(T.floor((truth[:,:,1] + 0.5 * truth[:,:,3]) * self.output_shape[0]), 'int32')
				
		# image index
		img_idx = T.repeat(T.arange(truth.shape[0]).dimshuffle(0,'x'), truth.shape[1], axis=1)
		
		# index for each object in an image
		obj_idx = T.repeat(T.arange(truth.shape[1]), truth.shape[0], axis=0)
		
		# reshape to flat
		row_idx = row_idx.reshape((-1,))
		col_idx = col_idx.reshape((-1,))
		img_idx = img_idx.reshape((-1,))
		obj_idx = obj_idx.reshape((-1,))
		
		# use only valid indices (i.e. greater or equal to zero)
		valid_idx = T.bitwise_and(row_idx >= 0, col_idx >= 0).reshape((-1,))
		row_idx = row_idx[valid_idx.nonzero()]
		col_idx = col_idx[valid_idx.nonzero()]
		img_idx = img_idx[valid_idx.nonzero()]
		obj_idx = obj_idx[valid_idx.nonzero()]
				
		# reshape output and truth
		output = output.dimshuffle(0,'x',1,2,3,4)
		truth = truth.dimshuffle(0,1,'x',2,'x','x')
		
		output = T.repeat(output, truth.shape[1], axis=1)
		truth = T.repeat(truth, self.boxes.__len__(), axis=2)
		truth = T.repeat(T.repeat(truth, self.output_shape[0], axis=4), self.output_shape[1], axis=5)
		
		# reformat ground truth labels so that they are relative to offsets
		# and that the width/height are log scale relative to the box height.
		
		# add offset to the x,y coordinates
		x_diff, y_diff = 1./self.output_shape[0], 1./self.output_shape[1]
		y, x = meshgrid(T.arange(0 + x_diff/2,1,x_diff), T.arange(0 + y_diff/2,1,y_diff))
		x, y = x.dimshuffle('x','x',0,1), y.dimshuffle('x','x',0,1)
		
		# scaling from each anchor box
		x_scale = theano.shared(np.asarray([b[0] for b in self.boxes]), name='x_scale', borrow=True).dimshuffle('x',0,'x','x')
		y_scale = theano.shared(np.asarray([b[1] for b in self.boxes]), name='y_scale', borrow=True).dimshuffle('x',0,'x','x')

		# change predicted output to proper scale
		pred = T.set_subtensor(output[:,:,:,0], output[:,:,:,0] + x)
		pred = T.set_subtensor(pred[:,:,:,1], pred[:,:,:,1] + y)
		pred = T.set_subtensor(pred[:,:,:,2], x_scale * T.exp(pred[:,:,:,2]))
		pred = T.set_subtensor(pred[:,:,:,3], y_scale * T.exp(pred[:,:,:,3]))
		
		# determine iou of chosen boxes
		xi = T.maximum(pred[img_idx, obj_idx, :, 0, row_idx, col_idx], truth[img_idx, obj_idx, :, 0, row_idx, col_idx])
		yi = T.maximum(pred[img_idx, obj_idx, :, 1, row_idx, col_idx], truth[img_idx, obj_idx, :, 1, row_idx, col_idx])
		xf = T.minimum(
			pred[img_idx, obj_idx, :, 0, row_idx, col_idx] + pred[img_idx, obj_idx, :, 2, row_idx, col_idx],
			truth[img_idx, obj_idx, :, 0, row_idx, col_idx] + truth[img_idx, obj_idx, :, 2, row_idx, col_idx]
		)
		yf = T.minimum(
			pred[img_idx, obj_idx, :, 1, row_idx, col_idx] + pred[img_idx, obj_idx, :, 3, row_idx, col_idx],
			truth[img_idx, obj_idx, :, 1, row_idx, col_idx] + truth[img_idx, obj_idx, :, 3, row_idx, col_idx]
		)
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)
		
		isec = w * h
		iou = isec / (pred[img_idx, obj_idx, :, 2, row_idx, col_idx] * pred[img_idx, obj_idx, :, 3, row_idx, col_idx] + \
					truth[img_idx, obj_idx, :, 2, row_idx, col_idx] * truth[img_idx, obj_idx, :, 3, row_idx, col_idx] - isec)
					 
		# get index for matched boxes
		match_idx = T.argmax(iou, axis=1)
		
		# change truth to proper scale for error
		truth = T.set_subtensor(truth[:,:,:,0,:,:], truth[:,:,:,0,:,:] - x)
		truth = T.set_subtensor(truth[:,:,:,1,:,:], truth[:,:,:,1,:,:] - y)
		truth = T.set_subtensor(truth[:,:,:,2,:,:], T.log(truth[:,:,:,2,:,:] / x_scale))
		truth = T.set_subtensor(truth[:,:,:,3,:,:], T.log(truth[:,:,:,3,:,:] / y_scale))
		
		# add to cost boxes which have been matched
		
		# correct for matched boxes
		#cost -= lambda_noobj_coord * T.mean(output[img_idx, obj_idx, :, :4, row_idx, col_idx][:,match_idx]**2)
		cost -= lambda_noobj * T.mean(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx]**2)
		
		# coordinate errors
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 0, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 0, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 1, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 1, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 2, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 2, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 3, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 3, row_idx, col_idx][:,match_idx])**2
		)
		
		# objectness error
		if rescore:
			cost += lambda_obj * T.mean(
				(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx] - iou[:,match_idx])**2
			)
		else:
			cost += lambda_obj * T.mean(
				(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx] - 1)**2
			)
		
		# class error
		cost += lambda_obj * T.mean(
			(
				-truth[img_idx, obj_idx, :, -self.num_classes:, row_idx, col_idx][:,match_idx] * \
				T.log(output[img_idx, obj_idx, :, -self.num_classes:, row_idx, col_idx][:,match_idx])
			)
		)
				
		return cost, [iou]
Exemple #14
0
    def while_zoom(phi_rec, a_rec, a_lo, a_hi, phi_hi,
                   phi_lo, derphi_lo, a_star, val_star, valprime):
        # interpolate to find a trial step length between a_lo and
        # a_hi Need to choose interpolation here.  Use cubic
        # interpolation and then if the result is within delta *
        # dalpha or outside of the interval bounded by a_lo or a_hi
        # then use quadratic interpolation, if the result is still too
        # close, then use bisection
        dalpha = a_hi - a_lo
        a = TT.switch(dalpha < zero, a_hi, a_lo)
        b = TT.switch(dalpha < zero, a_lo, a_hi)

        # minimizer of cubic interpolant
        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
        #
        # if the result is too close to the end points (or out of the
        # interval) then use quadratic interpolation with phi_lo,
        # derphi_lo and phi_hi if the result is stil too close to the
        # end points (or out of the interval) then use bisection

        # cubic interpolation
        cchk = delta1 * dalpha
        a_j_cubic = _cubicmin(a_lo, phi_lo, derphi_lo,
                              a_hi, phi_hi, a_rec, phi_rec)
        # quadric interpolation
        qchk = delta2 * dalpha
        a_j_quad = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
        cond_q = lazy_or('condq',
                         TT.isnan(a_j_quad),
                         a_j_quad > b - qchk,
                         a_j_quad < a + qchk)
        a_j_quad = TT.switch(cond_q, a_lo +
                             numpy.asarray(0.5, dtype=theano.config.floatX) * \
                             dalpha, a_j_quad)

        # pick between the two ..
        cond_c = lazy_or('condc',
                         TT.isnan(a_j_cubic),
                         TT.bitwise_or(a_j_cubic > b - cchk,
                                       a_j_cubic < a + cchk))
        # this lazy if actually decides if we need to run the quadric
        # interpolation
        a_j = TT.switch(cond_c, a_j_quad, a_j_cubic)
        #a_j = ifelse(cond_c, a_j_quad,  a_j_cubic)

        # Check new value of a_j
        phi_aj = phi(a_j)
        derphi_aj = derphi(a_j)

        stop = lazy_and('stop',
                        TT.bitwise_and(phi_aj <= phi0 + c1 * a_j * derphi0,
                                       phi_aj < phi_lo),
                        abs(derphi_aj) <= -c2 * derphi0)

        cond1 = TT.bitwise_or(phi_aj > phi0 + c1 * a_j * derphi0,
                              phi_aj >= phi_lo)
        cond2 = derphi_aj * (a_hi - a_lo) >= zero

        # Switches just make more sense here because they have a C
        # implementation and they get composed
        phi_rec = ifelse(cond1,
                         phi_hi,
                         TT.switch(cond2, phi_hi, phi_lo),
                         name='phi_rec')
        a_rec = ifelse(cond1,
                       a_hi,
                       TT.switch(cond2, a_hi, a_lo),
                         name='a_rec')
        a_hi = ifelse(cond1, a_j,
                      TT.switch(cond2, a_lo, a_hi),
                      name='a_hi')
        phi_hi = ifelse(cond1, phi_aj,
                        TT.switch(cond2, phi_lo, phi_hi),
                        name='phi_hi')

        a_lo = TT.switch(cond1, a_lo, a_j)
        phi_lo = TT.switch(cond1, phi_lo, phi_aj)
        derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name='derphi_lo')

        a_star = a_j
        val_star = phi_aj
        valprime = ifelse(cond1, nan,
                          TT.switch(cond2, derphi_aj, nan), name='valprime')

        return ([phi_rec,
                 a_rec,
                 a_lo,
                 a_hi,
                 phi_hi,
                 phi_lo,
                 derphi_lo,
                 a_star,
                 val_star,
                 valprime],
                theano.scan_module.scan_utils.until(stop))
Exemple #15
0
def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
          phi, derphi, phi0, derphi0, c1, c2,
          n_iters=10,
          profile=False):
    """
    WRITEME

    Part of the optimization algorithm in `scalar_search_wolfe2`.

    Parameters
    ----------
    a_lo : float
        Step size
    a_hi : float
        Step size
    phi_lo : float
        Value of f at a_lo
    phi_hi : float
        Value of f at a_hi
    derphi_lo : float
        Value of derivative at a_lo
    phi : callable
        Generates computational graph
    derphi : callable
        Generates computational graph
    phi0 : float
        Value of f at 0
    derphi0 : float
        Value of the derivative at 0
    c1 : float
        Wolfe parameter
    c2 : float
        Wolfe parameter
    profile : bool
        True if you want printouts of profiling information
    """
    # Function reprensenting the computations of one step of the while loop
    def while_zoom(phi_rec, a_rec, a_lo, a_hi, phi_hi,
                   phi_lo, derphi_lo, a_star, val_star, valprime):
        # interpolate to find a trial step length between a_lo and
        # a_hi Need to choose interpolation here.  Use cubic
        # interpolation and then if the result is within delta *
        # dalpha or outside of the interval bounded by a_lo or a_hi
        # then use quadratic interpolation, if the result is still too
        # close, then use bisection
        dalpha = a_hi - a_lo
        a = TT.switch(dalpha < zero, a_hi, a_lo)
        b = TT.switch(dalpha < zero, a_lo, a_hi)

        # minimizer of cubic interpolant
        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
        #
        # if the result is too close to the end points (or out of the
        # interval) then use quadratic interpolation with phi_lo,
        # derphi_lo and phi_hi if the result is stil too close to the
        # end points (or out of the interval) then use bisection

        # cubic interpolation
        cchk = delta1 * dalpha
        a_j_cubic = _cubicmin(a_lo, phi_lo, derphi_lo,
                              a_hi, phi_hi, a_rec, phi_rec)
        # quadric interpolation
        qchk = delta2 * dalpha
        a_j_quad = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
        cond_q = lazy_or('condq',
                         TT.isnan(a_j_quad),
                         a_j_quad > b - qchk,
                         a_j_quad < a + qchk)
        a_j_quad = TT.switch(cond_q, a_lo +
                             numpy.asarray(0.5, dtype=theano.config.floatX) * \
                             dalpha, a_j_quad)

        # pick between the two ..
        cond_c = lazy_or('condc',
                         TT.isnan(a_j_cubic),
                         TT.bitwise_or(a_j_cubic > b - cchk,
                                       a_j_cubic < a + cchk))
        # this lazy if actually decides if we need to run the quadric
        # interpolation
        a_j = TT.switch(cond_c, a_j_quad, a_j_cubic)
        #a_j = ifelse(cond_c, a_j_quad,  a_j_cubic)

        # Check new value of a_j
        phi_aj = phi(a_j)
        derphi_aj = derphi(a_j)

        stop = lazy_and('stop',
                        TT.bitwise_and(phi_aj <= phi0 + c1 * a_j * derphi0,
                                       phi_aj < phi_lo),
                        abs(derphi_aj) <= -c2 * derphi0)

        cond1 = TT.bitwise_or(phi_aj > phi0 + c1 * a_j * derphi0,
                              phi_aj >= phi_lo)
        cond2 = derphi_aj * (a_hi - a_lo) >= zero

        # Switches just make more sense here because they have a C
        # implementation and they get composed
        phi_rec = ifelse(cond1,
                         phi_hi,
                         TT.switch(cond2, phi_hi, phi_lo),
                         name='phi_rec')
        a_rec = ifelse(cond1,
                       a_hi,
                       TT.switch(cond2, a_hi, a_lo),
                         name='a_rec')
        a_hi = ifelse(cond1, a_j,
                      TT.switch(cond2, a_lo, a_hi),
                      name='a_hi')
        phi_hi = ifelse(cond1, phi_aj,
                        TT.switch(cond2, phi_lo, phi_hi),
                        name='phi_hi')

        a_lo = TT.switch(cond1, a_lo, a_j)
        phi_lo = TT.switch(cond1, phi_lo, phi_aj)
        derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name='derphi_lo')

        a_star = a_j
        val_star = phi_aj
        valprime = ifelse(cond1, nan,
                          TT.switch(cond2, derphi_aj, nan), name='valprime')

        return ([phi_rec,
                 a_rec,
                 a_lo,
                 a_hi,
                 phi_hi,
                 phi_lo,
                 derphi_lo,
                 a_star,
                 val_star,
                 valprime],
                theano.scan_module.scan_utils.until(stop))

    maxiter = n_iters
    # cubic interpolant check
    delta1 = TT.constant(numpy.asarray(0.2,
                                       dtype=theano.config.floatX))
    # quadratic interpolant check
    delta2 = TT.constant(numpy.asarray(0.1,
                                       dtype=theano.config.floatX))
    phi_rec = phi0
    a_rec = zero

    # Initial iteration

    dalpha = a_hi - a_lo
    a = TT.switch(dalpha < zero, a_hi, a_lo)
    b = TT.switch(dalpha < zero, a_lo, a_hi)
    #a = ifelse(dalpha < 0, a_hi, a_lo)
    #b = ifelse(dalpha < 0, a_lo, a_hi)

    # minimizer of cubic interpolant
    # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
    #
    # if the result is too close to the end points (or out of the
    # interval) then use quadratic interpolation with phi_lo,
    # derphi_lo and phi_hi if the result is stil too close to the
    # end points (or out of the interval) then use bisection

    # quadric interpolation
    qchk = delta2 * dalpha
    a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
    cond_q = lazy_or('mcond_q',
                     TT.isnan(a_j),
                     TT.bitwise_or(a_j > b - qchk,
                                   a_j < a + qchk))

    a_j = TT.switch(cond_q, a_lo +
                    numpy.asarray(0.5, dtype=theano.config.floatX) * \
                    dalpha, a_j)

    # Check new value of a_j
    phi_aj = phi(a_j)
    derphi_aj = derphi(a_j)

    cond1 = TT.bitwise_or(phi_aj > phi0 + c1 * a_j * derphi0,
                          phi_aj >= phi_lo)
    cond2 = derphi_aj * (a_hi - a_lo) >= zero

    # Switches just make more sense here because they have a C
    # implementation and they get composed
    phi_rec = ifelse(cond1,
                     phi_hi,
                     TT.switch(cond2, phi_hi, phi_lo),
                     name='mphirec')
    a_rec = ifelse(cond1,
                   a_hi,
                   TT.switch(cond2, a_hi, a_lo),
                   name='marec')
    a_hi = ifelse(cond1,
                  a_j,
                  TT.switch(cond2, a_lo, a_hi),
                  name='mahi')
    phi_hi = ifelse(cond1,
                    phi_aj,
                    TT.switch(cond2, phi_lo, phi_hi),
                    name='mphihi')

    onlyif = lazy_and('only_if',
                      TT.bitwise_and(phi_aj <= phi0 + c1 * a_j * derphi0,
                                     phi_aj < phi_lo),
                      abs(derphi_aj) <= -c2 * derphi0)

    a_lo = TT.switch(cond1, a_lo, a_j)
    phi_lo = TT.switch(cond1, phi_lo, phi_aj)
    derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name='derphi_lo_main')
    phi_rec.name = 'phi_rec'
    a_rec.name = 'a_rec'
    a_lo.name = 'a_lo'
    a_hi.name = 'a_hi'
    phi_hi.name = 'phi_hi'
    phi_lo.name = 'phi_lo'
    derphi_lo.name = 'derphi_lo'
    vderphi_aj = ifelse(cond1, nan, TT.switch(cond2, derphi_aj, nan),
                        name='vderphi_aj')
    states = []
    states += [TT.unbroadcast(TT.shape_padleft(phi_rec), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_rec), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_lo), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_hi), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_hi), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_lo), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(derphi_lo), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero), 0)]
    # print'while_zoom'
    outs, updates = scan(while_zoom,
                         states=states,
                         n_steps=maxiter,
                         name='while_zoom',
                         mode=theano.Mode(linker='cvm_nogc'),
                         profile=profile)
    # print 'done_while'
    a_star = ifelse(onlyif, a_j, outs[7][0], name='astar')
    val_star = ifelse(onlyif, phi_aj, outs[8][0], name='valstar')
    valprime = ifelse(onlyif, vderphi_aj, outs[9][0], name='valprime')

    ## WARNING !! I ignore updates given by scan which I should not do !!!
    return a_star, val_star, valprime
Exemple #16
0
 def compute(self, x_gold, x_pred, x_label_gold, x_label_pred):
     correct_head = T.ge(x_gold, 0) * T.eq(x_gold, x_pred)
     correct_label = T.eq(x_label_gold, x_label_pred)
     return T.sum(T.bitwise_and(correct_head, correct_label))
Exemple #17
0
 def clip_around_zero(x, threshold=0.2):
     indicies = T.bitwise_and(x < threshold, x > -threshold)
     return T.set_subtensor(x[indicies.nonzero()], 0)
Exemple #18
0
    def _get_cost(self, input, truth, alpha=1., min_iou=0.5):
        cost = 0.

        # create ground truth for non-object class
        neg_example = theano.shared(
            np.zeros(self.num_classes + 1, dtype=theano.config.floatX))
        neg_example = T.set_subtensor(neg_example[-1], 1.)
        neg_example = neg_example.dimshuffle('x', 'x', 0, 'x', 'x')

        cost_coord, cost_class, cost_noobj = 0., 0., 0.

        for i in range(self._predictive_maps.__len__()):
            dmap = self._default_maps[i]
            fmap = self._predictive_maps[i]
            shape = layers.get_output_shape(self.network['detection'][i])[2:]

            # get iou between default maps and ground truth
            iou_default = self._get_iou(
                dmap.dimshuffle('x', 'x', 0, 1, 2, 3),
                truth.dimshuffle(0, 1, 'x', 2, 'x', 'x'))
            #pdb.set_trace()
            # get which object for which cell
            idx_match = T.argmax(iou_default, axis=1)

            # extend truth to cover all cell/box/examples
            truth_extended = T.repeat(T.repeat(T.repeat(truth.dimshuffle(
                0, 1, 'x', 2, 'x', 'x'),
                                                        self.ratios.__len__(),
                                                        axis=2),
                                               shape[0],
                                               axis=4),
                                      shape[1],
                                      axis=5)

            idx1, idx2, idx3, idx4 = meshgrid(T.arange(truth.shape[0]),
                                              T.arange(self.ratios.__len__()),
                                              T.arange(shape[0]),
                                              T.arange(shape[1]))

            # copy truth for every cell/box.
            truth_extended = truth_extended[idx1, idx_match, idx2, :, idx3,
                                            idx4].dimshuffle(0, 1, 4, 2, 3)

            iou_default = iou_default.max(axis=1)

            iou_gt_min = iou_default >= min_iou

            dmap_extended = dmap.dimshuffle('x', 0, 1, 2, 3)

            # penalize coordinates
            # cost_fmap = 0.

            cost_coord_fmap = 0.
            cost_coord_fmap += ((
                (fmap[:, :, 0] -
                 (truth_extended[:, :, 0] - dmap_extended[:, :, 0]) /
                 dmap_extended[:, :, 2])[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 1] -
                 (truth_extended[:, :, 1] - dmap_extended[:, :, 1]) /
                 dmap_extended[:, :, 3])[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 2] -
                 T.log(truth_extended[:, :, 2] / dmap_extended[:, :, 2])
                 )[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 3] -
                 T.log(truth_extended[:, :, 3] / dmap_extended[:, :, 3])
                 )[iou_gt_min.nonzero()])**2).sum()

            cost_class_fmap = -(
                truth_extended[:, :, -(self.num_classes + 1):] *
                T.log(fmap[:, :, -(self.num_classes + 1):])).sum(axis=2)
            cost_class_fmap = cost_class_fmap[iou_gt_min.nonzero()].sum()

            # find negative examples
            iou_default = iou_default.reshape((-1, ))
            # iou_idx_sorted = T.argsort(iou_default)[::-1]

            # iou_st_min = iou_default < min_iou
            iou_st_min = T.bitwise_and(iou_default >= 0.1,
                                       iou_default < min_iou)

            # Choose index for top boxes whose overlap is smaller than the min overlap.
            pos_size = iou_gt_min[iou_gt_min.nonzero()].size
            neg_size = pos_size * 3  # ratio of 3 to 1
            #neg_size = 10

            idx_neg = T.arange(iou_default.shape[0])[iou_st_min.nonzero()]
            replace = T.le(idx_neg.shape[0], neg_size)
            idx_neg = theano.ifelse.ifelse(
                idx_neg.shape[0] > 0,
                self._random_stream.choice((neg_size, ),
                                           a=idx_neg,
                                           replace=replace), T.arange(0))

            # iou_idx_sorted = iou_idx_sorted[iou_st_min[iou_idx_sorted].nonzero()][:neg_size]
            # neg_size = iou_idx_sorted.size

            neg_size, pos_size = T.maximum(1.,
                                           neg_size), T.maximum(1., pos_size)

            # Add the negative examples to the costs.
            cost_noobj_fmap = -(neg_example * T.log(
                fmap[:, :, -(self.num_classes + 1):])).sum(axis=2).reshape(
                    (-1, ))
            cost_noobj_fmap = cost_noobj_fmap[idx_neg].sum()

            #
            # NEW STUFF
            #
            cost_coord += cost_coord_fmap / pos_size
            cost_class += alpha * cost_class_fmap / pos_size
            cost_noobj += alpha * cost_noobj_fmap / neg_size
            # cost += cost_fmap

        cost = cost_coord + cost_class + cost_noobj

        return cost, [cost_coord, cost_class, cost_noobj]
def t_noise3d(v, perm, grad3):
    x = v[0]
    y = v[1]
    z = v[2]
    skew_factor = (x + y + z) * 1.0 / 3.0
    i = T.floor(x + skew_factor)
    j = T.floor(y + skew_factor)
    k = T.floor(z + skew_factor)
    unskew_factor = (i + j + k) * 1.0 / 6.0
    x0 = x - (i - unskew_factor)
    y0 = y - (j - unskew_factor)
    z0 = z - (k - unskew_factor)
    vertices = T.switch(T.ge(x0, y0),
                        T.switch(T.ge(y0, z0), vertices_options[0],
                                 T.switch(T.ge(x0, z0), vertices_options[1],
                                          vertices_options[2])),
                        T.switch(T.lt(y0, z0), vertices_options[3],
                                 T.switch(T.lt(x0, z0), vertices_options[4],
                                          vertices_options[5]))
                        )
    x1 = x0 - vertices[0][0] + 1.0 / 6.0
    y1 = y0 - vertices[0][1] + 1.0 / 6.0
    z1 = z0 - vertices[0][2] + 1.0 / 6.0
    x2 = x0 - vertices[1][0] + 1.0 / 3.0
    y2 = y0 - vertices[1][1] + 1.0 / 3.0
    z2 = z0 - vertices[1][2] + 1.0 / 3.0
    x3 = x0 - 0.5
    y3 = y0 - 0.5
    z3 = z0 - 0.5
    ii = T.bitwise_and(i.astype('int32'), 255)
    jj = T.bitwise_and(j.astype('int32'), 255)
    kk = T.bitwise_and(k.astype('int32'), 255)
    gi0 = perm[ii + perm[
            jj + perm[
                kk].astype('int32')].astype('int32')] % 12
    gi1 = perm[ii + vertices[0][0] + perm[
            jj + vertices[0][1] + perm[
                kk + vertices[0][2]].astype('int32')].astype('int32')] % 12
    gi2 = perm[ii + vertices[1][0] + perm[
            jj + vertices[1][1] + perm[
                kk + vertices[1][2]].astype('int32')].astype('int32')] % 12
    gi3 = perm[ii + 1 + perm[
            jj + 1 + perm[
                kk + 1].astype('int32')].astype('int32')] % 12
    t0 = 0.5 - x0 ** 2 - y0 ** 2 - z0 ** 2
    n0 = T.switch(
        T.lt(t0, 0),
        0.0,
        t0 ** 4 * T.dot(grad3[gi0.astype('int32')], [x0, y0, z0]))
    t1 = 0.5 - x1 ** 2 - y1 ** 2 - z1 ** 2
    n1 = T.switch(
        T.lt(t1, 0),
        0.0,
        t1 ** 4 * T.dot(grad3[gi1.astype('int32')], [x1, y1, z1])),
    t2 = 0.5 - x2 ** 2 - y2 ** 2 - z2 ** 2
    n2 = T.switch(
        T.lt(t2, 0),
        0.0,
        t2 ** 4 * T.dot(grad3[gi2.astype('int32')], [x2, y2, z2]))
    t3 = 0.5 - x3 ** 2 - y3 ** 2 - z3 ** 2
    n3 = T.switch(
        T.lt(t3, 0),
        0.0,
        t3 ** 4 * T.dot(grad3[gi3.astype('int32')], [x3, y3, z3]))
    return 23.0 * (n0 + n1 + n2 + n3)
Exemple #20
0
	def _get_cost2(
			self,
			output,
			truth,
			rescore=True
		):

		if not hasattr(self, '_lambda_obj'):
			lambda_obj, lambda_noobj, thresh = T.scalar('lambda_obj'), T.scalar('lambda_noobj'), T.scalar('thresh')
			self._lambda_obj, self._lambda_noobj, self._thresh = lambda_obj, lambda_noobj, thresh
		else:
			lambda_obj, lambda_noobj, thresh = self._lambda_obj, self._lambda_noobj, self._thresh
		
		cost = 0.
		# create grid for cells
		w_cell, h_cell =  1. / self.output_shape[1], 1. / self.output_shape[0]
		x, y = T.arange(w_cell / 2, 1., w_cell), T.arange(h_cell / 2, 1., h_cell)
		y, x = meshgrid(x, y)
		
		# reshape truth to match with cell
		truth_cell = truth.dimshuffle(0, 1, 2, 'x','x')
		x, y = x.dimshuffle('x','x',0,1), y.dimshuffle('x','x',0,1)
		
		# calculate overlap between cell and ground truth boxes
		xi, yi = T.maximum(truth_cell[:,:,0], x - w_cell/2), T.maximum(truth_cell[:,:,1], y - h_cell/2)
		xf = T.minimum(truth_cell[:,:,[0,2]].sum(axis=2), x + w_cell/2)
		yf = T.minimum(truth_cell[:,:,[1,3]].sum(axis=2), y + h_cell/2)
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		# overlap between cell and ground truth box
		overlap = (w * h) / (w_cell * h_cell)
		
		# repeat truth boxes
		truth_boxes = truth.dimshuffle(0, 1, 'x', 2, 'x', 'x')
		
		# create grid for anchor boxes
		anchors = T.concatenate((x.dimshuffle(0,1,'x','x',2,3) - w_cell/2, y.dimshuffle(0,1,'x','x',2,3) - h_cell/2), axis=3)
		anchors = T.concatenate((anchors, T.ones_like(anchors)), axis=3)
		anchors = T.repeat(anchors, self.boxes.__len__(), axis=2)
		
		w_acr = theano.shared(np.asarray([b[0] for b in self.boxes]), name='w_acr', borrow=True).dimshuffle('x','x',0,'x','x')
		h_acr = theano.shared(np.asarray([b[1] for b in self.boxes]), name='h_acr', borrow=True).dimshuffle('x','x',0,'x','x')
		
		anchors = T.set_subtensor(anchors[:,:,:,2], anchors[:,:,:,2] * w_acr)
		anchors = T.set_subtensor(anchors[:,:,:,3], anchors[:,:,:,3] * h_acr)
		
		# find iou between anchors and ground truths
		xi, yi = T.maximum(truth_boxes[:,:,:,0], anchors[:,:,:,0]), T.maximum(truth_boxes[:,:,:,1], anchors[:,:,:,1])
		xf = T.minimum(truth_boxes[:,:,:,[0,2]].sum(axis=3), anchors[:,:,:,[0,2]].sum(axis=3))
		yf = T.minimum(truth_boxes[:,:,:,[1,3]].sum(axis=3), anchors[:,:,:,[1,3]].sum(axis=3))
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		isec = w * h
		iou = isec / (T.prod(truth_boxes[:,:,:,[2,3]], axis=3) + T.prod(anchors[:,:,:,[2,3]], axis=3) - isec)
		
		overlap = overlap.dimshuffle(0,1,'x',2,3)
		
		best_iou_obj_idx = T.argmax(iou, axis=1).dimshuffle(0,'x',1,2,3)
		best_iou_box_idx = T.argmax(iou, axis=2).dimshuffle(0,1,'x',2,3)
		
		_,obj_idx,box_idx,_,_ = meshgrid(
			T.arange(truth.shape[0]),
			T.arange(truth.shape[1]),
			T.arange(self.boxes.__len__()),
			T.arange(self.output_shape[0]),
			T.arange(self.output_shape[1])
		)
		
		# define logical matrix assigning object to correct anchor box and cell.
		best_iou_idx = T.bitwise_and(
			T.bitwise_and(
				T.eq(best_iou_box_idx, box_idx),
				T.eq(best_iou_obj_idx, obj_idx)
			),
			overlap >= thresh
		)
		
		constants = []
		if rescore: 
			# scale predictions correctly
			pred = output.dimshuffle(0,'x',1,2,3,4)
			pred = T.set_subtensor(pred[:,:,:,0], pred[:,:,:,0] + x.dimshuffle(0,1,'x',2,3))
			pred = T.set_subtensor(pred[:,:,:,1], pred[:,:,:,1] + y.dimshuffle(0,1,'x',2,3))
			pred = T.set_subtensor(pred[:,:,:,2], w_acr * T.exp(pred[:,:,:,2]))
			pred = T.set_subtensor(pred[:,:,:,3], h_acr * T.exp(pred[:,:,:,3]))
			
			xi, yi = T.maximum(pred[:,:,:,0], truth_boxes[:,:,:,0]), T.maximum(pred[:,:,:,1], truth_boxes[:,:,:,1])
			xf = T.minimum(pred[:,:,:,[0,2]].sum(axis=3), truth_boxes[:,:,:,[0,2]].sum(axis=3))
			yf = T.minimum(pred[:,:,:,[1,3]].sum(axis=3), truth_boxes[:,:,:,[1,3]].sum(axis=3))
			w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)
			
			isec = w * h
			iou = isec / (pred[:,:,:,[2,3]].prod(axis=3) + truth_boxes[:,:,:,[2,3]].prod(axis=3) - isec)

			# make sure iou is considered constant when taking gradient
			constants.append(iou)
	
		# format ground truths correclty
		truth_boxes = truth_boxes = T.repeat(
			T.repeat(
				T.repeat(truth_boxes, self.boxes.__len__(), axis=2),
				self.output_shape[0], axis=4
			),
			self.output_shape[1], axis=5
		)
		
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,0], truth_boxes[:,:,:,0] - anchors[:,:,:,0])
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,1], truth_boxes[:,:,:,1] - anchors[:,:,:,1])
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,2], T.log(truth_boxes[:,:,:,2] / anchors[:,:,:,2]))
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,3], T.log(truth_boxes[:,:,:,3] / anchors[:,:,:,3]))
		
		# add dimension for objects per image
		pred = T.repeat(output.dimshuffle(0,'x',1,2,3,4), truth.shape[1], axis=1)
				
		# penalize coordinates
		cost += lambda_obj * T.mean(((pred[:,:,:,:4] - truth_boxes[:,:,:,:4])**2).sum(axis=3)[best_iou_idx.nonzero()])
				
		# penalize class scores
		cost += lambda_obj * T.mean((-truth_boxes[:,:,:,-self.num_classes:] * T.log(pred[:,:,:,-self.num_classes:])).sum(axis=3)[best_iou_idx.nonzero()])
		
		# penalize objectness score
		if rescore:
			cost += lambda_obj * T.mean(((pred[:,:,:,4] - iou)**2)[best_iou_idx.nonzero()])
		else:
			cost += lambda_obj * T.mean(((pred[:,:,:,4] - 1.)**2)[best_iou_idx.nonzero()])
		
		# flip all matched and penalize all un-matched objectness scores
		not_matched_idx = best_iou_idx.sum(axis=1) > 0
		not_matched_idx = bitwise_not(not_matched_idx)

		# penalize objectness score for non-matched boxes
		cost += lambda_noobj * T.mean((pred[:,0,:,4]**2)[not_matched_idx.nonzero()])
		
		return cost, constants
Exemple #21
0
def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
          phi, derphi, phi0, derphi0, c1, c2,
          n_iters=10,
          profile = False,
          mode=theano.Mode(linker='cvm')):
    """
    TODO: re-write me

    Part of the optimization algorithm in `scalar_search_wolfe2`.
    a_lo : scalar (step size)
    a_hi : scalar (step size)
    phi_lo : scalar (value of f at a_lo)
    phi_hi : scalar ( value of f at a_hi)
    derphi_lo : scalar ( value of derivative at a_lo)
    phi : callable -> generates computational graph
    derphi: callable -> generates computational graph
    phi0 : scalar ( value of f at 0)
    derphi0 : scalar (value of the derivative at 0)
    c1 : scalar  (wolfe parameter)
    c2 : scalar  (wolfe parameter)
    profile: if you want printouts of profiling information
    """
    # Function reprensenting the computations of one step of the while loop
    def while_zoom(phi_rec, a_rec, a_lo, a_hi, phi_hi,
                   phi_lo, derphi_lo, a_star, val_star, valprime):
        # interpolate to find a trial step length between a_lo and
        # a_hi Need to choose interpolation here.  Use cubic
        # interpolation and then if the result is within delta *
        # dalpha or outside of the interval bounded by a_lo or a_hi
        # then use quadratic interpolation, if the result is still too
        # close, then use bisection
        dalpha = a_hi-a_lo
        a = TT.switch( dalpha < zero, a_hi, a_lo)
        b = TT.switch( dalpha < zero, a_lo, a_hi)

        # minimizer of cubic interpolant
        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
        #
        # if the result is too close to the end points (or out of the
        # interval) then use quadratic interpolation with phi_lo,
        # derphi_lo and phi_hi if the result is stil too close to the
        # end points (or out of the interval) then use bisection

        # cubic interpolation
        cchk = delta1*dalpha
        a_j_cubic = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
        # quadric interpolation
        qchk = delta2*dalpha
        a_j_quad = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
        cond_q = lazy_or('condq',TT.isnan(a_j_quad), a_j_quad > b-qchk, a_j_quad < a + qchk)
        a_j_quad = TT.switch(cond_q, a_lo +
                             numpy.asarray(0.5, dtype=theano.config.floatX)*dalpha, a_j_quad)


        # pick between the two ..
        cond_c = lazy_or('condc',TT.isnan(a_j_cubic), TT.bitwise_or(a_j_cubic > b -
                                                            cchk, a_j_cubic
                                                            < a + cchk))
        # this lazy if actually decides if we need to run the quadric
        # interpolation
        a_j = TT.switch(cond_c, a_j_quad, a_j_cubic)
        #a_j = ifelse(cond_c, a_j_quad,  a_j_cubic)

        # Check new value of a_j
        phi_aj = phi(a_j)
        derphi_aj = derphi(a_j)

        stop = lazy_and('stop', TT.bitwise_and(phi_aj <= phi0 + c1*a_j*derphi0,
                         phi_aj < phi_lo),
                        abs(derphi_aj) <= -c2*derphi0)


        cond1 = TT.bitwise_or(phi_aj > phi0 + c1*a_j*derphi0,
                              phi_aj >= phi_lo)
        cond2 = derphi_aj*(a_hi - a_lo) >= zero

        # Switches just make more sense here because they have a C
        # implementation and they get composed
        phi_rec = ifelse( cond1, phi_hi,
                            TT.switch( cond2, phi_hi, phi_lo), name =
                         'phi_rec')
        a_rec   = ifelse( cond1, a_hi,
                            TT.switch( cond2, a_hi, a_lo), name='a_rec')
        a_hi    = ifelse( cond1, a_j,
                            TT.switch( cond2, a_lo, a_hi), name='a_hi')
        phi_hi  = ifelse( cond1, phi_aj,
                            TT.switch( cond2, phi_lo, phi_hi), name='phi_hi')

        a_lo      = TT.switch(cond1, a_lo, a_j)
        phi_lo    = TT.switch(cond1, phi_lo, phi_aj)
        derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name='derphi_lo')

        a_star = a_j
        val_star = phi_aj
        valprime = ifelse(cond1, nan, TT.switch(cond2, derphi_aj,
                                                  nan), name='valprime')

        return ( [ phi_rec,
                  a_rec,
                  a_lo,
                  a_hi,
                  phi_hi,
                  phi_lo,
                  derphi_lo,
                  a_star,
                  val_star,
                  valprime],
                theano.scan_module.scan_utils.until(stop) )

    maxiter = n_iters
    delta1 = TT.constant(numpy.asarray(0.2,
                                       dtype=theano.config.floatX))  # cubic interpolant check
    delta2 = TT.constant(numpy.asarray(0.1,
                                       dtype=theano.config.floatX))  # quadratic interpolant check
    phi_rec = phi0
    a_rec = zero

    # Initial iteration

    dalpha = a_hi-a_lo
    a = TT.switch( dalpha < zero, a_hi, a_lo)
    b = TT.switch( dalpha < zero, a_lo, a_hi)
    #a = ifelse(dalpha < 0, a_hi, a_lo)
    #b = ifelse(dalpha < 0, a_lo, a_hi)

    # minimizer of cubic interpolant
    # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
    #
    # if the result is too close to the end points (or out of the
    # interval) then use quadratic interpolation with phi_lo,
    # derphi_lo and phi_hi if the result is stil too close to the
    # end points (or out of the interval) then use bisection


    # quadric interpolation
    qchk = delta2*dalpha
    a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
    cond_q = lazy_or('mcond_q',TT.isnan(a_j), TT.bitwise_or( a_j > b-qchk, a_j < a +
                                                  qchk))

    a_j = TT.switch(cond_q, a_lo +
                    numpy.asarray(0.5, dtype=theano.config.floatX)*dalpha, a_j)


    # Check new value of a_j

    phi_aj = phi(a_j)
    derphi_aj = derphi(a_j)



    cond1 = TT.bitwise_or(phi_aj > phi0 + c1*a_j*derphi0,
                          phi_aj >= phi_lo)
    cond2 = derphi_aj*(a_hi - a_lo) >= zero

    # Switches just make more sense here because they have a C
    # implementation and they get composed
    phi_rec = ifelse( cond1, phi_hi,
                        TT.switch( cond2, phi_hi, phi_lo), name='mphirec')
    a_rec   = ifelse( cond1, a_hi,
                        TT.switch( cond2, a_hi, a_lo), name='marec')
    a_hi    = ifelse( cond1, a_j,
                        TT.switch( cond2, a_lo, a_hi), name='mahi')
    phi_hi  = ifelse( cond1, phi_aj,
                        TT.switch( cond2, phi_lo, phi_hi), name='mphihi')

    onlyif = lazy_and( 'only_if', TT.bitwise_and(phi_aj <= phi0 + c1*a_j*derphi0,
                       phi_aj < phi_lo),
                       abs(derphi_aj) <= -c2*derphi0)

    a_lo      = TT.switch(cond1, a_lo, a_j)
    phi_lo    = TT.switch(cond1, phi_lo, phi_aj)
    derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name = 'derphi_lo_main')
    phi_rec.name = 'phi_rec'
    a_rec.name = 'a_rec'
    a_lo.name = 'a_lo'
    a_hi.name = 'a_hi'
    phi_hi.name = 'phi_hi'
    phi_lo.name = 'phi_lo'
    derphi_lo.name = 'derphi_lo'
    vderphi_aj = ifelse(cond1, nan, TT.switch(cond2, derphi_aj, nan),
                        name='vderphi_aj')
    states = []
    states += [TT.unbroadcast(TT.shape_padleft(phi_rec),0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_rec),0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_lo),0)]
    states += [TT.unbroadcast(TT.shape_padleft(a_hi),0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_hi),0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_lo),0)]
    states += [TT.unbroadcast(TT.shape_padleft(derphi_lo),0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero),0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero),0)]
    states += [TT.unbroadcast(TT.shape_padleft(zero),0)]
    print'while_zoom'
    outs, updates = scan(while_zoom,
                         states = states,
                         n_steps = maxiter,
                         name = 'while_zoom',
                         mode = mode,
                         profile = profile)
    print 'done_while'
    a_star   = ifelse(onlyif, a_j   , outs[7][0], name='astar')
    val_star = ifelse(onlyif, phi_aj, outs[8][0], name='valstar')
    valprime = ifelse(onlyif, vderphi_aj, outs[9][0], name='valprime')

    ## WARNING !! I ignore updates given by scan which I should not do !!!
    return a_star, val_star, valprime
Exemple #22
0
def logic_and(x, y):
  return T.bitwise_and(x, y)
Exemple #23
0
 def clip_around_zero(x, threshold=0.2):
     indicies = T.bitwise_and(x < threshold, x > -threshold)
     return T.set_subtensor(x[indicies.nonzero()], 0)
Exemple #24
0
	def _get_cost(
		self,
		output,
		truth,
		S,
		B,
		C,
		rescore=False,
		lmbda_coord=5.,
		lmbda_noobj=0.5,
		lmbda_obj=1.,
		min_overlap=1e-5,
		use_overlap=False
		):
		'''
		Calculates cost for multiple objects in a scene without for loops or scan (so reduces the amount of variable
		created in the theano computation graph).  A cell is associated with a certain object if the iou of that cell
		and the object is higher than any other ground truth object. and the rest of the objectness scores are pushed
		towards zero.

		Returns the cost and list of variable that I don't want to backpropagate through.

		Params:
		------

		use_overlap: Yolo, as described in the original paper, assigns a ground truth label if the ground truth box overlaps at all with
				the cell.  I've found that the result is that with new images with many smaller objects because several objects might be
				overlap a single cell, this causes a sort of average bounding box which looks pretty bad.  So by using overlap, you don't
				assign a cell to a ground truth label unless it overlaps by some semi-significant amount.
		'''
		
		# calculate height/width of individual cell
		block_height, block_width = 1. / S[0], 1./ S[1]

		# get the offset of each cell
		offset_x, offset_y = meshgrid2D(T.arange(0,1,block_width), T.arange(0,1,block_height))

		# get indices for x,y,w,h,object-ness for easy access
		x_idx, y_idx = T.arange(0,5*B,5), T.arange(1,5*B, 5)
		w_idx, h_idx = T.arange(2,5*B,5), T.arange(3,5*B,5)
		conf_idx = T.arange(4,5*B,5)

		# Get position predictions with offsets.
		pred_x = (output[:,x_idx] + offset_x.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3)
		pred_y = (output[:,y_idx] + offset_y.dimshuffle('x','x',0,1)).dimshuffle(0,'x',1,2,3)
		pred_w, pred_h = output[:,w_idx].dimshuffle(0,'x',1,2,3), output[:,h_idx].dimshuffle(0,'x',1,2,3)
		#pred_w, pred_h = T.exp(pred_w), T.exp(pred_h)		
		pred_conf = output[:,conf_idx].dimshuffle(0,'x',1,2,3)
		pred_class = output[:,-C:].dimshuffle(0,'x',1,2,3)
		
		#pred_w, pred_h = T.maximum(pred_w, 0.), T.maximum(pred_h, 0.)

		x_idx, y_idx = T.arange(0,truth.shape[1],4+C), T.arange(1,truth.shape[1],4+C)
		w_idx, h_idx = T.arange(2,truth.shape[1],4+C), T.arange(3,truth.shape[1],4+C)
		class_idx,_ = theano.scan(
			lambda x: T.arange(x,x+C,1),
			sequences = T.arange(4,truth.shape[1],4+C)
		)

		truth_x, truth_y = truth[:,x_idx], truth[:,y_idx]
		truth_w, truth_h = truth[:,w_idx], truth[:,h_idx]
		truth_class = truth[:, class_idx]
		
		# Get intersection region bounding box coordinates
		xi = T.maximum(pred_x, truth_x.dimshuffle(0,1,'x','x','x'))
		xf = T.minimum(pred_x + pred_w, (truth_x + truth_w).dimshuffle(0,1,'x','x','x'))
		yi = T.maximum(pred_y, truth_y.dimshuffle(0,1,'x','x','x'))
		yf = T.minimum(pred_y + pred_h, (truth_y + truth_h).dimshuffle(0,1,'x','x','x'))
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)

		# Calculate iou score for predicted boxes and truth
		isec = w * h
		union = (pred_w * pred_h) + (truth_w * truth_h).dimshuffle(0,1,'x','x','x') - isec
		iou = T.maximum(isec/union, 0.)

		# Calculate rmse for boxes which have 0 iou score
		squared_error = (pred_x - truth_x.dimshuffle(0,1,'x','x','x'))**2 + (pred_y - truth_y.dimshuffle(0,1,'x','x','x'))**2 + \
			(pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2 + (pred_h - truth_h.dimshuffle(0,1,'x','x','x'))**2

		# Get index matrix representing max along the 1st dimension for the iou score (reps 'responsible' box).
		maxval_idx, _ = meshgrid2D(T.arange(B), T.arange(truth.shape[0]))
		maxval_idx = maxval_idx.dimshuffle(0,'x',1,'x','x')
		maxval_idx = T.repeat(T.repeat(maxval_idx,S[0],3),S[1],4)

		# determine which box is responsible by giving box with highest iou score (if iou > 0) or smalles squared error.
		greater_iou = T.eq(maxval_idx, iou.argmax(axis=2).dimshuffle(0,1,'x',2,3))
		smaller_se = T.eq(maxval_idx, squared_error.argmin(axis=2).dimshuffle(0,1,'x',2,3))
		box_is_resp = T.switch(iou.max(axis=2, keepdims=True) > 0, greater_iou, smaller_se)
		
		# Get matrix for the width/height of each cell
		width, height = T.ones(S) / S[1], T.ones(S) / S[0]
		width, height = width.dimshuffle('x','x',0,1), height.dimshuffle('x','x',0,1)
		offset_x, offset_y = offset_x.dimshuffle('x','x',0,1), offset_y.dimshuffle('x','x',0,1)

		# Get bounding box for intersection between CELL and ground truth box.
		xi = T.maximum(offset_x, truth_x.dimshuffle(0,1,'x','x'))
		xf = T.minimum(offset_x + width, (truth_x + truth_w).dimshuffle(0,1,'x','x'))
		yi = T.maximum(offset_y, truth_y.dimshuffle(0,1,'x','x'))
		yf = T.minimum(offset_y + height, (truth_y + truth_h).dimshuffle(0,1,'x','x'))
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)

		# Calculate iou score for the cell.
		isec = w * h
		if not use_overlap:
			union = (width * height) + (truth_w* truth_h).dimshuffle(0,1,'x','x') - isec
			iou_cell = T.maximum(isec/union, 0.).dimshuffle(0,1,'x',2,3) # * (np.prod(S)) # normalize the iou to make more sense
		else:
			iou_cell = T.maximum(isec / (width * height), 0.).dimshuffle(0,1,'x',2,3)
		
		maxval_idx, _ = meshgrid2D(T.arange(iou_cell.shape[1]), T.arange(iou_cell.shape[0]))
		maxval_idx = maxval_idx.dimshuffle(0,1,'x','x','x')
		maxval_idx = T.repeat(T.repeat(T.repeat(maxval_idx, B, 2), S[0], 3), S[1], 4)
		
		obj_for_cell = T.eq(maxval_idx, iou_cell.argmax(axis=1).dimshuffle(0,'x',1,2,3))
			
		# Get logical matrix representing minimum iou score for cell to be considered overlapping ground truth.
		cell_intersects = (iou_cell > min_overlap)
		
		obj_in_cell_and_resp = T.bitwise_and(T.bitwise_and(cell_intersects, box_is_resp), obj_for_cell)
		conf_is_zero = T.bitwise_and(
			bitwise_not(T.bitwise_and(cell_intersects, box_is_resp)),
			obj_for_cell
		)
		conf_is_zero = conf_is_zero.sum(axis=1, keepdims=True)
		
		# repeat "cell overlaps" logical matrix for the number of classes.
		pred_class = T.repeat(pred_class, truth.shape[1] // (4 + C), axis=1)

		# repeat the ground truth for class probabilities for each cell.
		truth_class_rep = T.repeat(T.repeat(truth_class.dimshuffle(0,1,2,'x','x'), S[0], axis=3), S[1], axis=4)
		cell_intersects = T.repeat(cell_intersects, C, axis=2)

		if not rescore:
			iou = T.ones_like(iou)
		cost = T.sum((pred_conf - iou)[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_noobj * T.sum((pred_conf[conf_is_zero.nonzero()])**2) + \
		 	lmbda_coord * T.sum((pred_x - truth_x.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \
		 	lmbda_coord * T.sum((pred_y - truth_y.dimshuffle(0,1,'x','x','x'))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_coord * T.sum((safe_sqrt(pred_w) - safe_sqrt(truth_w.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_coord * T.sum((safe_sqrt(pred_h) - safe_sqrt(truth_h.dimshuffle(0,1,'x','x','x')))[obj_in_cell_and_resp.nonzero()]**2) + \
			lmbda_obj * T.sum(((pred_class - truth_class_rep)[cell_intersects.nonzero()])**2)

		cost /= T.maximum(1., truth.shape[0])
		return cost, [iou]
Exemple #25
0
    def while_zoom(phi_rec, a_rec, a_lo, a_hi, phi_hi,
                   phi_lo, derphi_lo, a_star, val_star, valprime):
        # interpolate to find a trial step length between a_lo and
        # a_hi Need to choose interpolation here.  Use cubic
        # interpolation and then if the result is within delta *
        # dalpha or outside of the interval bounded by a_lo or a_hi
        # then use quadratic interpolation, if the result is still too
        # close, then use bisection
        dalpha = a_hi-a_lo
        a = TT.switch( dalpha < zero, a_hi, a_lo)
        b = TT.switch( dalpha < zero, a_lo, a_hi)

        # minimizer of cubic interpolant
        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
        #
        # if the result is too close to the end points (or out of the
        # interval) then use quadratic interpolation with phi_lo,
        # derphi_lo and phi_hi if the result is stil too close to the
        # end points (or out of the interval) then use bisection

        # cubic interpolation
        cchk = delta1*dalpha
        a_j_cubic = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
        # quadric interpolation
        qchk = delta2*dalpha
        a_j_quad = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
        cond_q = lazy_or('condq',TT.isnan(a_j_quad), a_j_quad > b-qchk, a_j_quad < a + qchk)
        a_j_quad = TT.switch(cond_q, a_lo +
                             numpy.asarray(0.5, dtype=theano.config.floatX)*dalpha, a_j_quad)


        # pick between the two ..
        cond_c = lazy_or('condc',TT.isnan(a_j_cubic), TT.bitwise_or(a_j_cubic > b -
                                                            cchk, a_j_cubic
                                                            < a + cchk))
        # this lazy if actually decides if we need to run the quadric
        # interpolation
        a_j = TT.switch(cond_c, a_j_quad, a_j_cubic)
        #a_j = ifelse(cond_c, a_j_quad,  a_j_cubic)

        # Check new value of a_j
        phi_aj = phi(a_j)
        derphi_aj = derphi(a_j)

        stop = lazy_and('stop', TT.bitwise_and(phi_aj <= phi0 + c1*a_j*derphi0,
                         phi_aj < phi_lo),
                        abs(derphi_aj) <= -c2*derphi0)


        cond1 = TT.bitwise_or(phi_aj > phi0 + c1*a_j*derphi0,
                              phi_aj >= phi_lo)
        cond2 = derphi_aj*(a_hi - a_lo) >= zero

        # Switches just make more sense here because they have a C
        # implementation and they get composed
        phi_rec = ifelse( cond1, phi_hi,
                            TT.switch( cond2, phi_hi, phi_lo), name =
                         'phi_rec')
        a_rec   = ifelse( cond1, a_hi,
                            TT.switch( cond2, a_hi, a_lo), name='a_rec')
        a_hi    = ifelse( cond1, a_j,
                            TT.switch( cond2, a_lo, a_hi), name='a_hi')
        phi_hi  = ifelse( cond1, phi_aj,
                            TT.switch( cond2, phi_lo, phi_hi), name='phi_hi')

        a_lo      = TT.switch(cond1, a_lo, a_j)
        phi_lo    = TT.switch(cond1, phi_lo, phi_aj)
        derphi_lo = ifelse(cond1, derphi_lo, derphi_aj, name='derphi_lo')

        a_star = a_j
        val_star = phi_aj
        valprime = ifelse(cond1, nan, TT.switch(cond2, derphi_aj,
                                                  nan), name='valprime')

        return ( [ phi_rec,
                  a_rec,
                  a_lo,
                  a_hi,
                  phi_hi,
                  phi_lo,
                  derphi_lo,
                  a_star,
                  val_star,
                  valprime],
                theano.scan_module.scan_utils.until(stop) )
Exemple #26
0
    def __init__(self, inputs, labels, y_mask,
                 n_dim,
                 cutoff, project_factor=4):
        '''
        Args:
            inputs: flattened logits with shape of [n_step*n_batch, n_dim]
            labels: flattened labels with shape of [n_step*n_batch]
            y_mask: mask the null space of sentences with shape of [n_step*n_batch]
            cutoff: frequency binning, i.e. [2000, vocab_size]
            project_factor: project for low-frequency words
        '''
        self.input_dim = n_dim
        self.sample_num = inputs.shape[0]
        self.cluster_num = len(cutoff) - 1
        self.head_dim = cutoff[0] + self.cluster_num
        self.params = []
        self.y_mask = y_mask

        init_head_w = np.asarray(np.random.uniform(low=-np.sqrt(1./self.input_dim),
                                              high=np.sqrt(1./self.input_dim),
                                              size=(self.input_dim,self.head_dim)))
        self.head_w=theano.shared(value=init_head_w,name='head_w')
        self.params.append(self.head_w)

        tail_project_factor = project_factor
        tail_w_list = []
        for i in range(self.cluster_num):
            project_dim = max(1, self.input_dim // tail_project_factor)
            tail_dim = cutoff[i + 1] - cutoff[i]
            _tail_proj_w = np.asarray(np.random.uniform(low=-np.sqrt(1./self.input_dim),
                                             high=np.sqrt(1./self.input_dim),
                                             size=(self.input_dim, project_dim)),dtype=theano.config.floatX)
            _tail_w = np.asarray(np.random.uniform(low=-np.sqrt(1./project_dim),
                                             high=np.sqrt(1./project_dim),
                                             size=(project_dim,tail_dim)),dtype=theano.config.floatX)
            tail_proj_w = theano.shared(value=_tail_proj_w, name="adaptive_softmax_tail{}_proj_w".format(i+1))
            tail_w = theano.shared(value=_tail_w, name="adaptive_softmax_tail{}_w".format(i+1))
            tail_w_list.append([tail_proj_w, tail_w])
            tail_project_factor *= project_factor
            self.params.append(tail_proj_w)
            self.params.append(tail_w)

        training_losses = []
        loss = 0.
        head_labels = labels
        for i in range(self.cluster_num):
            mask = T.bitwise_and(T.ge(labels, cutoff[i]), T.lt(labels, cutoff[i + 1]))  # mask words not in this cluster

            # update head labels with mask (we take words with high frequency as head part)
            head_labels = T.switch(mask, T.constant([cutoff[0] + i]).repeat(self.sample_num), head_labels)
            # we take words with low frequency as a unified label, and append to tail of head labels
            # i.g. 3000 -> 2000, 2001 -> 2000
            # head labels: [0,1,2...,1999] + [2000]

            # compute tail loss
            # first remove the words not in this cluster (this range of frequency) with mask
            tail_inputs = inputs[mask.nonzero()]
            # encode on tail inputs and get logits
            tail_logits = T.dot(T.dot(tail_inputs, tail_w_list[i][0]), tail_w_list[i][1])
            # update tail labels, relabel by (- 2000)
            tail_labels = (labels - cutoff[i])[mask.nonzero()]
            # y_mask that eases the effect of null space in the tail of sentences
            tail_y_mask = self.y_mask[mask.nonzero()]
            tail_logits = tail_logits[T.eq(tail_y_mask, 1).nonzero()]
            tail_labels = tail_labels[T.eq(tail_y_mask, 1).nonzero()]
            # to solve NaN problem
            tail_logits = T.clip(tail_logits, 1.0e-8, 1.0 - 1.0e-8)
            # tail_loss for words with low frequency
            tail_loss = T.mean(T.nnet.categorical_crossentropy(tail_logits, tail_labels))
            training_losses.append(tail_loss)
            loss += tail_loss
            self.tail_logits = tail_logits
            self.tail_labels = tail_labels
            self.tail_loss = tail_loss

        # compute head loss
        # encode head_inputs
        head_logits = T.dot(inputs, self.head_w)
        # y_mask that eases the effect of null space in the tail of sentences
        head_logits = head_logits[T.eq(self.y_mask, 1).nonzero()]
        head_labels = head_labels[T.eq(self.y_mask, 1).nonzero()]
        # to solve NaN problem
        head_logits = T.clip(head_logits, 1.0e-8, 1.0 - 1.0e-8)
        head_loss = T.mean(T.nnet.categorical_crossentropy(head_logits, head_labels))
        loss += head_loss
        training_losses.append(head_loss)

        self.loss = loss
        self.training_losses = training_losses
        self.head_loss = head_loss