Esempio n. 1
0
def _vector_clf_curve(y_true, y_predicted):
    """
    sklearn.metrics._binary_clf_curve port

    y_true: tensor (vector): y true
    y_predicted: tensor (vector): y predicted

    returns: fps, tps, threshold_values
    fps: tensor (vector): false positivies
    tps: tensor (vector): true positives
    threshold_values: tensor (vector): value of y predicted at each threshold 
        along the curve

    restrictions: 
        -not numpy compatible
        -only works with two vectors (not matrix or tensor)


    """
    assert y_true.ndim == y_predicted.ndim == 1

    desc_score_indices = y_predicted.argsort()[::-1].astype('int8')
    sorted_y_predicted = y_predicted[desc_score_indices]
    sorted_y_true = y_true[desc_score_indices]

    distinct_value_indices = (1-T.isclose(T.extra_ops.diff(sorted_y_predicted), 0)).nonzero()[0]
    curve_cap = T.extra_ops.repeat(sorted_y_predicted.size - 1, 1)
    threshold_indices = T.concatenate([distinct_value_indices, curve_cap]).astype('int8')

    tps = T.extra_ops.cumsum(sorted_y_true[threshold_indices])
    fps = 1 + threshold_indices - tps
    threshold_values = sorted_y_predicted[threshold_indices]

    return fps, tps, threshold_values
Esempio n. 2
0
    def logp(self, value):
        n = self.n
        p = self.p

        return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
                     value >= 0,
                     0 <= p, p <= 1,
                     tt.isclose(p.sum(), 1),
                     broadcast_conditions=False
        )
Esempio n. 3
0
    def logp(self, value):
        n = self.n
        p = self.p

        return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
                     tt.all(value >= 0),
                     tt.all(0 <= p), tt.all(p <= 1),
                     tt.isclose(p.sum(), 1),
                     broadcast_conditions=False
        )
Esempio n. 4
0
def squared_error_void(y_pred, y_true):
    # Flatten y_true
    y_true = T.flatten(y_true)
    y_pred = T.flatten(y_pred)
    # Create mask
    mask = T.ones_like(y_true, dtype=np.int32)
    mask = T.switch(T.isclose(y_true, 1), np.int32(0), mask)

    # Modify y_true temporarily
    y_true_tmp = y_true * mask

    error = mask * T.sqr(y_true_tmp - y_pred)

    return T.mean(error)
    def errors(self, y):

        if y.ndim != self.y_pred_given_x.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred_given_x.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('float'):
            #return T.mean(T.neq(self.y_pred, y))
            # need to rewrite this
            return 1 - T.mean(T.all(T.isclose(y, self.y_pred_given_x, rtol=0.005, atol=0.5), axis=1))
            #1 - T.mean(T.all(T.isclose(y, self.output, rtol=0.005, atol=0.3), axis=1))
                #1 - T.mean(T.all(T.isclose(y, self.y_pred_given_x, rtol=0.005, atol=0.5), axis=1))
                # T.abs_(T.mean(T.invert(T.all(T.isclose(self.output, y, rtol=0.005, atol=0.3), axis=1))))
        else:
            raise NotImplementedError()
Esempio n. 6
0
    def logp(self, x):
        n = self.n
        p = self.p

        if x.ndim==2:
            x_sum = x.sum(axis=0)
            n_sum = n * x.shape[0]
        else:
            x_sum = x
            n_sum = n

        return bound(
            factln(n_sum) + tt.sum(x_sum * tt.log(p) - factln(x_sum)),
            tt.all(x >= 0),
            tt.all(x <= n),
            tt.eq(tt.sum(x_sum), n_sum),
            tt.isclose(p.sum(), 1),
            n >= 0)
Esempio n. 7
0
def one_step(s, s_abs, tt_error, tt_r, tt_height, tt_current_error, tt_flag):
    """
    One step function using by pq_theano scan
    :param s:
    :param s_abs:
    :param tt_error:
    :param tt_r:
    :param tt_height:
    :param tt_current_error:
    :param tt_flag:
    :return:
    """

    tt_current_error = tt.switch(tt.isclose(tt_height, 2.) and tt.isclose(tt_flag, 0.), 2., tt_current_error)
    tt_flag += tt.switch(tt_height > 1., 1., 0.)

    tt_height = tt.switch(tt.ge(tt_height + s, 0.), tt_height + s, 0.)

    tt_current_error += s_abs

    tt_error += tt_current_error * tt.switch(tt.isclose(tt_current_error, 4.), 0., 1.) / 3. * tt.isclose(tt_height, 0.)
    tt_r += tt.isclose(tt_current_error, 4.) * tt.isclose(tt_height, 0.)
    tt_current_error -= tt_current_error * tt.isclose(tt_height, 0.)
    return tt_error, tt_r, tt_height, tt_current_error, tt_flag
Esempio n. 8
0
def isclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False):
    return T.isclose(x, y, rtol=rtol, atol=atol, equal_nan=equal_nan)
Esempio n. 9
0
from layers import LSTM, FullConnected, TimeDistributed
from models import Decoder, Encoder, Seq2seq, Sequential
from utils import masking, padding

rng.seed(123)

# Preprocess data
x1 = [2, 1, 1, 1, 2, 4, 2]
x2 = [2, 1]
x3 = [2, 1, 4, 3, 1]
batch_value = np.asarray([x1, x2, x3])

vocab_size = 5
embedding_size = 4
encoder_hidden_size = 6

encoder = Encoder(vocab_size + 1, embedding_size, encoder_hidden_size)
mask_value = masking(batch_value)
padded_batch_value = padding(batch_value, 0)

mask = shared(mask_value, name='mask')
padded_batch = shared(padded_batch_value, name='padded_batch')
H, C = encoder.forward(padded_batch, mask)

(h1, c1) = encoder.forward2(x1)
(h2, c2) = encoder.forward2(x2)
(h3, c3) = encoder.forward2(x3)

print(T.isclose(H, T.as_tensor_variable([h1, h2, h3])).eval())
print(T.isclose(C, T.as_tensor_variable([c1, c2, c3])).eval())
            def _iter_fn(input_repr,
                         ref_matrix,
                         gstate,
                         correct_num_new_nodes=None,
                         correct_new_strengths=None,
                         correct_new_node_ids=None,
                         correct_edges=None,
                         dropout_masks=None):
                # If necessary, update node state
                if self.nodes_mutable:
                    gstate, dropout_masks = self.node_state_updater.process(
                        gstate, input_repr, dropout_masks)

                if len(self.word_node_mapping) > 0:
                    gstate, dropout_masks = self.direct_reference_updater.process(
                        gstate, ref_matrix, dropout_masks)

                # If necessary, propagate node state
                if self.intermediate_propagate != 0:
                    gstate, dropout_masks = self.intermediate_propagator.process_multiple(
                        gstate, self.intermediate_propagate, dropout_masks)

                node_loss = None
                node_accuracy = None
                # Propose and vote on new nodes
                if self.dynamic_nodes:
                    new_strengths, new_ids, dropout_masks = self.new_node_adder.get_candidates(
                        gstate, input_repr, self.new_nodes_per_iter,
                        dropout_masks)
                    # new_strengths and correct_new_strengths are of shape (n_batch, new_nodes_per_iter)
                    # new_ids and correct_new_node_ids are of shape (n_batch, new_nodes_per_iter, num_node_ids)
                    if with_correct_graph:
                        perm_idxs = np.array(
                            list(
                                itertools.permutations(
                                    range(self.new_nodes_per_iter))))
                        permuted_correct_str = correct_new_strengths[:,
                                                                     perm_idxs]
                        permuted_correct_ids = correct_new_node_ids[:,
                                                                    perm_idxs]
                        # due to advanced indexing, we should have shape (n_batch, permutation, new_nodes_per_iter, num_node_ids)
                        ext_new_str = T.shape_padaxis(new_strengths, 1)
                        ext_new_ids = T.shape_padaxis(new_ids, 1)
                        strength_ll = permuted_correct_str * T.log(
                            ext_new_str +
                            util.EPSILON) + (1 - permuted_correct_str) * T.log(
                                1 - ext_new_str + util.EPSILON)
                        ids_ll = permuted_correct_ids * T.log(ext_new_ids +
                                                              util.EPSILON)
                        reduced_perm_lls = T.sum(strength_ll, axis=2) + T.sum(
                            ids_ll, axis=[2, 3])
                        if self.best_node_match_only:
                            node_loss = -T.max(reduced_perm_lls, 1)
                        else:
                            full_ll = util.reduce_log_sum(reduced_perm_lls, 1)
                            # Note that some of these permutations are identical, since we likely did not add the maximum
                            # amount of nodes. Thus we will have added repeated elements here.
                            # We have log(x+x+...+x) = log(kx), where k is the repetition factor and x is the probability we want
                            # log(kx) = log(k) + log(x)
                            # Our repetition factor k is given by (new_nodes_per_iter - correct_num_new_nodes)!
                            # Recall that n! = gamma(n+1)
                            # so log(x) = log(kx) - log(gamma(k+1))
                            log_rep_factor = T.gammaln(
                                T.cast(
                                    self.new_nodes_per_iter -
                                    correct_num_new_nodes + 1, 'floatX'))
                            scaled_ll = full_ll - log_rep_factor
                            node_loss = -scaled_ll
                        if evaluate_accuracy:
                            best_match_idx = T.argmax(reduced_perm_lls, 1)
                            # should be of shape (n_batch), indexing the best permutation
                            best_correct_str = permuted_correct_str[
                                T.arange(n_batch), best_match_idx]
                            best_correct_ids = permuted_correct_ids[
                                T.arange(n_batch), best_match_idx]
                            snapped_strengths = util.independent_best(
                                new_strengths)
                            snapped_ids = util.categorical_best(
                                new_ids) * T.shape_padright(snapped_strengths)
                            close_strengths = T.all(
                                T.isclose(best_correct_str, snapped_strengths),
                                (1))
                            close_ids = T.all(
                                T.isclose(best_correct_ids, snapped_ids),
                                (1, 2))
                            node_accuracy = T.and_(close_strengths, close_ids)
                        # now substitute in the correct nodes
                        gstate = gstate.with_additional_nodes(
                            correct_new_strengths, correct_new_node_ids)
                    elif snap_to_best:
                        snapped_strengths = util.independent_best(
                            new_strengths)
                        snapped_ids = util.categorical_best(new_ids)
                        gstate = gstate.with_additional_nodes(
                            snapped_strengths, snapped_ids)
                    else:
                        gstate = gstate.with_additional_nodes(
                            new_strengths, new_ids)

                # Update edge state
                gstate, dropout_masks = self.edge_state_updater.process(
                    gstate, input_repr, dropout_masks)
                if with_correct_graph:
                    cropped_correct_edges = correct_edges[:, :gstate.n_nodes, :
                                                          gstate.n_nodes, :]
                    edge_lls = cropped_correct_edges * T.log(
                        gstate.edge_strengths +
                        util.EPSILON) + (1 - cropped_correct_edges) * T.log(
                            1 - gstate.edge_strengths + util.EPSILON)
                    # edge_lls currently penalizes for edges connected to nodes that do not exist
                    # we do not want it to do this, so we mask it with node strengths
                    mask_src = util.shape_padaxes(gstate.node_strengths,
                                                  [2, 3])
                    mask_dest = util.shape_padaxes(gstate.node_strengths,
                                                   [1, 3])
                    masked_edge_lls = edge_lls * mask_src * mask_dest
                    edge_loss = -T.sum(masked_edge_lls, axis=[1, 2, 3])
                    if evaluate_accuracy:
                        snapped_edges = util.independent_best(
                            gstate.edge_strengths)
                        close_edges = T.isclose(cropped_correct_edges,
                                                snapped_edges)
                        ok_mask = 1 - T.cast(
                            mask_src * mask_dest, 'int8'
                        )  # its OK for things not to match if node strengths are NOT both 1
                        edge_accuracy = T.all(T.or_(close_edges, ok_mask),
                                              (1, 2, 3))
                        overall_accuracy = edge_accuracy if node_accuracy is None else T.and_(
                            node_accuracy, edge_accuracy)
                    else:
                        overall_accuracy = None
                    gstate = gstate.with_updates(
                        edge_strengths=cropped_correct_edges)
                    return gstate, node_loss, edge_loss, overall_accuracy
                elif snap_to_best:
                    snapped_edges = util.independent_best(
                        gstate.edge_strengths)
                    gstate = gstate.with_updates(edge_strengths=snapped_edges)
                    return gstate
                else:
                    return gstate