Beispiel #1
0
def proj_LSTM(input_dim, out_dim, init_W, init_H, init_b, init_W_0):
    '''numpy initial'''
    W = C.Constant(shape=(input_dim, 4096*4),value=init_W) # (512,4096*4)
    H = C.Constant(shape=(out_dim, 4*4096), value=init_H)
    b = C.Constant(shape=(4096*4,), value=init_b)
    proj_W = C.Constant(shape=(4096,out_dim), value=init_W_0)
    stacked_dim=4096
    @C.Function
    def unit(dh, dc, x):
        ''' dh: out_dim, dc:4096, x:input_dim'''
        proj4 = b + times(x, W) + times(dh, H)
        it_proj  = proj4[0:1*stacked_dim]  # split along stack_axis
        bit_proj = proj4[1*stacked_dim: 2*stacked_dim]
        ft_proj  = proj4[2*stacked_dim: 3*stacked_dim]
        ot_proj  = proj4[3*stacked_dim: 4*stacked_dim]

        it = C.sigmoid(it_proj)        # input gate(t)
        # TODO: should both activations be replaced?
        bit = it * C.tanh(bit_proj)              # applied to tanh of input network

        ft = C.sigmoid (ft_proj)        # forget-me-not gate(t)
        bft = ft * dc                                 # applied to cell(t-1)

        ct = bft + bit                                # c(t) is sum of both

        ot = C.sigmoid (ot_proj)    # output gate(t)
        ht = ot * C.tanh(ct)                     # applied to tanh(cell(t))

        c = ct                                        # cell value
        h = ht
        proj_h = C.times(h, proj_W) # out_dim
        return (proj_h, c) 
    return unit
Beispiel #2
0
    def model(seq_image, decoded):
        params = dense(decoded)
        g_x, g_y, sigma2, delta, gamma = attention_parameters(params)

        i = C.Constant(np.arange(n) + 1, )  # col of patch
        j = C.Constant(np.arange(n) + 1, )  # row of patch
        mu_x = g_x + (i - n / 2 - 0.5) * delta
        mu_y = g_y + (j - n / 2 - 0.5) * delta
        mu_x = C.expand_dims(mu_x, axis=-1)
        mu_y = C.expand_dims(mu_y, axis=-1)
        # mu_x: [#, *] [n, 1]
        # mu_y: [#, *] [n, 1]

        image = C.sequence.unpack(seq_image,
                                  padding_value=0,
                                  no_mask_output=True)
        # image: [#] [*image_width, filters, image_height]

        width_pos = Cx.sequence.position(seq_image)
        # width_pos: [#, *] [1]

        width_pos_unpacked = C.sequence.unpack(width_pos,
                                               padding_value=999_999,
                                               no_mask_output=True)
        # width_pos: [#] [*image_width, 1]

        a = C.sequence.broadcast_as(C.swapaxes(width_pos_unpacked), mu_x)
        # a: [#, *] [1, *image_width]
        # x pos index of image (width)

        b = C.Constant(np.arange(image_height).reshape((1, -1)))
        # b: [] [1, image_height]
        # y pos index of image (height)

        # calculate the which portion of the image that is attended by the gaussian filter
        f_xi = C.exp(-0.5 * C.square(a - mu_x) / sigma2)
        f_yj = C.exp(-0.5 * C.square(b - mu_y) / sigma2)
        # f_xi: [#, *] [n, *image_width]
        # f_yj: [#, *] [n, image_height]

        z_x = C.reduce_sum(f_xi, axis=1)
        z_y = C.reduce_sum(f_yj, axis=1)
        # z_x: [#, *] [n]
        # z_y: [#, *] [n]

        f_xi = f_xi / z_x
        f_yj = f_yj / z_y
        # f_xi: [#, *] [n, *image_width]
        # f_yj: [#, *] [n, image_height]

        # combine filters from x and y
        image_broadcasted = C.sequence.broadcast_as(image, f_yj)
        attended = gamma * C.times(
            f_xi, C.times_transpose(image_broadcasted, f_yj), output_rank=2)
        # attended: [#, *] [n, filters, n]
        attended = C.swapaxes(attended)
        # attended: [#, *] [filters, n (x) , n (y)]
        return attended
Beispiel #3
0
def test_constant_eval():
    c = C.Constant(value=1)
    c_plus_1 = c + 1
    op = C.combine([c_plus_1, c])
    result = op.eval({})
    assert np.array_equal(result[c_plus_1.output], [2.0])
    assert np.array_equal(result[c], 1.0)
Beispiel #4
0
def flow_reverse(chunk):
    input_dim = chunk['input_dim']
    log_det_J = 0
    _half_dim = input_dim//2

    _ph = C.placeholder(input_dim, name='place_holder')
    _log_s_func = chunk['log_s_func']
    _t_func = chunk['t_func']

    _y1, _y2 = _ph[:_half_dim], _ph[_half_dim:]
    _log_s = _log_s_func(_y2)
    _t = _t_func(_y2)
    _s = C.exp(_log_s)
    _x1 = (_y1-_t)/_s
    _x2 = _y2
    _X = C.splice(_x1, _x2)

    log_det_J += C.reduce_sum(C.log(C.abs(_s)))

    _w = chunk['W_rot_mat']
    chunk['W_rot_mat_inv'] = _inv_w = C.Constant(np.linalg.inv(_w.value), name='inv_W')
    _out = _X@_inv_w
    log_det_J += input_dim*C.log(C.det(_inv_w))

    # if 'scale' in chunk:
    #     _out -= chunk['bias']
    #     _out /= chunk['scale']
    #     log_det_J += input_dim*C.reduce_sum(C.log(C.abs(chunk['scale'])))

    # _out -= chunk['b']
    # _out @= _inv_w

    return _out, log_det_J
Beispiel #5
0
    def attention(query, key, value):
        dk = C.reduce_sum(C.ones_like(query))  # cannot use sequence.last, will conflict with recurrence
        # dk: [#, *] [1, ] and value = int(dim_of_query)

        unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True)  # [#] [-3, key_dim]
        unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True)  # [#] [-3, value_dim]

        broadcasted_key = C.sequence.broadcast_as(unpacked_key, query)  # [#, *] [-3, key_dim]
        scaled = C.times_transpose(query, broadcasted_key) / dk
        # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim
        # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score

        # masked out invalid temporal connections to obey_sequence_order
        if obey_sequence_order and max_seq_len:
            unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs
            # unpacked_scaled: [#] [-3, -3]  <== matrix will be top right diagonally zero-ed
            # scaled_mask: [#] [-3,]

            minus_inf = C.constant(-1e+30)
            valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0))  # [] [max_seq, max_seq]
            valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled)  # [#] [max_seq, max_seq]
            valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0)  # [#] [-3, -3]
            unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf)  # [#] [-3, -3]
            scaled = C.to_sequence_like(unpacked_scaled, query)  # [#, *] [-3]

        elif obey_sequence_order and not max_seq_len:
            raise ValueError("max_seq_len must be defined when obey_sequence_order is True")

        attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query))  # [#, *] [value_dim,]
        return attended
Beispiel #6
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.layers.Embedding(hidden_dim),
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
    ])

    # Apply it to the input sequence.
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32(np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights),
                                     shape=(1, vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(
            latent_vector, label_sequence, vocab_dim, hidden_dim,
            softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector,
                                                      label_sequence,
                                                      vocab_dim, hidden_dim)

    return z, ce, errs
Beispiel #7
0
def pad(x, pattern, mode=C.CONSTANT_PAD, constant_value=0, name=''):
    """
    Pads a tensor in the sequence axis according to the specified patterns.
    Three padding modes are supported: CONSTANT / REFLECT / SYMMETRIC.

    Arguments:
        x: tensor to be padded.
        pattern (tuple with 2 integers): how many values to add before and after the contents in the sequence axis.
        mode (int): padding mode: C.ops.CONSTANT_PAD, C.ops.REFLECT_PAD and C.ops.SYMMETRIC_PAD
        constant_value: the value used to fill the padding cells, only meaningful under CONSTANT mode.
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    """
    if not all(isinstance(i, int) for i in pattern) or not isinstance(pattern, tuple):
        raise ValueError(f"pattern {pattern} must be a tuple with 2 integers")

    ndim = len(x.shape)
    null_pattern = [(0, 0)] * ndim
    final_pattern = [pattern] + null_pattern

    b, valid = C.sequence.unpack(x, padding_value=0).outputs
    c = C.pad(b, final_pattern, mode=mode, constant_value=constant_value)
    seq_length = C.reduce_sum(valid, axis=0) + C.Constant(sum(pattern))
    d = C.to_sequence(c, seq_length, name=name)
    return d
def create_model(model_details,
                 num_classes,
                 input_features,
                 new_prediction_node_name='prediction',
                 freeze=False):
    # Load the pretrained classification net and find nodes
    base_model = C.load_model(model_details['model_file'])
    feature_node = C.logging.find_by_name(base_model,
                                          model_details['feature_node_name'])
    last_node = C.logging.find_by_name(base_model,
                                       model_details['last_hidden_node_name'])

    # Clone the desired layers with fixed weights
    cloned_layers = C.combine([last_node.owner]).clone(
        C.CloneMethod.freeze if freeze else C.CloneMethod.clone,
        {feature_node: C.placeholder(name='features')})

    # Add new dense layer for class prediction
    feat_norm = input_features - C.Constant(114)
    cloned_out = cloned_layers(feat_norm)
    z = C.layers.Dense(num_classes,
                       activation=None,
                       name=new_prediction_node_name)(cloned_out)

    return z
Beispiel #9
0
def create_model(model_details, num_classes, input_features, new_prediction_node_name="prediction", freeze=False):
    # Load the pre-trained classification net and find nodes
    base_model = cntk.load_model(model_details["model_file"])

    feature_node = cntk.logging.find_by_name(base_model, model_details["feature_node_name"])
    last_node = cntk.logging.find_by_name(base_model, model_details["last_hidden_node_name"])

    if model_details["inception"]:
        node_outputs = cntk.logging.get_node_outputs(base_model)
        last_node = node_outputs[5]
        feature_node = cntk.logging.find_all_with_name(base_model, "")[-5]
    if model_details["vgg"]:
        last_node = cntk.logging.find_by_name(base_model, "prob")
        feature_node = cntk.logging.find_by_name(base_model, "data")

    # Clone the desired layers with fixed weights
    cloned_layers = cntk.combine([last_node.owner]).clone(
        cntk.CloneMethod.freeze if freeze else cntk.CloneMethod.clone,
        {feature_node: cntk.placeholder(name="features")},
    )

    # Add new dense layer for class prediction
    feat_norm = input_features - cntk.Constant(114)
    cloned_out = cloned_layers(feat_norm)
    z = cntk.layers.Dense(num_classes, activation=None, name=new_prediction_node_name)(cloned_out)
    return z
Beispiel #10
0
def create_network(cfg):
    """build the network for faster rcnn"""

    ##create input variables
    features = C.input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT,
                                       cfg.IMAGE_WIDTH),
                                dynamic_axes=[C.Axis.default_batch_axis()],
                                name=cfg["MODEL"].FEATURE_NODE_NAME)
    ##roi_input
    scaled_gt_boxes = C.input_variable(
        (cfg.INPUT_ROIS_PER_IMAGE, 5),
        dynamic_axes=[C.Axis.default_batch_axis()])
    dims_in = C.input_variable((6), dynamic_axes=[C.Axis.default_batch_axis()])
    dims_input = C.alias(dims_in, name='dims_input')

    # Load the pre-trained classification net and clone layers
    base_model = C.load_model(cfg['BASE_MODEL_PATH'])
    conv_layers = clone_conv_layers(base_model, cfg)
    fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME],
                            [cfg["MODEL"].LAST_HIDDEN_NODE_NAME],
                            clone_method=CloneMethod.clone)

    # Normalization and conv layers
    feat_norm = features - C.Constant([[[v]]
                                       for v in cfg["MODEL"].IMG_PAD_COLOR])
    conv_out = conv_layers(feat_norm)

    # RPN and prediction targets
    rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_input,
                                      cfg)

    rois, label_targets, bbox_targets, bbox_inside_weights = create_proposal_target_layer(
        rpn_rois, scaled_gt_boxes, cfg)

    # Fast RCNN and losses
    cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois,
                                                      fc_layers, cfg)
    detection_losses = create_detection_losses(cls_score, label_targets,
                                               bbox_pred, rois, bbox_targets,
                                               bbox_inside_weights, cfg)
    loss = rpn_losses + detection_losses
    pred_error = classification_error(cls_score, label_targets, axis=1)

    e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR
    e2e_lr_per_sample_scaled = [
        x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE
    ]
    mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)

    print("Using base model:   {}".format(cfg["MODEL"].BASE_MODEL))
    print("lr_per_sample:      {}".format(e2e_lr_per_sample_scaled))

    return {
        'features': features,
        'roi_input': scaled_gt_boxes,
        'loss': loss,
        'pred_error': pred_error,
        'dim_input': dims_in
    }
Beispiel #11
0
def create_sparse_to_dense(input_vocab_dim):
    I = C.Constant(np.eye(input_vocab_dim))

    @C.Function
    def no_op(input: InputSequence[C.layers.SparseTensor[input_vocab_dim]]):
        return C.times(input, I)

    return no_op
Beispiel #12
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,  # Node providing the output of the recurrent layers
    target_vector,  # Node providing the expected labels (as sparse vectors)
    vocab_dim,  # Vocabulary size
    hidden_dim,  # Dimension of the hidden vector
    num_samples,  # Number of samples to use for sampled softmax
    sampling_weights,  # Node providing weights to be used for the weighted sampling
    allow_duplicates=False  # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
):
    bias = C.layers.Parameter(shape=(vocab_dim, 1), init=0)
    weights = C.layers.Parameter(shape=(vocab_dim, hidden_dim),
                                 init=C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(
        sampling_weights, num_samples,
        allow_duplicates)  # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(
        sampling_weights, num_samples,
        allow_duplicates)  # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs)  # dense row [1 * vocab_dim]

    print("hidden_vector: " + str(hidden_vector.shape))
    wS = C.times(sample_selector, weights,
                 name='wS')  # [num_samples * hidden_dim]
    print("ws:" + str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(
        sample_selector, bias, name='zS2') - C.times_transpose(
            sample_selector, log_prior, name='zS3')  # [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT')  # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(
        target_vector, bias, name='zT2') - C.times_transpose(
            target_vector, log_prior, name='zT3')  # [1]

    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape=(vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
Beispiel #13
0
def create_model(input_vector, label_vector, freq_list, vocab_dim, hidden_dim):

	hidden_vector = C.layers.Embedding(hidden_dim)(input_vector)
	#hidden_vector = C.times(input_vector, weights1) + bias1

	smoothed_weights = np.float32(np.power(freq_list, alpha))
	sampling_weights = C.reshape(C.Constant(smoothed_weights), shape = (1,vocab_dim))

	return cross_entropy_with_sampled_softmax(hidden_vector, label_vector, vocab_dim, hidden_dim, num_of_samples, sampling_weights)
Beispiel #14
0
def __cntk_trace__(m):
    if len(m.shape) != 2:
        raise RuntimeError(f'{m.shape} is not 2 dims')
    if m.shape[0] != m.shape[1]:
        raise RuntimeError(f'{m.shape} is different size')

    _dim = m.shape[0]
    _identity_matrix = C.Constant(np.eye(_dim))
    return C.reduce_sum(m * _identity_matrix)
Beispiel #15
0
    def __init__(self, p, eps=1e-7):
        if isinstance(p, (C.Variable, C.Function)):
            self.p = C.squeeze(p)
        else:
            self.p = C.Constant(np.squeeze(p))

        self.eps = C.Constant(eps, name='eps')
        self.c = self.p.shape[0]

        self.prob = self.p / (self.eps + C.reduce_sum(self.p))
        self.logits = C.log(self.prob)
        self.accum_prob = self.prob @ C.Constant(
            (1 - np.tri(self.prob.shape[-1], k=-1)))

        p_log_p = self.logits * self.prob
        self._entropy = -C.reduce_sum(p_log_p)

        dist = C.input_variable(1, name='category index')
        # method 1
        self._log_prob = C.log(
            C.reduce_sum(self.prob * C.one_hot(dist, self.c)))
Beispiel #16
0
    def f(self, input_dim):
        x = C.input_variable(input_dim, needs_gradient=True, name='input')
        z, sum_log_det_jacob = x, C.Constant(0, name='log_det_zero')

        for i in reversed(range(len(self.t))):
            z_ = self.mask[i] * z
            s = self.s[i](z_) * (1 - self.mask[i])
            t = self.t[i](z_) * (1 - self.mask[i])
            z = z_ + (1 - self.mask[i]) * (z - t) * C.exp(-s)
            sum_log_det_jacob -= C.reduce_sum(s)

        z = C.squeeze(z)
        return z, sum_log_det_jacob
Beispiel #17
0
def multivariate_kl_divergence(input_layer):
    _dim = input_layer.shape[0]

    out_value = C.unpack_batch(input_layer)
    _mu1 = C.transpose(C.reduce_mean(out_value, axis=0), [1, 0])
    _sigma1 = C.cov2(input_layer)

    _mu2 = C.zeros_like(_mu1)
    _sigma2 = C.Constant(np.eye(_dim))
    _sigma2_inv = _sigma2  # identity matrix

    return 0.5 * (C.log(C.det(_sigma2) / C.det(_sigma1)) - _dim +
                  C.trace(_sigma2_inv @ _sigma1) + C.transpose(
                      (_mu2 - _mu1), [1, 0]) @ _sigma2_inv @ (_mu2 - _mu1))
Beispiel #18
0
def KLF_reverse(chunk):
    input_dim = chunk['input_dim']
    _ph = C.placeholder(input_dim, name='place_holder')

    inv_act_func = chunk['inv_act_func']
    _out = inv_act_func(_ph)

    if 'scale' in chunk:
        _out -= chunk['bias']
        _out /= chunk['scale']

    _w = chunk['W']
    _inv_w = C.Constant(np.linalg.inv(_w.value), name='inv_W')

    _out -= chunk['b']
    _out @= _inv_w

    return _out
Beispiel #19
0
                def decode(history, q, c, start_logits, end_logits):
                    q = encode(q)
                    c = encode_c(C.splice(c, start_logits, end_logits, axis=0))
                    r = history
                    r = stab_in(r)

                    q_last_h = C.sequence.last(q.outputs[0])
                    q_last_c = C.sequence.last(q.outputs[1])
                    c_last_h = C.sequence.last(c.outputs[0])
                    c_last_c = C.sequence.last(c.outputs[1])
                    initial_hstate = hstate_dense(C.splice(q_last_h, c_last_h))
                    initial_cstate = cstate_dense(C.splice(q_last_c, c_last_c))

                    rec_block = rec_blocks[0]   # LSTM(hidden_dim)  # :: (dh, dc, x) -> (h, c)
                    
                    @C.Function
                    def find_embed(x):
                        gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size)
                        return embed(gx, ngx) 

                    @C.Function
                    def lstm_with_attention(dh, dc, r, x):
                        history_embed = find_embed(x)
                        h_att = attention_model(c.outputs[0], dh)
                        q_att = attention_model(q.outputs[0], dh)
                        att = C.splice(h_att, q_att)
                        x = C.splice(x, att)
                        x, dc = rec_block(dh, dc, x).outputs
          
                        # 0*r is a hack because cntk freaks out when r is not used.
                        r = U_dense(att) + W_dense(history_embed) + V_dense(x) + 0*r 
                        #bug when W_dense is added first, wtf?!
                        #r = W_dense(embed(gx, ngx)) + U_dense(att) + V_dense(x) + 0*r
                        return x, dc, r
                    _, _, r = C.layers.RecurrenceFrom(lstm_with_attention, return_full_state=True)(initial_hstate, initial_cstate, C.Constant(np.zeros(2*self.hidden_dim)),r).outputs
        
                    r = maxout(r)
                    r = stab_out(r)
                    r = proj_out(r)
                    #r = C.softmax(r)
                    r = C.layers.Label('out_proj_out')(r)
                    return r
Beispiel #20
0
def main():
    show_image = False
    if show_image:
        bs = 1
        ci = 3
        co = 3
        cg = co * (ci + 1)
        gd = 8
        gh = 64
        gw = 64
        h = 256
        w = 256
    else:
        bs = 1
        ci = 3
        co = 3
        cg = co * (ci + 1)
        gd = 8
        gh = 64
        gw = 64
        h = 1024
        w = 1024

    im = C.input_variable([bs, ci, h, w], needs_gradient=True, dynamic_axes=[])
    guide = C.input_variable([bs, h, w], needs_gradient=True, dynamic_axes=[])
    guide_no_grad = C.input_variable([bs, h, w],
                                     needs_gradient=False,
                                     dynamic_axes=[])
    grid = C.input_variable([bs, cg, gd, gh, gw],
                            needs_gradient=True,
                            dynamic_axes=[])
    # Create indices
    xx = np.arange(0, w).reshape(1, -1).repeat(h, 0).astype(np.float32)
    yy = np.arange(0, h).reshape(-1, 1).repeat(w, 1).astype(np.float32)
    xx = C.Constant(xx, xx.shape)
    yy = C.Constant(yy, yy.shape)
    gx = ((xx + 0.5) / w) * gw
    gy = ((yy + 0.5) / h) * gh
    gz = C.clip(guide, 0.0, 1.0) * gd
    gz_no_grad = C.clip(guide_no_grad, 0.0, 1.0) * gd
    fx = C.element_max(C.floor(gx - 0.5), 0.0)
    fy = C.element_max(C.floor(gy - 0.5), 0.0)
    fz = C.element_max(C.floor(gz - 0.5), 0.0)
    fz_no_grad = C.element_max(C.floor(gz_no_grad - 0.5), 0.0)
    wx = gx - 0.5 - fx
    wy = gy - 0.5 - fy
    wx = C.expand_dims(C.expand_dims(wx, -1 - len(wx.shape)),
                       -1 - len(wx.shape))
    wy = C.expand_dims(C.expand_dims(wy, -1 - len(wy.shape)),
                       -1 - len(wy.shape))
    wz = C.abs(gz - 0.5 - fz)
    wz = C.expand_dims(wz, 0)
    fx = C.expand_dims(C.expand_dims(fx, -1 - len(fx.shape)),
                       -1 - len(fx.shape))
    fy = C.expand_dims(C.expand_dims(fy, -1 - len(fy.shape)),
                       -1 - len(fy.shape))
    cx = C.element_min(fx + 1, gw - 1)
    cy = C.element_min(fy + 1, gh - 1)
    cz = C.element_min(fz_no_grad + 1, gd - 1)
    batch_idx = np.arange(bs).reshape(bs, 1, 1, 1).astype(np.float32)
    batch_idx = C.Constant(batch_idx, batch_idx.shape)
    out = []
    flat_grid = C.reshape(grid, [-1])
    for c_ in range(co):
        c_idx = np.arange((ci + 1) * c_,
                          (ci + 1) * (c_ + 1)).reshape(1, ci + 1, 1,
                                                       1).astype(np.float32)
        c_idx = C.Constant(c_idx, c_idx.shape)

        def flatten_and_gather(x, y, z):
            linear_idx = x + gw * y + gw * gh * z + c_idx * gw * gh * gd + batch_idx * gw * gh * gd * cg
            flat_linear_idx = C.reshape(linear_idx, [-1])
            return C.reshape(C.gather(flat_grid, flat_linear_idx),
                             linear_idx.shape)

        gather_fff = flatten_and_gather(fx, fy, fz_no_grad)
        gather_ffc = flatten_and_gather(fx, fy, cz)
        gather_fcf = flatten_and_gather(fx, cy, fz_no_grad)
        gather_fcc = flatten_and_gather(fx, cy, cz)
        gather_cff = flatten_and_gather(cx, fy, fz_no_grad)
        gather_cfc = flatten_and_gather(cx, fy, cz)
        gather_ccf = flatten_and_gather(cx, cy, fz_no_grad)
        gather_ccc = flatten_and_gather(cx, cy, cz)
        a = gather_fff*(1-wx)*(1-wy)*(1-wz) + \
            gather_ffc*(1-wx)*(1-wy)*(  wz) + \
            gather_fcf*(1-wx)*(  wy)*(1-wz) + \
            gather_fcc*(1-wx)*(  wy)*(  wz) + \
            gather_cff*(  wx)*(1-wy)*(1-wz) + \
            gather_cfc*(  wx)*(1-wy)*(  wz) + \
            gather_ccf*(  wx)*(  wy)*(1-wz) + \
            gather_ccc*(  wx)*(  wy)*(  wz)
        o = C.reduce_sum(a[:, :-1, ...] * im, 1) + a[:, -1, ...]
        print(o.shape)
        out.append(C.expand_dims(o, 0))
    out = C.splice(*out, axis=1)
    loss = C.reduce_l2(out)

    grid_val = np.random.rand(bs, cg, gd, gh, gw).astype(np.float32)
    if show_image:
        guide_val = skio.imread("/data/rgb.png").mean(2)[:h, :w].astype(
            np.float32)
        guide_val = np.expand_dims(guide_val / 255.0, 0)
        im_val = np.tile(np.expand_dims(guide_val, 1), [1, 3, 1, 1])
        out_val = out.eval({
            im: im_val,
            guide: guide_val,
            guide_no_grad: guide_val,
            grid: grid_val
        })
        out_val = np.clip(np.transpose(np.squeeze(out_val), [1, 2, 0]), 0, 1)
        skio.imsave("/output/imout.png", out_val)
    else:
        im_val = np.random.randn(bs, ci, h, w)
        guide_val = np.random.rand(bs, h, w).astype(np.float32)
        # burning iteration
        for it in range(5):
            print('burning (', it, ')')
            g = loss.grad({
                im: im_val,
                guide: guide_val,
                guide_no_grad: guide_val,
                grid: grid_val
            })
        # actual iterations
        start = time.time()
        for it in range(50):
            print('profiling (', it, ')')
            g = loss.grad({
                im: im_val,
                guide: guide_val,
                guide_no_grad: guide_val,
                grid: grid_val
            })
        end = time.time()
    runtime = (end - start) * 1000.0 / 50.0
    print('Runtime:', runtime)
Beispiel #21
0
# input_variable sample #
import cntk
myFeatures = 7
features = cntk.input_variable(myFeatures)
print(features)

alternativeFeatures = cntk.input_variable(7)
print(alternativeFeatures))

# input_variable with different shapes ##
import cntk
data  = cntk.input_variable(shape=[3,5])

# Using Parameter #
import cntk
data = cntk.parameter(shape=(3,5), init=2)
data.value

# Using Constant #
import cntk
data = cntk.Constant(6,shape = (3,4))
data.value

# Using Record #
import cntk.variables as var
record = var.Record(x = 23, y = 32, z = 55)
# printing the record values #
record.x
record.y
record.z
Beispiel #22
0
def BilateralSlice(sz, i_chans, o_chans, grid_sz=64, sigma_r=8):
  gsize = [(i_chans+1)*o_chans, sigma_r, grid_sz, grid_sz]
  grid = C.Parameter(gsize, 
                     name="grid", init=np.random.uniform(size=gsize))
  guide_scale = C.Parameter((1, ), 
                     name="guide_scale", init=np.ones((1, )))
  grid_scale = C.Parameter((1, ), 
                     name="grid_scale", init=np.ones((1, )))
  im_scale = C.Parameter((1, ), 
                     name="im_scale", init=np.ones((1, )))


  yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz))
  xx = np.expand_dims(xx, 0)
  yy = np.expand_dims(yy, 0)
  cc = np.arange(0, i_chans+1)
  cc = np.expand_dims(cc, 1)
  cc = np.expand_dims(cc, 2)
  xx = C.Constant(xx, xx.shape)
  yy = C.Constant(yy, yy.shape)
  cc = C.Constant(cc, cc.shape)


  @C.functions.BlockFunction("BilateralSlice", "bilateral_slice")
  def bilateral_slice(im, guide, guide_no_grad):
    # Flatten data for gather op
    flat_grid = grid_scale*C.reshape(grid, [grid_sz*grid_sz*sigma_r*o_chans*(i_chans+1)])
    # flat_grid_u = C.unpack_batch(flat_grid)

    # Make sure we do sth that requires the gradient w.r.t guide
    scaled_guide = guide_scale*guide  
    gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
        scaled_guide, xx, yy, sz, grid_sz, sigma_r)
    wx = C.abs(gx_d - 0.5 - fx_d)
    wy = C.abs(gy_d - 0.5 - fy_d)
    wz = C.abs(gz_d - 0.5 - fz_d)

    # Enclosing cell
    gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(
        guide_no_grad, xx, yy, sz, grid_sz, sigma_r)

    out_chans = []
    for chan in range(o_chans):
      output_components = []
      for ix, x in enumerate([fx, cx]):
        wx_ = (1-wx) if ix == 0 else wx
        for iy, y in enumerate([fy, cy]):
          wy_ = (1-wy) if iy == 0 else wy
          for iz, z in enumerate([fz, cz]):
            wz_ = (1-wz) if iz == 0 else wz

            linear_idx = x + grid_sz*(y + grid_sz*(z + sigma_r*(cc + chan*(i_chans+1))))
            flat_linear_idx = C.reshape(linear_idx, [(i_chans+1)*sz*sz])
            # Slice
            interp = C.gather(flat_grid, flat_linear_idx)
            interp_fsz = C.reshape(interp, [i_chans+1, sz, sz])*wx_*wy_*wz_
            output_components.append(interp_fsz)

      out_coeffs = sum(output_components)
      out_chan = C.reduce_sum(out_coeffs[:i_chans]*(im_scale*im) + out_coeffs[-1], 0)
      out_chans.append(out_chan)
    out = C.splice(*out_chans, axis=0)

    return out
  
  return bilateral_slice
Beispiel #23
0
def test_constant_value(value):
    c = C.Constant(value=value)
    assert np.allclose(c.value, value)
import cntk as C
import numpy as np
from io_funcs.binary_io import BinaryIOCollection
from model_lf0_weight import SRU_MULTI_SPEAKER

gpu_descriptor = C.gpu(3)

C.try_set_default_device(gpu_descriptor)

proj = SRU_MULTI_SPEAKER(87, 187, 0.001, 0.5)

trainer = proj.trainer

trainer.restore_from_checkpoint('net/16k/trainer_' + str(41))

output = trainer.model

index = C.Constant(value=np.asarray([0, 1, 0]).astype(np.float32))
input = C.sequence.input_variable(shape=87)

out = output(input, index)

out.save('extracted_model/16k/model_emo')
Beispiel #25
0
    def parameters(self):
        return self.forward.parameters


if __name__ == '__main__':
    nets = lambda: C.layers.Sequential([
        C.layers.Dense(256, activation=C.leaky_relu),
        C.layers.Dense(256, activation=C.leaky_relu),
        C.layers.Dense(2, activation=C.tanh)
    ])(C.placeholder(2))
    nett = lambda: C.layers.Sequential([
        C.layers.Dense(256, activation=C.leaky_relu),
        C.layers.Dense(256, activation=C.leaky_relu),
        C.layers.Dense(2)
    ])(C.placeholder(2))
    masks = C.Constant(np.array([[0, 1], [1, 0]] * 3).astype(np.float32),
                       name='mask')
    prior = MultivariateNormalDiag(loc=[0., 0.], scale_diag=[1., 1.])
    flow = RealNVP(nets, nett, masks, prior)

    loss = -C.reduce_mean(flow.log_prob)

    learner = C.adam(loss.parameters, C.learning_parameter_schedule(1e-1),
                     C.momentum_schedule(0.9))
    trainer = C.Trainer(flow.forward, (loss, None), learner)

    for t in range(5001):
        noisy_moons = datasets.make_moons(n_samples=1000,
                                          noise=.05)[0].astype(np.float32)
        trainer.train_minibatch({loss.arguments[0]: noisy_moons})

        if t % 500 == 0:
Beispiel #26
0
def main():
    bs = 4
    c = 64
    h = 512
    w = 512

    im = C.input_variable([bs, c, h, w], needs_gradient=True, dynamic_axes=[])
    warp = C.input_variable([bs, 2, h, w],
                            needs_gradient=True,
                            dynamic_axes=[])
    warp_ng = C.input_variable([bs, 2, h, w],
                               needs_gradient=False,
                               dynamic_axes=[])
    # Create indices
    dx = 0.5 * (warp[:, 0, :, :] + 1.0)
    dy = 0.5 * (warp[:, 1, :, :] + 1.0)
    new_x = C.clip(dx * w, 0, w)
    new_y = C.clip(dy * h, 0, h)
    fx = C.clip(C.floor(new_x), 0, w - 2)
    fy = C.clip(C.floor(new_y), 0, h - 2)
    wx = new_x - fx
    wy = new_y - fy
    dx_ng = 0.5 * (warp_ng[:, 0, :, :] + 1.0)
    dy_ng = 0.5 * (warp_ng[:, 1, :, :] + 1.0)
    new_x_ng = C.clip(dx_ng * w, 0, w)
    new_y_ng = C.clip(dy_ng * h, 0, h)
    fx_ng = C.clip(C.floor(new_x_ng), 0, w - 2)
    fy_ng = C.clip(C.floor(new_y_ng), 0, h - 2)

    chan_idx = np.arange(c).reshape(1, c, 1, 1)
    chan_idx = C.Constant(chan_idx, chan_idx.shape)
    batch_idx = np.arange(bs).reshape(bs, 1, 1, 1)
    batch_idx = C.Constant(batch_idx, batch_idx.shape)
    flat_im = C.reshape(im, [-1])

    def flatten_and_gather(x, y):
        linear_idx = x + w * y + w * h * chan_idx + w * h * c * batch_idx
        flat_linear_idx = C.reshape(linear_idx, [-1])
        return C.reshape(C.gather(flat_im, flat_linear_idx), linear_idx.shape)

    gather_ff = flatten_and_gather(fx_ng, fy_ng)
    gather_fc = flatten_and_gather(fx_ng, fy_ng + 1)
    gather_cf = flatten_and_gather(fx_ng + 1, fy_ng)
    gather_cc = flatten_and_gather(fx_ng + 1, fy_ng + 1)
    out = gather_ff*(1-wx)*(1-wy) + \
          gather_fc*(1-wx)*(  wy) + \
          gather_cf*(  wx)*(1-wy) + \
          gather_cc*(  wx)*(  wy)
    loss = C.reduce_l2(out)

    im_val = np.random.randn(bs, c, h, w).astype(np.float32)
    warp_val = np.random.rand(bs, 2, h, w).astype(np.float32)
    # burning iteration
    for it in range(5):
        print('burning (', it, ')')
        g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val})
    # actual iterations
    start = time.time()
    for it in range(50):
        print('profiling (', it, ')')
        g = loss.grad({im: im_val, warp: warp_val, warp_ng: warp_val})
    end = time.time()
    runtime = (end - start) * 1000.0 / 50.0
    print('Runtime:', runtime)
Beispiel #27
0
    if not cntk.device.try_set_default_device(dev):
        print("Error: error setting device")
        sys.exit(1)
else:
    dev = None

N = float(saxpy.N)
YVAL = float(saxpy.YVAL)
XVAL = float(saxpy.XVAL)
AVAL = float(saxpy.AVAL)

print("N: {}".format(N))

a = cntk.Constant(value=AVAL,
                  shape=[N],
                  dtype=np.float32,
                  device=dev,
                  name="a")
y = cntk.Parameter(shape=[N],
                   init=YVAL,
                   dtype=np.float32,
                   device=dev,
                   name="y")
x = cntk.Parameter(shape=[N],
                   init=XVAL,
                   dtype=np.float32,
                   device=dev,
                   name="x")

t0 = time.time()
cntk.assign(y, y + a * x).eval()
Beispiel #28
0
#     return C.atan(x)*5

# c_block = KLF_forward(c_dim, batch_norm=True)
c_block = []
for i in range(6):
    c_block.append(flow_forward(c_dim, batch_norm=False))


# single = np.array([[1, 2]])
# # multi = np.random.uniform(size=(100, 2))
# multi = np.random.normal(size=(100, 2))

# value = multi.astype(np.float32)

q = c_input
log_det_J = C.Constant(0)
bn = []
bn_update = []
for block in c_block:
    log_det_J += block[1](q)
    if 'muB' in block[-1]: # batch norm
        bn.append(block[-1]['muB'](q))
        bn.append(block[-1]['varB'](q))
        bn_update.append(block[-1]['mu'])
        bn_update.append(block[-1]['var'])
    q = block[0](q)

base_dist = MultivariateNormalDiag(loc=[0., 0.], scale_diag=[1., 1.])

# log_q_k = C.log(base_dist.pdf(z_0)) - sum_log_det_jacob
Beispiel #29
0
def main():
    print("version", C.__version__)
    bs = 1
    n_chans = 1

    sigma_s = 16
    sigma_r = 12

    # 4x4x1024x1024
    # 4x12x64x64

    sz = 256
    # sz = 1024
    small_sz = sz // sigma_s

    yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz))
    cc, bb = np.meshgrid(np.arange(0, n_chans), np.arange(0, bs))

    xx = np.expand_dims(xx, 0)
    xx = np.expand_dims(xx, 0)
    yy = np.expand_dims(yy, 0)
    yy = np.expand_dims(yy, 0)

    bb = np.expand_dims(bb, 2)
    bb = np.expand_dims(bb, 3)
    cc = np.expand_dims(cc, 2)
    cc = np.expand_dims(cc, 3)

    # Compute graph
    grid = C.Parameter([bs, n_chans, sigma_r, small_sz, small_sz], )
    # grid = C.input_variable(
    #     [bs, n_chans, sigma_r, small_sz, small_sz],
    #     dynamic_axes=[], needs_gradient=True)
    guide = C.input_variable([bs, sz, sz],
                             dynamic_axes=[],
                             needs_gradient=True)
    guide_non_diff = C.input_variable([bs, sz, sz], dynamic_axes=[])

    # Coordinates
    xx = C.Constant(xx, xx.shape)
    yy = C.Constant(yy, yy.shape)
    cc = C.Constant(cc, cc.shape)
    bb = C.Constant(bb, bb.shape)

    gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
        guide, xx, yy, sz, small_sz, sigma_r, bs)

    # Trilerp weights
    wx = (gx_d - 0.5 - fx_d)
    wy = (gy_d - 0.5 - fy_d)
    wz = C.abs(gz_d - 0.5 - fz_d)

    # Enclosing cell
    gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(guide_non_diff, xx, yy, sz,
                                                    small_sz, sigma_r, bs)

    output_components = []
    for ix, x in enumerate([fx, cx]):
        wx_ = (1 - wx) if ix == 0 else wx
        for iy, y in enumerate([fy, cy]):
            wy_ = (1 - wy) if iy == 0 else wy
            for iz, z in enumerate([fz, cz]):
                wz_ = (1 - wz) if iz == 0 else wz
                linear_idx = x + small_sz * (y + small_sz *
                                             (z + sigma_r *
                                              (cc + n_chans * bb)))

                # Flatten data for gather op
                flat_grid = C.reshape(
                    grid, [bs * small_sz * small_sz * sigma_r * n_chans])
                flat_linear_idx = C.reshape(linear_idx,
                                            [bs * n_chans * sz * sz])

                # Slice
                interp = C.gather(flat_grid, flat_linear_idx)
                interp_fsz = C.reshape(interp, [bs, n_chans, sz, sz])
                output_components.append(interp_fsz * wz_ * wx_ * wy_)

    out = sum(output_components)
    loss = C.squared_error(out, guide)

    # svg = C.logging.graph.plot(out, "/output/graph.svg")

    grid_data = np.random.uniform(size=(bs, n_chans, sigma_r, small_sz,
                                        small_sz)).astype(np.float32)

    # guide_data = np.random.uniform(
    #     size=(bs, sz, sz)).astype(np.float32)
    guide_data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype(
        np.float32)
    guide_data = np.expand_dims(guide_data, 0) / 255.0

    inputs = {guide: guide_data, guide_non_diff: guide_data}
Beispiel #30
0
def flow_forward(input_dim: int, act_func_pair: tuple = (None, None), batch_norm: bool = False):
    chunk = {}
    log_det_J = 0

    chunk['input_dim'] = input_dim
    _ph = C.placeholder(input_dim, name='place_holder')
    _out = _ph

    if batch_norm:
        # _bn = C.layers.BatchNormalization(name='batch_norm')(_ph)
        # chunk['scale'] = _bn.parameters[0]
        # chunk['bias'] = _bn.parameters[1]

        chunk['mu'] = C.Constant(np.zeros(shape=input_dim))
        chunk['var'] = C.Constant(np.ones(shape=input_dim))

        _eps = C.Constant(1e-7)
        _mu = C.reduce_mean(_ph, axis=C.Axis.default_batch_axis())
        _var = C.reduce_mean(C.square(_ph-_mu), axis=C.Axis.default_batch_axis())

        chunk['muB'] = _mu
        chunk['varB'] = _var

        # _bn = (_ph-chunk['mu'])/C.sqrt(chunk['var']+_eps)
        _bn = C.sqrt(chunk['var']+_eps)*_ph + chunk['mu']
        _ph = _bn

        log_det_J += -0.5*C.reduce_sum(C.log((_var+_eps)))
        # log_det_J += C.reduce_sum(C.log())

    chunk['W_rot_mat'] = _W = C.parameter((input_dim, input_dim))
    _W.value = random_rotation_matrix = special_ortho_group.rvs(input_dim)
    # _W.value = np.roll(np.eye(input_dim),input_dim//2,axis=0)
    _out = _ph@_W
    log_det_J += C.log(C.abs(C.det(_W))) # or # log_det_J += C.slogdet(_W)[1]
    
    _half_dim = input_dim//2
    _x1 = _out[:_half_dim]
    _x2 = _out[_half_dim:]

    _log_s_func, _t_func = act_func_pair
    if _log_s_func is None: # basic network
        _log_s_func = C.layers.Sequential([
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(_half_dim, C.tanh),
        ])#(C.placeholder(input_dim, name='place_holder'))
    if _t_func is None: # basic network
        _t_func = C.layers.Sequential([
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(_half_dim),
        ])#(C.placeholder(input_dim, name='place_holder'))

    chunk['log_s_func'] = _log_s_func
    chunk['t_func'] = _t_func

    _log_s, _t = _log_s_func(_x2), _t_func(_x2)

    _s = C.exp(_log_s)

    _y1 = _s*_x1 + _t
    _y2 = _x2

    _Y = C.splice(_y1, _y2)
    chunk['output'] = _Y

    log_det_J += C.reduce_sum(_log_s)

    return _Y, log_det_J, chunk