예제 #1
0
파일: converter.py 프로젝트: AllanYiin/CNTK
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
        block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments, new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs]
                root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
    for function_to_convert in functions_to_convert:
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:            
            root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs])
            else:
                root_func = converted

    return root_func
예제 #2
0
    def input_layer(self,cgw,cnw,cc,qgw,qnw,qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()

        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False
        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            self.embed()(input_glove_words, input_nonglove_words), name='splice_embed')
        processed = C.layers.Sequential([For(range(2), lambda: OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn'))])(embedded)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        
        q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})
        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)],
            'input_layer',
            'input_layer')
예제 #3
0
파일: blocks.py 프로젝트: FDecaYed/CNTK
def _Identity(name='identity_arg'):
    x = Placeholder(name=name)
    apply_x = combine([x])
    # TODO: Let's not encourage users to use combine([f]) as a workaround for identity/pass, but rather have it as a first-class operator implemented that we then use. [Willi]
    #apply_x = alias(x) # TODO: does not work. Should it?
    #_name_and_extend_Function(apply_x, 'Identity')
    return Block(apply_x, 'Identity')
예제 #4
0
    def model(self):
        c = C.Axis.new_unique_dynamic_axis('c')
        q = C.Axis.new_unique_dynamic_axis('q')
        b = C.Axis.default_batch_axis()
        cgw = C.input_variable(self.wg_dim, dynamic_axes=[b,c], is_sparse=self.use_sparse, name='cgw')
        cnw = C.input_variable(self.wn_dim, dynamic_axes=[b,c], is_sparse=self.use_sparse, name='cnw')
        qgw = C.input_variable(self.wg_dim, dynamic_axes=[b,q], is_sparse=self.use_sparse, name='qgw')
        qnw = C.input_variable(self.wn_dim, dynamic_axes=[b,q], is_sparse=self.use_sparse, name='qnw')
        cc = C.input_variable((1,self.word_size), dynamic_axes=[b,c], name='cc')
        qc = C.input_variable((1,self.word_size), dynamic_axes=[b,q], name='qc')
        ab = C.input_variable(self.a_dim, dynamic_axes=[b,c], name='ab')
        ae = C.input_variable(self.a_dim, dynamic_axes=[b,c], name='ae')

        #input layer
        c_processed, q_processed = self.input_layer(cgw,cnw,cc,qgw,qnw,qc).outputs
        
        # attention layer
        att_context = self.gated_attention_gru_layer(c_processed, q_processed)

        # seif-matching_attention layer
        match_context = self.matching_attention_layer(att_context)

        # output layer
        start_logits, end_logits = self.output_layer(q_processed, match_context).outputs

        # loss
        start_loss = seq_loss(start_logits, ab)
        end_loss = seq_loss(end_logits, ae)
        #paper_loss = start_loss + end_loss
        new_loss = all_spans_loss(start_logits, ab, end_logits, ae)
        return C.combine([start_logits, end_logits]), new_loss
예제 #5
0
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None):
    print("creating eval model")
    last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME
    conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze)
    conv_out = conv_layers(image_input)

    model_with_rpn = model if rpn_model is None else rpn_model
    rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze)
    rpn_out = rpn(conv_out)
    # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
    rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg)

    roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
    pred_net = roi_fc_layers(conv_out, rpn_rois)
    cls_score = pred_net.outputs[0]
    bbox_regr = pred_net.outputs[1]

    if cfg.BBOX_NORMALIZE_TARGETS:
        num_boxes = int(bbox_regr.shape[1] / 4)
        bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes)
        bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes)
        bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')

    cls_pred = softmax(cls_score, axis=1, name='cls_pred')
    eval_model = combine([cls_pred, rpn_rois, bbox_regr])

    return eval_model
예제 #6
0
파일: train.py 프로젝트: gzt200361/CNTK
def calculate_loss_vector(network, path, location_path, communicator):
    source = DataSource(path, opt.vocab_file, location_path,
                        opt.seqlength, opt.batchsize)
    # the curr row -> the curr col
    # the curr col -> the next row
    row_loss = C.log(C.softmax(network['model'].outputs[0]))
    col_loss = C.log(C.softmax(network['model'].outputs[1]))
    loss = C.combine([row_loss, col_loss])
    row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))
    col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))

    flag = True
    while flag:
        mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(),
                                   Communicator.num_workers(),
                                   communicator.rank())
        result = loss.eval({
            network['row']: mb[source.input1],
            network['col']: mb[source.input2],
        })
        row_prob = result[loss.outputs[0]]
        col_prob = result[loss.outputs[1]]
        label1 = mb[source.word1].asarray()
        label2 = mb[source.word2].asarray()
        sequences = len(label1)
        for i in range(sequences):
            seqlength = len(row_prob[i])
            for j in range(seqlength):
                row_word = int(label1[i][j][0])
                col_word = int(label2[i][j][0])
                row_loss_vector[row_word] -= row_prob[i][j]
                col_loss_vector[col_word] -= col_prob[i][j]
        flag = not mb[source.input1].sweep_end
    return col_loss_vector, row_loss_vector
예제 #7
0
파일: functions.py 프로젝트: FDecaYed/CNTK
    def clone(self, method, substitutions=None):
        '''
        Clones the function. The parameters of the Function are either cloned,
        shared or frozen as specified by the method argument and any variable
        substitutions requested are applied in the cloned Function instance.

        Args:
            method (:class:`CloneMethod`): one of

             * 'clone': the returned function gets its own copy of parameters (default)
             * 'share': the returned function shares its parameters with this function
             * 'freeze': parameters are cloned and made immutable (constant).

            substitutions (dict): a dictionary mapping variables in this
             function to variables in the cloned function

        Returns:
            :class:`~cntk.ops.functions.Function`: the cloned Function
        '''
        # C++ clone() can only clone composites. If we are not a composite, make it one using combine()
        if not self.is_composite:
            from cntk import combine
            #return combine([self]).clone(method, substitutions).root_function.arguments[0].owner
            # BUGBUG: This ^^ does not give me the correct .arguments, so we leave the extra combine() in for now.
            return combine([self]).clone(method, substitutions)

        method = getattr(cntk_py,
                'ParameterCloningMethod_' + CloneMethod(method).name.capitalize())
        substitutions = substitutions or {}
        if not isinstance(substitutions, dict):
            raise TypeError("Variable substitution map must be a dictionary")
        return super(Function, self).clone(method, substitutions)
예제 #8
0
def test_block_with_unused_outputs():
    p1 = C.placeholder()
    p3 = C.placeholder()
    func1 = C.as_block(p1 + 1, [(p1, p3)], 'plus_func_1')
    p2 = C.placeholder()
    p4 = C.placeholder()
    func2 = C.as_block(p2 + 1, [(p2, p4)], 'plus_func_2')
    p5 = C.placeholder()
    func3 = C.as_block(C.combine([func2]), [(p4, p5)], 'empty_block')
    input_var1 = C.input_variable(shape=())
    input_var2 = C.input_variable(shape=())
    block = C.as_block(C.combine([func1, func3]), [(p3, input_var1), (p5, input_var2)], 'multi_output_block')

    eval_root = C.combine([block.outputs[0]])
    result = eval_root.eval({input_var1 : np.asarray([3], dtype=np.float32), input_var2 : np.asarray([-3], dtype=np.float32)})
    assert np.array_equal(result, [ 4.])
예제 #9
0
파일: blocks.py 프로젝트: BorisJineman/CNTK
def identity(keep):
    '''
    Identity function.
    There is no factory for it because there is only one identity function.
    '''
    # Note: We cannot use alias() here since parameter-shape inference cannot be done through alias().
    return combine([keep])
예제 #10
0
def gru_cell(shape, init=init_default_or_glorot_uniform, name=''): # (x, (h,c))
  shape = _as_tuple(shape)

  if len(shape) != 1 :
    raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)")

  # determine stacking dimensions
  cell_shape_stacked = shape * 2  # patched dims with stack_axis duplicated 4 times

  # parameters
  Wz = Parameter(cell_shape_stacked, init = init, name='Wz')
  Wr = Parameter(cell_shape_stacked, init = init, name='Wr')
  Wh = Parameter(cell_shape_stacked, init = init, name='Wh')
  Uz = Parameter( _INFERRED + shape, init = init, name = 'Uz')
  Ur = Parameter( _INFERRED + shape, init = init, name = 'Ur')
  Uh = Parameter( _INFERRED + shape, init = init, name = 'Uh')

  def create_s_placeholder():
    # we pass the known dimensions here, which makes dimension inference easier
    return Placeholder(shape=shape, name='S') # (h, c)

  # parameters to model function
  x = Placeholder(name='gru_block_arg')
  prev_status = create_s_placeholder()

  # formula of model function
  Sn_1 = prev_status

  z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'), name='z')
  r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'), name='r')
  h = tanh(times(x, Uh, name='x*Uh') + times(element_times(Sn_1, r, name='Sprev*r'), Wh), name='h')
  s = plus(element_times((1-z), h, name='(1-z)*h'), element_times(z, Sn_1, name='z*SPrev'), name=name)
  apply_x_s = combine([s])
  apply_x_s.create_placeholder = create_s_placeholder
  return apply_x_s
예제 #11
0
파일: train.py 프로젝트: gzt200361/CNTK
def create_model(input_dim):
    row = sequence.input_variable(shape=input_dim)
    col = sequence.input_variable(shape=input_dim)
    rowh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(row)
    colh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(col)

    x = C.splice(rowh, colh, axis=-1)
    x = lightlstm(opt.embed, opt.nhid)(x)
    x = For(range(opt.layer-1), lambda: lightlstm(opt.nhid, opt.nhid))(x)
    rowh = C.slice(x, -1, opt.nhid * 0, opt.nhid * 1)
    colh = C.slice(x, -1, opt.nhid * 1, opt.nhid * 2)

    row_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(rowh)
    col_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(colh)

    # variable : row label and col label
    row_label = sequence.input_variable(shape=input_dim)
    col_label = sequence.input_variable(shape=input_dim)
    model = C.combine([row_predict, col_predict])

    return {'row':       row,
            'col':       col,
            'row_label': row_label,
            'col_label': col_label,
            'model':     model}
예제 #12
0
def test_constant_eval():
    c = C.Constant(value=1)
    c_plus_1 = c + 1
    op = C.combine([c_plus_1, c])
    result = op.eval({})
    assert np.array_equal(result[c_plus_1.output], 2.0)
    assert np.array_equal(result[c], 1.0)
예제 #13
0
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2]);

    _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
예제 #14
0
def test_eval_again_with_prev_outputs_live(device_id):
    x = C.input_variable(2)
    dev = cntk_device(device_id)
    w1 = C.parameter(init=np.asarray([1], dtype=np.float32), device=dev)
    w2 = C.parameter(init=np.asarray([-1], dtype=np.float32), device=dev)
    out1 = x + w1
    out2 = x + w2
    op = C.combine([out1, out2])

    result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev)
    assert np.array_equal(result1[out1.output], [[3, 6]])
    assert np.array_equal(result1[out2.output], [[1, 4]])

    result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev)
    assert np.array_equal(result2[out1.output], [[0, 5], [-3, 8]])
    assert np.array_equal(result2[out2.output], [[-2, 3], [-5, 6]])

    # result1 should still be valid
    assert np.array_equal(result1[out1.output], [[3, 6]])
    assert np.array_equal(result1[out2.output], [[1, 4]])

    result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev, as_numpy=False)
    assert np.array_equal(result1[out1.output].asarray(), [[3, 6]])
    assert np.array_equal(result1[out2.output].asarray(), [[1, 4]])

    result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev, as_numpy=False)
    assert np.array_equal(result2[out1.output].asarray(), [[0, 5], [-3, 8]])
    assert np.array_equal(result2[out2.output].asarray(), [[-2, 3], [-5, 6]])

    # Accessing result1 now will cause an error since it was a temporary that
    # is now erased, due to the subsequent eval call
    with pytest.raises(RuntimeError):
        assert np.array_equal(result1[out1.output].asarray(), [[3, 6]])

    grad_op = out1 + out2
    grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev)
    assert np.array_equal(grad1[w1], [2])
    assert np.array_equal(grad1[w2], [2])

    grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev)
    assert np.array_equal(grad2[w1], [4])
    assert np.array_equal(grad2[w2], [4])

    # grad1 should still be valid
    assert np.array_equal(grad1[w1], [2])
    assert np.array_equal(grad1[w2], [2])

    grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False)
    assert np.array_equal(grad1[w1].asarray(), [2])
    assert np.array_equal(grad1[w2].asarray(), [2])

    grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False)
    assert np.array_equal(grad2[w1].asarray(), [4])
    assert np.array_equal(grad2[w2].asarray(), [4])

    # Accessing grad1 now will cause an error since it was a temporary that
    # is now erased, due to the subsequent grad call
    with pytest.raises(RuntimeError):
        assert np.array_equal(grad1[w1].asarray(), [2])
예제 #15
0
def test_grad_custimized_root():
    x = C.input_variable(shape=(1,), needs_gradient=True)
    y = C.sqrt(x)
    y2 = C.log(x)
    combine = C.combine([y.output, y2.output])
    a = np.asarray([1,4,16], dtype=np.float32).reshape(3,1)
    grads = combine.grad({x:a}, grad_root = y.output)
    expect_grad = np.asarray([[0.5],[0.25],[0.125]], dtype=np.float32)
    assert np.array_equal(grads, expect_grad)
예제 #16
0
def test_universal():
    np.random.seed(98052)
    builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch))
    builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd)
    np.random.seed(98052)
    my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)])
    universal_sgd = lambda params: universal(my_sgd, params)
    my_last_avg_error, my_avg_error, _ = ffnet(universal_sgd)
    assert np.all(np.less_equal(my_last_avg_error, builtin_last_avg_error))
    assert np.all(np.less_equal(my_avg_error, builtin_avg_error))
예제 #17
0
파일: blocks.py 프로젝트: BorisJineman/CNTK
def _inject_name(f, name):
    '''
    Call this at the end of any layer or block that takes an optional name argument.
    '''
    if name:
        if len(f.outputs) == 1:
            f = alias(f, name=name)
        else:
            f = combine(list(f.outputs), name=name) # BUGBUG: Does this actually name things?
    return f
예제 #18
0
def test_clone_with_unfound_previous_node():
    x = C.input_variable(())
    y = C.combine(x * x, x + x)
    y0 = y[0]
    y1 = y[1]
    y0_new = C.plus(y0,0, name="test")
    X=C.logging.find_by_name(y0_new, 'QueryReply_y')
    
    with pytest.raises(AttributeError):
        y_clone = y.clone(C.CloneMethod.share, {X:y0_new})
예제 #19
0
파일: trainer.py 프로젝트: AllanYiin/CNTK
 def _get_loss_metric(criterion): # helper to interpret criterion parameter
     if isinstance(criterion, cntk_py.Function): # input can be a tuple of Functions or a tuple-valued Function
         criterion = criterion.outputs           # break up tuple-valued Function into tuple of Functions
     # map Variable to Function
     from cntk import combine
     criterion = tuple([combine([output], name=output.name) if isinstance(output, cntk_py.Variable) else output for output in criterion])
     if len(criterion) == 1:
         criterion = criterion + (None,) # tuple of 1 value: pad with None
     elif len(criterion) != 2:
         raise ValueError("criterion parameter must be a singleton or a tuple of 2 elements")
     return criterion
예제 #20
0
def debug_attention(model, input):
    q = C.combine([model, model.attention_model.attention_weights])
    #words, p = q(input) # Python 3
    words_p = q(input)
    words = words_p[0]
    p     = words_p[1]
    output_seq_len = words[0].shape[0]
    p_sq = np.squeeze(p[0][:output_seq_len,:,:]) # (batch, output_len, input_len, 1)
    opts = np.get_printoptions()
    np.set_printoptions(precision=5)
    print(p_sq)
    np.set_printoptions(**opts)
예제 #21
0
def clone_model(base_model, from_node_names, to_node_names, clone_method):
    from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names]
    if None in from_nodes:
        print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
              .format(from_node_names, from_nodes))
    to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names]
    if None in to_nodes:
        print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
              .format(to_node_names, to_nodes))
    input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes]))
    cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
    return cloned_net
예제 #22
0
def test_clone_with_wrong_type_node():
    x = C.input_variable(())
    y = C.combine(x * x, x + x)
    y0 = y[0]
    y1 = y[1]
    y0_new = C.plus(y0,0, name="test")
    X=C.logging.find_by_name(y0_new, 'QueryReply_y')

    a = 5
    
    with pytest.raises(TypeError):
        y_clone = y.clone(C.CloneMethod.share, {y0:a})
예제 #23
0
 def output_layer(self, query, match_context):
     q_processed = C.placeholder(shape=(2*self.hidden_dim,))
     mat_context = C.placeholder(shape=(2*self.hidden_dim,))
     
     #output layer
     r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim)
     p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim)
     attention_pool = C.sequence.reduce_sum(p1_logits * mat_context)
     state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q)
     p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim)
     
     @C.Function
     def start_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def start_ave(last, now):
             now = now + last - last
             new_start = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.future_value(point)
             return new_start
         start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits)
         return start_logits_ave
     point = C.sequence.is_first(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point)
     point = C.greater(C.constant(16), point)
     start_logits_ave = start_ave_point(p1_logits, p2_logits, point)
     
     @C.Function
     def end_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def end_ave(last, now):
             now = now + last - last
             new_end = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.past_value(point)
             return new_end
         end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits)
         return end_logits_ave
     point = C.sequence.is_last(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point)
     point = C.greater(C.constant(16),point)
     end_logits_ave = end_ave_point(p1_logits, p2_logits, point)
     
     start_logits = seq_hardmax(start_logits_ave)
     end_logits = seq_hardmax(end_logits_ave)
     '''
     start_logits = seq_hardmax(p1_logits)
     end_logits = seq_hardmax(p2_logits)
     '''
     return C.as_block(
         C.combine([start_logits, end_logits]),
         [(q_processed, query), (mat_context, match_context)],
         'output_layer',
         'output_layer')
예제 #24
0
def debug_attention(model, input):
    q = combine([model, model.attention_model.attention_weights])
    #words, p = q(input) # Python 3
    words_p = q(input)
    words = words_p[0]
    p     = words_p[1]
    seq_len = words[0].shape[attention_axis-1]
    span = 7 #attention_span  #7 # test sentence is 7 tokens long
    p_sq = np.squeeze(p[0][:seq_len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim)
    opts = np.get_printoptions()
    np.set_printoptions(precision=5)
    print(p_sq)
    np.set_printoptions(**opts)
예제 #25
0
def test_debug_multi_output():
    input_dim = 2
    num_output_classes = 2

    f_input = input_variable(input_dim, np.float32,
                             needs_gradient=True, name='features')

    p = parameter(shape=(input_dim,), init=10, name='p')

    comb = combine([f_input, p])

    ins = InStream(['n', 'n', 'n', 'n', 'n'])
    outs = OutStream()

    z = times(comb.outputs[0], comb.outputs[1], name='z')
    z = debug_model(z, ins, outs)

    l_input = input_variable(num_output_classes, np.float32, name='labels')
    loss = cross_entropy_with_softmax(z, l_input)
    eval_error = classification_error(z, l_input)

    _train(z, loss, eval_error,
           loss.find_by_name('features'),
           loss.find_by_name('labels'),
           num_output_classes, 1)

    # outs.written contains something like
    # =================================== forward  ===================================
    # Parameter('p', [], [2]) with uid 'Parameter4'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # =================================== backward ===================================
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Parameter('p', [], [2]) with uid 'Parameter4'   assert outs.written == out_stuff

    assert len(outs.written) == 8

    v_p = "Parameter('p', "
    v_i = "Input('features'"
    v_t = 'Times: '

    assert outs.written[0].startswith('=') and 'forward' in outs.written[0]
    line_1, line_2, line_3 = outs.written[1:4]

    assert outs.written[4].startswith('=') and 'backward' in outs.written[4]
    line_5, line_6, line_7 = outs.written[5:8]
    assert line_5.startswith(v_t)
    assert line_6.startswith(v_p) and line_7.startswith(v_i) or \
           line_6.startswith(v_i) and line_7.startswith(v_p)
예제 #26
0
def test_eval_not_all_outputs():
    x = C.input_variable(1)
    x_data = [AA([3], dtype=np.float32)]
    y = C.input_variable(1)
    y_data = [AA([2], dtype=np.float32)]
    plus_func = x + 1
    minus_func = y - 1
    func = combine([plus_func, minus_func])

    result = func.eval({x : x_data}, [plus_func])
    assert np.array_equal(result, np.asarray([[4.]]))

    result = func.eval({y : y_data}, [minus_func])
    assert np.array_equal(result, np.asarray([[1.]]))
예제 #27
0
def test_assign_dependency(input_data, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    data = AA(input_data, dtype=dt)

    value = C.parameter(init=data)
    dest = C.parameter(shape=data.shape, dtype=dt)
    assign_op = C.assign(dest, value)
    y = dest + value

    result = C.combine([y, assign_op]).eval()

    assert np.array_equal(result[y.output], data)
    assert np.array_equal(dest.asarray(), data)
    assert np.array_equal(y.eval(), data + data)
예제 #28
0
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([d['root'], C.minus(
        d['target'], C.constant(1, name='c2'), name='minus')], name='all')

    return d
예제 #29
0
def test_normal_diff_along_batch(arg0, arg1, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    N = 1000
    B = 10.0 / np.sqrt(N)
    x  = C.sequence.input_variable(1, dtype=dt)
    x0 = np.zeros((N,2,1), dtype=dt)
    z = cr.normal_like(x, arg0, arg1, seed=98052)
    diff = C.sequence.first(z)-C.sequence.last(z)
    mean = C.reduce_mean(diff, axis=C.Axis.all_axes())
    var  = C.reduce_mean(diff*diff, axis=C.Axis.all_axes())
    expr = C.combine([mean, var])
    values = expr.eval({x:x0}, device=dev)
    assert np.abs(values[mean.output]) < B
    assert np.abs(values[var.output] - 2*arg1*arg1) < np.sqrt(2)*arg1*B
예제 #30
0
def eval_and_write(model_file, node_name, output_file, minibatch_source, num_objects):
    # load model and pick desired node as output
    loaded_model  = load_model(model_file)
    node_in_graph = loaded_model.find_by_name(node_name)
    output_nodes  = combine([node_in_graph.owner])

    # evaluate model and get desired node output
    print("Evaluating model for output node %s" % node_name)
    features_si = minibatch_source['features']
    with open(output_file, 'wb') as results_file:
        for i in range(0, num_objects):
            mb = minibatch_source.next_minibatch(1)
            output = output_nodes.eval(mb[features_si])

            # write results to file
            out_values = output[0].flatten()
            np.savetxt(results_file, out_values[np.newaxis], fmt="%.6f")
def test_evaluating_multiple_outputs():
    input_data = AA([1], np.float32)

    a = C.input_variable(shape=input_data.shape, name='a')
    a_plus_1 = a + 1
    out1 = ((a_plus_1 + 2) - 1) + 1
    out2 = ((a_plus_1 + 4) - 1) + 2
    z = C.combine([out1, out2])

    # create batch
    input_data.shape = (1, 1) + input_data.shape

    res = z.eval({a: input_data})

    expected_forward_out1 = [[4.]]
    expected_forward_out2 = [[7.]]
    assert np.array_equal(res[out1.output], expected_forward_out1)
    assert np.array_equal(res[out2.output], expected_forward_out2)
def init():
    """ Initialise ResNet 152 model
    """
    global trainedModel, labelLookup, mem_after_init

    start = t.default_timer()

    # Load the model and labels from disk
    with open(LABEL_FILE, 'r') as f:
        labelLookup = [l.rstrip() for l in f]

    # Load model and load the model from brainscript (3rd index)
    trainedModel = load_model(MODEL_FILE)
    trainedModel = combine([trainedModel.outputs[2].owner])
    end = t.default_timer()

    loadTimeMsg = "Model loading time: {0} ms".format(round((end - start) * 1000, 2))
    logger.info(loadTimeMsg)
예제 #33
0
파일: trainer.py 프로젝트: tinyfx/CNTK
 def _get_loss_metric(criterion):  # helper to interpret criterion parameter
     if isinstance(
             criterion, cntk_py.Function
     ):  # input can be a tuple of Functions or a tuple-valued Function
         criterion = criterion.outputs  # break up tuple-valued Function into tuple of Functions
     # map Variable to Function
     from cntk import combine
     criterion = tuple([
         combine([output], output.name) if isinstance(
             output, cntk_py.Variable) else output for output in criterion
     ])
     if len(criterion) == 1:
         criterion = criterion + (None, )  # tuple of 1 value: pad with None
     elif len(criterion) != 2:
         raise ValueError(
             "criterion parameter must be a singleton or a tuple of 2 elements"
         )
     return criterion
예제 #34
0
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([
        d['root'],
        C.minus(d['target'], C.constant(1, name='c2'), name='minus')
    ],
                         name='all')

    return d
예제 #35
0
    def build_model(self):
        phmap = self.get_inputs()
        cc = phmap['cc']
        qc = phmap['qc']
        ab = phmap['ab']
        ae = phmap['ae']
        df = phmap['df']
        qf = phmap['qf']
        #self.info['query'] = C.splice(qgw, qnw)
        #self.info['doc'] = C.splice(cgw, gnw)
        elmo_encoder = self.__elmo_fac.build()
        #input layer
        reduction_cc = C.reshape(cc, (-1, ))
        reduction_qc = C.reshape(qc, (-1, ))
        c_elmo = elmo_encoder(reduction_cc)
        q_elmo = elmo_encoder(reduction_qc)
        c_processed, q_processed = self.input_layer(phmap['cgw'], phmap['cnw'],
                                                    phmap['qgw'],
                                                    phmap['qnw']).outputs

        # attention layer
        c_enhance = C.splice(c_processed, c_elmo, df)
        q_enhance = C.splice(q_processed, q_elmo, qf)
        att_context, wei = self.attention_layer(c_enhance, q_enhance,
            dimc= 2*self.hidden_dim+1027, dimq=2*self.hidden_dim+1025,\
            common_dim=2*self.hidden_dim+1024).outputs
        self_context = self.self_attention_layer(att_context)  # 2*hidden_dim
        # modeling layer
        mod_context = self.modeling_layer(self_context)
        enhance_mod_context = C.splice(mod_context, c_elmo, df)

        # output layer
        start_logits, end_logits = self.output_layer(
            att_context, enhance_mod_context).outputs

        # loss
        start_loss = seq_loss(start_logits, ab)
        end_loss = seq_loss(end_logits, ae)
        regulizer = 0.001 * C.reduce_sum(
            elmo_encoder.scales * elmo_encoder.scales)
        new_loss = all_spans_loss(start_logits, ab, end_logits, ae) + regulizer
        self._model = C.combine([start_logits, end_logits])
        self._loss = new_loss
        return self._model, self._loss, self._input_phs
예제 #36
0
    def attention_layer(self, context, query, dimc, dimq, common_dim):
        q_processed = C.placeholder(shape=(dimq, ))
        c_processed = C.placeholder(shape=(dimc, ))

        #convert query's sequence axis to static
        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        # so W * [h; u; h.* u] becomes w1 * h + w2 * u + w4 * (h.*u)
        ws1 = C.parameter(shape=(dimc, 1), init=C.glorot_uniform())
        ws2 = C.parameter(shape=(dimq, 1), init=C.glorot_uniform())
        ws4 = C.parameter(shape=(1, common_dim), init=C.glorot_uniform())
        att_bias = C.parameter(shape=(), init=0)

        wh = C.times(c_processed, ws1)  # [#,c][1]
        wu = C.reshape(C.times(qvw, ws2), (-1, ))  # [#][*]
        # qvw*ws4: [#][*,200], whu:[#,c][*]
        whu = C.reshape(C.reduce_sum(
            c_processed[:common_dim] *\
            C.sequence.broadcast_as(qvw[:,:common_dim] * ws4, c_processed), axis=1), (-1,))
        S1 = wh + C.sequence.broadcast_as(wu,
                                          c_processed) + att_bias  # [#,c][*]
        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, c_processed)
        S1 = C.element_select(qvw_mask_expanded, S1, C.constant(-1e+30))
        q_attn = C.reshape(C.softmax(S1), (-1, 1))  # [#,c][*,1]
        c2q = C.reshape(
            C.reduce_sum(C.sequence.broadcast_as(qvw, q_attn) * q_attn,
                         axis=0), (-1))  # [#,c][200]

        max_col = C.reduce_max(S1)  # [#,c][1] 最大的q中的单词
        c_attn = C.sequence.softmax(max_col)  # [#,c][1] 对c中的每一个单词做softmax

        htilde = C.sequence.reduce_sum(c_processed * c_attn)  # [#][200]
        q2c = C.sequence.broadcast_as(htilde, c_processed)  # [#,c][200]
        q2c_out = c_processed[:common_dim] * q2c[:common_dim]

        # 原始文档,题目表示,文章重点表示,匹配度表示,文章上下文表示
        att_context_reg = C.splice(c_processed, c2q, q2c_out,
                                   c_processed[:common_dim] * c2q[:common_dim])
        res = C.combine(att_context_reg, C.reshape(q_attn, (-1, )))
        return \
        C.as_block(res,
            [(c_processed, context), (q_processed, query)],
            'attention_layer',
            'attention_layer')
예제 #37
0
    def dot_attention(self, inputs, memory, dim):
        '''
        @inputs: [#,c][d] a sequence need attention
        @memory(key): [#,q][d] a sequence input refers to compute similarity(weight)
        @value: [#,q][d] a sequence input refers to weighted sum
        @output: [#,c][d] attention vector
        '''
        input_ph = C.placeholder()
        input_mem = C.placeholder()
        with C.layers.default_options(
                bias=False,
                activation=C.relu):  # all the projections have no bias
            attn_proj_enc = C.layers.Dense(dim,
                                           init=glorot_uniform(),
                                           input_rank=1,
                                           name="Wqu")
            attn_proj_dec = C.layers.Dense(dim,
                                           init=glorot_uniform(),
                                           input_rank=1)

        inputs_ = attn_proj_enc(input_ph)  # [#,c][d]
        memory_ = attn_proj_dec(input_mem)  # [#,q][d]
        unpack_memory, mem_mask = C.sequence.unpack(
            memory_, 0).outputs  # [#][*=q, d], [#][*=q]
        unpack_memory_expand = C.sequence.broadcast_as(unpack_memory,
                                                       inputs_)  # [#,c][*=q,d]

        matrix = C.times_transpose(inputs_, unpack_memory_expand) / (
            dim**0.5)  # [#,c][*=q]
        mem_mask_expand = C.sequence.broadcast_as(mem_mask,
                                                  inputs_)  # [#,c][*=q]
        matrix = C.element_select(mem_mask_expand, matrix,
                                  C.constant(-1e+30))  # [#,c][*=q]
        logits = C.reshape(C.softmax(matrix), (-1, 1))  # [#,c][*=q,1]
        # [#,c][*=q, d]
        memory_expand = C.sequence.broadcast_as(
            C.sequence.unpack(input_mem, 0, no_mask_output=True), input_ph)
        weighted_att = C.reshape(C.reduce_sum(logits * memory_expand, axis=0),
                                 (-1, ))  # [#,c][d]

        return C.as_block(C.combine(weighted_att,
                                    logits), [(input_ph, inputs),
                                              (input_mem, memory)],
                          'dot attention', 'dot attention')
예제 #38
0
def gru_cell(shape, init=glorot_uniform(), name=''):  # (x, (h,c))
    """ GRU cell function
  """
    shape = _as_tuple(shape)

    if len(shape) != 1:
        raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)")

    # determine stacking dimensions
    cell_shape_stacked = shape * 2  # patched dims with stack_axis duplicated 2 times

    # parameters
    Wz = Parameter(cell_shape_stacked, init=init, name='Wz')
    Wr = Parameter(cell_shape_stacked, init=init, name='Wr')
    Wh = Parameter(cell_shape_stacked, init=init, name='Wh')
    Uz = Parameter(_INFERRED + shape, init=init, name='Uz')
    Ur = Parameter(_INFERRED + shape, init=init, name='Ur')
    Uh = Parameter(_INFERRED + shape, init=init, name='Uh')

    def create_s_placeholder():
        # we pass the known dimensions here, which makes dimension inference easier
        return Placeholder(shape=shape, name='S')  # (h, c)

    # parameters to model function
    x = Placeholder(name='gru_block_arg')
    prev_status = create_s_placeholder()

    # formula of model function
    Sn_1 = prev_status

    z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'),
                name='z')
    r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'),
                name='r')
    h = tanh(times(x, Uh, name='x*Uh') +
             times(element_times(Sn_1, r, name='Sprev*r'), Wh),
             name='h')
    s = plus(element_times((1 - z), h, name='(1-z)*h'),
             element_times(z, Sn_1, name='z*SPrev'),
             name=name)
    apply_x_s = combine([s])
    apply_x_s.create_placeholder = create_s_placeholder
    return apply_x_s
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
예제 #40
0
def clone_model(base_model, from_node_names, to_node_names, clone_method):
    from_nodes = [
        find_by_name(base_model, node_name) for node_name in from_node_names
    ]
    if None in from_nodes:
        print(
            "Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
            .format(from_node_names, from_nodes))
    to_nodes = [
        find_by_name(base_model, node_name) for node_name in to_node_names
    ]
    if None in to_nodes:
        print(
            "Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
            .format(to_node_names, to_nodes))
    input_placeholders = dict(
        zip(from_nodes, [placeholder() for x in from_nodes]))
    cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
    return cloned_net
예제 #41
0
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = C.input_variable((input_dim, ))

    x_placeholder = C.placeholder()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim, ))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(C.combine([proj, proj_plus_bias]),
                              [(x_placeholder, x)], 'dense_op')

    labels = C.input_variable((proj_dim, ))
    lr_schedule = C.learning_rate_schedule(0.003, C.UnitType.sample)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe),
                                  C.sgd(ce.parameters, lr=lr_schedule))
예제 #42
0
def resnet_model(name, scaled_input):
    '''
    Input: pretrained-model name, scaled_input
    Function:
    - We are using Transfer Learning here, since the iNaturalist Image dataset is similar to Imagenet data.
    - Load Resnet34 as the base-model
    - Finetune Resnet34 by removing the last layer and add custom layers.
    - Custom layers:
        - Dense
        - Dropout
        - BatchNorm
    Return: Model
    '''
    print('Loading Resnet model from {}.'.format(name))
    base_model = C.load_model(os.path.join(MODELDIR, name))
    features_placeholder = C.placeholder(shape=(3, 224, 224), name='features')
    features = C.input_variable(shape=(3, 224, 224), name='features')

    feature_node = C.logging.find_by_name(base_model, 'features')
    last_node = C.logging.find_by_name(base_model, 'z.x')
    cloned_layers = C.combine([last_node.owner]).clone(C.CloneMethod.freeze,
                                                       {feature_node: features_placeholder})
    retained_layers = C.as_block(composite=cloned_layers,
                                 block_arguments_map=[(features_placeholder, features)],
                                 block_op_name='retainedlayers',
                                 block_instance_name='retainedlayers')

    z = retained_layers(scaled_input)
    z = C.layers.GlobalAveragePooling()(z)
    z = C.layers.Dropout(dropout_rate=0.25, name='d1')(z)

    z = C.layers.Dense(10000, activation=C.ops.relu, name='fc1')(z)
    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = C.layers.Dropout(dropout_rate=0.4, name='d2')(z)

    z = C.layers.Dense(10000, activation=C.ops.relu, name='fc2')(z)
    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = C.layers.Dropout(dropout_rate=0.5, name='d3')(z)

    z = C.layers.Dense(num_classes, activation=None, name='prediction')(z)

    return z
def create_train_model(s2smodel, embed_layer):
    '''
    return: @input map @softmax @loss
    '''
    q = C.Axis.new_unique_dynamic_axis('q')
    a = C.Axis.new_unique_dynamic_axis('a')
    b = C.Axis.default_batch_axis()
    qwk = C.sequence.input_variable(myConfig['wg_dim'],
                                    sequence_axis=q,
                                    is_sparse=False,
                                    name='qwk')
    qwn = C.sequence.input_variable(myConfig['wn_dim'],
                                    sequence_axis=q,
                                    is_sparse=False,
                                    name='qwn')
    awk = C.sequence.input_variable(myConfig['wg_dim'],
                                    sequence_axis=a,
                                    is_sparse=False,
                                    name='awk')
    awn = C.sequence.input_variable(myConfig['wn_dim'],
                                    sequence_axis=a,
                                    is_sparse=False,
                                    name='awn')

    input_ph = {'qwk': qwk, 'qwn': qwn, 'awk': awk, 'awn': awn}

    a_processed = embed_layer(awk, awn)
    q_processed = embed_layer(qwk, qwn)
    a_onehot = C.splice(awk, awn)
    print("q_onehot shape:{}".format(a_onehot.output))

    # query generate answer
    logits = s2smodel(a_processed, q_processed)
    logits = C.sequence.slice(logits, 0, -1)
    print('logits shape:{}'.format(logits.output))

    labels = C.sequence.slice(a_onehot, 1, 0)  # <s> a b c </s> -> a b c </s>
    print('labels shape:{}'.format(labels.output))
    logits = C.reconcile_dynamic_axes(logits, labels)
    loss = C.cross_entropy_with_softmax(logits, labels)
    errs = C.classification_error(logits, labels)
    return input_ph, logits, C.combine(loss, errs)
예제 #44
0
def decode_model(use_gpu=True, gpu_id=0):
    # use GPU or CPU according to parameters
    try_set_default_device(gpu(gpu_id) if use_gpu else cpu())

    model_dnn = load_model("./model/speech_enhancement.model")
    features_file = "./test_normed.scp"
    feature_dim = 257
    test_reader = MinibatchSource(HTKFeatureDeserializer(StreamDefs(
            amazing_features=StreamDef(
                    shape=feature_dim, context=(3, 3),
                    scp=features_file))),
                                  randomize=False, frame_mode=False)
    eval_input_map = {input: test_reader.streams.amazing_features}

    f = open(features_file)
    line = f.readline()
    while line:
        temp_input_path = line.split(']')[0]
        mb_size = temp_input_path.split(',')[-1]
        mb_size = int(mb_size) + 1
        noisy_fea = test_reader.next_minibatch(
                mb_size, input_map=eval_input_map)
        real_noisy_fea = noisy_fea[input].data

        node_in_graph = model_dnn.find_by_name('irm')
        output_nodes = combine([node_in_graph.owner])
        out_noisy_fea = output_nodes.eval(real_noisy_fea)
        # out_noisy_fea = as_composite(model_dnn.output1[0].owner).eval(
        #         real_noisy_fea)

        out_SE_noisy_fea = np.concatenate((out_noisy_fea), axis=0)

        out_file_path = line.split('=')[0]
        out_file_name = os.path.join('./enhanced_norm_fea_mat', out_file_path)
        out_file_fullpath = os.path.split(out_file_name)[0]
        # print (out_file_fullpath)
        if not os.path.exists(out_file_fullpath):
            os.makedirs(out_file_fullpath)
        sio.savemat(out_file_name, {'SE': out_SE_noisy_fea})
        line = f.readline()

    f.close()
예제 #45
0
 def __init__(self,model_file=None,im_mean=None, model_output_layer=1):
     # model specific parameters
  
     # 0: Softmax, 1: Unnormalised output layer
     assert model_output_layer in (0,1), "model output layer must be 0 or 1"
     
     self.im_mean=im_mean
     #self.model_name='cnn_model.dnn'
     #model_file=os.path.join(self.param.model_dir,self.model_name)
     print('...loading classification model')
     # ToDo: do checks for image size and num_channel
     
     mod = load_model(model_file)
     nodes=mod.find_all_with_name('')
     
     self.pred  = combine([nodes[model_output_layer]])
     
     self.im_height=mod.arguments[0].shape[1]
     self.im_width=mod.arguments[0].shape[2]
     self.im_channels=mod.arguments[0].shape[0]
예제 #46
0
def create_faster_rcnn_eval_model(model,
                                  image_input,
                                  dims_input,
                                  cfg,
                                  rpn_model=None):

    print("creating eval model")
    last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME
    conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME],
                              [last_conv_node_name], CloneMethod.freeze)
    conv_out = conv_layers(image_input)

    model_with_rpn = model if rpn_model is None else rpn_model
    rpn = clone_model(model_with_rpn, [last_conv_node_name],
                      ["rpn_cls_prob_reshape", "rpn_bbox_pred"],
                      CloneMethod.freeze)
    rpn_out = rpn(conv_out)
    # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training
    rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1],
                                     dims_input, cfg)

    roi_fc_layers = clone_model(model,
                                [last_conv_node_name, "rpn_target_rois"],
                                ["cls_score", "bbox_regr"], CloneMethod.freeze)
    pred_net = roi_fc_layers(conv_out, rpn_rois)
    cls_score = pred_net.outputs[0]
    bbox_regr = pred_net.outputs[1]

    if cfg.BBOX_NORMALIZE_TARGETS:

        num_boxes = int(bbox_regr.shape[1] / 4)
        bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes)
        bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes)
        bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds),
                         bbox_normalize_means,
                         name='bbox_regr')

    cls_pred = softmax(cls_score, axis=1, name='cls_pred')
    eval_model = combine([cls_pred, rpn_rois, bbox_regr])

    return eval_model
예제 #47
0
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg):
    print("creating eval model")
    predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"],
                            CloneMethod.freeze)
    pred_net = predictor(image_input, roi_proposals)
    cls_score = pred_net.outputs[0]
    bbox_regr = pred_net.outputs[1]

    if cfg.BBOX_NORMALIZE_TARGETS:
        num_boxes = int(bbox_regr.shape[1] / 4)
        bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes)
        bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes)
        bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')

    cls_pred = softmax(cls_score, axis=1, name='cls_pred')
    eval_model = combine([cls_pred, bbox_regr])

    if cfg["CNTK"].DEBUG_OUTPUT:
        plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE))

    return eval_model
def test_combine_duplicated_inputs():
    input_dim = 1
    proj_dim = 2
    x = C.input_variable((input_dim,), name='x')
    b = C.parameter((proj_dim), name='b')
    w = C.parameter((input_dim, proj_dim), name='w')
    func_name = 't_plus_b'
    t = C.times(x, w)
    t_plus_b = C.plus(t, b, name=func_name)

    duplicated_t_plus_b = C.combine([t_plus_b, t_plus_b])

    def compare_var_names(vars, names):
        num_vars = len(vars)
        for i in range(num_vars):
            if (vars[i].name != names[i]):
                return False

        return True

    assert compare_var_names(duplicated_t_plus_b.outputs, [func_name, func_name])
    def output_layer(self, attention_context, modeling_context):
        att_context = C.placeholder(shape=(8*self.hidden_dim,))
        mod_context = C.placeholder(shape=(2*self.hidden_dim,))
        #output layer
        start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout))
        if self.two_step:
            start_hardmax = seq_hardmax(start_logits)
            att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax))
        else:
            start_prob = C.softmax(start_logits)
            att_mod_ctx = C.sequence.reduce_sum(mod_context * start_prob)
        att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context)
        end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded)
        m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input)
        end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout))

        return C.as_block(
            C.combine([start_logits, end_logits]),
            [(att_context, attention_context), (mod_context, modeling_context)],
            'output_layer',
            'output_layer')
예제 #50
0
def create_binary_convolution_model():

    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    z = C.layers.Convolution((3, 3), 32, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=32, pad=True)(z)

    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=128, pad=True)(z)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((1, 1), num_classes, channels=128, pad=True)(z)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum,
                                C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
예제 #51
0
    def build_model(self):
        phmap = self.get_inputs()
        df = phmap['df']
        qf = phmap['qf']
        ab = phmap['ab']
        ae = phmap['ae']
        #input layer
        cc = C.reshape(phmap['cc'], (1, -1))
        qc = C.reshape(phmap['qc'], (1, -1))
        c_processed, q_processed = self.input_layer(phmap['cgw'],phmap['cnw'],cc,\
            phmap['qgw'],phmap['qnw'],qc).outputs
        c_processed = C.splice(c_processed, df)
        q_processed = C.splice(q_processed, qf)
        # attention layer output:[#,c][8*hidden_dim]
        att_context, wei1 = self.attention_layer(c_processed,
                                                 q_processed,
                                                 dimc=2 * self.hidden_dim + 3,
                                                 dimq=2 * self.hidden_dim + 1,
                                                 common_dim=2 *
                                                 self.hidden_dim).outputs
        a = att_context[:4 * self.hidden_dim]
        b = att_context[4 * self.hidden_dim:]
        self_context = self.multiHead(a, a, self.hidden_dim // 2)

        # modeling layer
        mod_inp = C.splice(self_context, b)
        mod_context = self.modeling_layer(mod_inp)
        mod_context = C.splice(mod_context, df)

        # output layer
        start_logits, end_logits = self.output_layer(att_context,
                                                     mod_context).outputs

        # loss
        start_loss = seq_loss(start_logits, ab)
        end_loss = seq_loss(end_logits, ae)
        new_loss = all_spans_loss(start_logits, ab, end_logits, ae)
        self._model = C.combine([start_logits, end_logits])
        self._loss = new_loss
        return self._model, self._loss, self._input_phs
    def input_layer(self, c1w, c2w):
        c1w_ph = C.placeholder()
        c2w_ph = C.placeholder()

        input_words = C.placeholder(shape=(self.word_dim))

        embedded = self.embed()(input_words)
        processed = OptimizedRnnStack(self.hidden_dim,
                                      num_layers=1,
                                      bidirectional=True,
                                      use_cudnn=True,
                                      name='input_rnn')(embedded)

        c1_processed = processed.clone(C.CloneMethod.share,
                                       {input_words: c1w_ph})
        c2_processed = processed.clone(C.CloneMethod.share,
                                       {input_words: c2w_ph})

        return C.as_block(C.combine([c1_processed,
                                     c2_processed]), [(c1w_ph, c1w),
                                                      (c2w_ph, c2w)],
                          'input_layer', 'input_layer')
예제 #53
0
def cntk_prediction(pathToImage):
    """
	Fuction has following steps:
	- open the image
	- alter the image (change color mode, resize)
	- convert image to array, subtract the mean, roll axis
	- load model
	- pass on the image to model fo classification
	- return a result of classification or an error

	Args:
		pathToImage: server path of image

	Returns:
		top_class: the result of classification (1 - target class, 0 - non-target class, -1 - error)
	"""
    top_class = -1
    size = 224, 224

    try:
        im = Image.open(pathToImage)
        im = remove_transparency(im)
        im = LPmode2RGB(im)  # black and white images
        im = im.resize(size)  # PIL.Image.NEAREST resampling

        rgb_image = np.asarray(im, dtype=np.float32) - 128
        bgr_image = rgb_image[..., [2, 1, 0]]
        pic = np.ascontiguousarray(np.rollaxis(bgr_image, 2))

        z = load_model(MODEL)
        z_out = ct.combine([z.outputs[3].owner])
        y = ct.ops.softmax(z_out)
        predictions = np.squeeze(y.eval({y.arguments[0]: [pic]}))
        top_class = np.argmax(predictions)
    except Exception:
        # Nothing will be done in the case of exception. Function will return -1 value.
        pass

    return top_class
예제 #54
0
    def load_model(self, model_filename):

        self.model_filename = model_filename

        cntk_model = cntk.load_model(model_filename)

        #  First try and find output by name
        model_output = cntk_model.find_by_name('ScaledLogLikelihood')

        #  Fall back to first defined output
        if model_output is None:
            model_output = cntk_model.outputs[0]

        #  Create an object restricted to the desired output.
        cntk_model = cntk.combine(model_output)

        #  Optimized RNN models won't run on CPU without conversion.
        if 0 == cntk.use_default_device().type():
            cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model)

        self.model = cntk_model
        return self
예제 #55
0
    def input_layer(self,cgw,cc,qgw,qc,qnw,cnw):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()

        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            self.embed()(input_glove_words, input_nonglove_words), name='splice_embed')

        highway = HighwayNetwork(dim=self.elmo_dim + self.hidden_dim + self.convs, 
                                 highway_layers=self.highway_layers)(embedded)
        highway_drop = C.layers.Dropout(self.dropout)(highway)
        processed = OptimizedRnnStack(self.hidden_dim,
             num_layers=1,
             bidirectional=True,
             use_cudnn=self.use_cudnn,
             name='input_rnn')(highway_drop)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
                
        q_processed = processed.clone(C.CloneMethod.share, 
            {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, 
            {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})

        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw), (cc_ph, cc), (qgw_ph, qgw), (qc_ph, qc), (qnw_ph, qnw), (cnw_ph, cnw)],
            'input_layer',
            'input_layer')
예제 #56
0
    def _build_network(self, pretrained_policy):
        self.image_frame = C.input_variable((1, ) +
                                            self.observation_space_shape)
        self.next_image_frame = C.input_variable((1, ) +
                                                 self.observation_space_shape)
        self.reward = C.input_variable((1, ))
        if pretrained_policy is None:
            h = C.layers.Convolution2D(filter_shape=(7, 7),
                                       num_filters=32,
                                       strides=(4, 4),
                                       pad=True,
                                       name='conv_1',
                                       activation=C.relu)
            h = C.layers.Convolution2D(filter_shape=(5, 5),
                                       num_filters=64,
                                       strides=(2, 2),
                                       pad=True,
                                       name='conv_2',
                                       activation=C.relu)(h)
            h = C.layers.Convolution2D(filter_shape=(3, 3),
                                       num_filters=128,
                                       strides=(1, 1),
                                       pad=True,
                                       name='conv_3',
                                       activation=C.relu)(h)
            h = C.layers.Dense(64, activation=C.relu, name='dense_1')(h)
            v = C.layers.Dense(1, name='dense_2')(h)
            self.value = v(self.image_frame)
            self.next_value = v(self.next_image_frame)
            self.output = C.combine([self.value, self.next_value])
        else:
            self.output = C.Function.load(pretrained_policy)(
                self.image_frame, self.next_image_frame)
            [self.value, self.next_value] = self.output[
                self.value.output], self.output[self.next_value.output]

        target = DISCOUNT_FACTOR * self.next_value + self.reward
        self.loss = C.squared_error(target, self.value)
예제 #57
0
def score_models_fast(distance_measure,
                      unk_ivecs,
                      spk_ivecs,
                      siam_output='hl2',
                      calc_softmax=False):
    print('Score models')
    #n_spks = spk_ivecs.shape[0]
    node_in_graph = distance_measure.find_all_with_name(siam_output)
    #prov_f_dim = node_in_graph[0].shape[0]
    prov_output = C.combine(node_in_graph[0])
    #prov_spk = np.zeros(shape=(n_spks,prov_f_dim),dtype=np.float32)
    print('Transform blacklist speaker i-vectors')
    prov_spk = prov_output.eval(spk_ivecs)
    print('Transform dev. speaker i-vectors')
    prov_unk = prov_output.eval(unk_ivecs)
    print('Normalize transformed vectors')
    prov_spk = length_norm(prov_spk)
    prov_unk = length_norm(prov_unk)
    print('Calculate scores')
    scores = np.dot(prov_spk, prov_unk.transpose())
    if calc_softmax:
        scores = 1 / (1 + np.exp(-2 * scores))
    return scores
def load_model(model_filename: str):
    """A helper function to load the acoustic model from disc.

    Args:
        model_filename (str): The file path to the acoustic model.
        """
    cntk_model = cntk.load_model(model_filename)

    #  First try and find output by name
    model_output = cntk_model.find_by_name('ScaledLogLikelihood')

    #  Fall back to first defined output
    if model_output is None:
        model_output = cntk_model.outputs[0]

    #  Create an object restricted to the desired output.
    cntk_model = cntk.combine(model_output)

    #  Optimized RNN models won't run on CPU without conversion.
    if 0 == cntk.use_default_device().type():
        cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model)

    return cntk_model
예제 #59
0
def combine(operands, name=''):
    '''
     Create a new Function instance which just combines the outputs of the specified list of 
     'operands' Functions such that the 'Outputs' of the new 'Function' are union of the
     'Outputs' of each of the specified 'operands' Functions. E.g. When creating a classification
     model, typically the CrossEntropy loss Function and the ClassificationError Function comprise
     the two roots of the computation graph which can be "Combine"d to create a single Function
     with 2 outputs; viz. CrossEntropy loss and ClassificationError output.    
    Args:
        operands (list): list of functions or their variables to combine
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import combine
    from cntk import Variable
    converted_operands = list()
    for o in operands:
        if isinstance(o, Variable):            
            converted_operands.append(o.owner)
        else:
            converted_operands.append(o)

    return combine(converted_operands, name)
예제 #60
0
def create_eval_model(model, image_input, dims_input, rpn_model=None):
    print("creating eval model")
    conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
    conv_out = conv_layers(image_input)

    model_with_rpn = model if rpn_model is None else rpn_model
    rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze)
    rpn_rois = rpn(conv_out, dims_input)

    roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
    pred_net = roi_fc_layers(conv_out, rpn_rois)
    cls_score = pred_net.outputs[0]
    bbox_regr = pred_net.outputs[1]

    if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        num_boxes = int(bbox_regr.shape[1] / 4)
        bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes)
        bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes)
        bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')

    cls_pred = softmax(cls_score, axis=1, name='cls_pred')
    eval_model = combine([cls_pred, rpn_rois, bbox_regr])

    return eval_model