コード例 #1
0
ファイル: onnx_op_test.py プロジェクト: moolighty/CNTK
def test_ArgMax(tmpdir):
    shape = (4, 5)
    data = np.random.rand(*shape).astype(np.float32)
    model = C.argmax(data, 0)

    verify_no_input(model, tmpdir, 'ArgMax_0')

    x = C.input_variable(shape)
    model = C.argmax(x, 0)
    verify_one_input(model, data, tmpdir, 'ArgMax_1')
コード例 #2
0
ファイル: onnx_op_test.py プロジェクト: yaochengji/CNTK
def test_ArgMax(tmpdir):
    shape = (4, 5)
    data = np.random.rand(*shape).astype(np.float32)
    model = C.argmax(data, 0)

    verify_no_input(model, tmpdir, 'ArgMax_0')

    x = C.input_variable(shape)
    model = C.argmax(x, 0)
    verify_one_input(model, data, tmpdir, 'ArgMax_1')
コード例 #3
0
ファイル: onnx_op_test.py プロジェクト: PSEUDOBUBLAR/CNTK
def test_ArgMax(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        shape = (4, 5)
        data = np.random.rand(*shape).astype(dtype)
        model = C.argmax(data, 0)

        verify_no_input(model, tmpdir, 'ArgMax_0')

        x = C.input_variable(shape)
        model = C.argmax(x, 0)
        verify_one_input(model, data, tmpdir, 'ArgMax_1')
コード例 #4
0
ファイル: onnx_op_test.py プロジェクト: delpart/CNTK
def test_ArgMax(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        shape = (4, 5)
        data = np.random.rand(*shape).astype(dtype)
        model = C.argmax(data, 0)

        verify_no_input(model, tmpdir, 'ArgMax_0')

        x = C.input_variable(shape)
        model = C.argmax(x, 0)
        verify_one_input(model, data, tmpdir, 'ArgMax_1')
コード例 #5
0
 def __init__(self, vocabulary, labels, model):
     jieba.enable_parallel(multiprocessing.cpu_count())
     self.model = C.load_model(model)
     self.vocab = get_vocab(vocabulary)
     self.x_dim = len(self.vocab)
     self.y_dim = get_size(labels)
     self.x = C.sequence.input_variable(self.x_dim, is_sparse=True)
     self.model = self.model(self.x)
     self.predictor = C.argmax(self.model)
コード例 #6
0
ファイル: CapsLayer.py プロジェクト: southworks/CapsNet-CNTK
    def masking(input, labels):

        if not is_onehot_encoded:
            mask = ct.reshape(ct.one_hot(
                ct.reshape(ct.argmax(labels, axis=0), shape=(-1, )), 10),
                              shape=(10, 1, 1))
            mask = ct.stop_gradient(mask)
        else:
            mask = ct.reshape(labels, shape=(10, 1, 1))

        mask = ct.splice(*([mask] * 16), axis=1)
        return ct.reshape(ct.element_times(input, mask), shape=(-1, ))
コード例 #7
0
def evaluate(reader,model_func,is_body=False):#cal precision and recall

    if is_body:
        test_xt = C.sequence.input_variable(title_size)
        
    else:
        test_xt = C.sequence.input_variable(vocab_size)
         
    test_xb = C.sequence.input_variable(body_size)
    test_y = C.input_variable(num_labels)
    model=model_func(x)
    # Create the loss and error functions
    loss, label_error = create_criterion_function_preferred(model, y)
    
    # Assign the data fields to be read from the input
    data_map={x: reader.streams.title, y: reader.streams.industry}
    
    confuse=np.zeros((num_labels,num_labels))
    count=0
    while True:
        data = reader.next_minibatch(2048)  # fetch minibatch
        if not data:
            break
            
        for key in data.keys():
            if(key.m_name=="title"):   
                test_xt=data[key]            
            if(key.m_name=="industry"):      
                test_y=data[key]
            if(key.m_name=="body"): 
                test_xb=data[key]
        #print(data)   
        if is_body:
            output=z(x).eval({xt:test_xt,xb:test_xb}).argmax(axis=1)
        else:
            output=z(x).eval({x:test_xt}).argmax(axis=1)
       
        gt=C.squeeze(C.argmax(y)).eval({y:test_y}).astype(int)#.as_sequences(test_y)[0].indices[0]
        confuse+=fast_hist(output,gt,num_labels)
        count+=1

    precision=np.diag(confuse)/np.sum(confuse,axis=0)
    recall = np.diag(confuse)/np.sum(confuse,axis=1)
    accuarcy = np.diag(confuse).sum() / confuse.sum()
    aver_precision=np.nanmean(precision)
    aver_recall = np.nanmean(recall)
   
    print("Precision:{} Recall:{} Acc:{}".format(aver_precision,aver_recall,accuarcy))
    return accuarcy
コード例 #8
0
ファイル: NMT_Model.py プロジェクト: j4ckl1u/cntk_s2s
 def createPredictionNetwork(self, preSoftmax):
     nextWordProb = C.softmax(preSoftmax)
     bestTrans = C.reshape(C.argmax(nextWordProb, -1),
                           shape=(Config.BatchSize))
     return bestTrans
コード例 #9
0

def crossentropy(y, t):
    prob = C.squeeze(C.reduce_sum(y * t, axis=0), 0)
    return -C.reduce_mean(C.unpack_batch(C.log(prob)))


y = crossentropy(softmax(forward(x)), t)

batch_size = 20
for i in range(min(dataset_size, 100000) // batch_size):
    lr = 0.5 * (.1**(max(i - 100, 0) // 1000))
    sample = X[batch_size * i:batch_size * (i + 1)]
    target = labels[batch_size * i:batch_size * (i + 1)]
    g = y.grad({x: sample, t: target}, wrt=[theta1, bias1, theta2, bias2])
    for param, grad in g.items():
        param.value = param.value - grad * lr
    loss = y.eval({x: sample, t: target})
    print("cost {} - learning rate {}".format(loss, lr))

y = C.squeeze(C.argmax(forward(x), 0), 0)
accuracy = 0
for i in range(1000):
    sample = X[batch_size * i:batch_size * (i + 1)]
    target = labels[batch_size * i:batch_size * (i + 1)]
    tt = y.eval({x: sample})
    accuracy += np.sum(tt == np.argmax(target, axis=1))

print("Accuracy", accuracy / 1000. / batch_size)
# accuracy 99.36
コード例 #10
0
ファイル: polymath.py プロジェクト: Robinysh/asc18-cntk
    def output_layer(self, embed, attention_context, model_context, aw, q_processed, c_processed,cw):
        cw_ph=C.placeholder()
        att_context = C.placeholder(shape=(8*self.hidden_dim,))
        query_processed = C.placeholder(shape=(2*self.hidden_dim,))
        context_processed = C.placeholder(shape=(2*self.hidden_dim,))
        mod_context = C.placeholder(shape=(2*self.hidden_dim))
        a_onehot = C.placeholder(shape=(self.vocab_size+1,))

        start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout))
        start_hardmax = seq_hardmax(start_logits)
        att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax))
        att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context)
        end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded)
        m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input)
        end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout))

        start_flag = C.hardmax(start_logits)
        end_flag = C.hardmax(end_logits)
     
        def create_model():
            # Encoder: (input*) --> (h0, c0)
            # Create multiple layers of LSTMs by passing the output of the i-th layer
            # to the (i+1)th layer as its input
            with C.layers.default_options(enable_self_stabilization=True, go_backwards=False):
                LastRecurrence = C.layers.Recurrence
                encode = C.layers.Sequential([
                    C.layers.Stabilizer(),
                    OptimizedRnnStack(self.hidden_dim, return_full_state=True),
                ])

                encode_c = C.layers.Sequential([
                    C.layers.Stabilizer(),
                    OptimizedRnnStack(self.hidden_dim, return_full_state=True),
                ])
            
            # Decoder: (history*, input*) --> unnormalized_word_logp*
            # where history is one of these, delayed by 1 step and <s> prepended:
            #  - training: labels
            #  - testing:  its own output hardmax(z) (greedy decoder)
            with C.layers.default_options(enable_self_stabilization=True):
                # sub-layers
                stab_in = C.layers.Stabilizer()
                rec_blocks = [C.layers.LSTM(self.hidden_dim) for i in range(self.num_layers)]
                stab_out = C.layers.Stabilizer()
                proj_out = C.layers.Dense(self.vocab_size+1, name='out_proj')
                # attention model
                attention_model = C.layers.AttentionModel(self.attention_dim, 
                                                              name='attention_model') # :: (h_enc*, h_dec) -> (h_dec augmented)
                hstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1)
                cstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1)
                W_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                U_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                V_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                maxout  = C.layers.MaxPooling((2,), strides=2)
                # layer function
                @C.Function
                def decode(history, q, c, start_logits, end_logits):
                    q = encode(q)
                    c = encode_c(C.splice(c, start_logits, end_logits, axis=0))
                    r = history
                    r = stab_in(r)

                    q_last_h = C.sequence.last(q.outputs[0])
                    q_last_c = C.sequence.last(q.outputs[1])
                    c_last_h = C.sequence.last(c.outputs[0])
                    c_last_c = C.sequence.last(c.outputs[1])
                    initial_hstate = hstate_dense(C.splice(q_last_h, c_last_h))
                    initial_cstate = cstate_dense(C.splice(q_last_c, c_last_c))

                    rec_block = rec_blocks[0]   # LSTM(hidden_dim)  # :: (dh, dc, x) -> (h, c)
                    
                    @C.Function
                    def find_embed(x):
                        gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size)
                        return embed(gx, ngx) 

                    @C.Function
                    def lstm_with_attention(dh, dc, r, x):
                        history_embed = find_embed(x)
                        h_att = attention_model(c.outputs[0], dh)
                        q_att = attention_model(q.outputs[0], dh)
                        att = C.splice(h_att, q_att)
                        x = C.splice(x, att)
                        x, dc = rec_block(dh, dc, x).outputs
          
                        # 0*r is a hack because cntk freaks out when r is not used.
                        r = U_dense(att) + W_dense(history_embed) + V_dense(x) + 0*r 
                        #bug when W_dense is added first, wtf?!
                        #r = W_dense(embed(gx, ngx)) + U_dense(att) + V_dense(x) + 0*r
                        return x, dc, r
                    _, _, r = C.layers.RecurrenceFrom(lstm_with_attention, return_full_state=True)(initial_hstate, initial_cstate, C.Constant(np.zeros(2*self.hidden_dim)),r).outputs
        
                    r = maxout(r)
                    r = stab_out(r)
                    r = proj_out(r)
                    #r = C.softmax(r)
                    r = C.layers.Label('out_proj_out')(r)
                    return r
            return decode

        def create_model_train(s2smodel):
            # model used in training (history is known from labels)
            # note: the labels must NOT contain the initial <s>
            @C.Function
            def model_train(labels, q, c, start_logits, end_logits): # (input*, labels*) --> (word_logp*)

                # The input to the decoder always starts with the special label sequence start token.
                # Then, use the previous value of the label sequence (for training) or the output (for execution).
                past_labels = C.layers.Delay(initial_state=self.sentence_start)(labels)
    
                return s2smodel(past_labels, q, c, start_logits, end_logits)
            return model_train

        def create_model_greedy(s2smodel):
            # model used in (greedy) decoding (inferencing) (history is decoder's own output)
            @C.Function
            def model_greedy(q, c, start_logits, end_logits): # (input*) --> (word_sequence*)
                # Decoding is an unfold() operation starting from sentence_start.
                # We must transform s2smodel (history*, input* -> word_logp*) into a generator (history* -> output*)
                # which holds 'input' in its closure.
                unfold = C.layers.UnfoldFrom(\
                                    lambda history: s2smodel(history, q, c, start_logits, end_logits) >> C.hardmax,
                                    # stop once sentence_end_index was max-scoring output
                                    until_predicate=lambda w: w[...,self.sentence_end_index],
                                    length_increase=self.sentence_max_length)
                return unfold(initial_state=self.sentence_start, dynamic_axes_like=c)
            return model_greedy
       
        s2smodel = create_model()
      
        model_train = create_model_train(s2smodel)(a_onehot, query_processed, context_processed, start_logits, end_logits)
        model_greed = create_model_greedy(s2smodel)(query_processed, context_processed, start_logits, end_logits)
        model_greedy = C.argmax(model_greed,0)
        context = C.argmax(cw_ph,0)

        return C.as_block(
            C.combine((model_train, model_greedy, start_logits, end_logits,context)),
            [(att_context, attention_context), (mod_context, model_context), (a_onehot, aw), (query_processed, q_processed), (context_processed, c_processed),(cw_ph,cw)],
            'attention_layer',
            'attention_layer')
コード例 #11
0
 def sample(self, n=1):
     samples = C.random.uniform((n, 1))
     indcies = C.argmax(C.greater(self.accum_prob - samples, 0), axis=1)
     return C.squeeze(indcies)
コード例 #12
0
y = C.cross_entropy_with_softmax(z(x), t)
acc = C.classification_error(z(x), t)

batch_size = 20
from cntk.learners import sgd, learning_parameter_schedule
lr = learning_parameter_schedule([.5 * (.1**i) for i in range(10000)],
                                 minibatch_size=batch_size,
                                 epoch_size=1000 * batch_size)
learner = sgd(z.parameters, lr)
trainer = C.Trainer(z(x), (y, acc), [learner])

for i in range(min(dataset_size, 100000) // batch_size):
    sample = X[batch_size * i:batch_size * (i + 1)]
    target = labels[batch_size * i:batch_size * (i + 1)]
    trainer.train_minibatch({x: sample, t: target})
    loss = trainer.previous_minibatch_loss_average
    acc = trainer.previous_minibatch_evaluation_average
    print("cost {} - classification error {} - learning rate {}".format(
        loss, acc, learner.learning_rate()))

y = C.argmax(z(x))
accuracy = 0
for i in range(1000):
    sample = X[batch_size * i:batch_size * (i + 1)]
    target = labels[batch_size * i:batch_size * (i + 1)]
    tt = y.eval({x: sample})
    accuracy += np.sum(tt == np.argmax(target, axis=1))

print("Accuracy", accuracy / 1000. / batch_size)
# accuracy 99.36
コード例 #13
0
ファイル: train_pm.py プロジェクト: Robinysh/asc18-cntk
def validate_model(i2w, test_data, model, polymath):
    print('validating')
    RL = rouge.Rouge()
    testout = model.outputs[1]  # according to model.shape
    start_logits = model.outputs[2]
    end_logits = model.outputs[3]
    context = model.outputs[4]
    loss = model.outputs[5]
    root = C.as_composite(loss.owner)
    mb_source, input_map = create_mb_and_map(root,
                                             test_data,
                                             polymath,
                                             randomize=False,
                                             repeat=False)
    begin_label = argument_by_name(root, 'ab')
    end_label = argument_by_name(root, 'ae')
    onehot = argument_by_name(root, 'aw')

    begin_prediction = C.sequence.input_variable(
        1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True)
    end_prediction = C.sequence.input_variable(
        1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True)
    predicted_span = C.layers.Recurrence(
        C.plus)(begin_prediction - C.sequence.past_value(end_prediction))
    true_span = C.layers.Recurrence(C.plus)(begin_label -
                                            C.sequence.past_value(end_label))

    best_span_score = symbolic_best_span(begin_prediction, end_prediction)

    one2num = C.argmax(onehot, 0)

    minibatch_size = 128
    num_sequences = 0

    stat = np.array([0, 0, 0, 0, 0, 0], dtype=np.dtype('float64'))
    loss_sum = 0
    cnt = 0
    #while True:
    while cnt < 1000:
        data = mb_source.next_minibatch(minibatch_size, input_map=input_map)
        if not data or not (onehot in data) or data[onehot].num_sequences == 0:
            break

        out = model.eval(
            data,
            outputs=[testout, start_logits, end_logits, context, loss],
            as_numpy=True)
        true = one2num.eval({onehot: data[onehot]})

        g = best_span_score.grad(
            {
                begin_prediction: out[start_logits],
                end_prediction: out[end_logits]
            },
            wrt=[begin_prediction, end_prediction],
            as_numpy=False)
        #        print(g[begin_prediction], g[end_prediction])
        other_input_map = {
            begin_prediction: g[begin_prediction],
            end_prediction: g[end_prediction]
        }
        span = predicted_span.eval((other_input_map))
        #        print(span)

        span_out = np.asarray(span).reshape(-1).tolist()
        context_o = np.asarray(out[context]).reshape(-1).tolist()
        predict_answer = []
        for i in range(len(span_out)):
            if (span_out[i] == 1):
                predict_answer.append(context_o[i])

#       pred_out = np.asarray(out[context]).reshape(-1).tolist()
#       predict_answer = pred_out[span_begin:span_end+1]
        if cnt < 10:

            #print(predict_answer)
            print(format_true_sequences(predict_answer, i2w, polymath))
            print('\n')
        cnt += 1
        true_text = format_true_sequences(
            np.asarray(true).reshape(-1).tolist(), i2w, polymath)
        predout_text = format_predict_sequences(
            np.asarray(out[testout]).reshape(-1), predict_answer, i2w,
            polymath)
        testloss = out[loss]
        stat += RL.calc_score(predout_text, true_text)

        loss_sum += np.sum(np.asarray(testloss))
        num_sequences += data[onehot].num_sequences

    loss_avg = loss_sum / num_sequences
    stat_avg = stat / float(num_sequences)
    print(
        "Validated {} sequences, loss {:.4f}, RouL {:.4f}, LCS {:.4f}, LengCan {:.4f}, LenRef {:.4f}, prec {:.4f}, rec {:.4f}"
        .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2],
                stat_avg[3], stat_avg[4], stat_avg[5]))

    return loss_avg