def test_ArgMax(tmpdir): shape = (4, 5) data = np.random.rand(*shape).astype(np.float32) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def test_ArgMax(tmpdir): shape = (4, 5) data = np.random.rand(*shape).astype(np.float32) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def test_ArgMax(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def test_ArgMax(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def __init__(self, vocabulary, labels, model): jieba.enable_parallel(multiprocessing.cpu_count()) self.model = C.load_model(model) self.vocab = get_vocab(vocabulary) self.x_dim = len(self.vocab) self.y_dim = get_size(labels) self.x = C.sequence.input_variable(self.x_dim, is_sparse=True) self.model = self.model(self.x) self.predictor = C.argmax(self.model)
def masking(input, labels): if not is_onehot_encoded: mask = ct.reshape(ct.one_hot( ct.reshape(ct.argmax(labels, axis=0), shape=(-1, )), 10), shape=(10, 1, 1)) mask = ct.stop_gradient(mask) else: mask = ct.reshape(labels, shape=(10, 1, 1)) mask = ct.splice(*([mask] * 16), axis=1) return ct.reshape(ct.element_times(input, mask), shape=(-1, ))
def evaluate(reader,model_func,is_body=False):#cal precision and recall if is_body: test_xt = C.sequence.input_variable(title_size) else: test_xt = C.sequence.input_variable(vocab_size) test_xb = C.sequence.input_variable(body_size) test_y = C.input_variable(num_labels) model=model_func(x) # Create the loss and error functions loss, label_error = create_criterion_function_preferred(model, y) # Assign the data fields to be read from the input data_map={x: reader.streams.title, y: reader.streams.industry} confuse=np.zeros((num_labels,num_labels)) count=0 while True: data = reader.next_minibatch(2048) # fetch minibatch if not data: break for key in data.keys(): if(key.m_name=="title"): test_xt=data[key] if(key.m_name=="industry"): test_y=data[key] if(key.m_name=="body"): test_xb=data[key] #print(data) if is_body: output=z(x).eval({xt:test_xt,xb:test_xb}).argmax(axis=1) else: output=z(x).eval({x:test_xt}).argmax(axis=1) gt=C.squeeze(C.argmax(y)).eval({y:test_y}).astype(int)#.as_sequences(test_y)[0].indices[0] confuse+=fast_hist(output,gt,num_labels) count+=1 precision=np.diag(confuse)/np.sum(confuse,axis=0) recall = np.diag(confuse)/np.sum(confuse,axis=1) accuarcy = np.diag(confuse).sum() / confuse.sum() aver_precision=np.nanmean(precision) aver_recall = np.nanmean(recall) print("Precision:{} Recall:{} Acc:{}".format(aver_precision,aver_recall,accuarcy)) return accuarcy
def createPredictionNetwork(self, preSoftmax): nextWordProb = C.softmax(preSoftmax) bestTrans = C.reshape(C.argmax(nextWordProb, -1), shape=(Config.BatchSize)) return bestTrans
def crossentropy(y, t): prob = C.squeeze(C.reduce_sum(y * t, axis=0), 0) return -C.reduce_mean(C.unpack_batch(C.log(prob))) y = crossentropy(softmax(forward(x)), t) batch_size = 20 for i in range(min(dataset_size, 100000) // batch_size): lr = 0.5 * (.1**(max(i - 100, 0) // 1000)) sample = X[batch_size * i:batch_size * (i + 1)] target = labels[batch_size * i:batch_size * (i + 1)] g = y.grad({x: sample, t: target}, wrt=[theta1, bias1, theta2, bias2]) for param, grad in g.items(): param.value = param.value - grad * lr loss = y.eval({x: sample, t: target}) print("cost {} - learning rate {}".format(loss, lr)) y = C.squeeze(C.argmax(forward(x), 0), 0) accuracy = 0 for i in range(1000): sample = X[batch_size * i:batch_size * (i + 1)] target = labels[batch_size * i:batch_size * (i + 1)] tt = y.eval({x: sample}) accuracy += np.sum(tt == np.argmax(target, axis=1)) print("Accuracy", accuracy / 1000. / batch_size) # accuracy 99.36
def output_layer(self, embed, attention_context, model_context, aw, q_processed, c_processed,cw): cw_ph=C.placeholder() att_context = C.placeholder(shape=(8*self.hidden_dim,)) query_processed = C.placeholder(shape=(2*self.hidden_dim,)) context_processed = C.placeholder(shape=(2*self.hidden_dim,)) mod_context = C.placeholder(shape=(2*self.hidden_dim)) a_onehot = C.placeholder(shape=(self.vocab_size+1,)) start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout)) start_hardmax = seq_hardmax(start_logits) att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax)) att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context) end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded) m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input) end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout)) start_flag = C.hardmax(start_logits) end_flag = C.hardmax(end_logits) def create_model(): # Encoder: (input*) --> (h0, c0) # Create multiple layers of LSTMs by passing the output of the i-th layer # to the (i+1)th layer as its input with C.layers.default_options(enable_self_stabilization=True, go_backwards=False): LastRecurrence = C.layers.Recurrence encode = C.layers.Sequential([ C.layers.Stabilizer(), OptimizedRnnStack(self.hidden_dim, return_full_state=True), ]) encode_c = C.layers.Sequential([ C.layers.Stabilizer(), OptimizedRnnStack(self.hidden_dim, return_full_state=True), ]) # Decoder: (history*, input*) --> unnormalized_word_logp* # where history is one of these, delayed by 1 step and <s> prepended: # - training: labels # - testing: its own output hardmax(z) (greedy decoder) with C.layers.default_options(enable_self_stabilization=True): # sub-layers stab_in = C.layers.Stabilizer() rec_blocks = [C.layers.LSTM(self.hidden_dim) for i in range(self.num_layers)] stab_out = C.layers.Stabilizer() proj_out = C.layers.Dense(self.vocab_size+1, name='out_proj') # attention model attention_model = C.layers.AttentionModel(self.attention_dim, name='attention_model') # :: (h_enc*, h_dec) -> (h_dec augmented) hstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1) cstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1) W_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) U_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) V_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) maxout = C.layers.MaxPooling((2,), strides=2) # layer function @C.Function def decode(history, q, c, start_logits, end_logits): q = encode(q) c = encode_c(C.splice(c, start_logits, end_logits, axis=0)) r = history r = stab_in(r) q_last_h = C.sequence.last(q.outputs[0]) q_last_c = C.sequence.last(q.outputs[1]) c_last_h = C.sequence.last(c.outputs[0]) c_last_c = C.sequence.last(c.outputs[1]) initial_hstate = hstate_dense(C.splice(q_last_h, c_last_h)) initial_cstate = cstate_dense(C.splice(q_last_c, c_last_c)) rec_block = rec_blocks[0] # LSTM(hidden_dim) # :: (dh, dc, x) -> (h, c) @C.Function def find_embed(x): gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size) return embed(gx, ngx) @C.Function def lstm_with_attention(dh, dc, r, x): history_embed = find_embed(x) h_att = attention_model(c.outputs[0], dh) q_att = attention_model(q.outputs[0], dh) att = C.splice(h_att, q_att) x = C.splice(x, att) x, dc = rec_block(dh, dc, x).outputs # 0*r is a hack because cntk freaks out when r is not used. r = U_dense(att) + W_dense(history_embed) + V_dense(x) + 0*r #bug when W_dense is added first, wtf?! #r = W_dense(embed(gx, ngx)) + U_dense(att) + V_dense(x) + 0*r return x, dc, r _, _, r = C.layers.RecurrenceFrom(lstm_with_attention, return_full_state=True)(initial_hstate, initial_cstate, C.Constant(np.zeros(2*self.hidden_dim)),r).outputs r = maxout(r) r = stab_out(r) r = proj_out(r) #r = C.softmax(r) r = C.layers.Label('out_proj_out')(r) return r return decode def create_model_train(s2smodel): # model used in training (history is known from labels) # note: the labels must NOT contain the initial <s> @C.Function def model_train(labels, q, c, start_logits, end_logits): # (input*, labels*) --> (word_logp*) # The input to the decoder always starts with the special label sequence start token. # Then, use the previous value of the label sequence (for training) or the output (for execution). past_labels = C.layers.Delay(initial_state=self.sentence_start)(labels) return s2smodel(past_labels, q, c, start_logits, end_logits) return model_train def create_model_greedy(s2smodel): # model used in (greedy) decoding (inferencing) (history is decoder's own output) @C.Function def model_greedy(q, c, start_logits, end_logits): # (input*) --> (word_sequence*) # Decoding is an unfold() operation starting from sentence_start. # We must transform s2smodel (history*, input* -> word_logp*) into a generator (history* -> output*) # which holds 'input' in its closure. unfold = C.layers.UnfoldFrom(\ lambda history: s2smodel(history, q, c, start_logits, end_logits) >> C.hardmax, # stop once sentence_end_index was max-scoring output until_predicate=lambda w: w[...,self.sentence_end_index], length_increase=self.sentence_max_length) return unfold(initial_state=self.sentence_start, dynamic_axes_like=c) return model_greedy s2smodel = create_model() model_train = create_model_train(s2smodel)(a_onehot, query_processed, context_processed, start_logits, end_logits) model_greed = create_model_greedy(s2smodel)(query_processed, context_processed, start_logits, end_logits) model_greedy = C.argmax(model_greed,0) context = C.argmax(cw_ph,0) return C.as_block( C.combine((model_train, model_greedy, start_logits, end_logits,context)), [(att_context, attention_context), (mod_context, model_context), (a_onehot, aw), (query_processed, q_processed), (context_processed, c_processed),(cw_ph,cw)], 'attention_layer', 'attention_layer')
def sample(self, n=1): samples = C.random.uniform((n, 1)) indcies = C.argmax(C.greater(self.accum_prob - samples, 0), axis=1) return C.squeeze(indcies)
y = C.cross_entropy_with_softmax(z(x), t) acc = C.classification_error(z(x), t) batch_size = 20 from cntk.learners import sgd, learning_parameter_schedule lr = learning_parameter_schedule([.5 * (.1**i) for i in range(10000)], minibatch_size=batch_size, epoch_size=1000 * batch_size) learner = sgd(z.parameters, lr) trainer = C.Trainer(z(x), (y, acc), [learner]) for i in range(min(dataset_size, 100000) // batch_size): sample = X[batch_size * i:batch_size * (i + 1)] target = labels[batch_size * i:batch_size * (i + 1)] trainer.train_minibatch({x: sample, t: target}) loss = trainer.previous_minibatch_loss_average acc = trainer.previous_minibatch_evaluation_average print("cost {} - classification error {} - learning rate {}".format( loss, acc, learner.learning_rate())) y = C.argmax(z(x)) accuracy = 0 for i in range(1000): sample = X[batch_size * i:batch_size * (i + 1)] target = labels[batch_size * i:batch_size * (i + 1)] tt = y.eval({x: sample}) accuracy += np.sum(tt == np.argmax(target, axis=1)) print("Accuracy", accuracy / 1000. / batch_size) # accuracy 99.36
def validate_model(i2w, test_data, model, polymath): print('validating') RL = rouge.Rouge() testout = model.outputs[1] # according to model.shape start_logits = model.outputs[2] end_logits = model.outputs[3] context = model.outputs[4] loss = model.outputs[5] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') onehot = argument_by_name(root, 'aw') begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) best_span_score = symbolic_best_span(begin_prediction, end_prediction) one2num = C.argmax(onehot, 0) minibatch_size = 128 num_sequences = 0 stat = np.array([0, 0, 0, 0, 0, 0], dtype=np.dtype('float64')) loss_sum = 0 cnt = 0 #while True: while cnt < 1000: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (onehot in data) or data[onehot].num_sequences == 0: break out = model.eval( data, outputs=[testout, start_logits, end_logits, context, loss], as_numpy=True) true = one2num.eval({onehot: data[onehot]}) g = best_span_score.grad( { begin_prediction: out[start_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) # print(g[begin_prediction], g[end_prediction]) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) # print(span) span_out = np.asarray(span).reshape(-1).tolist() context_o = np.asarray(out[context]).reshape(-1).tolist() predict_answer = [] for i in range(len(span_out)): if (span_out[i] == 1): predict_answer.append(context_o[i]) # pred_out = np.asarray(out[context]).reshape(-1).tolist() # predict_answer = pred_out[span_begin:span_end+1] if cnt < 10: #print(predict_answer) print(format_true_sequences(predict_answer, i2w, polymath)) print('\n') cnt += 1 true_text = format_true_sequences( np.asarray(true).reshape(-1).tolist(), i2w, polymath) predout_text = format_predict_sequences( np.asarray(out[testout]).reshape(-1), predict_answer, i2w, polymath) testloss = out[loss] stat += RL.calc_score(predout_text, true_text) loss_sum += np.sum(np.asarray(testloss)) num_sequences += data[onehot].num_sequences loss_avg = loss_sum / num_sequences stat_avg = stat / float(num_sequences) print( "Validated {} sequences, loss {:.4f}, RouL {:.4f}, LCS {:.4f}, LengCan {:.4f}, LenRef {:.4f}, prec {:.4f}, rec {:.4f}" .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5])) return loss_avg