def bi_sru_layer(self, sru_1, index): f_1_f = C.sigmoid(sru_1[0 * self.param2:1 * self.param2] + self.list_bias[0 + index * 4]) r_1_f = C.sigmoid(sru_1[1 * self.param2:2 * self.param2] + self.list_bias[1 + index * 4]) c_1_f_r = (1 - f_1_f) * sru_1[2 * self.param2:3 * self.param2] dec_c_1_f = C.layers.ForwardDeclaration('f_' + str(index)) var_c_1_f = C.sequence.delay(dec_c_1_f, initial_state=0, time_step=1) nex_c_1_f = var_c_1_f * f_1_f + c_1_f_r dec_c_1_f.resolve_to(nex_c_1_f) h_1_f = r_1_f * C.tanh(nex_c_1_f) + ( 1 - r_1_f) * sru_1[3 * self.param2:4 * self.param2] f_1_b = C.sigmoid(sru_1[4 * self.param2:5 * self.param2] + self.list_bias[2 + index * 4]) r_1_b = C.sigmoid(sru_1[5 * self.param2:6 * self.param2] + self.list_bias[3 + index * 4]) c_1_b_r = (1 - f_1_b) * sru_1[6 * self.param2:7 * self.param2] dec_c_1_b = C.layers.ForwardDeclaration('b_' + str(index)) var_c_1_b = C.sequence.delay(dec_c_1_b, time_step=-1) nex_c_1_b = var_c_1_b * f_1_b + c_1_b_r dec_c_1_b.resolve_to(nex_c_1_b) h_1_b = r_1_b * C.tanh(nex_c_1_b) + ( 1 - r_1_b) * sru_1[7 * self.param2:8 * self.param2] x = C.splice(h_1_f, h_1_b) return x
def GetPredictionOnEvalSet(model, testfile, submissionfile): global q_max_words, p_max_words, emb_dim f = open(testfile, 'r', encoding="utf-8") all_scores = { } # Dictionary with key = query_id and value = array of scores for respective passages for line in f: tokens = line.strip().split("|") #tokens[0] will be empty token since the line is starting with | x1 = tokens[1].replace("qfeatures", "").strip() #Query Features x2 = tokens[2].replace("pfeatures", "").strip() # Passage Features query_id = tokens[3].replace("qid", "").strip() # Query_id x1 = [float(v) for v in x1.split()] x2 = [float(v) for v in x2.split()] queryVec = np.array(x1, dtype="float32").reshape(1, q_max_words, emb_dim) passageVec = np.array(x2, dtype="float32").reshape(1, p_max_words, emb_dim) score = (C.sigmoid(model( queryVec, passageVec)).eval())[0] # do forward-prop on model to get score if (query_id in all_scores): all_scores[query_id].append(score) else: all_scores[query_id] = [score] fw = open(submissionfile, "w", encoding="utf-8") for query_id in all_scores: scores = all_scores[query_id] scores_str = [str(sc) for sc in scores] # convert all scores to string values scores_str = "\t".join( scores_str ) # join all scores in list to make it one string with tab delimiter. fw.write(query_id + "\t" + scores_str + "\n") fw.close()
def test_sigmoid(): a = C.input_variable((), dtype=np.float16, needs_gradient=True, name='a') s = C.sigmoid(a) result = s.eval([[0]]) grad = s.grad([[0]]) assert np.array_equal(result, np.asarray([0.5]).astype(np.float16)) assert np.array_equal(grad, np.asarray([0.25]).astype(np.float16))
def model(self): c1_axis = C.Axis.new_unique_dynamic_axis('c1_axis') c2_axis = C.Axis.new_unique_dynamic_axis('c2_axis') b = C.Axis.default_batch_axis() c1 = C.input_variable(self.word_dim, dynamic_axes=[b, c1_axis], name='c1') c2 = C.input_variable(self.word_dim, dynamic_axes=[b, c2_axis], name='c2') y = C.input_variable(1, dynamic_axes=[b], name='y') c1_processed, c2_processed = self.input_layer(c1, c2).outputs att_context = self.attention_layer(c2_processed, c1_processed, 'attention') c2_len = C.layers.Fold(plus1)(c2_processed) att_len = C.layers.Fold(plus1)(att_context) cos = C.cosine_distance( C.sequence.reduce_sum(c2_processed) / c2_len, C.sequence.reduce_sum(att_context) / att_len) prob = C.sigmoid(cos) is_context = C.greater(prob, 0.5) loss = C.losses.binary_cross_entropy(prob, y) acc = C.equal(is_context, y) return cos, loss, acc
def createNetwork(self, inputEmb, preHidden, preMem=None): WrX = C.times(inputEmb, self.Wr) + self.Wrb UrH = C.times(preHidden, self.Ur) R = C.sigmoid(WrX + UrH) WzX = C.times(inputEmb, self.Wz) + self.Wzb UzH = C.times(preHidden, self.Uz) Z = C.sigmoid(WzX + UzH) UH = C.times(preHidden, self.U) + self.Ub UHR = C.element_times(UH, R) WX = C.times(inputEmb, self.W) + self.Wb HTilde = C.tanh(WX + UHR) CurH = C.element_times(HTilde, 1 - Z) + C.element_times(preHidden, Z) return (CurH, None)
def test_sigmoid_2(): cntk_op = C.sigmoid([0.]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.isclose(cntk_ret, ng_ret).all()
def createNetwork(self, inputEmb, preHidden): WX = C.times(inputEmb, self.W) + self.Wb UH = C.times(preHidden, self.U) + self.Ub R = C.sigmoid( C.slice(WX, -1, 0, self.hiddenSize) + C.slice(UH, -1, 0, self.hiddenSize)) Z = C.sigmoid( C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) + C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2)) UHR = C.element_times( C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3), R) HTilde = C.tanh( C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) + UHR) CurH = C.element_times(HTilde, 1 - Z) + C.element_times(preHidden, Z) return CurH
def func(x_var): x = C.placeholder() transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT) update = C.relu(C.times(x, WU, name=name + '_U') + bU) return C.as_block( x + transform_gate * (update - x), # trans(x)*u(x)+(1-f(x))*x [(x, x_var)], 'HighwayBlock', 'HighwayBlock' + name)
def resnet_exclusive(input, num_filters): c1 = conv_bn_relu(input, (3, 3), num_filters) c2 = conv_bn(c1, (3, 3), num_filters, bn_init_scale=1) b1 = conv_bn_relu(input, (1, 1), num_filters) b2 = 1 - C.sigmoid(b1) input = input * b2 p = input + c2 return relu(p)
def lstm_func(output_dim, cell_dim, x, input_dim, prev_state_h, prev_state_c): # input gate (t) it_w = C.times(x,C.parameter((input_dim, cell_dim))) it_b = C.parameter((1,cell_dim)) it_h = C.times(prev_state_h,C.parameter((output_dim, cell_dim))) it_c = C.parameter((1,cell_dim)) * prev_state_c it = C.sigmoid((it_w + it_b + it_h + it_c), name='it') # applied to tanh of input bit_w = C.times(x,C.parameter((input_dim,cell_dim))) bit_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim))) bit_b = C.parameter((1,cell_dim)) bit = it * C.tanh(bit_w + (bit_h + bit_b)) # forget-me-not gate (t) ft_w = C.times(x, C.parameter((input_dim,cell_dim))) ft_b = C.parameter((1,cell_dim)) ft_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim))) ft_c = C.parameter((1,cell_dim)) * prev_state_c ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft') # applied to cell(t-1) bft = ft * prev_state_c # c(t) = sum of both ct = bft + bit # output gate ot_w = C.times(x, C.parameter((input_dim,cell_dim))) ot_b = C.parameter((1,cell_dim)) ot_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim))) ot_c = C.parameter((1,cell_dim)) * prev_state_c ot = C.sigmoid((ot_w + ot_b + ot_h + ot_c), name='ot') # applied to tanh(cell(t)) ht = ot * C.tanh(ct) # return cell value and hidden state return ct, ht
def grid_lstm_func(m_t_1_k, m_tk_1, c_t_1_k, c_tk_1, x_tk): common_11 = C.times(m_t_1_k, W_t_im) + C.times( m_tk_1, W_k_im) + C.times(c_t_1_k, W_t_ic) + C.times( c_tk_1, W_k_ic) i_t_tk = C.sigmoid(C.times(x_tk, W_t_ix) + common_11 + b_t_i) i_k_tk = C.sigmoid(C.times(x_tk, W_k_ix) + common_11 + b_k_i) common_12 = C.times(m_t_1_k, W_t_fm) + C.times( m_tk_1, W_k_fm) + C.times(c_t_1_k, W_t_fc) + C.times( c_tk_1, W_k_fc) f_t_tk = C.sigmoid(C.times(x_tk, W_t_fx) + common_12 + b_t_f) f_k_tk = C.sigmoid(C.times(x_tk, W_k_fx) + common_12 + b_k_f) c_t_tk = C.element_times(f_t_tk, c_t_1_k) + C.element_times( i_t_tk, C.tanh( C.times(x_tk, W_t_cx) + C.times(m_t_1_k, W_t_cm) + C.times(m_tk_1, W_k_cm) + b_t_c)) # (13) c_k_tk = C.element_times(f_k_tk, c_tk_1) + C.element_times( i_k_tk, C.tanh( C.times(x_tk, W_k_cx) + C.times(m_t_1_k, W_t_cm) + C.times(m_tk_1, W_k_cm) + b_k_c)) # (14) common_15 = C.times(m_t_1_k, W_t_om) + C.times( m_tk_1, W_k_om) + C.times(c_t_tk, W_t_oc) + C.times( c_k_tk, W_k_oc) o_t_tk = C.sigmoid(C.times(x_tk, W_t_ox) + common_15 + b_t_o) o_k_tk = C.sigmoid(C.times(x_tk, W_k_ox) + common_15 + b_k_o) m_t_tk = C.element_times(o_t_tk, C.tanh(c_t_tk)) m_k_tk = C.element_times(o_k_tk, C.tanh(c_k_tk)) return (m_t_tk, m_k_tk, c_t_tk, c_k_tk)
def lstm_func(output_dim, cell_dim, x, input_dim, prev_state_h, prev_state_c): # input gate (t) it_w = C.times(C.parameter((cell_dim, input_dim)), x) it_b = C.parameter((cell_dim)) it_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h) it_c = C.parameter((cell_dim)) * prev_state_c it = C.sigmoid((it_w + it_b + it_h + it_c), name='it') # applied to tanh of input bit_w = C.times(C.parameter((cell_dim, input_dim)), x) bit_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h) bit_b = C.parameter((cell_dim)) bit = it * C.tanh(bit_w + (bit_h + bit_b)) # forget-me-not gate (t) ft_w = C.times(C.parameter((cell_dim, input_dim)), x) ft_b = C.parameter((cell_dim)) ft_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h) ft_c = C.parameter((cell_dim)) * prev_state_c ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft') # applied to cell(t-1) bft = ft * prev_state_c # c(t) = sum of both ct = bft + bit # output gate ot_w = C.times(C.parameter((cell_dim, input_dim)), x) ot_b = C.parameter((cell_dim)) ot_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h) ot_c = C.parameter((cell_dim)) * prev_state_c ot = C.sigmoid((ot_w + ot_b + ot_h + ot_c), name='ot') # applied to tanh(cell(t)) ht = ot * C.tanh(ct) # return cell value and hidden state return ct, ht
def createNetwork(self, inputEmb, preHidden, preMem): WX = C.times(inputEmb, self.W) + self.Wb UH = C.times(preHidden, self.U) + self.Ub I = C.sigmoid( C.slice(WX, -1, 0, self.hiddenSize) + C.slice(UH, -1, 0, self.hiddenSize)) O = C.sigmoid( C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) + C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2)) F = C.sigmoid( C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) + C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3)) N = C.tanh( C.slice(WX, -1, self.hiddenSize * 3, self.hiddenSize * 4) + C.slice(UH, -1, self.hiddenSize * 3, self.hiddenSize * 4)) NI = C.element_times(N, I) FM = C.element_times(F, preMem) CurMem = NI + FM CurH = C.element_times(C.tanh(CurMem), O) return (CurH, CurMem)
def func(x_var): x = C.placeholder() WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT') bT = C.Parameter(dim, init=transform_bias_initializer, name=name+'_bT') WU = C.Parameter((dim,dim,), init=update_weight_initializer, name=name+'_WU') bU = C.Parameter(dim, init=update_bias_initializer, name=name+'_bU') transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT) update = C.relu(C.times(x, WU, name=name+'_U') + bU) return C.as_block( x + transform_gate * (update - x), [(x, x_var)], 'HighwayBlock', 'HighwayBlock'+name)
def unit(dh, dc, x): ''' dh: out_dim, dc:4096, x:input_dim''' proj4 = b + times(x, W) + times(dh, H) it_proj = proj4[0:1*stacked_dim] # split along stack_axis bit_proj = proj4[1*stacked_dim: 2*stacked_dim] ft_proj = proj4[2*stacked_dim: 3*stacked_dim] ot_proj = proj4[3*stacked_dim: 4*stacked_dim] it = C.sigmoid(it_proj) # input gate(t) # TODO: should both activations be replaced? bit = it * C.tanh(bit_proj) # applied to tanh of input network ft = C.sigmoid (ft_proj) # forget-me-not gate(t) bft = ft * dc # applied to cell(t-1) ct = bft + bit # c(t) is sum of both ot = C.sigmoid (ot_proj) # output gate(t) ht = ot * C.tanh(ct) # applied to tanh(cell(t)) c = ct # cell value h = ht proj_h = C.times(h, proj_W) # out_dim return (proj_h, c)
def sigmoid(x, name=''): ''' Computes the element-wise sigmoid of `x`: :math:`sigmoid(x) = {1 \over {1+\exp(-x)}}` The output tensor has the same shape as `x`. Example: >>> C.eval(C.sigmoid([-2, -1., 0., 1., 2.])) [array([[ 0.119203, 0.268941, 0.5 , 0.731059, 0.880797]])] Args: x: numpy array or any :class:`cntk.Function` that outputs a tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import sigmoid x = sanitize_input(x) return sigmoid(x, name).output()
def func(x_var): x = C.placeholder() WT = C.Parameter(( dim, dim, ), init=transform_weight_initializer, name=name + '_WT') bT = C.Parameter(dim, init=transform_bias_initializer, name=name + '_bT') WU = C.Parameter(( dim, dim, ), init=update_weight_initializer, name=name + '_WU') bU = C.parameter(dim, init=update_bias_initializer, name=name + '_bU') transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT) update = C.tanh(C.times(x, WU, name=name + '_U') + bU) return C.as_block(update * transform_gate + (1 - transform_gate) * x, [(x, x_var)], 'SingleInner', 'SingleInner' + name)
def LSTMCell(x, y, dh, dc): '''LightLSTM Cell''' b = C.parameter(shape=(4 * cell_dim), init=0) W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform()) H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform()) # projected contribution from input x, hidden, and bias proj4 = b + C.times(x, W) + C.times(dh, H) it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim) bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim) ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim) ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim) it = C.sigmoid(it_proj) # input gate bit = it * C.tanh(bit_proj) ft = C.sigmoid(ft_proj) # forget gate bft = ft * dc ct = bft + bit ot = C.sigmoid(ot_proj) # output gate ht = ot * C.tanh(ct) # projected contribution from input y, hidden, and bias proj4_2 = b + C.times(y, W) + C.times(ht, H) it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim) bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim) ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim) ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim) it_2 = C.sigmoid(it_proj_2) # input gate bit_2 = it_2 * C.tanh(bit_proj_2) ft_2 = C.sigmoid(ft_proj_2) # forget gate bft_2 = ft_2 * ct ct2 = bft_2 + bit_2 ot_2 = C.sigmoid(ot_proj_2) # output gate ht2 = ot_2 * C.tanh(ct2) return (ht, ct, ht2, ct2)
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** global tf, l, a, r q_max_words = 15 p_max_words = 120 emb_dim = 50 num_classes = 2 minibatch_size = 4000 epoch_size = 5241880 #No.of samples in training set total_epochs = 19 #Total number of epochs to run query_total_dim = q_max_words * emb_dim label_total_dim = num_classes passage_total_dim = p_max_words * emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.sequence.input_variable((1, q_max_words, emb_dim), np.float32, is_sparse=False) passage_input_var = C.sequence.input_variable((1, p_max_words, emb_dim), np.float32, is_sparse=False) output_var = C.input_variable(num_classes, np.float32, is_sparse=False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var: train_reader.streams.queryfeatures, passage_input_var: train_reader.streams.passagefeatures, output_var: train_reader.streams.labels } # ********* Model configuration ******* model_output = rnn_network(query_input_var, passage_input_var, num_classes) # model_output.restore('RNN_{}.dnn') // This line should be uncommented to restore training from a particular model if (output_var[1] == '1'): a = 1 else: a = 0 loss = C.sigmoid(create_loss(model_output, a)) pe = None lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046785] * 10 + [0.000015625] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size=epoch_size) mms = [0] * 20 + [0.9200444146293233] * 20 + [0.9591894571091382] mm_schedule = C.learners.momentum_schedule(mms, epoch_size=epoch_size, minibatch_size=minibatch_size) l2_reg_weight = 0.0002 dssm_learner = C.learners.momentum_sgd(model_output.parameters, lr_schedule, mm_schedule) learner = dssm_learner progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ", epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[ output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save("RNN_{}.dnn".format(epoch + 1)) ''' #*** Find metrics on validation set after every epoch ******# predicted_labels=[] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim) passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim) scores = model_output(queryVec,passageVec)[0] # do forward-prop on model to get score predictLabel = 1 if scores[1]>=scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary')''' #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n") return model_output
def add_dnn_sigmoid_layer(in_dim, out_dim, x, param_scale): W = C.parameter((out_dim, in_dim)) * param_scale b = C.parameter((out_dim, 1)) * param_scale t = C.times(W, x) z = C.plus(t, b) return C.sigmoid(z)
def inner(a): return a * C.sigmoid(1.702 * a)
def inner(a): return a * C.sigmoid(a)
def create_model(input_var, output_dim): weight = cntk.parameter(shape=(input_var.shape[0], output_dim), name='W') bias = cntk.parameter(shape=(output_dim), name='b') return cntk.sigmoid(cntk.times(input_var, weight) + bias, name='o')
def test_Sigmoid(tmpdir, dtype): with C.default_options(dtype = dtype): model = C.sigmoid(np.array([-2, -1., 0., 1., 2.]).astype(dtype)) verify_no_input(model, tmpdir, 'Sigmoid_0')
def bigru_with_match(dh, x): c_att = matching_model(att_input, dh) x = C.splice(x, c_att) x = C.element_times(x, C.sigmoid(C.times(x, Wg))) return att_gru(dh, x)
def policy_gradient(): import cntk as C TOTAL_EPISODES = 2000 if isFast else 10000 H = 100 # number of hidden layer neurons observations = input(STATE_COUNT, np.float32, name="obs") W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name="W1") b1 = C.parameter(shape=H, name="b1") layer1 = C.relu(C.times(observations, W1) + b1) W2 = C.parameter(shape=(H, ACTION_COUNT), init=C.glorot_uniform(), name="W2") b2 = C.parameter(shape=ACTION_COUNT, name="b2") score = C.times(layer1, W2) + b2 # Until here it was similar to DQN probability = C.sigmoid(score, name="prob") input_y = input(1, np.float32, name="input_y") advantages = input(1, np.float32, name="advt") loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss') lr = 1e-4 lr_schedule = learning_rate_schedule(lr, UnitType.sample) sgd = C.sgd([W1, W2], lr_schedule) gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) xs, hs, label, drs = [], [], [], [] running_reward = None reward_sum = 0 episode_number = 1 observation = env.reset() actionlist = [i for i in range(env.action_space['n']) ] #%% while episode_number <= TOTAL_EPISODES: x = np.reshape(observation, [1, STATE_COUNT]).astype(np.float32) # Run the policy network and get an action to take. #prob = probability.eval(arguments={observations: x})[0][0][0] prob = probability.eval(arguments={observations: x}) normalized_weights = (prob / np.sum(prob))[0][0] action = numpy.random.choice(actionlist, p=normalized_weights) #action = 1 if np.random.uniform() < prob else 0 xs.append(x) # observation # grad that encourages the action that was taken to be taken y = 1 if action == 0 else 0 # a "fake label" label.append(y) # step the environment and get new measurements observation, reward, done, info = env.step(action) reward_sum += float(reward) # Record reward (has to be done after we call step() to get reward for previous action) drs.append(float(reward)) if done: # Stack together all inputs, hidden states, action gradients, and rewards for this episode epx = np.vstack(xs) epl = np.vstack(label).astype(np.float32) epr = np.vstack(drs).astype(np.float32) xs, label, drs = [], [], [] # reset array memory # Compute the discounted reward backwards through time. discounted_epr = discount_rewards(epr) # Size the rewards to be unit normal (helps control the gradient estimator variance) discounted_epr -= np.mean(discounted_epr) discounted_epr /= (np.std(discounted_epr) + 0.000000000001) # Forward pass arguments = {observations: epx, input_y: epl, advantages: discounted_epr} state, outputs_map = loss.forward(arguments, outputs=loss.outputs, keep_for_backward=loss.outputs) # Backward psas root_gradients = {v: np.ones_like(o) for v, o in outputs_map.items()} vargrads_map = loss.backward(state, root_gradients, variables=set([W1, W2])) for var, grad in vargrads_map.items(): gradBuffer[var.name] += grad # Wait for some batches to finish to reduce noise if episode_number % BATCH_SIZE_BASELINE == 0: grads = {W1: gradBuffer['W1'].astype(np.float32), W2: gradBuffer['W2'].astype(np.float32)} updated = sgd.update(grads, BATCH_SIZE_BASELINE) # reset the gradBuffer gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) print('Episode: %d. Average reward for episode %f.' % (episode_number, reward_sum / BATCH_SIZE_BASELINE)) if reward_sum / BATCH_SIZE_BASELINE > REWARD_TARGET: print('Task solved in: %d ' % episode_number) break reward_sum = 0 observation = env.reset() # reset env episode_number += 1 probability.save('pg.mod')
def LSTM(shape, _inf, cell_shape=None, use_peepholes=False, init=_default_initializer, init_bias=0, enable_self_stabilization=False): # (x, (h, c)) has_projection = cell_shape is not None has_aux = False if has_aux: UntestedBranchError("LSTM, has_aux option") if enable_self_stabilization: UntestedBranchError("LSTM, enable_self_stabilization option") shape = _as_tuple(shape) cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape #stack_axis = -1 # stack_axis = 0 # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS # determine stacking dimensions cell_shape_list = list(cell_shape) stacked_dim = cell_shape_list[0] cell_shape_list[stack_axis] = stacked_dim * 4 cell_shape_stacked = tuple( cell_shape_list) # patched dims with stack_axis duplicated 4 times # parameters b = Parameter(cell_shape_stacked, init=init_bias, name='b') # a bias W = Parameter(_inf.shape + cell_shape_stacked, init=init, name='W') # input A = Parameter(_inf.shape + cell_shape_stacked, init=init, name='A') if has_aux else None # aux input (optional) H = Parameter(shape + cell_shape_stacked, init=init, name='H') # hidden-to-hidden Ci = Parameter( cell_shape, init=init, name='Ci' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Cf = Parameter( cell_shape, init=init, name='Cf' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Co = Parameter( cell_shape, init=init, name='Co' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Wmr = ParameterTensor( cell_shape + shape, init=init, init_value_scale=init_value_scale ) if has_projection else None # final projection Sdh = Stabilizer(_inf=_inf.with_shape( shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(shape)) Sdc = Stabilizer(_inf=_inf.with_shape( cell_shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(cell_shape)) Sct = Stabilizer(_inf=_inf.with_shape( cell_shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(cell_shape)) Sht = Stabilizer(_inf=_inf.with_shape( shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(shape)) def create_hc_placeholder(): return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'), Placeholder(_inf=_inf.with_shape(cell_shape), name='cPh')) # (h, c) # parameters to model function x = Placeholder(_inf=_inf, name='lstm_block_arg') prev_state = create_hc_placeholder() # formula of model function dh, dc = prev_state dhs = Sdh(dh) # previous values, stabilized dcs = Sdc(dc) # note: input does not get a stabilizer here, user is meant to do that outside # projected contribution from input(s), hidden, and bias proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \ b + times(x, W) + times(dhs, H) it_proj = slice(proj4, stack_axis, 0 * stacked_dim, 1 * stacked_dim) # split along stack_axis bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim) ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim) ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim) # add peephole connection if requested def peep(x, c, C): return x + C * c if use_peepholes else x it = sigmoid(peep(it_proj, dcs, Ci)) # input gate(t) bit = it * tanh(bit_proj) # applied to tanh of input network ft = sigmoid(peep(ft_proj, dcs, Cf)) # forget-me-not gate(t) bft = ft * dc # applied to cell(t-1) ct = bft + bit # c(t) is sum of both ot = sigmoid(peep(ot_proj, Sct(ct), Co)) # output gate(t) ht = ot * tanh(ct) # applied to tanh(cell(t)) c = ct # cell value h = times(Sht(ht), Wmr) if has_projection else \ ht _name_node(h, 'h') if _trace_layers: _log_node(h) # this looks right _name_node(c, 'c') # TODO: figure out how to do scoping, and also rename all the apply... to expression apply_x_h_c = combine([h, c]) # return to caller a helper function to create placeholders for recurrence apply_x_h_c.create_placeholder = create_hc_placeholder _name_and_extend_Function(apply_x_h_c, 'LSTM') return apply_x_h_c
def test_Sigmoid(tmpdir, dtype): with C.default_options(dtype=dtype): model = C.sigmoid(np.array([-2, -1., 0., 1., 2.]).astype(dtype)) verify_no_input(model, tmpdir, 'Sigmoid_0')
update_frequency = 20 #Next we will define the policy network. # The policy network maps an observation to a probability of taking action 0 or 1. observations = C.sequence.input_variable(state_dim, np.float32, name="obs") W1 = C.parameter(shape=(state_dim, hidden_size), init=C.glorot_uniform(), name="W1") b1 = C.parameter(shape=hidden_size, name="b1") layer1 = C.relu(C.times(observations, W1) + b1) W2 = C.parameter(shape=(hidden_size, action_count), init=C.glorot_uniform(), name="W2") b2 = C.parameter(shape=action_count, name="b2") layer2 = C.times(layer1, W2) + b2 output = C.sigmoid(layer2, name="output") ''' Now you must define the loss function for training the policy network. - Recall that the desired loss function is: $\frac{1}{m}\sum_1^m \nabla_\theta \log \pi_\theta(a_t|s_t) R$. - Label is a variable corresponding to $a_t$, the action the policy selected. - output is the policy network that maps an observation to a probability of taking an action. - And return_weight is a scalar that will cointain the return $R$. The current loss function is incorrect and will need to be modified. ''' # Label will tell the network what action it should have taken.
def test_sigmoid(): assert_cntk_ngraph_isclose(C.sigmoid([-2, -1., 0., 1., 2.])) assert_cntk_ngraph_isclose(C.sigmoid([0.])) assert_cntk_ngraph_isclose( C.exp([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.]))
def test_Sigmoid(tmpdir): model = C.sigmoid([-2, -1., 0., 1., 2.]) verify_no_input(model, tmpdir, 'Sigmoid_0')
def gru_with_attentioin(dh, x): c_att = attention_model(att_input, x) x = C.splice(x, c_att) x = C.element_times(x, C.sigmoid(C.times(x, Wg))) return att_gru(dh, x)