Esempio n. 1
0
    def bi_sru_layer(self, sru_1, index):
        f_1_f = C.sigmoid(sru_1[0 * self.param2:1 * self.param2] +
                          self.list_bias[0 + index * 4])
        r_1_f = C.sigmoid(sru_1[1 * self.param2:2 * self.param2] +
                          self.list_bias[1 + index * 4])
        c_1_f_r = (1 - f_1_f) * sru_1[2 * self.param2:3 * self.param2]
        dec_c_1_f = C.layers.ForwardDeclaration('f_' + str(index))
        var_c_1_f = C.sequence.delay(dec_c_1_f, initial_state=0, time_step=1)
        nex_c_1_f = var_c_1_f * f_1_f + c_1_f_r
        dec_c_1_f.resolve_to(nex_c_1_f)
        h_1_f = r_1_f * C.tanh(nex_c_1_f) + (
            1 - r_1_f) * sru_1[3 * self.param2:4 * self.param2]

        f_1_b = C.sigmoid(sru_1[4 * self.param2:5 * self.param2] +
                          self.list_bias[2 + index * 4])
        r_1_b = C.sigmoid(sru_1[5 * self.param2:6 * self.param2] +
                          self.list_bias[3 + index * 4])
        c_1_b_r = (1 - f_1_b) * sru_1[6 * self.param2:7 * self.param2]
        dec_c_1_b = C.layers.ForwardDeclaration('b_' + str(index))
        var_c_1_b = C.sequence.delay(dec_c_1_b, time_step=-1)
        nex_c_1_b = var_c_1_b * f_1_b + c_1_b_r
        dec_c_1_b.resolve_to(nex_c_1_b)
        h_1_b = r_1_b * C.tanh(nex_c_1_b) + (
            1 - r_1_b) * sru_1[7 * self.param2:8 * self.param2]

        x = C.splice(h_1_f, h_1_b)
        return x
Esempio n. 2
0
def GetPredictionOnEvalSet(model, testfile, submissionfile):
    global q_max_words, p_max_words, emb_dim

    f = open(testfile, 'r', encoding="utf-8")
    all_scores = {
    }  # Dictionary with key = query_id and value = array of scores for respective passages
    for line in f:
        tokens = line.strip().split("|")
        #tokens[0] will be empty token since the line is starting with |
        x1 = tokens[1].replace("qfeatures", "").strip()  #Query Features
        x2 = tokens[2].replace("pfeatures", "").strip()  # Passage Features
        query_id = tokens[3].replace("qid", "").strip()  # Query_id
        x1 = [float(v) for v in x1.split()]
        x2 = [float(v) for v in x2.split()]
        queryVec = np.array(x1,
                            dtype="float32").reshape(1, q_max_words, emb_dim)
        passageVec = np.array(x2,
                              dtype="float32").reshape(1, p_max_words, emb_dim)
        score = (C.sigmoid(model(
            queryVec,
            passageVec)).eval())[0]  # do forward-prop on model to get score
        if (query_id in all_scores):
            all_scores[query_id].append(score)
        else:
            all_scores[query_id] = [score]
    fw = open(submissionfile, "w", encoding="utf-8")
    for query_id in all_scores:
        scores = all_scores[query_id]
        scores_str = [str(sc)
                      for sc in scores]  # convert all scores to string values
        scores_str = "\t".join(
            scores_str
        )  # join all scores in list to make it one string with  tab delimiter.
        fw.write(query_id + "\t" + scores_str + "\n")
    fw.close()
Esempio n. 3
0
def test_sigmoid():
    a = C.input_variable((), dtype=np.float16, needs_gradient=True, name='a')
    s = C.sigmoid(a)
    result = s.eval([[0]])
    grad = s.grad([[0]])
    assert np.array_equal(result, np.asarray([0.5]).astype(np.float16))
    assert np.array_equal(grad, np.asarray([0.25]).astype(np.float16))
    def model(self):
        c1_axis = C.Axis.new_unique_dynamic_axis('c1_axis')
        c2_axis = C.Axis.new_unique_dynamic_axis('c2_axis')
        b = C.Axis.default_batch_axis()

        c1 = C.input_variable(self.word_dim,
                              dynamic_axes=[b, c1_axis],
                              name='c1')
        c2 = C.input_variable(self.word_dim,
                              dynamic_axes=[b, c2_axis],
                              name='c2')

        y = C.input_variable(1, dynamic_axes=[b], name='y')

        c1_processed, c2_processed = self.input_layer(c1, c2).outputs
        att_context = self.attention_layer(c2_processed, c1_processed,
                                           'attention')

        c2_len = C.layers.Fold(plus1)(c2_processed)
        att_len = C.layers.Fold(plus1)(att_context)

        cos = C.cosine_distance(
            C.sequence.reduce_sum(c2_processed) / c2_len,
            C.sequence.reduce_sum(att_context) / att_len)

        prob = C.sigmoid(cos)
        is_context = C.greater(prob, 0.5)

        loss = C.losses.binary_cross_entropy(prob, y)
        acc = C.equal(is_context, y)

        return cos, loss, acc
Esempio n. 5
0
    def createNetwork(self, inputEmb, preHidden, preMem=None):
        WrX = C.times(inputEmb, self.Wr) + self.Wrb
        UrH = C.times(preHidden, self.Ur)
        R = C.sigmoid(WrX + UrH)

        WzX = C.times(inputEmb, self.Wz) + self.Wzb
        UzH = C.times(preHidden, self.Uz)
        Z = C.sigmoid(WzX + UzH)

        UH = C.times(preHidden, self.U) + self.Ub
        UHR = C.element_times(UH, R)

        WX = C.times(inputEmb, self.W) + self.Wb
        HTilde = C.tanh(WX + UHR)

        CurH = C.element_times(HTilde, 1 - Z) + C.element_times(preHidden, Z)
        return (CurH, None)
Esempio n. 6
0
def test_sigmoid_2():
    cntk_op = C.sigmoid([0.])
    cntk_ret = cntk_op.eval()

    ng_op, _ = CNTKImporter().import_model(cntk_op)
    ng_ret = ng.transformers.make_transformer().computation(ng_op)()

    assert np.isclose(cntk_ret, ng_ret).all()
Esempio n. 7
0
    def createNetwork(self, inputEmb, preHidden):
        WX = C.times(inputEmb, self.W) + self.Wb
        UH = C.times(preHidden, self.U) + self.Ub

        R = C.sigmoid(
            C.slice(WX, -1, 0, self.hiddenSize) +
            C.slice(UH, -1, 0, self.hiddenSize))
        Z = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) +
            C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2))

        UHR = C.element_times(
            C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3), R)
        HTilde = C.tanh(
            C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) + UHR)

        CurH = C.element_times(HTilde, 1 - Z) + C.element_times(preHidden, Z)
        return CurH
Esempio n. 8
0
 def func(x_var):
     x = C.placeholder()
     transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
     update = C.relu(C.times(x, WU, name=name + '_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),  # trans(x)*u(x)+(1-f(x))*x
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock' + name)
Esempio n. 9
0
def resnet_exclusive(input, num_filters):
    c1 = conv_bn_relu(input, (3, 3), num_filters)
    c2 = conv_bn(c1, (3, 3), num_filters, bn_init_scale=1)

    b1 = conv_bn_relu(input, (1, 1), num_filters)
    b2 = 1 - C.sigmoid(b1)

    input = input * b2

    p = input + c2

    return relu(p)
Esempio n. 10
0
def lstm_func(output_dim, cell_dim, x, input_dim, prev_state_h, prev_state_c):
        
    # input gate (t)
    it_w = C.times(x,C.parameter((input_dim, cell_dim)))
    it_b = C.parameter((1,cell_dim))
    it_h = C.times(prev_state_h,C.parameter((output_dim, cell_dim)))
    it_c = C.parameter((1,cell_dim)) * prev_state_c        
    it = C.sigmoid((it_w + it_b + it_h + it_c), name='it')

    # applied to tanh of input    
    bit_w = C.times(x,C.parameter((input_dim,cell_dim)))
    bit_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim)))
    bit_b = C.parameter((1,cell_dim))
    bit = it * C.tanh(bit_w + (bit_h + bit_b))
        
    # forget-me-not gate (t)
    ft_w = C.times(x, C.parameter((input_dim,cell_dim)))
    ft_b = C.parameter((1,cell_dim))
    ft_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim)))
    ft_c = C.parameter((1,cell_dim)) * prev_state_c        
    ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft')

    # applied to cell(t-1)
    bft = ft * prev_state_c
        
    # c(t) = sum of both
    ct = bft + bit
        
    # output gate
    ot_w = C.times(x, C.parameter((input_dim,cell_dim)))
    ot_b = C.parameter((1,cell_dim))
    ot_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim)))
    ot_c = C.parameter((1,cell_dim)) * prev_state_c        
    ot = C.sigmoid((ot_w + ot_b + ot_h + ot_c), name='ot')
       
    # applied to tanh(cell(t))
    ht = ot * C.tanh(ct)
        
    # return cell value and hidden state
    return ct, ht
Esempio n. 11
0
    def grid_lstm_func(m_t_1_k, m_tk_1, c_t_1_k, c_tk_1, x_tk):
        common_11 = C.times(m_t_1_k, W_t_im) + C.times(
            m_tk_1, W_k_im) + C.times(c_t_1_k, W_t_ic) + C.times(
                c_tk_1, W_k_ic)
        i_t_tk = C.sigmoid(C.times(x_tk, W_t_ix) + common_11 + b_t_i)
        i_k_tk = C.sigmoid(C.times(x_tk, W_k_ix) + common_11 + b_k_i)

        common_12 = C.times(m_t_1_k, W_t_fm) + C.times(
            m_tk_1, W_k_fm) + C.times(c_t_1_k, W_t_fc) + C.times(
                c_tk_1, W_k_fc)
        f_t_tk = C.sigmoid(C.times(x_tk, W_t_fx) + common_12 + b_t_f)
        f_k_tk = C.sigmoid(C.times(x_tk, W_k_fx) + common_12 + b_k_f)

        c_t_tk = C.element_times(f_t_tk, c_t_1_k) + C.element_times(
            i_t_tk,
            C.tanh(
                C.times(x_tk, W_t_cx) + C.times(m_t_1_k, W_t_cm) +
                C.times(m_tk_1, W_k_cm) + b_t_c))  # (13)
        c_k_tk = C.element_times(f_k_tk, c_tk_1) + C.element_times(
            i_k_tk,
            C.tanh(
                C.times(x_tk, W_k_cx) + C.times(m_t_1_k, W_t_cm) +
                C.times(m_tk_1, W_k_cm) + b_k_c))  # (14)

        common_15 = C.times(m_t_1_k, W_t_om) + C.times(
            m_tk_1, W_k_om) + C.times(c_t_tk, W_t_oc) + C.times(
                c_k_tk, W_k_oc)
        o_t_tk = C.sigmoid(C.times(x_tk, W_t_ox) + common_15 + b_t_o)
        o_k_tk = C.sigmoid(C.times(x_tk, W_k_ox) + common_15 + b_k_o)

        m_t_tk = C.element_times(o_t_tk, C.tanh(c_t_tk))
        m_k_tk = C.element_times(o_k_tk, C.tanh(c_k_tk))

        return (m_t_tk, m_k_tk, c_t_tk, c_k_tk)
Esempio n. 12
0
def lstm_func(output_dim, cell_dim, x, input_dim, prev_state_h, prev_state_c):

    # input gate (t)
    it_w = C.times(C.parameter((cell_dim, input_dim)), x)
    it_b = C.parameter((cell_dim))
    it_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
    it_c = C.parameter((cell_dim)) * prev_state_c
    it = C.sigmoid((it_w + it_b + it_h + it_c), name='it')

    # applied to tanh of input
    bit_w = C.times(C.parameter((cell_dim, input_dim)), x)
    bit_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
    bit_b = C.parameter((cell_dim))
    bit = it * C.tanh(bit_w + (bit_h + bit_b))

    # forget-me-not gate (t)
    ft_w = C.times(C.parameter((cell_dim, input_dim)), x)
    ft_b = C.parameter((cell_dim))
    ft_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
    ft_c = C.parameter((cell_dim)) * prev_state_c
    ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft')

    # applied to cell(t-1)
    bft = ft * prev_state_c

    # c(t) = sum of both
    ct = bft + bit

    # output gate
    ot_w = C.times(C.parameter((cell_dim, input_dim)), x)
    ot_b = C.parameter((cell_dim))
    ot_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
    ot_c = C.parameter((cell_dim)) * prev_state_c
    ot = C.sigmoid((ot_w + ot_b + ot_h + ot_c), name='ot')

    # applied to tanh(cell(t))
    ht = ot * C.tanh(ct)

    # return cell value and hidden state
    return ct, ht
Esempio n. 13
0
    def createNetwork(self, inputEmb, preHidden, preMem):
        WX = C.times(inputEmb, self.W) + self.Wb
        UH = C.times(preHidden, self.U) + self.Ub

        I = C.sigmoid(
            C.slice(WX, -1, 0, self.hiddenSize) +
            C.slice(UH, -1, 0, self.hiddenSize))
        O = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) +
            C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2))
        F = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) +
            C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3))
        N = C.tanh(
            C.slice(WX, -1, self.hiddenSize * 3, self.hiddenSize * 4) +
            C.slice(UH, -1, self.hiddenSize * 3, self.hiddenSize * 4))

        NI = C.element_times(N, I)
        FM = C.element_times(F, preMem)
        CurMem = NI + FM
        CurH = C.element_times(C.tanh(CurMem), O)
        return (CurH, CurMem)
Esempio n. 14
0
 def func(x_var):
     x  = C.placeholder()
     WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT')
     bT = C.Parameter(dim,        init=transform_bias_initializer,   name=name+'_bT')
     WU = C.Parameter((dim,dim,), init=update_weight_initializer,    name=name+'_WU')
     bU = C.Parameter(dim,        init=update_bias_initializer,      name=name+'_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT)
     update = C.relu(C.times(x, WU, name=name+'_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock'+name)
Esempio n. 15
0
    def unit(dh, dc, x):
        ''' dh: out_dim, dc:4096, x:input_dim'''
        proj4 = b + times(x, W) + times(dh, H)
        it_proj  = proj4[0:1*stacked_dim]  # split along stack_axis
        bit_proj = proj4[1*stacked_dim: 2*stacked_dim]
        ft_proj  = proj4[2*stacked_dim: 3*stacked_dim]
        ot_proj  = proj4[3*stacked_dim: 4*stacked_dim]

        it = C.sigmoid(it_proj)        # input gate(t)
        # TODO: should both activations be replaced?
        bit = it * C.tanh(bit_proj)              # applied to tanh of input network

        ft = C.sigmoid (ft_proj)        # forget-me-not gate(t)
        bft = ft * dc                                 # applied to cell(t-1)

        ct = bft + bit                                # c(t) is sum of both

        ot = C.sigmoid (ot_proj)    # output gate(t)
        ht = ot * C.tanh(ct)                     # applied to tanh(cell(t))

        c = ct                                        # cell value
        h = ht
        proj_h = C.times(h, proj_W) # out_dim
        return (proj_h, c) 
Esempio n. 16
0
def sigmoid(x, name=''):
    '''
    Computes the element-wise sigmoid of `x`: 

    :math:`sigmoid(x) = {1 \over {1+\exp(-x)}}`

    The output tensor has the same shape as `x`.
    
    Example:
        >>> C.eval(C.sigmoid([-2, -1., 0., 1., 2.]))
        [array([[ 0.119203,  0.268941,  0.5     ,  0.731059,  0.880797]])]
    
    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import sigmoid
    x = sanitize_input(x)
    return sigmoid(x, name).output()    
Esempio n. 17
0
def sigmoid(x, name=''):
    '''
    Computes the element-wise sigmoid of `x`: 

    :math:`sigmoid(x) = {1 \over {1+\exp(-x)}}`

    The output tensor has the same shape as `x`.
    
    Example:
        >>> C.eval(C.sigmoid([-2, -1., 0., 1., 2.]))
        [array([[ 0.119203,  0.268941,  0.5     ,  0.731059,  0.880797]])]
    
    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import sigmoid
    x = sanitize_input(x)
    return sigmoid(x, name).output()    
Esempio n. 18
0
 def func(x_var):
     x = C.placeholder()
     WT = C.Parameter((
         dim,
         dim,
     ),
                      init=transform_weight_initializer,
                      name=name + '_WT')
     bT = C.Parameter(dim,
                      init=transform_bias_initializer,
                      name=name + '_bT')
     WU = C.Parameter((
         dim,
         dim,
     ),
                      init=update_weight_initializer,
                      name=name + '_WU')
     bU = C.parameter(dim, init=update_bias_initializer, name=name + '_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
     update = C.tanh(C.times(x, WU, name=name + '_U') + bU)
     return C.as_block(update * transform_gate + (1 - transform_gate) * x,
                       [(x, x_var)], 'SingleInner', 'SingleInner' + name)
Esempio n. 19
0
    def LSTMCell(x, y, dh, dc):
        '''LightLSTM Cell'''

        b = C.parameter(shape=(4 * cell_dim), init=0)
        W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform())
        H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform())

        # projected contribution from input x, hidden, and bias
        proj4 = b + C.times(x, W) + C.times(dh, H)

        it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim)

        it = C.sigmoid(it_proj)  # input gate
        bit = it * C.tanh(bit_proj)

        ft = C.sigmoid(ft_proj)  # forget gate
        bft = ft * dc

        ct = bft + bit
        ot = C.sigmoid(ot_proj)  # output gate
        ht = ot * C.tanh(ct)

        # projected contribution from input y, hidden, and bias
        proj4_2 = b + C.times(y, W) + C.times(ht, H)

        it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim)

        it_2 = C.sigmoid(it_proj_2)  # input gate
        bit_2 = it_2 * C.tanh(bit_proj_2)

        ft_2 = C.sigmoid(ft_proj_2)  # forget gate
        bft_2 = ft_2 * ct

        ct2 = bft_2 + bit_2
        ot_2 = C.sigmoid(ot_proj_2)  # output gate
        ht2 = ot_2 * C.tanh(ct2)
        return (ht, ct, ht2, ct2)
Esempio n. 20
0
def TrainAndValidate(trainfile):

    #*****Hyper-Parameters******
    global tf, l, a, r
    q_max_words = 15
    p_max_words = 120
    emb_dim = 50
    num_classes = 2
    minibatch_size = 4000
    epoch_size = 5241880  #No.of samples in training set
    total_epochs = 19  #Total number of epochs to run
    query_total_dim = q_max_words * emb_dim
    label_total_dim = num_classes
    passage_total_dim = p_max_words * emb_dim

    #****** Create placeholders for reading Training Data  ***********
    query_input_var = C.sequence.input_variable((1, q_max_words, emb_dim),
                                                np.float32,
                                                is_sparse=False)
    passage_input_var = C.sequence.input_variable((1, p_max_words, emb_dim),
                                                  np.float32,
                                                  is_sparse=False)
    output_var = C.input_variable(num_classes, np.float32, is_sparse=False)
    train_reader = create_reader(trainfile, True, query_total_dim,
                                 passage_total_dim, label_total_dim)
    input_map = {
        query_input_var: train_reader.streams.queryfeatures,
        passage_input_var: train_reader.streams.passagefeatures,
        output_var: train_reader.streams.labels
    }

    # ********* Model configuration *******
    model_output = rnn_network(query_input_var, passage_input_var, num_classes)
    # model_output.restore('RNN_{}.dnn')  // This line should be uncommented to restore training from a particular model

    if (output_var[1] == '1'):
        a = 1
    else:
        a = 0
    loss = C.sigmoid(create_loss(model_output, a))
    pe = None

    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046785] * 10 + [0.000015625]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=epoch_size)
    mms = [0] * 20 + [0.9200444146293233] * 20 + [0.9591894571091382]
    mm_schedule = C.learners.momentum_schedule(mms,
                                               epoch_size=epoch_size,
                                               minibatch_size=minibatch_size)
    l2_reg_weight = 0.0002

    dssm_learner = C.learners.momentum_sgd(model_output.parameters,
                                           lr_schedule, mm_schedule)
    learner = dssm_learner
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=total_epochs)

    #************Create Trainer with model_output object, learner and loss parameters*************
    trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer)
    C.logging.log_number_of_parameters(model_output)

    # **** Train the model in batchwise mode *****
    for epoch in range(total_epochs):  # loop over epochs
        print("Epoch : ", epoch)
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = train_reader.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # training step
            sample_count += data[
                output_var].num_samples  # count samples processed so far

        trainer.summarize_training_progress()
        model_output.save("RNN_{}.dnn".format(epoch + 1))
        '''
        #*** Find metrics on validation set after every epoch ******# 
        predicted_labels=[]
        for i in range(len(validation_query_vectors)):
            queryVec   = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim)
            passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim)
            scores = model_output(queryVec,passageVec)[0]   # do forward-prop on model to get score  
            predictLabel = 1 if scores[1]>=scores[0] else 0
            predicted_labels.append(predictLabel) 
        metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary')'''
        #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n")

    return model_output
Esempio n. 21
0
def add_dnn_sigmoid_layer(in_dim, out_dim, x, param_scale):
    W = C.parameter((out_dim, in_dim)) * param_scale
    b = C.parameter((out_dim, 1)) * param_scale
    t = C.times(W, x)
    z = C.plus(t, b)
    return C.sigmoid(z)
Esempio n. 22
0
 def inner(a):
     return a * C.sigmoid(1.702 * a)
Esempio n. 23
0
 def inner(a):
     return a * C.sigmoid(a)
def create_model(input_var, output_dim):
    weight = cntk.parameter(shape=(input_var.shape[0], output_dim), name='W')
    bias = cntk.parameter(shape=(output_dim), name='b')
    
    return cntk.sigmoid(cntk.times(input_var, weight) + bias, name='o')
Esempio n. 25
0
def add_dnn_sigmoid_layer(in_dim, out_dim, x, param_scale):
    W = C.parameter((out_dim, in_dim)) * param_scale
    b = C.parameter((out_dim, 1)) * param_scale
    t = C.times(W, x)
    z = C.plus(t, b)
    return C.sigmoid(z)
Esempio n. 26
0
def test_Sigmoid(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        model = C.sigmoid(np.array([-2, -1., 0., 1., 2.]).astype(dtype))
        verify_no_input(model, tmpdir, 'Sigmoid_0')
Esempio n. 27
0
 def bigru_with_match(dh, x):
     c_att = matching_model(att_input, dh)
     x = C.splice(x, c_att)
     x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
     return att_gru(dh, x)
Esempio n. 28
0
def policy_gradient():
    import cntk as C

    TOTAL_EPISODES = 2000 if isFast else 10000

    H = 100 # number of hidden layer neurons
    
    observations = input(STATE_COUNT, np.float32, name="obs")
    
    W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name="W1")
    b1 = C.parameter(shape=H, name="b1")
    layer1 = C.relu(C.times(observations, W1) + b1)
    
    W2 = C.parameter(shape=(H, ACTION_COUNT), init=C.glorot_uniform(), name="W2")
    b2 = C.parameter(shape=ACTION_COUNT, name="b2")
    score = C.times(layer1, W2) + b2
    # Until here it was similar to DQN
    
    probability = C.sigmoid(score, name="prob")
    input_y = input(1, np.float32, name="input_y")
    advantages = input(1, np.float32, name="advt")
    
    loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss')
    
    lr = 1e-4
    lr_schedule = learning_rate_schedule(lr, UnitType.sample)
    sgd = C.sgd([W1, W2], lr_schedule)
    
    gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2'])
    
    xs, hs, label, drs = [], [], [], []
    running_reward = None
    reward_sum = 0
    episode_number = 1
    
    observation = env.reset()
    actionlist = [i for i in range(env.action_space['n']) ]
#%%
    while episode_number <= TOTAL_EPISODES:
        x = np.reshape(observation, [1, STATE_COUNT]).astype(np.float32)
    
        # Run the policy network and get an action to take.
        #prob = probability.eval(arguments={observations: x})[0][0][0]
        prob = probability.eval(arguments={observations: x})        
        normalized_weights = (prob / np.sum(prob))[0][0]                
        action = numpy.random.choice(actionlist, p=normalized_weights)
        #action = 1 if np.random.uniform() < prob else 0
    
        xs.append(x)  # observation
        # grad that encourages the action that was taken to be taken
    
        y = 1 if action == 0 else 0  # a "fake label"
        label.append(y)
    
        # step the environment and get new measurements
        observation, reward, done, info = env.step(action)
        reward_sum += float(reward)
    
        # Record reward (has to be done after we call step() to get reward for previous action)
        drs.append(float(reward))
    
        if done:
            # Stack together all inputs, hidden states, action gradients, and rewards for this episode
            epx = np.vstack(xs)
            epl = np.vstack(label).astype(np.float32)
            epr = np.vstack(drs).astype(np.float32)
            xs, label, drs = [], [], []  # reset array memory
    
            # Compute the discounted reward backwards through time.
            discounted_epr = discount_rewards(epr)
            # Size the rewards to be unit normal (helps control the gradient estimator variance)
            discounted_epr -= np.mean(discounted_epr)
            discounted_epr /= (np.std(discounted_epr) + 0.000000000001)
    
            # Forward pass
            arguments = {observations: epx, input_y: epl, advantages: discounted_epr}
            state, outputs_map = loss.forward(arguments, outputs=loss.outputs,
                                              keep_for_backward=loss.outputs)
    
            # Backward psas
            root_gradients = {v: np.ones_like(o) for v, o in outputs_map.items()}
            vargrads_map = loss.backward(state, root_gradients, variables=set([W1, W2]))
    
            for var, grad in vargrads_map.items():
                gradBuffer[var.name] += grad
    
            # Wait for some batches to finish to reduce noise
            if episode_number % BATCH_SIZE_BASELINE == 0:
                grads = {W1: gradBuffer['W1'].astype(np.float32),
                         W2: gradBuffer['W2'].astype(np.float32)}
                updated = sgd.update(grads, BATCH_SIZE_BASELINE)
    
                # reset the gradBuffer
                gradBuffer = dict((var.name, np.zeros(shape=var.shape))
                                  for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2'])
    
                print('Episode: %d. Average reward for episode %f.' % (episode_number, reward_sum / BATCH_SIZE_BASELINE))
    
                if reward_sum / BATCH_SIZE_BASELINE > REWARD_TARGET:
                    print('Task solved in: %d ' % episode_number)
                    break
    
                reward_sum = 0    
            observation = env.reset()  # reset env
            episode_number += 1    
    probability.save('pg.mod')
Esempio n. 29
0
def LSTM(shape,
         _inf,
         cell_shape=None,
         use_peepholes=False,
         init=_default_initializer,
         init_bias=0,
         enable_self_stabilization=False):  # (x, (h, c))
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")
    if enable_self_stabilization:
        UntestedBranchError("LSTM, enable_self_stabilization option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape

    #stack_axis = -1  #
    stack_axis = 0  # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim * 4
    cell_shape_stacked = tuple(
        cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b = Parameter(cell_shape_stacked, init=init_bias, name='b')  # a bias
    W = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='W')  # input
    A = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='A') if has_aux else None  # aux input (optional)
    H = Parameter(shape + cell_shape_stacked, init=init,
                  name='H')  # hidden-to-hidden
    Ci = Parameter(
        cell_shape, init=init, name='Ci'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(
        cell_shape, init=init, name='Cf'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(
        cell_shape, init=init, name='Co'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = ParameterTensor(
        cell_shape + shape, init=init, init_value_scale=init_value_scale
    ) if has_projection else None  # final projection

    Sdh = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))
    Sdc = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sct = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sht = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))

    def create_hc_placeholder():
        return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'),
                Placeholder(_inf=_inf.with_shape(cell_shape),
                            name='cPh'))  # (h, c)

    # parameters to model function
    x = Placeholder(_inf=_inf, name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj = slice(proj4, stack_axis, 0 * stacked_dim,
                    1 * stacked_dim)  # split along stack_axis
    bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim)
    ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim)
    ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid(peep(it_proj, dcs, Ci))  # input gate(t)
    bit = it * tanh(bit_proj)  # applied to tanh of input network

    ft = sigmoid(peep(ft_proj, dcs, Cf))  # forget-me-not gate(t)
    bft = ft * dc  # applied to cell(t-1)

    ct = bft + bit  # c(t) is sum of both

    ot = sigmoid(peep(ot_proj, Sct(ct), Co))  # output gate(t)
    ht = ot * tanh(ct)  # applied to tanh(cell(t))

    c = ct  # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine([h, c])
    # return to caller a helper function to create placeholders for recurrence
    apply_x_h_c.create_placeholder = create_hc_placeholder
    _name_and_extend_Function(apply_x_h_c, 'LSTM')
    return apply_x_h_c
Esempio n. 30
0
def test_Sigmoid(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        model = C.sigmoid(np.array([-2, -1., 0., 1., 2.]).astype(dtype))
        verify_no_input(model, tmpdir, 'Sigmoid_0')
Esempio n. 31
0
update_frequency = 20

#Next we will define the policy network.
# The policy network maps an observation to a probability of taking action 0 or 1.
observations = C.sequence.input_variable(state_dim, np.float32, name="obs")
W1 = C.parameter(shape=(state_dim, hidden_size),
                 init=C.glorot_uniform(),
                 name="W1")
b1 = C.parameter(shape=hidden_size, name="b1")
layer1 = C.relu(C.times(observations, W1) + b1)
W2 = C.parameter(shape=(hidden_size, action_count),
                 init=C.glorot_uniform(),
                 name="W2")
b2 = C.parameter(shape=action_count, name="b2")
layer2 = C.times(layer1, W2) + b2
output = C.sigmoid(layer2, name="output")
'''
Now you must define the loss function for training the policy network. 

- Recall that the desired loss function is: $\frac{1}{m}\sum_1^m \nabla_\theta \log \pi_\theta(a_t|s_t) R$. 

- Label is a variable corresponding to $a_t$, the action the policy selected. 

- output is the policy network that maps an observation to a probability of taking an action.

- And return_weight is a scalar that will cointain the return $R$.

The current loss function is incorrect and will need to be modified.
'''

# Label will tell the network what action it should have taken.
def test_sigmoid():
    assert_cntk_ngraph_isclose(C.sigmoid([-2, -1., 0., 1., 2.]))
    assert_cntk_ngraph_isclose(C.sigmoid([0.]))
    assert_cntk_ngraph_isclose(
        C.exp([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.]))
Esempio n. 33
0
def test_Sigmoid(tmpdir):
    model = C.sigmoid([-2, -1., 0., 1., 2.])
    verify_no_input(model, tmpdir, 'Sigmoid_0')
Esempio n. 34
0
def test_Sigmoid(tmpdir):
    model = C.sigmoid([-2, -1., 0., 1., 2.])
    verify_no_input(model, tmpdir, 'Sigmoid_0')
Esempio n. 35
0
 def gru_with_attentioin(dh, x):
     c_att = attention_model(att_input, x)
     x = C.splice(x, c_att)
     x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
     return att_gru(dh, x)