def process_history(hist, inp):
     wk = C.slice(hist, 0, 0, myConfig['wg_dim'])
     wn = hist[myConfig['wg_dim']:]
     hist_processed = embed_layer(wk, wn)
     out_logits = s2smodel(hist_processed, inp)
     hamax = C.reshape(C.hardmax(out_logits), (-1, ))
     return hamax
Ejemplo n.º 2
0
 def model(self):
     token_axis = C.Axis.new_unique_dynamic_axis('token_axis')
     b = C.Axis.default_batch_axis()
     
     token = C.input_variable(self.word_dim, dynamic_axes=[b, token_axis], name='token')
     
     # 8 classes
     emotion = C.input_variable(self.num_emotions, dynamic_axes=[b], name='emotion')
     
     processed_token = self.input_layer(token)
     att = self.attention_layer(processed_token, processed_token, 'attention')
     
     test = C.sequence.last(att)
     test = C.layers.Stabilizer()(test)
     
     test_w = C.parameter(shape=(2*self.hidden_dim, self.num_emotions), init=C.glorot_uniform())
     test_v = C.parameter(shape=(self.num_emotions), init=C.glorot_uniform())
     
     out = C.softmax(C.times(test, test_w) + test_v)
     loss = C.binary_cross_entropy(out, emotion)
     
     f1 = C.losses.fmeasure(C.hardmax(out), emotion)
     
     return out, loss, f1
Ejemplo n.º 3
0
def test_Hardmax(tmpdir):
    data = np.asarray([1., 1., 2., 3.], dtype=np.float32)
    model = C.hardmax(data)
    verify_no_input(model, tmpdir, 'Hardmax_0')
Ejemplo n.º 4
0
    def output_layer(self, embed, attention_context, model_context, aw, q_processed, c_processed,cw):
        cw_ph=C.placeholder()
        att_context = C.placeholder(shape=(8*self.hidden_dim,))
        query_processed = C.placeholder(shape=(2*self.hidden_dim,))
        context_processed = C.placeholder(shape=(2*self.hidden_dim,))
        mod_context = C.placeholder(shape=(2*self.hidden_dim))
        a_onehot = C.placeholder(shape=(self.vocab_size+1,))

        start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout))
        start_hardmax = seq_hardmax(start_logits)
        att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax))
        att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context)
        end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded)
        m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input)
        end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout))

        start_flag = C.hardmax(start_logits)
        end_flag = C.hardmax(end_logits)
     
        def create_model():
            # Encoder: (input*) --> (h0, c0)
            # Create multiple layers of LSTMs by passing the output of the i-th layer
            # to the (i+1)th layer as its input
            with C.layers.default_options(enable_self_stabilization=True, go_backwards=False):
                LastRecurrence = C.layers.Recurrence
                encode = C.layers.Sequential([
                    C.layers.Stabilizer(),
                    OptimizedRnnStack(self.hidden_dim, return_full_state=True),
                ])

                encode_c = C.layers.Sequential([
                    C.layers.Stabilizer(),
                    OptimizedRnnStack(self.hidden_dim, return_full_state=True),
                ])
            
            # Decoder: (history*, input*) --> unnormalized_word_logp*
            # where history is one of these, delayed by 1 step and <s> prepended:
            #  - training: labels
            #  - testing:  its own output hardmax(z) (greedy decoder)
            with C.layers.default_options(enable_self_stabilization=True):
                # sub-layers
                stab_in = C.layers.Stabilizer()
                rec_blocks = [C.layers.LSTM(self.hidden_dim) for i in range(self.num_layers)]
                stab_out = C.layers.Stabilizer()
                proj_out = C.layers.Dense(self.vocab_size+1, name='out_proj')
                # attention model
                attention_model = C.layers.AttentionModel(self.attention_dim, 
                                                              name='attention_model') # :: (h_enc*, h_dec) -> (h_dec augmented)
                hstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1)
                cstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1)
                W_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                U_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                V_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1)
                maxout  = C.layers.MaxPooling((2,), strides=2)
                # layer function
                @C.Function
                def decode(history, q, c, start_logits, end_logits):
                    q = encode(q)
                    c = encode_c(C.splice(c, start_logits, end_logits, axis=0))
                    r = history
                    r = stab_in(r)

                    q_last_h = C.sequence.last(q.outputs[0])
                    q_last_c = C.sequence.last(q.outputs[1])
                    c_last_h = C.sequence.last(c.outputs[0])
                    c_last_c = C.sequence.last(c.outputs[1])
                    initial_hstate = hstate_dense(C.splice(q_last_h, c_last_h))
                    initial_cstate = cstate_dense(C.splice(q_last_c, c_last_c))

                    rec_block = rec_blocks[0]   # LSTM(hidden_dim)  # :: (dh, dc, x) -> (h, c)
                    
                    @C.Function
                    def find_embed(x):
                        gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size)
                        return embed(gx, ngx) 

                    @C.Function
                    def lstm_with_attention(dh, dc, r, x):
                        history_embed = find_embed(x)
                        h_att = attention_model(c.outputs[0], dh)
                        q_att = attention_model(q.outputs[0], dh)
                        att = C.splice(h_att, q_att)
                        x = C.splice(x, att)
                        x, dc = rec_block(dh, dc, x).outputs
          
                        # 0*r is a hack because cntk freaks out when r is not used.
                        r = U_dense(att) + W_dense(history_embed) + V_dense(x) + 0*r 
                        #bug when W_dense is added first, wtf?!
                        #r = W_dense(embed(gx, ngx)) + U_dense(att) + V_dense(x) + 0*r
                        return x, dc, r
                    _, _, r = C.layers.RecurrenceFrom(lstm_with_attention, return_full_state=True)(initial_hstate, initial_cstate, C.Constant(np.zeros(2*self.hidden_dim)),r).outputs
        
                    r = maxout(r)
                    r = stab_out(r)
                    r = proj_out(r)
                    #r = C.softmax(r)
                    r = C.layers.Label('out_proj_out')(r)
                    return r
            return decode

        def create_model_train(s2smodel):
            # model used in training (history is known from labels)
            # note: the labels must NOT contain the initial <s>
            @C.Function
            def model_train(labels, q, c, start_logits, end_logits): # (input*, labels*) --> (word_logp*)

                # The input to the decoder always starts with the special label sequence start token.
                # Then, use the previous value of the label sequence (for training) or the output (for execution).
                past_labels = C.layers.Delay(initial_state=self.sentence_start)(labels)
    
                return s2smodel(past_labels, q, c, start_logits, end_logits)
            return model_train

        def create_model_greedy(s2smodel):
            # model used in (greedy) decoding (inferencing) (history is decoder's own output)
            @C.Function
            def model_greedy(q, c, start_logits, end_logits): # (input*) --> (word_sequence*)
                # Decoding is an unfold() operation starting from sentence_start.
                # We must transform s2smodel (history*, input* -> word_logp*) into a generator (history* -> output*)
                # which holds 'input' in its closure.
                unfold = C.layers.UnfoldFrom(\
                                    lambda history: s2smodel(history, q, c, start_logits, end_logits) >> C.hardmax,
                                    # stop once sentence_end_index was max-scoring output
                                    until_predicate=lambda w: w[...,self.sentence_end_index],
                                    length_increase=self.sentence_max_length)
                return unfold(initial_state=self.sentence_start, dynamic_axes_like=c)
            return model_greedy
       
        s2smodel = create_model()
      
        model_train = create_model_train(s2smodel)(a_onehot, query_processed, context_processed, start_logits, end_logits)
        model_greed = create_model_greedy(s2smodel)(query_processed, context_processed, start_logits, end_logits)
        model_greedy = C.argmax(model_greed,0)
        context = C.argmax(cw_ph,0)

        return C.as_block(
            C.combine((model_train, model_greedy, start_logits, end_logits,context)),
            [(att_context, attention_context), (mod_context, model_context), (a_onehot, aw), (query_processed, q_processed), (context_processed, c_processed),(cw_ph,cw)],
            'attention_layer',
            'attention_layer')
Ejemplo n.º 5
0
def test_Hardmax(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        data = np.asarray([1., 1., 2., 3.], dtype=dtype)
        model = C.hardmax(data)
        verify_no_input(model, tmpdir, 'Hardmax_0')
Ejemplo n.º 6
0
def test_Hardmax(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.asarray([1., 1., 2., 3.], dtype=dtype)
        model = C.hardmax(data)
        verify_no_input(model, tmpdir, 'Hardmax_0')
def create_network(input_vocab_dim, label_vocab_dim):
    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')
    raw_input = sequence.input(shape=(input_vocab_dim), sequence_axis=input_seq_axis, name='raw_input')
    raw_labels = sequence.input(shape=(label_vocab_dim), sequence_axis=label_seq_axis, name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)        # <s>

    is_first_label = sequence.is_first(label_sequence)       # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(
        label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(
        thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(
        thought_vectorC, label_sequence)

    # Decoder
    decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence

    decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
        decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output})

    return {
        'raw_input' : raw_input,
        'raw_labels' : raw_labels,
        'ce' : ce,
        'pe' : errs,
        'ng' : ng,
        'output': z
    }
Ejemplo n.º 8
0
def test_Hardmax(tmpdir):
    data = np.asarray([1., 1., 2., 3.], dtype=np.float32)
    model = C.hardmax(data)
    verify_no_input(model, tmpdir, 'Hardmax_0')