コード例 #1
0
ファイル: main.py プロジェクト: NathanRobertPart/Numpy-LSTM
def train_time_steps(x, y_true, h_prev, ct_prev, dimensionality, h_size,
                     z_size, lstm_instance):
    timeStep = models.TimeStepModel()
    lstm_instance = lstm.lstm_layer(h_size, z_size)

    timeStep.ht_s[-1] = np.copy(h_prev)
    timeStep.ct_s[-1] = np.copy(ct_prev)

    loss = 0
    for i in range(len(x)):
        timeStep.x_s[i] = np.zeros((dimensionality, 1))
        timeStep.x_s[i][y_true[i]] = 1
        timeStep = lstm.passes.forward_pass(timeStep, timeStep.x_s[i],
                                            timeStep.h_s[i - 1],
                                            timeStep.ct_s[i - 1],
                                            lstm_instance, True)

        loss += -np.log(timeStep.y_s[i][y_true[i], 0])
        print(loss)

    lstm_instance = lstm.passes.reset_grad(lstm_instance)

    dht_next = np.zeros_like(timeStep.ht_s[0])
    dct_next = np.zeros_like(timeStep.ct_s[0])

    x_reverse = x.reverse()

    for i in range(len(x_reverse)):
        dht_next, dct_next, timeStep = lstm.passes.backward_pass(
            timeStep, i, y_true, dht_next, dct_next, lstm_instance, True,
            h_size)

    lstm_instance = lstm.passes.clip_grad(lstm_instance)

    return loss, timeStep.ht_s[len(x) - 1], timeStep.ct_s[len(x) -
                                                          1], lstm_instance
コード例 #2
0
ファイル: readm.py プロジェクト: zhangxiaowbl/DeepCare
def build_model(shared_params, options, use_noise = None):
    trng = RandomStreams(SEED)
    # Used for dropout.
    if use_noise is None:
        use_noise = theano.shared(lstm_layer.numpy_floatX(0.))

    x = tensor.matrix('x', dtype = 'int64')
    x_mask = tensor.tensor3('x_mask', dtype=config.floatX)
    x_time = tensor.tensor3('x_time', dtype=config.floatX)

    method = tensor.matrix('method', dtype=config.floatX)
    y = tensor.vector('y', dtype='int64')

    adm_list = tensor.tensor3('adm_list', dtype='int64')
    adm_mask = tensor.tensor3('adm_mask', dtype=config.floatX)

    n_steps = x.shape[0]
    n_samples = x.shape[1]
    n_words = adm_list.shape[2]

    # compute mean vector for diagnosis & proc/medi
    for i in range(2):
        adm_words = adm_list[i][x.flatten()]
        word_mask = adm_mask[i][x.flatten()]
        if 'drin' in options['reg']:
            word_mask = lstm_layer.dropout_layer(word_mask, use_noise, trng, 0.8)

        emb_vec = shared_params['Wemb'][adm_words.flatten()].reshape([n_steps*n_samples,
                                                                      n_words,
                                                                      options['dim_emb']])

        mean_vec = (emb_vec * word_mask[:, :, None]).sum(axis=1)
        if options['embed'] == 'mean':
            mean_vec = mean_vec / word_mask.sum(axis=1)[:, None]
        elif options['embed'] == 'sum':
            mean_vec = mean_vec / tensor.sqrt(word_mask.sum(axis=1))[:, None]
        elif options['embed'] == 'max':
            mean_vec = (emb_vec * word_mask[:, :, None]).max(axis=1)
        elif options['embed'] == 'sqrt':
            mean_vec = mean_vec / tensor.sqrt(abs(mean_vec))

        emb_vec = mean_vec.reshape([n_steps, n_samples, options['dim_emb']])

        if 'drfeat' in options['reg']:
            emb_vec = lstm_layer.dropout_layer(emb_vec, use_noise, trng, 0.8)

        if i == 0: emb_dia = emb_vec
        else: emb_pm = emb_vec

    proj = lstm_layer.lstm_layer(shared_params, options, emb_dia, emb_pm, x_mask, x_time[0], method)

    # weighted mean of hidden states - weighted funcion: 1/log(x_mask)
    weight = x_mask[:, 0] / (method + tensor.log(x_time[1]/30.0 + 1))# / tensor.log(x_time[1])
    weight0 = weight * x_mask[:, 3]
    weight1 = weight * x_mask[:, 4]
    weight2 = weight

    hidd_0 = tensor.sum(proj * weight0[:, :, None], axis=0) / tensor.sum(weight0, axis=0)[:, None]
    hidd_1 = tensor.sum(proj * weight1[:, :, None], axis=0) / tensor.sum(weight1, axis=0)[:, None]
    hidd_2 = tensor.sum(proj * weight2[:, :, None], axis=0) / tensor.sum(weight2, axis=0)[:, None]
    hidd = tensor.concatenate([hidd_0, hidd_1, hidd_2], axis=1)

    if 'drhid' in options['reg']:
         hidd = lstm_layer.dropout_layer(hidd, use_noise, trng, 0.9)

    # pool the hidden state to a neural network
    hid1 = tensor.dot(hidd, shared_params['U1']) + shared_params['b1']
    hid1 = tensor.nnet.sigmoid(hid1)

    if 'drhid' in options['reg']:
        hid1 = lstm_layer.dropout_layer(hid1, use_noise, trng, 0.5)

    pred = tensor.nnet.softmax(tensor.dot(hid1, shared_params['U2']) + shared_params['b2'])
    f_pred = theano.function(inputs = [x, x_mask, x_time, method, adm_list, adm_mask],
                             outputs = pred.argmax(axis=1), name = 'f_pred')

    esp = 1e-8
    if pred.dtype == 'float16': esp = 1e-6
    cost = -tensor.log(pred[tensor.arange(n_samples), y] + esp).mean()

    if 'norm' in options['reg']:
        cost += options['L1_reg'] * lstm_layer.L1_reg(shared_params) + options['L2_reg'] * lstm_layer.L2_reg(shared_params)

    return x, x_mask, x_time, method, y, adm_list, adm_mask, f_pred, cost, use_noise
コード例 #3
0
def build_model(shared_params, options, use_noise=None):
    trng = RandomStreams(SEED)
    # Used for dropout.
    if use_noise is None:
        use_noise = theano.shared(lstm_layer.numpy_floatX(0.))

    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.tensor3('x_mask', dtype=config.floatX)
    x_time = tensor.matrix('x_time', dtype=config.floatX)
    method = tensor.matrix('method', dtype=config.floatX)
    y = tensor.matrix('y', dtype='int64')

    adm_list = tensor.tensor3('adm_list', dtype='int64')
    adm_mask = tensor.tensor3('adm_mask', dtype=config.floatX)

    n_steps = x.shape[0]
    n_samples = x.shape[1]
    n_words = adm_list.shape[2]

    # compute mean vector for diagnosis & proc/medi
    for i in range(2):
        adm_words = adm_list[i][x.flatten()]
        word_mask = adm_mask[i][x.flatten()]
        if 'drin' in options['reg']:
            word_mask = lstm_layer.dropout_layer(word_mask, use_noise, trng,
                                                 0.8)

        emb_vec = shared_params['Wemb'][adm_words.flatten()].reshape(
            [n_steps * n_samples, n_words, options['dim_emb']])
        mean_vec = (emb_vec * word_mask[:, :, None]).sum(axis=1)
        if options['embed'] == 'mean':
            mean_vec = mean_vec / word_mask.sum(axis=1)[:, None]
        elif options['embed'] == 'sum':
            mean_vec = mean_vec / tensor.sqrt(word_mask.sum(axis=1))[:, None]
        elif options['embed'] == 'max':
            mean_vec = (emb_vec * word_mask[:, :, None]).max(axis=1)
        elif options['embed'] == 'sqrt':
            mean_vec = mean_vec / tensor.sqrt(abs(mean_vec))

        emb_vec = mean_vec.reshape([n_steps, n_samples, options['dim_emb']])

        if 'drfeat' in options['reg']:
            emb_vec = lstm_layer.dropout_layer(emb_vec, use_noise, trng, 0.8)

        if i == 0: emb_dia = emb_vec
        else: emb_pm = emb_vec

    proj = lstm_layer.lstm_layer(shared_params, options, emb_dia, emb_pm,
                                 x_mask, x_time, method)
    hid1 = tensor.dot(proj, shared_params['V1']).flatten().reshape(
        [n_steps * n_samples, options['dim_emb']]) + shared_params['c1']
    #hid1 = tensor.nnet.sigmoid(hid1)

    hid2 = tensor.dot(hid1, shared_params['V2']) + shared_params['c2']
    prob = tensor.nnet.softmax(hid2).flatten()

    esp = 1e-8
    if prob.dtype == 'float16': esp = 1e-6
    pred = -tensor.log(prob + esp)

    sorted_idx = pred.reshape([n_steps, n_samples,
                               options['n_diag']]).argsort(axis=2)
    f_pred = theano.function(
        inputs=[x, x_mask, x_time, method, adm_list, adm_mask],
        outputs=sorted_idx,
        name='f_pred')

    next_diag = adm_list[0][y.flatten()]
    diag_mask = adm_mask[0][y.flatten()]
    diag = next_diag + (tensor.arange(next_diag.shape[0]) *
                        options['n_diag'])[:, None]

    diag_pred = pred[diag.flatten()].reshape(
        [next_diag.shape[0], next_diag.shape[1]])
    diag_pred = (diag_pred * diag_mask).sum(axis=1)
    diag_pred = diag_pred / diag_mask.sum(axis=1)

    diag_pred = (diag_pred.reshape([n_steps, n_samples]) *
                 x_mask[:, 0]).sum(axis=0)
    diag_pred = diag_pred / x_mask[:, 0].sum(axis=0)

    cost = tensor.mean(diag_pred)

    if 'norm' in options['reg']:
        cost += options['L1_reg'] * lstm_layer.L1_reg(shared_params) + options[
            'L2_reg'] * lstm_layer.L2_reg(shared_params)

    return x, x_mask, x_time, method, y, adm_list, adm_mask, f_pred, cost, use_noise
コード例 #4
0
def test_neuron_init():
    new_neuron = lstm_layer(1, 1)
    assert sum(new_neuron.cell_bias.values) != 0
コード例 #5
0
ファイル: readm_all.py プロジェクト: zhangxiaowbl/DeepCare
def build_model(shared_params, options, use_noise = None):
    trng = RandomStreams(SEED)
    # Used for dropout.
    if use_noise is None:
        use_noise = theano.shared(lstm_layer.numpy_floatX(0.))

    x = tensor.matrix('x', dtype = 'int64')
    x_mask = tensor.tensor3('x_mask', dtype=config.floatX)
    x_time = tensor.tensor3('x_time', dtype=config.floatX)

    method = tensor.matrix('method', dtype=config.floatX)
    y = tensor.matrix('y', dtype='int64')

    adm_list = tensor.tensor3('adm_list', dtype='int64')
    adm_mask = tensor.tensor3('adm_mask', dtype=config.floatX)

    n_steps = x.shape[0]
    n_samples = x.shape[1]
    n_words = adm_list.shape[2]

    # compute mean vector for diagnosis & proc/medi
    for i in range(2):
        adm_words = adm_list[i][x.flatten()]
        word_mask = adm_mask[i][x.flatten()]
        if 'drin' in options['reg']:
            word_mask = lstm_layer.dropout_layer(word_mask, use_noise, trng, 0.8)

        emb_vec = shared_params['Wemb'][adm_words.flatten()].reshape([n_steps*n_samples,
                                                                      n_words,
                                                                      options['dim_emb']])

        mean_vec = (emb_vec * word_mask[:, :, None]).sum(axis=1)
        if options['embed'] == 'mean':
            mean_vec = mean_vec / word_mask.sum(axis=1)[:, None]
        elif options['embed'] == 'sum':
            mean_vec = mean_vec / tensor.sqrt(word_mask.sum(axis=1))[:, None]
        elif options['embed'] == 'max':
            mean_vec = (emb_vec * word_mask[:, :, None]).max(axis=1)
        elif options['embed'] == 'sqrt':
            mean_vec = mean_vec / tensor.sqrt(abs(mean_vec))

        emb_vec = mean_vec.reshape([n_steps, n_samples, options['dim_emb']])

        if 'drfeat' in options['reg']:
            emb_vec = lstm_layer.dropout_layer(emb_vec, use_noise, trng, 0.8)

        if i == 0: emb_dia = emb_vec
        else: emb_pm = emb_vec

    proj = lstm_layer.lstm_layer(shared_params, options, emb_dia, emb_pm, x_mask, x_time[0], method)

    # pool the hidden state to a neural network
    hidd = tensor.dot(proj, shared_params['U1']) + shared_params['b1']
    hidd = tensor.nnet.sigmoid(hidd)

    if 'drhid' in options['reg']:
        hidd = lstm_layer.dropout_layer(hidd, use_noise, trng, 0.5)

    hid1 = tensor.dot(hidd, shared_params['U2']) + shared_params['b2']

    prob = tensor.nnet.softmax(hid1.flatten().reshape([n_steps * n_samples, options['dim_y']]))
    prob_3D = prob.flatten().reshape([n_steps, n_samples, options['dim_y']])
    f_pred = theano.function(inputs = [x, x_mask, x_time, method, adm_list, adm_mask],
                             outputs = prob_3D.argmax(axis=2), name = 'f_pred')
    f_prob = theano.function([x, x_mask, x_time, method, adm_list, adm_mask], prob_3D)

    esp = 1e-8
    if prob.dtype == 'float16': esp = 1e-6
    log_loss = -tensor.log(prob[tensor.arange(n_steps*n_samples), y.flatten()] + esp)
    m_flat = x_mask[:, 0].flatten()
    cost = (log_loss * m_flat).sum() / m_flat.sum()

    if 'norm' in options['reg']:
        cost += options['L1_reg'] * lstm_layer.L1_reg(shared_params) + options['L2_reg'] * lstm_layer.L2_reg(shared_params)

    return x, x_mask, x_time, method, y, adm_list, adm_mask, f_pred, f_prob, cost, use_noise
コード例 #6
0
ファイル: curr_pm.py プロジェクト: trangptm/DeepCare
def build_model(shared_params, options, use_noise = None):
    trng = RandomStreams(SEED)
    # Used for dropout.
    if use_noise is None:
        use_noise = theano.shared(lstm_layer.numpy_floatX(0.))

    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.tensor3('x_mask', dtype=config.floatX)
    x_time = tensor.matrix('x_time', dtype=config.floatX)
    method = tensor.matrix('method', dtype=config.floatX)
    y = tensor.matrix('y', dtype='int64')

    adm_list = tensor.tensor3('adm_list', dtype='int64')
    adm_mask = tensor.tensor3('adm_mask', dtype=config.floatX)

    n_steps = x.shape[0]
    n_samples = x.shape[1]
    n_words = adm_list.shape[2]

    # compute mean vector for diagnosis & proc/medi
    for i in range(2):
        adm_words = adm_list[i][x.flatten()]
        word_mask = adm_mask[i][x.flatten()]
        if 'drin' in options['reg']:
            word_mask = lstm_layer.dropout_layer(word_mask, use_noise, trng, 0.8)

        emb_vec = shared_params['Wemb'][adm_words.flatten()].reshape([n_steps*n_samples,
                                                                      n_words,
                                                                      options['dim_emb']])
        mean_vec = (emb_vec * word_mask[:, :, None]).sum(axis=1)
        if options['embed'] == 'mean':
            mean_vec = mean_vec / word_mask.sum(axis=1)[:, None]
        elif options['embed'] == 'sum':
            mean_vec = mean_vec / tensor.sqrt(word_mask.sum(axis=1))[:, None]
        elif options['embed'] == 'max':
            mean_vec = (emb_vec * word_mask[:, :, None]).max(axis=1)
        elif options['embed'] == 'sqrt':
            mean_vec = mean_vec / tensor.sqrt(abs(mean_vec))

        emb_vec = mean_vec.reshape([n_steps, n_samples, options['dim_emb']])

        if 'drfeat' in options['reg']:
            emb_vec = lstm_layer.dropout_layer(emb_vec, use_noise, trng, 0.8)

        if i == 0: emb_dia = emb_vec
        else: emb_pm = emb_vec

    proj = lstm_layer.lstm_layer(shared_params, options, emb_dia, emb_pm, x_mask, x_time, method, 0)

    hid1 = tensor.dot(proj, shared_params['V1']).flatten().reshape([n_steps * n_samples, options['dim_emb']]) + shared_params['c1']
    #hid1 = tensor.nnet.sigmoid(hid1)

    hid2 = tensor.dot(hid1, shared_params['V2']) + shared_params['c2']
    prob = tensor.nnet.softmax(hid2).flatten()

    esp = 1e-8
    if prob.dtype == 'float16': esp = 1e-6
    pred = -tensor.log(prob + esp)

    sorted_idx = pred.reshape([n_steps, n_samples, options['n_pm']]).argsort(axis=2)
    f_pred = theano.function(inputs=[x, x_mask, x_time, method, adm_list, adm_mask], outputs=sorted_idx, name='f_pred')

    curr_pm = adm_list[1][y.flatten()] - options['n_diag']
    pm_mask = adm_mask[1][y.flatten()]
    pm = curr_pm + (tensor.arange(curr_pm.shape[0]) * options['n_pm'])[:, None]

    pm_pred = pred[pm.flatten()].reshape([curr_pm.shape[0], curr_pm.shape[1]])
    pm_pred = (pm_pred * pm_mask).sum(axis=1)
    pm_pred = pm_pred / pm_mask.sum(axis=1)

    pm_pred = (pm_pred.reshape([n_steps, n_samples]) * x_mask[:, 0]).sum(axis=0)
    pm_pred = pm_pred / x_mask[:, 0].sum(axis=0)

    cost = tensor.mean(pm_pred)

    if 'norm' in options['reg']:
        cost += options['L1_reg'] * lstm_layer.L1_reg(shared_params) + options['L2_reg'] * lstm_layer.L2_reg(shared_params)

    return x, x_mask, x_time, method, y, adm_list, adm_mask, f_pred, cost, use_noise
コード例 #7
0
def build_model(tparams, options):

    trng = RandomStreams(options['SEED'])

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(options['dropout_val']))

    # input sentences, size of n_steps * n_samples
    x = tensor.matrix('x', dtype='int64')
    # the corresponding masks padding zeros
    mask = tensor.matrix('mask', dtype=config.floatX)
    # size of n_samples * n_z
    z = tensor.tensor3('z', dtype=config.floatX)
    z_score = tensor.matrix('z_score', dtype=config.floatX)
    s = tensor.matrix('s', dtype=config.floatX)
    z1 = dropout(z, trng, use_noise)
    s = dropout(s, trng, use_noise)

    n_steps = x.shape[0]  # the sentence length in this mini-batch
    n_samples = x.shape[1]  # the number of sentences in this mini-batch
    n_x = tparams['Wemb'].shape[1]  # the dimension of the word embedding

    # size of n_steps,n_samples,n_x
    emb = tparams['Wemb'][x.flatten()].reshape([n_steps, n_samples, n_x])
    emb = dropout(emb, trng, use_noise)

    # visual feature: 1 * n_samples * n_x
    z0 = tensor.dot(z1[:, 0, :], tparams['Tv']).dimshuffle('x', 0, 1)
    bos = (tensor.dot(tparams['bos'], tparams['Ts']) +
           tensor.dot(s, tparams['Ta'])).dimshuffle('x', 0, 1)
    # n_steps * n_samples * n_x
    # feed visual feature z only at the first step.
    emb_input = tensor.dot(emb, tparams['Ts']) + tensor.dot(s, tparams['Ta'])
    emb_input = tensor.concatenate((z0, bos, emb_input[:n_steps - 1]))

    # n_steps * n_samples
    # z has the same mask as the first word.
    mask0 = mask[0].dimshuffle('x', 0)
    mask1 = mask[0].dimshuffle('x', 0)
    mask_input = tensor.concatenate((mask0, mask1, mask[:n_steps - 1]))

    # decoding the sentence vector z back into the original sentence
    h_decoder, _ = lstm_layer(tparams, emb_input, mask_input, prefix='lstm')
    h_decoder = dropout(h_decoder, trng, use_noise)

    h_decoder = h_decoder[1:]

    #re-z

    pred_z = tensor.tanh(
        tensor.dot(h_decoder, tparams['task_W']) + tparams['task_b'])
    pred_z_cos = z_score[None, :, :] - (
        pred_z[:, :, None, :] * z[None, :, :, :]).sum(axis=3) / tensor.sqrt(
            tensor.sqr(pred_z).sum(axis=2))[:, :, None] / tensor.sqrt(
                tensor.sqr(z).sum(axis=2))[None, :, :]

    shape = h_decoder.shape
    h_decoder = h_decoder.reshape((shape[0] * shape[1], shape[2]))

    Vhid = tensor.dot(tparams['Vhid'], tparams['Wemb'].T)
    pred_x = tensor.dot(h_decoder, Vhid) + tparams['bhid']
    pred = tensor.nnet.softmax(pred_x)
    x_vec = x.reshape((shape[0] * shape[1], ))

    index = tensor.arange(shape[0] * shape[1])

    pred_word = pred[index, x_vec]
    mask_word = mask.reshape((shape[0] * shape[1], ))

    index_list = theano.tensor.eq(mask_word, 1.).nonzero()[0]

    pred_word = pred_word[index_list]

    # the cross-entropy loss
    cost1 = -tensor.log(pred_word + 1e-6).sum() / n_samples

    # the cosine hinge loss
    pred_z_cos = pred_z_cos.reshape((shape[0] * shape[1], pred_z_cos.shape[2]))
    pred_z_cos = tensor.maximum(pred_z_cos[index_list], 0.)
    cost2 = pred_z_cos.sum() / n_samples

    cost = cost1 + cost2

    return use_noise, x, mask, s, z, z_score, cost, cost1, cost2