Beispiel #1
0
def cnn_network(queryfeatures, passagefeatures, num_classes):
    with C.layers.default_options(initial_state=0.1):
        q_gru = C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM),
                                    go_backwards=True,
                                    name='q_gru')(queryfeatures)
        last1 = C.sequence.last(q_gru)
        q_proj = C.layers.Dense(DSSM_DIM, activation=C.relu,
                                name='q_proj')(last1)
        dropout_qdo1 = C.layers.Dropout(DROPOUT_RATIO,
                                        name='dropout_qdo1')(q_proj)
        q_enc = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                               name='q_enc')(dropout_qdo1)

        a_gru = C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM),
                                    go_backwards=True,
                                    name='a_gru')(passagefeatures)
        last2 = C.sequence.last(a_gru)
        a_proj = C.layers.Dense(DSSM_DIM, activation=C.relu,
                                name='a_proj')(last2)
        dropout_ado1 = C.layers.Dropout(DROPOUT_RATIO,
                                        name='dropout_ado1')(a_proj)
        a_enc = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                               name='a_enc')(dropout_ado1)

        c = C.cosine_distance(q_enc, a_enc)

        model = C.splice(1 - c, c)
        # mergeQP     = C.cosine_distance(q_enc,a_enc) # output : 50

        #model   = C.layers.Dense(num_classes, activation=C.softmax,name="overall")(mergeQP) #outupt : 2

    return model
def test_cos_distane_backward():
    x = C.sequence.input_variable(shape=(2, ),
                                  sequence_axis=C.Axis("B"),
                                  needs_gradient=True)
    y = C.sequence.input_variable(shape=(2, ),
                                  sequence_axis=C.Axis("B"),
                                  needs_gradient=True)
    z = C.cosine_distance(x, y)
    a = np.reshape(np.float32([0.25, 0.5, 0.1, 1]), (1, 2, 2))
    b = np.reshape(np.float32([-0.5, 1.5, -0.3, -1]), (1, 2, 2))
    bwd, fwd = z.forward({x: a, y: b}, [z.output], set([z.output]))
    value = list(fwd.values())[0]
    expected = [[0.707107, -0.981665]]
    assert np.allclose(value, expected)
    grad = z.backward(bwd, {z.output: np.ones_like(value)}, set([x, y]))
    x_driv_expected = np.ndarray(
        (1, 2, 2),
        dtype=np.float32,
        buffer=np.float32([-1.131371, 0.565686, -0.188727, 0.018873]))
    y_driv_expected = np.ndarray(
        (1, 2, 2),
        dtype=np.float32,
        buffer=np.float32([0.424264, 0.141421, -0.174876, 0.052463]))
    assert (np.all(np.absolute(grad[x] - x_driv_expected) < 1e-6))
    assert (np.all(np.absolute(grad[y] - y_driv_expected) < 1e-6))
    def model(self):
        c1_axis = C.Axis.new_unique_dynamic_axis('c1_axis')
        c2_axis = C.Axis.new_unique_dynamic_axis('c2_axis')
        b = C.Axis.default_batch_axis()

        c1 = C.input_variable(self.word_dim,
                              dynamic_axes=[b, c1_axis],
                              name='c1')
        c2 = C.input_variable(self.word_dim,
                              dynamic_axes=[b, c2_axis],
                              name='c2')

        y = C.input_variable(1, dynamic_axes=[b], name='y')

        c1_processed, c2_processed = self.input_layer(c1, c2).outputs
        att_context = self.attention_layer(c2_processed, c1_processed,
                                           'attention')

        c2_len = C.layers.Fold(plus1)(c2_processed)
        att_len = C.layers.Fold(plus1)(att_context)

        cos = C.cosine_distance(
            C.sequence.reduce_sum(c2_processed) / c2_len,
            C.sequence.reduce_sum(att_context) / att_len)

        prob = C.sigmoid(cos)
        is_context = C.greater(prob, 0.5)

        loss = C.losses.binary_cross_entropy(prob, y)
        acc = C.equal(is_context, y)

        return cos, loss, acc
Beispiel #4
0
def rnn_network(queryfeatures, passagefeatures, num_classes):
    with C.layers.default_options(initial_state=0.1):
        q_gru = C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM),
                                    go_backwards=True,
                                    name='q_gru')(queryfeatures)
        last1 = C.sequence.last(q_gru)
        q_proj = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                                name='q_proj')(last1)
        dropout_qdo1 = C.layers.Dropout(DROPOUT_RATIO,
                                        name='dropout_qdo1')(q_proj)
        q_enc = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                               name='q_enc')(dropout_qdo1)

        a_gru = C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM),
                                    go_backwards=True,
                                    name='a_gru')(passagefeatures)
        last2 = C.sequence.last(a_gru)
        a_proj = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                                name='a_proj')(last2)
        dropout_ado1 = C.layers.Dropout(DROPOUT_RATIO,
                                        name='dropout_ado1')(a_proj)
        a_enc = C.layers.Dense(DSSM_DIM, activation=C.tanh,
                               name='a_enc')(dropout_ado1)

        model = C.cosine_distance(q_enc, a_enc)

    return model
Beispiel #5
0
def test_rank0_output():
  x = C.sequence.input_variable(shape=(768,), sequence_axis=C.Axis("B"), needs_gradient=True)
  y = C.sequence.input_variable(shape=(768,), sequence_axis=C.Axis("B"), needs_gradient=True)
  z = C.cosine_distance(x, y)
  batch_num = 2
  batch_size = 30
  a = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  b = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  for i in range(batch_num):
    bwd, fwd = z.forward({x:a[i*batch_size:(i+1)*batch_size], y:b[i*batch_size:(i+1)*batch_size]}, [z.output], set([z.output]))
    grad = z.backward(bwd, {z.output:np.ones_like(fwd[z.output])}, set([x, y]))
Beispiel #6
0
def test_rank0_output():
  x = C.sequence.input_variable(shape=(768,), sequence_axis=C.Axis("B"), needs_gradient=True)
  y = C.sequence.input_variable(shape=(768,), sequence_axis=C.Axis("B"), needs_gradient=True)
  z = C.cosine_distance(x, y)
  batch_num = 2
  batch_size = 30
  a = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  b = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  for i in range(batch_num):
    bwd, fwd = z.forward({x:a[i*batch_size:(i+1)*batch_size], y:b[i*batch_size:(i+1)*batch_size]}, [z.output], set([z.output]))
    grad = z.backward(bwd, {z.output:np.ones_like(fwd[z.output])}, set([x, y]))
def test_cosine_distance():
    a = np.reshape(np.arange(25.0, dtype=np.float32), (5, 5))
    b = np.reshape(np.arange(0, 5, dtype=np.float32), (1, 5))

    src = C.sequence.input_variable(shape=(5), sequence_axis=C.Axis("Seq"))
    tgt = C.input_variable(shape=(5))
    tgt_br = C.sequence.broadcast_as(tgt, src)
    cos_seq = C.cosine_distance(src, tgt_br)
    assert len(cos_seq.dynamic_axes) == 2
    assert cos_seq.dynamic_axes[1].name == "Seq"
    val = cos_seq.eval({src: [a], tgt: [b]})
    expected = [[1., 0.914659, 0.878459, 0.86155, 0.851852]]
    assert np.allclose(val, expected)
Beispiel #8
0
def test_cosine_distance():
  a = np.reshape(np.arange(25.0, dtype = np.float32), (5,5))
  b = np.reshape(np.arange(0, 5, dtype=np.float32), (1,5))
  
  src = C.sequence.input_variable(shape=(5), sequence_axis=C.Axis("Seq"))
  tgt = C.input_variable(shape=(5))
  tgt_br = C.sequence.broadcast_as(tgt, src)
  cos_seq = C.cosine_distance(src, tgt_br)
  assert len(cos_seq.dynamic_axes)==2
  assert cos_seq.dynamic_axes[1].name=="Seq"
  val = cos_seq.eval({src:[a], tgt:[b]})
  expected = [[ 1., 0.914659,  0.878459,  0.86155,   0.851852]] 
  assert np.allclose(val, expected)
Beispiel #9
0
def test_cos_distane_backward():
  x = C.sequence.input_variable(shape=(2,), sequence_axis=C.Axis("B"), needs_gradient=True)
  y = C.sequence.input_variable(shape=(2,), sequence_axis=C.Axis("B"), needs_gradient=True)
  z = C.cosine_distance(x, y);
  a = np.reshape(np.float32([0.25,0.5,0.1,1]), (1,2,2))
  b = np.reshape(np.float32([-0.5,1.5,-0.3,-1]), (1,2,2))
  bwd, fwd = z.forward({x:a, y:b}, [z.output], set([z.output]))
  value = list(fwd.values())[0]
  expected = [[0.707107, -0.981665]]
  assert np.allclose(value, expected)
  grad = z.backward(bwd, {z.output:np.ones_like(value)}, set([x, y]))
  x_driv_expected = np.ndarray((1,2,2), dtype=np.float32, buffer=np.float32([-1.131371, 0.565686, -0.188727, 0.018873]))
  y_driv_expected = np.ndarray((1,2,2), dtype=np.float32, buffer = np.float32([0.424264, 0.141421,-0.174876, 0.052463]))
  assert (np.all(np.absolute(grad[x]-x_driv_expected) < 1e-6))
  assert (np.all(np.absolute(grad[y]-y_driv_expected) < 1e-6))
Beispiel #10
0
def test_cos_distane_backward3():
  x = C.sequence.input_variable(shape=(100,), sequence_axis=C.Axis("B"), needs_gradient=True)
  z = C.cosine_distance(x, x);
  np.random.seed(0)
  a = np.float32(np.random.rand(10,50,100))
  b = a
  bwd, fwd = z.forward({x:a}, [z.output], set([z.output]))
  value = list(fwd.values())[0]
  expected_cos = numpy_cos(a,b)
  expected = expected_cos.forward()
  assert np.allclose(value, expected)
  grad = z.backward(bwd, {z.output:np.ones_like(value)}, set([x]))
  bwd = expected_cos.backward()
  x_driv_expected = bwd['a']+bwd['b']
  assert (np.all(np.absolute(grad[x]-x_driv_expected) < 1e-6))
Beispiel #11
0
def test_cos_distane_backward3():
  x = C.sequence.input_variable(shape=(100,), sequence_axis=C.Axis("B"), needs_gradient=True)
  z = C.cosine_distance(x, x);
  np.random.seed(0)
  a = np.float32(np.random.rand(10,50,100))
  b = a
  bwd, fwd = z.forward({x:a}, [z.output], set([z.output]))
  value = list(fwd.values())[0]
  expected_cos = numpy_cos(a,b)
  expected = expected_cos.forward()
  assert np.allclose(value, expected)
  grad = z.backward(bwd, {z.output:np.ones_like(value)}, set([x]))
  bwd = expected_cos.backward()
  x_driv_expected = bwd['a']+bwd['b']
  assert (np.all(np.absolute(grad[x]-x_driv_expected) < 1e-6))
Beispiel #12
0
i1_axis = C.Axis.new_unique_dynamic_axis('1')
i2_axis = C.Axis.new_unique_dynamic_axis('2')

#Venue part
xv = C.sequence.input_variable((1, 2316, VEC_DIM))
hv_conv = conv_model(xv)

#Event part
xe = C.sequence.input_variable((1, 2826, VEC_DIM))
he_conv = conv_model(xe)

#Ground Truth Success label
target = C.sequence.input_variable(1, np.float32)

#Predicted success label of target event
venue_model = C.cosine_distance(hv_conv, he_conv, name="simi")

#Squared loss
venue_loss = C.squared_error(target, venue_model)
#Squared error
venue_error = C.squared_error(target, venue_model)

lr_per_sample = [LEARNING_RATE]
lr_schedule = C.learners.learning_rate_schedule(lr_per_sample,
                                                C.learners.UnitType.sample,
                                                epoch_size=10)

momentum_as_time_constant = C.learners.momentum_as_time_constant_schedule(700)
# use adam optimizer
venue_learner = C.learners.adam(venue_model.parameters,
                                lr=lr_schedule,
Beispiel #13
0
def cosine(vector_a, vector_b):
    return C.cosine_distance(vector_a, vector_b)