Example #1
0
    def get_lstm_layer(prior_layer, layer_name):

        # OUTPUT_DIM == C_DIM
        # INPUT_DIM == C_DIM

        # [BN x OUTPUT_DIM]
        prior_ht = Identity([], name='prior_' + layer_name + '_ht')
        # [BN x (OUTPUT_DIM + INPUT_DIM)]
        with_prior_ht = Concat([prior_layer, prior_ht])

        # [(OUTPUT_DIM + INPUT_DIM) x C_DIM ]
        ft_w = Identity([], name=layer_name + '_ft_w')
        # C_DIM ]
        ft_b = Identity([], name=layer_name + '_ft_b')
        # [BN x C_DIM]
        ft_mult = MatrixMult([with_prior_ht, ft_w])
        # [ BN x C_DIM ]
        ft = Sigmoid(MatrixAdd([ft_mult, ft_b]))

        # [(OUTPUT_DIM + INPUT_DIM) x C_DIM ]
        it_w = Identity([], name=layer_name + '_it_w')
        # [ C_DIM ]
        it_b = Identity([], name=layer_name + '_it_b')
        # [ BN x C_DIM ]
        it_mult = MatrixMult([with_prior_ht, it_w])
        # [ BN x C_DIM ]
        it = Sigmoid(MatrixAdd([it_mult, it_b]))

        # [(OUTPUT_DIM + INPUT_DIM) x C_DIM ]
        delta_c_w = Identity([], name=layer_name + '_delta_c_w')
        # [ C_DIM ]
        delta_c_b = Identity([], name=layer_name + '_delta_c_b')
        # [ BN x C_DIM ]
        delta_c_mult = MatrixMult([with_prior_ht, delta_c_w])
        # [ BN x C_DIM ]
        delta_c = TanH(MatrixAdd([delta_c_mult, delta_c_b]))

        prior_ct = Identity([], name='prior_' + layer_name + '_ct')

        # [ BN, C_DIM ] and [ BN, C_DIM ]
        ct_after_forget = ElementwiseMult([prior_ct, ft])
        # [ BN, C_DIM ] and [ BN, C_DIM ]
        ct = MatrixAddExact([ct_after_forget, ElementwiseMult([delta_c, it])])
        ct_pass = Identity([ct], name=layer_name + '_ct')

        # [ (OUTPUT_DIM + INPUT_DIM), OUTPUT_DIM ]
        output_c_w = Identity([], name=layer_name + '_output_c_w')
        # [ OUTPUT_DIM ]
        output_c_b = Identity([], name=layer_name + '_output_c_b')
        # [ BN, OUTPUT_DIM ]
        output_mult = MatrixMult([with_prior_ht, output_c_w])
        # [ BN, OUTPUT_DIM ]
        output_before_cond = Sigmoid(MatrixAdd([output_mult, output_c_b]))

        # [ BN, OUTPUT_DIM ] and [ BN, C_DIM ] so OUTPUT_DIM == C_DIM
        output = ElementwiseMult([output_before_cond, ct])
        output_pass = Identity([output], name=layer_name + '_ht')

        return output
def get_random_rnn():
    weights1 = Parameter(name="weights1")
    inputs = Input(name='input')

    prior_hidden = Prior(name='hidden')
    inputs_concat = Concat([inputs, prior_hidden], name='concat')

    hidden_internal = Relu([MatrixMult([inputs, weights1], name='matrixmult')],
                           name='hidden_internal')

    hidden = Identity([hidden_internal], name='hidden')

    weights2 = Parameter(name="weights2")
    output = Relu(
        [MatrixMult([hidden_internal, weights2], name='matrixmult2')],
        name='output')
    print(list(output.dag.get_node_names()))
    return output
Example #3
0
def test_rnn_works_simple():

    i = Identity([], name='input')

    def get_recursive_layer(prior, layer_name, weight_name, bias_name):
        fcw1 = Identity([], name=weight_name)
        fcb1 = Identity([], name=bias_name)
        ii = Identity([], name='prior_' + layer_name)
        joined = Concat([prior, ii])
        multed = MatrixMult([joined, fcw1])
        added = MatrixAdd([multed, fcb1])
        h1 = Relu(added, name=layer_name + '_internal')
        h11 = Identity([h1], name=layer_name)
        return h1

    h1 = get_recursive_layer(i, 'h1', 'fc_w1', 'fc_b1')
    fcw2 = Identity([], name='fc_w2')
    fcb2 = Identity([], name='fc_b2')
    output = (Relu(MatrixAdd([MatrixMult([h1, fcw2]), fcb2]), name='output'))

    BN = 4
    NUM = 3
    H_SIZE = 6
    weights = {
        'fc_w1': 0.05 * np.random.rand(3 + H_SIZE, H_SIZE),
        'fc_b1': 0.05 * np.random.rand(H_SIZE),
        'fc_w2': 0.05 * np.random.rand(H_SIZE, NUM),
        'fc_b2': 0.05 * np.random.rand(NUM),
    }
    optimizer = get_sgd_optimizer(0.0025)
    trainer = RNNTrainer(
        output, weights, {'h1': np.zeros((BN, H_SIZE))},
        running_rnn_loss('input', 'output', mean_squared_loss), optimizer)

    def batch_gen():
        return {'input': stupid_fsm()}

    test_batch = batch_gen()
    initial_loss = trainer.test(test_batch)
    trainer.train_batch(300, batch_gen)

    other_loss = trainer.test(test_batch)

    assert other_loss * 3 < initial_loss
    trainer.initial_hidden = {'h1': np.zeros((1, H_SIZE))}
    num = 20
    initial = np.array([[1, 0, 0]])

    def concretizer(val):
        m = np.random.choice(np.array([0, 1, 2]),
                             p=val['output'][0] / sum(val['output'][0]))
        ret = np.array([0, 0, 0])
        ret[m] = 1
        return {**val, 'input': np.array([ret])}

    predicted = trainer.predict(num, {'input': initial}, concretizer)
Example #4
0
 def get_recursive_layer(prior, layer_name, weight_name, bias_name):
     fcw1 = Identity([], name=weight_name)
     fcb1 = Identity([], name=bias_name)
     ii = Identity([], name='prior_' + layer_name)
     joined = Concat([prior, ii])
     multed = MatrixMult([joined, fcw1])
     added = MatrixAdd([multed, fcb1])
     h1 = Relu(added, name=layer_name + '_internal')
     h11 = Identity([h1], name=layer_name)
     return h1
Example #5
0
def test_rnn_multistep():

    i = Identity([], name='input')

    def get_recursive_layer(prior, layer_name, weight_name, bias_name):
        fcw1 = Identity([], name=weight_name)
        fcb1 = Identity([], name=bias_name)
        ii = Identity([], name='prior_' + layer_name)
        joined = Concat([prior, ii])
        multed = MatrixMult([joined, fcw1])
        added = MatrixAdd([multed, fcb1])
        h1 = Relu(added, name=layer_name + '_internal')
        h11 = Identity([h1], name=layer_name)
        return h1

    h1 = get_recursive_layer(i, 'h1', 'fc_w1', 'fc_b1')
    h2 = get_recursive_layer(h1, 'h2', 'fc_w2', 'fc_b2')

    fcw3 = Identity([], name='fc_w3')
    fcb3 = Identity([], name='fc_b3')

    output = (Exponent(MatrixAdd([MatrixMult([h2, fcw3]), fcb3]),
                       name='output'))

    BN = 4
    T = 15
    NUM = 3
    H_SIZE = 13
    weights = {
        'fc_w1': 0.2 * (np.random.rand(3 + H_SIZE, H_SIZE) - 0.5),
        'fc_b1': 0.2 * (np.random.rand(H_SIZE) - 0.5),
        'fc_w2': 0.2 * (np.random.rand(H_SIZE + H_SIZE, H_SIZE) - 0.5),
        'fc_b2': 0.2 * (np.random.rand(H_SIZE) - 0.5),
        'fc_w3': 0.2 * (np.random.rand(H_SIZE, NUM) - 0.5),
        'fc_b3': 0.2 * (np.random.rand(NUM) - 0.5),
    }

    optimizer = get_sgd_optimizer(0.004)
    trainer = RNNTrainer(
        output, weights, {
            'h1': np.zeros((BN, H_SIZE)),
            'h2': np.zeros((BN, H_SIZE))
        }, running_rnn_loss('input', 'output', mean_squared_loss), optimizer)

    def batch_gen():
        nmn = alt_patterns()
        return {'input': nmn}

    test_batch = batch_gen()
    initial_loss = trainer.test(test_batch)
    trainer.train_batch(500, batch_gen)
    final_loss = trainer.test(test_batch)
    assert final_loss * 3 < initial_loss

    trainer.initial_hidden = {
        'h1': np.zeros((1, H_SIZE)),
        'h2': np.zeros((1, H_SIZE))
    }

    num = 20
    initial = np.array([[0, 0, 1]])

    def concretizer(val):
        #m = np.random.choice(np.array([0, 1, 2]), p=val['output'][0] / sum(val['output'][0]))
        print(val['output'])
        m = np.argmax(val['output'])
        ret = np.array([0, 0, 0])
        ret[m] = 1
        return {**val, 'input': np.array([ret])}

    predicted = trainer.predict(num, {'input': initial}, concretizer)

    print([x['input'] for x in predicted])
def test_all_backprob_again():
    
    i = Input('input')
    iw = Parameter('fc_w1')
    ib = Parameter('fc_b1')

    h1 = Sigmoid([MatrixAdd([MatrixMult([i, iw], name='mult1'), ib], name='add1')], name='h1')
    iw2 = Parameter('fc_w2')
    ib2 = Parameter('fc_b2')
    h2 = Sigmoid([MatrixAdd([MatrixMult([h1, iw2], name='mult2'), ib2], name='add2')], name='h2')
    h3 = MatrixAddExact([h1, h2], name='added')

    iw3 = Parameter('fc_w3')
    ib3 = Parameter('fc_b3')

    h4 = Relu(MatrixAdd([MatrixMult([h3, iw3], name='mult3'), ib3], name='add3'), name='h4')

    output = Exponent(h4, name='output')

    full = output
    
    rand = np.random.rand

    def input_generator():
        return {
            'input': rand(*[7, 10]),
            'fc_w1': rand(*[10, 11]),
            'fc_b1': rand(*[11]),
            'fc_w2': rand(*[11, 11]),
            'fc_b2': rand(*[11]),
            'fc_w3': rand(*[11, 10]),
            'fc_b3': rand(*[10]),
        }

    skips = 0
    for n in range(100):

        inpp = input_generator()
        desired = rand(*[7, 10])

        forward1 = full.forw(inpp)

        loss1, deriv1 = mean_squared_loss(
            prediction=forward1['output'],
            truth=desired)

        derivatives = full.back(
            { 'output': deriv1 },
            forward1,
            list(inpp.keys()))

        k = list(derivatives.keys())
        r = np.random.choice(k)

        indiv = inpp[r].copy()

        random_point = [ floor(i * random()) for i in indiv.shape]

        this_deriv = derivatives[r]
        for ii in range(len(random_point)):
            cord = random_point[ii]
            this_deriv = this_deriv[cord]

        if np.abs(this_deriv) < 0.001:
            skips += 1
            continue

        LR = 0.001
        change_amount = LR
        if len(random_point) == 1:
            indiv[random_point[0]] = indiv[random_point[0]] - change_amount
        elif len(random_point) == 2:
            indiv[random_point[0]][random_point[1]] = indiv[random_point[0]][random_point[1]] - change_amount
        elif len(random_point) == 3:
            indiv[random_point[0]][random_point[1]][random_point[2]] = indiv[random_point[0]][random_point[1]][random_point[2]] - change_amount
        elif len(random_point) == 4:
            indiv[random_point[0]][random_point[1]][random_point[2]][random_point[3]] = indiv[random_point[0]][random_point[1]][random_point[2]][random_point[3]] - change_amount
        else:
            assert False


        inpp[r] = indiv
        forward2 = full.forw(inpp)
        loss2, deriv2 = mean_squared_loss(
            prediction=forward2['output'],
            truth=desired)

        amount = (loss1 - loss2)
        
        assert np.isclose(loss1, loss2, this_deriv * LR, atol=0.01) or (loss1 == 0.0 and loss2  == 0.0)


    assert skips < 50
def test_names_manual():
    
    i = Identity([], name='input')
    iw = Identity([], name='fc_w1')
    ib = Identity([], name='fc_b1')

    h1 = Relu([MatrixAdd([MatrixMult([i, iw], name='mult1'), ib], name='add1')], name='h1')
    iw2 = Identity([], name='fc_w2')
    ib2 = Identity([], name='fc_b2')
    h2 = Relu([MatrixAdd([MatrixMult([h1, iw2]), ib2])], name='h2')
    output = Probabilize(Exponent(h2))

    full = output

    includes = full.get_names()
    should_include = [
        'input', 'fc_w1', 'fc_b1',
        'h1', 'fc_b2', 'fc_w2', 'h2']

    for name in should_include:
        assert name in includes

    predecessors = full.get_inputs_required_for(['h1'])
    assert len(predecessors) == 3
    should_include = ['input', 'fc_w1', 'fc_b1']
    for name in should_include:
        assert name in includes

    requires_input = full.get_inputs()
    assert len(requires_input) == 5
    

    for _ in range(200):
        
        i = np.random.rand(10, 21)
        w1 = np.random.rand(21, 13)
        b1 = np.random.rand(13)
        
        i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 }
        results = full.forw(i_dict, [ 'h1' ])
        desired_h1 = np.random.rand(*results['h1'].shape)

        old_loss, deriv = mean_squared_loss(
            prediction=results['h1'],
            truth=desired_h1)

        for __ in range(2):
            
            i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 }
            results = full.forw(i_dict, [ 'h1' ])

            loss, deriv = mean_squared_loss(
                prediction=results['h1'],
                truth=desired_h1)

            back_derivs = full.back(
                { 'h1': deriv },
                results,
                [ 'fc_w1', 'fc_b1'])

            w1 = w1 - back_derivs['fc_w1'] * 0.001
            b1 = b1 - back_derivs['fc_b1'] * 0.001

        i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 }
        results = full.forw(i_dict, [ 'h1' ])

        new_loss, deriv = mean_squared_loss(
            prediction=results['h1'],
            truth=desired_h1)

        assert new_loss < old_loss
 def get_layer(prev, weight_name, bias_name):
     iw = Parameter(weight_name)
     ib = Parameter(bias_name)
     mult = MatrixMult([prev, iw], name='mult_' + weight_name)
     add = MatrixAdd([mult, ib], name='add_' + bias_name)
     return Relu(add)
Example #9
0
def test_basic_rnn():

    i = Identity([], name='input')
    fcw1 = Identity([], name='fc_w1')
    fcb1 = Identity([], name='fc_b1')

    ii = Identity([], name='prior_h1')
    joined = Concat([i, ii])
    h1 = LeakyRelu(MatrixAdd([MatrixMult([joined, fcw1]), fcb1]),
                   name='internal_h1')
    h11 = Identity([h1], name='h1')

    fcw2 = Identity([], name='fc_w2')
    fcb2 = Identity([], name='fc_b2')
    i2 = Identity([], name='prior_h2')
    joined2 = Concat([h1, i2])
    h2 = LeakyRelu(MatrixAdd([MatrixMult([joined2, fcw2]), fcb2]),
                   name='internal_h2')
    h22 = Identity([h2], name='h2')

    fcw3 = Identity([], name='fc_w3')
    fcb3 = Identity([], name='fc_b3')

    output = (LeakyRelu(MatrixAdd([MatrixMult([h2, fcw3]), fcb3]),
                        name='output'))

    BN = 4
    T = 15
    NUM = 3
    rnn = to_rnn(output)

    H_SIZE = 13
    weights = {
        'fc_w1': 0.2 * (np.random.rand(3 + H_SIZE, H_SIZE) - 0.5),
        'fc_b1': 0.2 * (np.random.rand(H_SIZE) - 0.5),
        'fc_w2': 0.2 * (np.random.rand(H_SIZE + H_SIZE, H_SIZE) - 0.5),
        'fc_b2': 0.2 * (np.random.rand(H_SIZE) - 0.5),
        'fc_w3': 0.2 * (np.random.rand(H_SIZE, NUM) - 0.5),
        'fc_b3': 0.2 * (np.random.rand(NUM) - 0.5),
    }

    first_loss = None
    last_loss = None
    for i in range(300):

        forward_data = alt_patterns()

        forward = rnn.forw({'input': forward_data}, weights, {
            'h1': np.zeros((BN, H_SIZE)),
            'h2': np.zeros((BN, H_SIZE))
        })

        losses = []
        derivs = []
        for ii in range(0, T - 1):
            loss, deriv = mean_squared_loss(prediction=forward[ii]['output'],
                                            truth=forward_data[:, ii + 1, :])
            derivs.append({'output': deriv})
            losses.append(loss)
        derivs.append({'output': np.zeros(derivs[0]['output'].shape)})

        print("Loss at ", i, " is ", sum(losses))
        if (first_loss is None):
            first_loss = sum(losses)
        last_loss = sum(losses)

        backwards = rnn.back(forward, derivs, [
            'fc_w1', 'fc_w2', 'fc_w3', 'fc_b1', 'fc_b2', 'fc_b3', 'prior_h1',
            'prior_h2'
        ])

        for key in weights.keys():
            weights[key] = weights[key] - 0.005 * backwards[key]

    assert last_loss * 3 < first_loss