def get_random_rnn(): weights1 = Parameter(name="weights1") inputs = Input(name='input') prior_hidden = Prior(name='hidden') inputs_concat = Concat([inputs, prior_hidden], name='concat') hidden_internal = Relu([MatrixMult([inputs, weights1], name='matrixmult')], name='hidden_internal') hidden = Identity([hidden_internal], name='hidden') weights2 = Parameter(name="weights2") output = Relu( [MatrixMult([hidden_internal, weights2], name='matrixmult2')], name='output') print(list(output.dag.get_node_names())) return output
def test_rnn_works_simple(): i = Identity([], name='input') def get_recursive_layer(prior, layer_name, weight_name, bias_name): fcw1 = Identity([], name=weight_name) fcb1 = Identity([], name=bias_name) ii = Identity([], name='prior_' + layer_name) joined = Concat([prior, ii]) multed = MatrixMult([joined, fcw1]) added = MatrixAdd([multed, fcb1]) h1 = Relu(added, name=layer_name + '_internal') h11 = Identity([h1], name=layer_name) return h1 h1 = get_recursive_layer(i, 'h1', 'fc_w1', 'fc_b1') fcw2 = Identity([], name='fc_w2') fcb2 = Identity([], name='fc_b2') output = (Relu(MatrixAdd([MatrixMult([h1, fcw2]), fcb2]), name='output')) BN = 4 NUM = 3 H_SIZE = 6 weights = { 'fc_w1': 0.05 * np.random.rand(3 + H_SIZE, H_SIZE), 'fc_b1': 0.05 * np.random.rand(H_SIZE), 'fc_w2': 0.05 * np.random.rand(H_SIZE, NUM), 'fc_b2': 0.05 * np.random.rand(NUM), } optimizer = get_sgd_optimizer(0.0025) trainer = RNNTrainer( output, weights, {'h1': np.zeros((BN, H_SIZE))}, running_rnn_loss('input', 'output', mean_squared_loss), optimizer) def batch_gen(): return {'input': stupid_fsm()} test_batch = batch_gen() initial_loss = trainer.test(test_batch) trainer.train_batch(300, batch_gen) other_loss = trainer.test(test_batch) assert other_loss * 3 < initial_loss trainer.initial_hidden = {'h1': np.zeros((1, H_SIZE))} num = 20 initial = np.array([[1, 0, 0]]) def concretizer(val): m = np.random.choice(np.array([0, 1, 2]), p=val['output'][0] / sum(val['output'][0])) ret = np.array([0, 0, 0]) ret[m] = 1 return {**val, 'input': np.array([ret])} predicted = trainer.predict(num, {'input': initial}, concretizer)
def get_recursive_layer(prior, layer_name, weight_name, bias_name): fcw1 = Identity([], name=weight_name) fcb1 = Identity([], name=bias_name) ii = Identity([], name='prior_' + layer_name) joined = Concat([prior, ii]) multed = MatrixMult([joined, fcw1]) added = MatrixAdd([multed, fcb1]) h1 = Relu(added, name=layer_name + '_internal') h11 = Identity([h1], name=layer_name) return h1
def test_all_backprob_again(): i = Input('input') iw = Parameter('fc_w1') ib = Parameter('fc_b1') h1 = Sigmoid([MatrixAdd([MatrixMult([i, iw], name='mult1'), ib], name='add1')], name='h1') iw2 = Parameter('fc_w2') ib2 = Parameter('fc_b2') h2 = Sigmoid([MatrixAdd([MatrixMult([h1, iw2], name='mult2'), ib2], name='add2')], name='h2') h3 = MatrixAddExact([h1, h2], name='added') iw3 = Parameter('fc_w3') ib3 = Parameter('fc_b3') h4 = Relu(MatrixAdd([MatrixMult([h3, iw3], name='mult3'), ib3], name='add3'), name='h4') output = Exponent(h4, name='output') full = output rand = np.random.rand def input_generator(): return { 'input': rand(*[7, 10]), 'fc_w1': rand(*[10, 11]), 'fc_b1': rand(*[11]), 'fc_w2': rand(*[11, 11]), 'fc_b2': rand(*[11]), 'fc_w3': rand(*[11, 10]), 'fc_b3': rand(*[10]), } skips = 0 for n in range(100): inpp = input_generator() desired = rand(*[7, 10]) forward1 = full.forw(inpp) loss1, deriv1 = mean_squared_loss( prediction=forward1['output'], truth=desired) derivatives = full.back( { 'output': deriv1 }, forward1, list(inpp.keys())) k = list(derivatives.keys()) r = np.random.choice(k) indiv = inpp[r].copy() random_point = [ floor(i * random()) for i in indiv.shape] this_deriv = derivatives[r] for ii in range(len(random_point)): cord = random_point[ii] this_deriv = this_deriv[cord] if np.abs(this_deriv) < 0.001: skips += 1 continue LR = 0.001 change_amount = LR if len(random_point) == 1: indiv[random_point[0]] = indiv[random_point[0]] - change_amount elif len(random_point) == 2: indiv[random_point[0]][random_point[1]] = indiv[random_point[0]][random_point[1]] - change_amount elif len(random_point) == 3: indiv[random_point[0]][random_point[1]][random_point[2]] = indiv[random_point[0]][random_point[1]][random_point[2]] - change_amount elif len(random_point) == 4: indiv[random_point[0]][random_point[1]][random_point[2]][random_point[3]] = indiv[random_point[0]][random_point[1]][random_point[2]][random_point[3]] - change_amount else: assert False inpp[r] = indiv forward2 = full.forw(inpp) loss2, deriv2 = mean_squared_loss( prediction=forward2['output'], truth=desired) amount = (loss1 - loss2) assert np.isclose(loss1, loss2, this_deriv * LR, atol=0.01) or (loss1 == 0.0 and loss2 == 0.0) assert skips < 50
def test_names_manual(): i = Identity([], name='input') iw = Identity([], name='fc_w1') ib = Identity([], name='fc_b1') h1 = Relu([MatrixAdd([MatrixMult([i, iw], name='mult1'), ib], name='add1')], name='h1') iw2 = Identity([], name='fc_w2') ib2 = Identity([], name='fc_b2') h2 = Relu([MatrixAdd([MatrixMult([h1, iw2]), ib2])], name='h2') output = Probabilize(Exponent(h2)) full = output includes = full.get_names() should_include = [ 'input', 'fc_w1', 'fc_b1', 'h1', 'fc_b2', 'fc_w2', 'h2'] for name in should_include: assert name in includes predecessors = full.get_inputs_required_for(['h1']) assert len(predecessors) == 3 should_include = ['input', 'fc_w1', 'fc_b1'] for name in should_include: assert name in includes requires_input = full.get_inputs() assert len(requires_input) == 5 for _ in range(200): i = np.random.rand(10, 21) w1 = np.random.rand(21, 13) b1 = np.random.rand(13) i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 } results = full.forw(i_dict, [ 'h1' ]) desired_h1 = np.random.rand(*results['h1'].shape) old_loss, deriv = mean_squared_loss( prediction=results['h1'], truth=desired_h1) for __ in range(2): i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 } results = full.forw(i_dict, [ 'h1' ]) loss, deriv = mean_squared_loss( prediction=results['h1'], truth=desired_h1) back_derivs = full.back( { 'h1': deriv }, results, [ 'fc_w1', 'fc_b1']) w1 = w1 - back_derivs['fc_w1'] * 0.001 b1 = b1 - back_derivs['fc_b1'] * 0.001 i_dict = { 'input': i, 'fc_w1': w1, 'fc_b1': b1 } results = full.forw(i_dict, [ 'h1' ]) new_loss, deriv = mean_squared_loss( prediction=results['h1'], truth=desired_h1) assert new_loss < old_loss
def get_layer(prev, weight_name, bias_name): iw = Parameter(weight_name) ib = Parameter(bias_name) mult = MatrixMult([prev, iw], name='mult_' + weight_name) add = MatrixAdd([mult, ib], name='add_' + bias_name) return Relu(add)