output = FullyConnectedLayer(name='output', parent=['h1', 'h2', 'h3'], parent_dim=[200, 200, 200], nout=frame_size, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [h1, h2, h3, output] params = OrderedDict() for node in nodes: params.update(node.initialize()) params = init_tparams(params) s1_0 = h1.get_init_state(batch_size) s2_0 = h2.get_init_state(batch_size) s3_0 = h3.get_init_state(batch_size) def inner_fn(x_t, s1_tm1, s2_tm1, s3_tm1): s1_t = h1.fprop([[x_t], [s1_tm1, s2_tm1, s3_tm1]], params) s2_t = h2.fprop([[s1_t], [s2_tm1, s1_tm1, s3_tm1]], params) s3_t = h3.fprop([[s2_t], [s3_tm1], s1_tm1, s2_tm1], params) y_hat_t = output.fprop([s1_t, s2_t, s3_t], params) return s1_t, s2_t, s3_t, y_hat_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn,
params = flatten([node.get_params().values() for node in nodes]) step_count = sharedX(0, name='step_count') last_h = np.zeros((batch_size, 400), dtype=np.float32) h1_tm1 = sharedX(last_h, name='h1_tm1') h2_tm1 = sharedX(last_h, name='h2_tm1') h3_tm1 = sharedX(last_h, name='h3_tm1') update_list = [step_count, h1_tm1, h2_tm1, h3_tm1] step_count = T.switch(T.le(step_count, reset_freq), step_count + 1, 0) s1_0 = T.switch(T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h1_tm1), 0.), 'int32')), h1.get_init_state(), h1_tm1) s2_0 = T.switch(T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h2_tm1), 0.), 'int32')), h2.get_init_state(), h2_tm1) s3_0 = T.switch(T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h3_tm1), 0.), 'int32')), h3.get_init_state(), h3_tm1) def inner_fn(x_t, h1_tm1, h2_tm1, h3_tm1): h1_t = h1.fprop([[x_t], [h1_tm1, h2_tm1, h3_tm1]]) h2_t = h2.fprop([[x_t, h1_t], [h2_tm1, h1_tm1, h3_tm1]]) h3_t = h3.fprop([[x_t, h2_t], [h3_tm1, h1_tm1, h2_tm1]]) return h1_t, h2_t, h3_t
node.initialize() params = flatten([node.get_params().values() for node in nodes]) step_count = sharedX(0, name='step_count') last_h = np.zeros((batch_size, 400), dtype=np.float32) h1_tm1 = sharedX(last_h, name='h1_tm1') h2_tm1 = sharedX(last_h, name='h2_tm1') h3_tm1 = sharedX(last_h, name='h3_tm1') update_list = [step_count, h1_tm1, h2_tm1, h3_tm1] step_count = T.switch(T.le(step_count, reset_freq), step_count + 1, 0) s1_0 = T.switch( T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h1_tm1), 0.), 'int32')), h1.get_init_state(), h1_tm1) s2_0 = T.switch( T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h2_tm1), 0.), 'int32')), h2.get_init_state(), h2_tm1) s3_0 = T.switch( T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h3_tm1), 0.), 'int32')), h3.get_init_state(), h3_tm1) def inner_fn(x_t, h1_tm1, h2_tm1, h3_tm1): h1_t = h1.fprop([[x_t], [h1_tm1, h2_tm1, h3_tm1]]) h2_t = h2.fprop([[x_t, h1_t], [h2_tm1, h1_tm1, h3_tm1]])