def test_bidirectional_lstm_converter(self):
     input_dim = (1, 8)
     output_dim = (1, 2)
     inputs = [('input', datatypes.Array(*input_dim))]
     outputs = [('output', datatypes.Array(*output_dim))]
     builder = NeuralNetworkBuilder(inputs, outputs)
     W_h = [
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2)
     ]
     W_x = [
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8)
     ]
     b = [
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1)
     ]
     p = [
         numpy.zeros(shape=(2, 1)),
         numpy.zeros(shape=(2, 1)),
         numpy.zeros(shape=(2, 1))
     ]
     builder.add_bidirlstm(name='LSTM',
                           W_h=W_h,
                           W_x=W_x,
                           W_h_back=W_h,
                           b=b,
                           W_x_back=W_x,
                           b_back=b,
                           hidden_size=2,
                           input_size=8,
                           input_names=['input'],
                           output_names=['output'],
                           inner_activation='SIGMOID',
                           cell_state_update_activation='TANH',
                           output_activation='TANH',
                           peep=p,
                           peep_back=p,
                           output_all=False,
                           forget_bias=False,
                           coupled_input_forget_gate=False,
                           cell_clip_threshold=10000)
     context = ConvertContext()
     node = BiDirectionalLSTMLayerConverter.convert(
         context, builder.spec.neuralNetwork.layers[0],
         ['input', 'h_init', 'c_init', 'h_back_init', 'c_back_init'],
         ['output', 'h', 'c', 'h_back', 'c_back'])
     self.assertTrue(node is not None)
Exemple #2
0
 def test_bidirectional_lstm_converter(self):
     input_dim = (1, 8)
     output_dim = (1, 2)
     inputs = [('input', datatypes.Array(*input_dim))]
     outputs = [('output', datatypes.Array(*output_dim))]
     builder = NeuralNetworkBuilder(inputs, outputs)
     W_h = [
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2),
         numpy.random.rand(2, 2)
     ]
     W_x = [
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8),
         numpy.random.rand(2, 8)
     ]
     b = [
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1),
         numpy.random.rand(2, 1)
     ]
     p = [
         numpy.zeros(shape=(2, 1)),
         numpy.zeros(shape=(2, 1)),
         numpy.zeros(shape=(2, 1))
     ]
     builder.add_bidirlstm(name='LSTM',
                           W_h=W_h,
                           W_x=W_x,
                           W_h_back=W_h,
                           b=b,
                           W_x_back=W_x,
                           b_back=b,
                           hidden_size=2,
                           input_size=8,
                           input_names=['input'],
                           output_names=['output'],
                           inner_activation='SIGMOID',
                           cell_state_update_activation='TANH',
                           output_activation='TANH',
                           peep=p,
                           peep_back=p,
                           output_all=False,
                           forget_bias=False,
                           coupled_input_forget_gate=False,
                           cell_clip_threshold=10000)
     model_onnx = convert_coreml(builder.spec)
     self.assertTrue(model_onnx is not None)
def train_model(ENV, in_file, op_file):

    graph = tf.Graph()
    with graph.as_default():
        stacked_layers = {}

        # e.g: log filter bank or MFCC features
        # Has size [batch_size, max_stepsize, num_features], but the
        # batch_size and max_stepsize can vary along each step
        inputs = tf.placeholder(tf.float32, [None, None, num_features])

        targets = tf.sparse_placeholder(tf.int32)
        # 1d array of size [batch_size]
        seq_len = tf.placeholder(tf.int32, [None])

        # Weights & biases
        weight_classes = tf.Variable(
            tf.truncated_normal([num_hidden, num_classes],
                                mean=0,
                                stddev=0.1,
                                dtype=tf.float32))
        bias_classes = tf.Variable(tf.zeros([num_classes]), dtype=tf.float32)

        #_activation = tf.nn.relu#this was causing the model to diverge
        _activation = None

        layers = {'forward': [], 'backward': []}
        for key in layers.keys():
            for i in range(num_layers):
                cell = tf.nn.rnn_cell.LSTMCell(num_hidden,
                                               use_peepholes=True,
                                               activation=_activation,
                                               state_is_tuple=True,
                                               cell_clip=clip_thresh)
                #
                #cell = RWACell(num_units=num_hidden)
                layers[key].append(cell)
            stacked_layers[key] = tf.nn.rnn_cell.MultiRNNCell(
                layers[key], state_is_tuple=True)

        outputs, bilstm_vars = tf.nn.bidirectional_dynamic_rnn(
            stacked_layers['forward'],
            stacked_layers['backward'],
            inputs,
            sequence_length=seq_len,
            time_major=False,  # [batch_size, max_time, num_hidden]
            dtype=tf.float32)
        """
        outputs_concate = tf.concat_v2(outputs, 2)
        outputs_concate = tf.reshape(outputs_concate, [-1, 2*num_hidden])
        # logits = tf.matmul(outputs_concate, weight_classes) + bias_classes
        """
        fw_output = tf.reshape(outputs[0], [-1, num_hidden])
        bw_output = tf.reshape(outputs[1], [-1, num_hidden])
        logits = tf.add(
            tf.add(tf.matmul(fw_output, weight_classes),
                   tf.matmul(bw_output, weight_classes)), bias_classes)

        logits = tf.reshape(logits, [batch_size, -1, num_classes])
        loss = tf.nn.ctc_loss(targets, logits, seq_len, time_major=False)
        error = tf.reduce_mean(loss)
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum).minimize(error)

        # Evaluating
        # decoded, log_prob = ctc_ops.ctc_greedy_decoder(tf.transpose(logits, perm=[1, 0, 2]), seq_len)
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(
            tf.transpose(logits, perm=[1, 0, 2]), seq_len)
        label_error_rate = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)

    data, labels = load_ipad_data(in_file)
    bound = ((3 * len(data) / batch_size) / 4) * batch_size
    train_inputs = data[0:bound]
    train_labels = labels[0:bound]
    test_data = data[bound:]
    test_labels = labels[bound:]
    num_examples = len(train_inputs)
    num_batches_per_epoch = num_examples / batch_size

    with tf.Session(graph=graph,
                    config=tf.ConfigProto(gpu_options=gpu_options)) as session:
        # Initializate the weights and biases
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=0)

        ckpt = tf.train.get_checkpoint_state(op_file)
        if ckpt:
            logging.info('load', ckpt.model_checkpoint_path)
            saver.restore(session, ckpt.model_checkpoint_path)
        else:
            logging.info("no previous session to load")

        for curr_epoch in range(num_epochs):
            train_cost = train_ler = 0
            start = time.time()

            for batch in range(num_batches_per_epoch):
                # Getting the index
                indices = [
                    i % num_examples
                    for i in range(batch * batch_size, (batch + 1) *
                                   batch_size)
                ]

                batch_train_inputs = train_inputs[indices]
                # Padding input to max_time_step of this batch
                batch_train_inputs, batch_train_seq_len = pad_sequences(
                    batch_train_inputs)

                # Converting to sparse representation so as to to feed SparseTensor input
                batch_train_targets = sparse_tuple_from(train_labels[indices])

                feed = {
                    inputs: batch_train_inputs,
                    targets: batch_train_targets,
                    seq_len: batch_train_seq_len
                }
                batch_cost, _ = session.run([error, optimizer], feed)
                train_cost += batch_cost * batch_size
                train_ler += session.run(label_error_rate,
                                         feed_dict=feed) * batch_size
                log = "Epoch {}/{}, iter {}, batch_cost {}"
                logging.info(
                    log.format(curr_epoch + 1, num_epochs, batch, batch_cost))

            saver.save(session,
                       os.path.join(ENV.output, 'best.ckpt'),
                       global_step=curr_epoch)

            # Shuffle the data
            shuffled_indexes = np.random.permutation(num_examples)
            train_inputs = train_inputs[shuffled_indexes]
            train_labels = train_labels[shuffled_indexes]

            # Metrics mean
            train_cost /= num_examples
            train_ler /= num_examples

            log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, time = {:.3f}"
            logging.info(
                log.format(curr_epoch + 1, num_epochs, train_cost, train_ler,
                           time.time() - start))

            #run the test data through
            indices = [
                i % len(test_data)
                for i in range(batch * batch_size, (batch + 1) * batch_size)
            ]
            test_inputs = test_data[indices]
            test_inputs, test_seq_len = pad_sequences(test_inputs)
            test_targets = sparse_tuple_from(test_labels[indices])
            feed_test = {
                inputs: test_inputs,
                targets: test_targets,
                seq_len: test_seq_len
            }
            test_cost, test_ler = session.run([error, label_error_rate],
                                              feed_dict=feed_test)
            log = "Epoch {}/{}, test_cost {}, test_ler {}"
            logging.info(
                log.format(curr_epoch + 1, num_epochs, test_cost, test_ler))

        input_features = [('strokeData', datatypes.Array(num_features))]
        output_features = [('labels', datatypes.Array(num_classes))]

        vars = tf.trainable_variables()
        weights = {'forward': {}, 'backward': {}}
        for _var in vars:
            name = _var.name.encode('utf-8')
            if name.startswith('bidirectional_rnn/fw'):
                key = name.replace('bidirectional_rnn/fw/', '')
                key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '')
                key = key.replace(':0', '')
                weights['forward'][key] = _var.eval()
            else:
                key = name.replace('bidirectional_rnn/bw/', '')
                key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '')
                key = key.replace(':0', '')
                weights['backward'][key] = _var.eval()

    builder = NeuralNetworkBuilder(input_features, output_features, mode=None)

    fw_biases = [
        weights['forward']['bias'][0 * num_hidden:1 * num_hidden],
        weights['forward']['bias'][1 * num_hidden:2 * num_hidden],
        weights['forward']['bias'][2 * num_hidden:3 * num_hidden],
        weights['forward']['bias'][3 * num_hidden:4 * num_hidden]
    ]

    bw_biases = [
        weights['backward']['bias'][0 * num_hidden:1 * num_hidden],
        weights['backward']['bias'][1 * num_hidden:2 * num_hidden],
        weights['backward']['bias'][2 * num_hidden:3 * num_hidden],
        weights['backward']['bias'][3 * num_hidden:4 * num_hidden]
    ]

    num_LSTM_gates = 5

    input_weights = {
        'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)),
        'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features))
    }

    recurrent_weights = {
        'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)),
        'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden))
    }

    builder.add_bidirlstm(
        name='bidirectional_1',
        W_h=recurrent_weights['forward'],
        W_x=input_weights['forward'],
        b=fw_biases,
        W_h_back=recurrent_weights['backward'],
        W_x_back=input_weights['backward'],
        b_back=bw_biases,
        hidden_size=num_hidden,
        input_size=num_features,
        input_names=[
            'strokeData', 'bidirectional_1_h_in', 'bidirectional_1_c_in',
            'bidirectional_1_h_in_rev', 'bidirectional_1_c_in_rev'
        ],
        output_names=[
            'y', 'bidirectional_1_h_out', 'bidirectional_1_c_out',
            'bidirectional_1_h_out_rev', 'bidirectional_1_c_out_rev'
        ],
        peep=[
            weights['forward']['w_i_diag'], weights['forward']['w_f_diag'],
            weights['forward']['w_o_diag']
        ],
        peep_back=[
            weights['backward']['w_i_diag'], weights['backward']['w_f_diag'],
            weights['backward']['w_o_diag']
        ],
        cell_clip_threshold=clip_thresh)

    builder.add_softmax(name='softmax', input_name='y', output_name='labels')

    optional_inputs = [('bidirectional_1_h_in', num_hidden),
                       ('bidirectional_1_c_in', num_hidden),
                       ('bidirectional_1_h_in_rev', num_hidden),
                       ('bidirectional_1_c_in_rev', num_hidden)]
    optional_outputs = [('bidirectional_1_h_out', num_hidden),
                        ('bidirectional_1_c_out', num_hidden),
                        ('bidirectional_1_h_out_rev', num_hidden),
                        ('bidirectional_1_c_out_rev', num_hidden)]

    #not really sure what this line belowe does, just copied it from the Keras converter in coremltools,
    # and it seemed to make things work
    builder.add_optionals(optional_inputs, optional_outputs)

    model = MLModel(builder.spec)

    model.short_description = 'Model for recognizing a symbols and diagrams drawn on ipad screen with apple pencil'

    model.input_description[
        'strokeData'] = 'A collection of strokes to classify'
    model.output_description[
        'labels'] = 'The "probability" of each label, in a dense array'

    outfile = 'bilstm.mlmodel'
    model.save(outfile)

    print('Saved to file: %s' % outfile)