def test_softmax_converter(self): input_dim = (3,) output_dim = (3,) input = [('input', datatypes.Array(*input_dim))] output = [('output', datatypes.Array(*output_dim))] builder = NeuralNetworkBuilder(input, output) builder.add_softmax(name='Softmax', input_name='input', output_name='output') model_onnx = convert_coreml(builder.spec) self.assertTrue(model_onnx is not None)
def get_custom_model_spec(): from coremltools.models.neural_network import NeuralNetworkBuilder from coremltools.models.datatypes import Array, Dictionary, String input_name = 'output1' input_length = self._feature_extractor.output_length builder = NeuralNetworkBuilder( [(input_name, Array(input_length, ))], [(prob_name, Dictionary(String))], 'classifier') ctx = _mxnet_utils.get_mxnet_context()[0] input_name, output_name = input_name, 0 import mxnet as _mx for i, cur_layer in enumerate(self._custom_classifier): output_name = str(i) if type(cur_layer) == _mx.gluon.nn.basic_layers.Dense: W = cur_layer.weight.data(ctx).asnumpy() nC, nB = W.shape Wb = cur_layer.bias.data(ctx).asnumpy() builder.add_inner_product(name='inner_product_' + str(i), W=W, b=Wb, input_channels=nB, output_channels=nC, has_bias=True, input_name=input_name, output_name='inner_product_' + output_name) if cur_layer.act: builder.add_activation("activation" + str(i), 'RELU', 'inner_product_' + output_name, output_name) elif type(cur_layer) == _mx.gluon.nn.basic_layers.BatchNorm: zeros = _np.zeros(nC) ones = _np.ones(nC) builder.add_batchnorm(name='bn_layer_' + str(i), channels=nC, gamma=ones, beta=zeros, mean=zeros, variance=ones, input_name=input_name, output_name=output_name) elif type(cur_layer) == _mx.gluon.nn.basic_layers.Dropout: continue input_name = output_name last_output = builder.spec.neuralNetworkClassifier.layers[ -1].output[0] builder.add_softmax('softmax', last_output, self.target) builder.set_class_labels(self.classes) builder.set_input([input_name], [(input_length, )]) builder.set_output([self.target], [(self.num_classes, )]) return builder.spec
def get_custom_model_spec(): from coremltools.models.neural_network import NeuralNetworkBuilder from coremltools.models.datatypes import Array input_name = "output1" input_length = self._feature_extractor.output_length builder = NeuralNetworkBuilder( [(input_name, Array(input_length,))], [(prob_name, Array(self.num_classes,))], "classifier", ) layer_counter = [0] builder.set_input([input_name], [(input_length,)]) def next_layer_name(): layer_counter[0] += 1 return "layer_%d" % layer_counter[0] for i, cur_layer in enumerate(self._custom_classifier.export_weights()): W = cur_layer["weight"] nC, nB = W.shape Wb = cur_layer["bias"] output_name = next_layer_name() builder.add_inner_product( name="inner_product_" + str(i), W=W, b=Wb, input_channels=nB, output_channels=nC, has_bias=True, input_name=input_name, output_name=output_name, ) input_name = output_name if cur_layer["act"]: output_name = next_layer_name() builder.add_activation( "activation" + str(i), "RELU", input_name, output_name ) input_name = output_name builder.add_softmax("softmax", input_name, prob_name) builder.set_class_labels( self.classes, predicted_feature_name=self.target, prediction_blob=prob_name, ) return builder.spec
def test_softmax_converter(self): input_dim = (3, ) output_dim = (3, ) input = [('input', datatypes.Array(*input_dim))] output = [('output', datatypes.Array(*output_dim))] builder = NeuralNetworkBuilder(input, output) builder.add_softmax(name='Softmax', input_name='input', output_name='output') context = ConvertContext() node = SoftmaxLayerConverter.convert( context, builder.spec.neuralNetwork.layers[0], ['input'], ['output']) self.assertTrue(node is not None)
def get_custom_model_spec(): from coremltools.models.neural_network import NeuralNetworkBuilder from coremltools.models.datatypes import Array, Dictionary, String input_name = 'output1' input_length = self._feature_extractor.output_length builder = NeuralNetworkBuilder( [(input_name, Array(input_length, ))], [(prob_name, Dictionary(String))], 'classifier') input_name, output_name = input_name, 0 for i, cur_layer in enumerate( self._custom_classifier.export_weights()): W = cur_layer['weight'] nC, nB = W.shape Wb = cur_layer['bias'] builder.add_inner_product(name="inner_product_" + str(i), W=W, b=Wb, input_channels=nB, output_channels=nC, has_bias=True, input_name=str(input_name), output_name='inner_product_' + str(output_name)) if cur_layer['act']: builder.add_activation("activation" + str(i), 'RELU', 'inner_product_' + str(output_name), str(output_name)) input_name = i output_name = i + 1 last_output = builder.spec.neuralNetworkClassifier.layers[ -1].output[0] builder.add_softmax('softmax', last_output, self.target) builder.set_class_labels(self.classes, predicted_feature_name=self.target) builder.set_input([input_name], [(input_length, )]) builder.set_output([self.target], [(self.num_classes, )]) return builder.spec
def train_model(ENV, in_file, op_file): graph = tf.Graph() with graph.as_default(): stacked_layers = {} # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features]) targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) # Weights & biases weight_classes = tf.Variable( tf.truncated_normal([num_hidden, num_classes], mean=0, stddev=0.1, dtype=tf.float32)) bias_classes = tf.Variable(tf.zeros([num_classes]), dtype=tf.float32) #_activation = tf.nn.relu#this was causing the model to diverge _activation = None layers = {'forward': [], 'backward': []} for key in layers.keys(): for i in range(num_layers): cell = tf.nn.rnn_cell.LSTMCell(num_hidden, use_peepholes=True, activation=_activation, state_is_tuple=True, cell_clip=clip_thresh) # #cell = RWACell(num_units=num_hidden) layers[key].append(cell) stacked_layers[key] = tf.nn.rnn_cell.MultiRNNCell( layers[key], state_is_tuple=True) outputs, bilstm_vars = tf.nn.bidirectional_dynamic_rnn( stacked_layers['forward'], stacked_layers['backward'], inputs, sequence_length=seq_len, time_major=False, # [batch_size, max_time, num_hidden] dtype=tf.float32) """ outputs_concate = tf.concat_v2(outputs, 2) outputs_concate = tf.reshape(outputs_concate, [-1, 2*num_hidden]) # logits = tf.matmul(outputs_concate, weight_classes) + bias_classes """ fw_output = tf.reshape(outputs[0], [-1, num_hidden]) bw_output = tf.reshape(outputs[1], [-1, num_hidden]) logits = tf.add( tf.add(tf.matmul(fw_output, weight_classes), tf.matmul(bw_output, weight_classes)), bias_classes) logits = tf.reshape(logits, [batch_size, -1, num_classes]) loss = tf.nn.ctc_loss(targets, logits, seq_len, time_major=False) error = tf.reduce_mean(loss) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(error) # Evaluating # decoded, log_prob = ctc_ops.ctc_greedy_decoder(tf.transpose(logits, perm=[1, 0, 2]), seq_len) decoded, log_prob = tf.nn.ctc_beam_search_decoder( tf.transpose(logits, perm=[1, 0, 2]), seq_len) label_error_rate = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) data, labels = load_ipad_data(in_file) bound = ((3 * len(data) / batch_size) / 4) * batch_size train_inputs = data[0:bound] train_labels = labels[0:bound] test_data = data[bound:] test_labels = labels[bound:] num_examples = len(train_inputs) num_batches_per_epoch = num_examples / batch_size with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as session: # Initializate the weights and biases tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables(), max_to_keep=0) ckpt = tf.train.get_checkpoint_state(op_file) if ckpt: logging.info('load', ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: logging.info("no previous session to load") for curr_epoch in range(num_epochs): train_cost = train_ler = 0 start = time.time() for batch in range(num_batches_per_epoch): # Getting the index indices = [ i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] batch_train_inputs = train_inputs[indices] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences( batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(train_labels[indices]) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len } batch_cost, _ = session.run([error, optimizer], feed) train_cost += batch_cost * batch_size train_ler += session.run(label_error_rate, feed_dict=feed) * batch_size log = "Epoch {}/{}, iter {}, batch_cost {}" logging.info( log.format(curr_epoch + 1, num_epochs, batch, batch_cost)) saver.save(session, os.path.join(ENV.output, 'best.ckpt'), global_step=curr_epoch) # Shuffle the data shuffled_indexes = np.random.permutation(num_examples) train_inputs = train_inputs[shuffled_indexes] train_labels = train_labels[shuffled_indexes] # Metrics mean train_cost /= num_examples train_ler /= num_examples log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, time = {:.3f}" logging.info( log.format(curr_epoch + 1, num_epochs, train_cost, train_ler, time.time() - start)) #run the test data through indices = [ i % len(test_data) for i in range(batch * batch_size, (batch + 1) * batch_size) ] test_inputs = test_data[indices] test_inputs, test_seq_len = pad_sequences(test_inputs) test_targets = sparse_tuple_from(test_labels[indices]) feed_test = { inputs: test_inputs, targets: test_targets, seq_len: test_seq_len } test_cost, test_ler = session.run([error, label_error_rate], feed_dict=feed_test) log = "Epoch {}/{}, test_cost {}, test_ler {}" logging.info( log.format(curr_epoch + 1, num_epochs, test_cost, test_ler)) input_features = [('strokeData', datatypes.Array(num_features))] output_features = [('labels', datatypes.Array(num_classes))] vars = tf.trainable_variables() weights = {'forward': {}, 'backward': {}} for _var in vars: name = _var.name.encode('utf-8') if name.startswith('bidirectional_rnn/fw'): key = name.replace('bidirectional_rnn/fw/', '') key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '') key = key.replace(':0', '') weights['forward'][key] = _var.eval() else: key = name.replace('bidirectional_rnn/bw/', '') key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '') key = key.replace(':0', '') weights['backward'][key] = _var.eval() builder = NeuralNetworkBuilder(input_features, output_features, mode=None) fw_biases = [ weights['forward']['bias'][0 * num_hidden:1 * num_hidden], weights['forward']['bias'][1 * num_hidden:2 * num_hidden], weights['forward']['bias'][2 * num_hidden:3 * num_hidden], weights['forward']['bias'][3 * num_hidden:4 * num_hidden] ] bw_biases = [ weights['backward']['bias'][0 * num_hidden:1 * num_hidden], weights['backward']['bias'][1 * num_hidden:2 * num_hidden], weights['backward']['bias'][2 * num_hidden:3 * num_hidden], weights['backward']['bias'][3 * num_hidden:4 * num_hidden] ] num_LSTM_gates = 5 input_weights = { 'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)), 'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)) } recurrent_weights = { 'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)), 'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)) } builder.add_bidirlstm( name='bidirectional_1', W_h=recurrent_weights['forward'], W_x=input_weights['forward'], b=fw_biases, W_h_back=recurrent_weights['backward'], W_x_back=input_weights['backward'], b_back=bw_biases, hidden_size=num_hidden, input_size=num_features, input_names=[ 'strokeData', 'bidirectional_1_h_in', 'bidirectional_1_c_in', 'bidirectional_1_h_in_rev', 'bidirectional_1_c_in_rev' ], output_names=[ 'y', 'bidirectional_1_h_out', 'bidirectional_1_c_out', 'bidirectional_1_h_out_rev', 'bidirectional_1_c_out_rev' ], peep=[ weights['forward']['w_i_diag'], weights['forward']['w_f_diag'], weights['forward']['w_o_diag'] ], peep_back=[ weights['backward']['w_i_diag'], weights['backward']['w_f_diag'], weights['backward']['w_o_diag'] ], cell_clip_threshold=clip_thresh) builder.add_softmax(name='softmax', input_name='y', output_name='labels') optional_inputs = [('bidirectional_1_h_in', num_hidden), ('bidirectional_1_c_in', num_hidden), ('bidirectional_1_h_in_rev', num_hidden), ('bidirectional_1_c_in_rev', num_hidden)] optional_outputs = [('bidirectional_1_h_out', num_hidden), ('bidirectional_1_c_out', num_hidden), ('bidirectional_1_h_out_rev', num_hidden), ('bidirectional_1_c_out_rev', num_hidden)] #not really sure what this line belowe does, just copied it from the Keras converter in coremltools, # and it seemed to make things work builder.add_optionals(optional_inputs, optional_outputs) model = MLModel(builder.spec) model.short_description = 'Model for recognizing a symbols and diagrams drawn on ipad screen with apple pencil' model.input_description[ 'strokeData'] = 'A collection of strokes to classify' model.output_description[ 'labels'] = 'The "probability" of each label, in a dense array' outfile = 'bilstm.mlmodel' model.save(outfile) print('Saved to file: %s' % outfile)
def make_mlmodel(variables): # Specify the inputs and outputs (there can be multiple). # Each name corresponds to the input_name/output_name of a layer in the network so # that Core ML knows where to insert and extract data. input_features = [('image', datatypes.Array(1, IMAGE_HEIGHT, IMAGE_WIDTH))] output_features = [('labelValues', datatypes.Array(NUM_LABEL_INDEXES))] builder = NeuralNetworkBuilder(input_features, output_features, mode=None) # The "name" parameter has no effect on the function of the network. As far as I know # it's only used when Xcode fails to load your mlmodel and gives you an error telling # you what the problem is. # The input_names and output_name are used to link layers to each other and to the # inputs and outputs of the model. When adding or removing layers, or renaming their # outputs, always make sure you correct the input and output names of the layers # before and after them. builder.add_elementwise(name='add_layer', input_names=['image'], output_name='add_layer', mode='ADD', alpha=-0.5) # Although Core ML internally uses weight matrices of shape # (outputChannels, inputChannels, height, width) (as can be found by looking at the # protobuf specification comments), add_convolution takes the shape # (height, width, inputChannels, outputChannels) (as can be found in the coremltools # documentation). The latter shape matches what TensorFlow uses so we don't need to # reorder the matrix axes ourselves. builder.add_convolution(name='conv2d_1', kernel_channels=1, output_channels=32, height=3, width=3, stride_height=1, stride_width=1, border_mode='same', groups=0, W=variables['W_conv1'].eval(), b=variables['b_conv1'].eval(), has_bias=True, is_deconv=False, output_shape=None, input_name='add_layer', output_name='conv2d_1') builder.add_activation(name='relu_1', non_linearity='RELU', input_name='conv2d_1', output_name='relu_1', params=None) builder.add_pooling(name='maxpool_1', height=2, width=2, stride_height=2, stride_width=2, layer_type='MAX', padding_type='SAME', input_name='relu_1', output_name='maxpool_1') # ... builder.add_flatten(name='maxpool_3_flat', mode=1, input_name='maxpool_3', output_name='maxpool_3_flat') # We must swap the axes of the weight matrix because add_inner_product takes the shape # (outputChannels, inputChannels) whereas TensorFlow uses # (inputChannels, outputChannels). Unlike with add_convolution (see the comment # above), the shape add_inner_product expects matches what the protobuf specification # requires for inner products. builder.add_inner_product(name='fc1', W=tf_fc_weights_order_to_mlmodel( variables['W_fc1'].eval()).flatten(), b=variables['b_fc1'].eval().flatten(), input_channels=6 * 6 * 64, output_channels=1024, has_bias=True, input_name='maxpool_3_flat', output_name='fc1') # ... builder.add_softmax(name='softmax', input_name='fc2', output_name='labelValues') model = MLModel(builder.spec) model.short_description = 'Model for recognizing a variety of images drawn on screen with one\'s finger' model.input_description['image'] = 'A gesture image to classify' model.output_description[ 'labelValues'] = 'The "probability" of each label, in a dense array' return model