Example #1
0
def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr, direction,
                dropout, init_filters, optim, lstm=False, vgg=False):

    filters=init_filters
    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(inputs, init_filters, feat_dim)

    outputs = network.network(outputs,units, depth, n_labels, direction, dropout, lstm)
    outputs = TimeDistributed(Dense(n_labels+1))(outputs)
    outputs = Activation('softmax')(outputs)

    model=Model([inputs, mask], outputs)
    # we can get accuracy from data along with batch/temporal axes.
    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr, clipnorm=50.),
            loss=['categorical_crossentropy'],
            metrics=['categorical_accuracy'])
    else:
        model.compile(keras.optimizers.Adadelta(),
            loss=['categorical_crossentropy'],
            metrics=['categorical_accuracy'])
    return model
Example #2
0
def build_model(inputs,
                mask,
                units,
                depth,
                n_labels,
                feat_dim,
                dropout,
                init_filters,
                lstm=False,
                vgg=False):

    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(outputs, init_filters, feat_dim)

    outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask])
    outputs = Masking(mask_value=0.0)(outputs)

    outputs = network.lc_network(outputs, units, depth, n_labels, dropout,
                                 init_filters, lstm)
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model([inputs, mask], outputs)

    return model
def build_model(inputs,
                mask,
                units,
                depth,
                n_labels,
                feat_dim,
                init_lr,
                dropout,
                init_filters,
                optim,
                lstm=False,
                vgg=False):

    masked = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(masked, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(masked, init_filters, feat_dim)

    outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask])
    outputs = Masking(mask_value=0.0)(outputs)

    outputs1 = network.lc_network(outputs, units, depth, n_labels, dropout,
                                  init_filters, lstm)

    depth = 2
    init_filters = 16
    # 1/2
    outputs2 = dilation.VGG2L_Strides(masked, init_filters, feat_dim)
    outputs2 = network.lc_network(outputs2, units, depth, n_labels, dropout,
                                  init_filters, lstm)
    outputs2 = dilation.VGG2L_Transpose(outputs2, init_filters, units * 2)

    # 1/4
    outputs3 = dilation.VGG2L_QuadStrides(
        masked, init_filters, feat_dim)  # output = time/2, feat_dim*filters*2
    outputs3 = network.lc_network(outputs3, units, depth, n_labels, dropout,
                                  init_filters,
                                  lstm)  # output = time/2, units*2
    outputs3 = dilation.VGG2L_QuadTranspose(outputs3, init_filters,
                                            units * 2)  #output = time, units*2

    #outputs = Add()([outputs, outputs2, outputs3])
    outputs = Add()([outputs1, outputs2, outputs3])

    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model([inputs, mask], outputs)
    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr),
                      loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])
    else:
        model.compile(keras.optimizers.Adadelta(lr=init_lr),
                      loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])

    return model
Example #4
0
def build_model(inputs, mask, units, depth, n_labels, feat_dim, init_lr,
                dropout, init_filters, optim, lstm=False, vgg=False):

    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(outputs, init_filters, feat_dim)

    for n in range (depth):
        # forward, keep current states
        # statefule
        if lstm is False:
            x=GRU(units, kernel_initializer='glorot_uniform',
                  return_sequences=True,
                  stateful=True,
                  dropout=dropout,
                  unroll=False)(outputs)
        else:
            x=LSTM(units, kernel_initializer='glorot_uniform',
                   return_sequences=True,
                   unit_forget_bias=True,
                   stateful=True,
                   dropout=dropout,
                   unroll=False)(outputs)
        # backward, not keep current states
        # do not preserve state values for backward pass
        if lstm is False:
            y=GRU(units, kernel_initializer='glorot_uniform',
                  return_sequences=True,
                  stateful=False,
                  unroll=False,
                  dropout=dropout,
                  go_backwards=True)(outputs)
        else:
            y=LSTM(units, kernel_initializer='glorot_uniform',
                   return_sequences=True,
                   unit_forget_bias=True,
                   stateful=False,
                   unroll=False,
                   dropout=dropout,
                   go_backwards=True)(outputs)
            
        outputs = Concatenate(axis=-1)([x,y])
        outputs=layer_normalization.LayerNormalization()(outputs)

    outputs = TimeDistributed(Dense(n_labels+1))(outputs)
    outputs = Activation('softmax')(outputs)
    #outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask])

    model = Model([inputs, mask], outputs)
    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr, clipnorm=50.), loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])
    else:
        model.compile(keras.optimizers.Adadelta(lr=init_lr, clipnorm=50.), loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])

    return model
Example #5
0
def build_model(file,
                inputs,
                mask,
                units,
                depth,
                n_labels,
                feat_dim,
                init_lr,
                dropout,
                init_filters,
                optim,
                proc_frames,
                lstm=False,
                vgg=False):

    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(outputs, init_filters, feat_dim)

    vgg_model = Model(inputs, outputs)
    vgg_model.trainable = False

    inputs = vgg_model.outputs
    outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([inputs, mask])
    rnn_inputs = Masking(mask_value=0.0)(outputs)

    rnn_model, rnn_trunc_model = network.lc_network(rnn_inputs, units, depth,
                                                    n_labels, dropout,
                                                    init_filters, proc_frames,
                                                    lstm)
    outputs = rnn_model.outputs
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model(inputs=[vgg_model.inputs, mask], outputs)
    valid_model = Model(inputs=[vgg_model.inputs, mask],
                        rnn_trunc_model.outputs)

    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr),
                      loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])
    else:
        model.compile(keras.optimizers.Adadelta(lr=init_lr),
                      loss=['categorical_crossentropy'],
                      metrics=['categorical_accuracy'])

    return model, valid_model
def build_model(inputs, units, depth, n_labels, feat_dim, init_lr, dropout,
                init_filters, optim):

    outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim)

    outputs = lstm(outputs, units, depth, n_labels, dropout)
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    if optim is not None:
        model = CTCModel.CTCModel([inputs], [outputs], greedy=True)
        if optim == 'adam':
            model.compile(keras.optimizers.Adam(lr=init_lr))
        elif optim == 'sgd':
            model.compile(keras.optimizers.SGD(lr=init_lr, momentum=0.9))
        else:
            model.compile(keras.optimizers.Adadelta(lr=init_lr))
    else:
        model = Model(inputs, outputs)
    return model
def build_model(inputs,
                mask,
                units,
                depth,
                n_labels,
                feat_dim,
                init_lr,
                dropout,
                init_filters,
                optim,
                lstm=False,
                vgg=False):

    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(outputs, init_filters, feat_dim)

    outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask])
    outputs = Masking(mask_value=0.0)(outputs)

    outputs = network.lc_network(outputs, units, depth, n_labels, dropout,
                                 init_filters, lstm)
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model([inputs, mask], outputs)
    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr),
                      loss=[multi_utils.soft_loss],
                      metrics=['categorical_accuracy'])
        #metrics=[multi_utils.soft_acc])
    else:
        model.compile(keras.optimizers.Adadelta(lr=init_lr),
                      loss=[multi_utils.soft_loss],
                      metrics=['categorical_accuracy'])
        #metrics=[multi_utils.soft_acc])

    return model
Example #8
0
def build_model(inputs, units, depth, n_labels, feat_dim, init_lr, direction,
                dropout, init_filters, optim, lstm=False, vgg=False):

    if vgg is False:
        outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(inputs, init_filters, feat_dim)
    #outputs = dilation.VGG2L_Strides(inputs, init_filters, feat_dim)

    outputs = network.network(outputs,units, depth, n_labels, direction, dropout, lstm)
    #outputs = dilation.VGG2L_Transpose(outputs, init_filters, units*2)
    outputs = TimeDistributed(Dense(n_labels+1))(outputs)
    outputs = Activation('softmax')(outputs)

    model=CTCModel.CTCModel([inputs], [outputs], greedy=True)
    if optim == 'adam':
        model.compile(keras.optimizers.Adam(lr=init_lr))
    elif optim == 'sgd':
        model.compile(keras.optimizers.SGD(lr=init_lr,  momentum=0.9))
    else:
        model.compile(keras.optimizers.Adadelta(lr=init_lr))

    return model
Example #9
0
def build_model(inputs,
                units,
                depth,
                n_labels,
                feat_dim,
                direction,
                init_filters,
                lstm=False,
                vgg=False):

    if vgg is False:
        outputs = vgg2l.VGG2L(inputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(inputs, init_filters, feat_dim)

    outputs = network.network(outputs, units, depth, n_labels, direction, 0.0,
                              lstm)
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model(inputs, outputs)

    return model
Example #10
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--data',
                        type=str,
                        required=True,
                        help='training data')
    parser.add_argument('--eval', type=str, help='evaluation data')
    parser.add_argument('--feat-dim', default=40, type=int, help='feats dim')
    parser.add_argument('--n-labels',
                        default=1024,
                        type=int,
                        required=True,
                        help='number of output labels')
    parser.add_argument('--batch-size',
                        default=1,
                        type=int,
                        help='mini-batch size')
    parser.add_argument('--snapshot',
                        type=str,
                        default='./',
                        help='snapshot directory')
    parser.add_argument('--snapshot-prefix',
                        type=str,
                        default='snapshot',
                        help='snapshot file prefix')
    parser.add_argument('--eval-output-prefix', type=str, default='eval_out')
    parser.add_argument('--units',
                        type=int,
                        default=16,
                        help='number of LSTM cells')
    parser.add_argument('--lstm-depth',
                        type=int,
                        default=2,
                        help='number of LSTM layers')
    parser.add_argument('--process-frames',
                        type=int,
                        default=10,
                        help='process frames')
    parser.add_argument('--extra-frames1',
                        type=int,
                        default=10,
                        help='1st extra frames')
    parser.add_argument('--extra-frames2',
                        type=int,
                        default=10,
                        help='2nd extra frames')
    parser.add_argument('--num-extra-frames1',
                        type=int,
                        default=1,
                        help='number of extra frames 1')
    parser.add_argument('--filters',
                        type=int,
                        default=16,
                        help='number of filters')
    parser.add_argument('--lstm', action='store_true')
    parser.add_argument('--vgg', action='store_true')
    parser.add_argument('--prior',
                        type=str,
                        default=None,
                        help='prior weights')
    parser.add_argument('--prior-scale',
                        type=float,
                        default=1.0,
                        help='prior scaler')
    parser.add_argument('--weights',
                        type=str,
                        required=True,
                        help='model weights')

    args = parser.parse_args()

    eval_in = Input(batch_shape=(args.batch_size, None, args.feat_dim))
    eval_mask = Input(batch_shape=(args.batch_size, None,
                                   args.feat_dim * args.filters * 2))

    outputs = Masking(mask_value=0.0)(inputs)

    if vgg is False:
        outputs = vgg2l.VGG2L(outputs, init_filters, feat_dim)
    else:
        outputs = vgg1l.VGG(outputs, init_filters, feat_dim)

    outputs = Lambda(lambda x: tf.multiply(x[0], x[1]))([outputs, mask])
    outputs = Masking(mask_value=0.0)(outputs)

    outputs = network.lc_network(outputs, units, depth, n_labels, dropout,
                                 lstm)
    outputs = TimeDistributed(Dense(n_labels + 1))(outputs)
    outputs = Activation('softmax')(outputs)

    model = Model([inputs, mask], outputs)

    return model