Ejemplo n.º 1
0
def lstm_init_states(batch_size):
    """ Returns a tuple of names and zero arrays for LSTM init states"""
    hp = Hyperparams()
    init_shapes = lstm.init_states(batch_size=batch_size, num_lstm_layer=hp.num_lstm_layer, num_hidden=hp.num_hidden)
    init_names = [s[0] for s in init_shapes]
    init_arrays = [mx.nd.zeros(x[1]) for x in init_shapes]
    return init_names, init_arrays
Ejemplo n.º 2
0
def lstm_init_states(batch_size):
    """ Returns a tuple of names and zero arrays for LSTM init states"""
    hp = Hyperparams()
    init_shapes = lstm.init_states(batch_size=batch_size, num_lstm_layer=hp.num_lstm_layer, num_hidden=hp.num_hidden)
    init_names = [s[0] for s in init_shapes]
    init_arrays = [mx.nd.zeros(x[1]) for x in init_shapes]
    return init_names, init_arrays
Ejemplo n.º 3
0
def lstm_init_states(batch_size):
    hp = Hyperparams()
    init_shapes = lstm.init_states(batch_size=batch_size, num_lstm_layer=hp.num_lstm_layer, num_hidden=hp.num_hidden)
    init_names = [s[0] for s in init_shapes]
    init_arrays = [mx.nd.zeros(x[1]) for x in init_shapes]
    
    return init_names, init_arrays
Ejemplo n.º 4
0
def main():
    """Program entry point"""
    args = parse_args()
    if not any(args.loss == s for s in ['ctc', 'warpctc']):
        raise ValueError(
            "Invalid loss '{}' (must be 'ctc' or 'warpctc')".format(args.loss))

    hp = Hyperparams()

    # Start a multiprocessor captcha image generator
    mp_captcha = MPDigitCaptcha(font_paths=get_fonts(args.font_path),
                                h=hp.seq_length,
                                w=30,
                                num_digit_min=3,
                                num_digit_max=4,
                                num_processes=args.num_proc,
                                max_queue_size=hp.batch_size * 2)
    try:
        # Must call start() before any call to mxnet module (https://github.com/apache/incubator-mxnet/issues/9213)
        mp_captcha.start()

        if args.gpu:
            contexts = [mx.context.gpu(i) for i in range(args.gpu)]
        else:
            contexts = [mx.context.cpu(i) for i in range(args.cpu)]

        init_states = lstm.init_states(hp.batch_size, hp.num_lstm_layer,
                                       hp.num_hidden)

        data_train = OCRIter(hp.train_epoch_size // hp.batch_size,
                             hp.batch_size,
                             init_states,
                             captcha=mp_captcha,
                             name='train')
        data_val = OCRIter(hp.eval_epoch_size // hp.batch_size,
                           hp.batch_size,
                           init_states,
                           captcha=mp_captcha,
                           name='val')

        symbol = lstm.lstm_unroll(num_lstm_layer=hp.num_lstm_layer,
                                  seq_len=hp.seq_length,
                                  num_hidden=hp.num_hidden,
                                  num_label=hp.num_label,
                                  loss_type=args.loss)

        head = '%(asctime)-15s %(message)s'
        logging.basicConfig(level=logging.DEBUG, format=head)

        module = mx.mod.Module(symbol,
                               data_names=[
                                   'data', 'l0_init_c', 'l0_init_h',
                                   'l1_init_c', 'l1_init_h'
                               ],
                               label_names=['label'],
                               context=contexts)

        metrics = CtcMetrics(hp.seq_length)
        module.fit(
            train_data=data_train,
            eval_data=data_val,
            # use metrics.accuracy or metrics.accuracy_lcs
            eval_metric=mx.gluon.metric.np(metrics.accuracy,
                                           allow_extra_outputs=True),
            optimizer='sgd',
            optimizer_params={
                'learning_rate': hp.learning_rate,
                'momentum': hp.momentum,
                'wd': 0.00001,
            },
            initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
            num_epoch=hp.num_epoch,
            batch_end_callback=mx.callback.Speedometer(hp.batch_size, 50),
            epoch_end_callback=mx.callback.do_checkpoint(args.prefix),
        )
    except KeyboardInterrupt:
        print("W: interrupt received, stopping...")
    finally:
        # Reset multiprocessing captcha generator to stop processes
        mp_captcha.reset()
Ejemplo n.º 5
0
def main():
    args = parse_args()
    if not any(args.loss == s for s in ['ctc', 'warpctc']):
        raise ValueError("Invalid loss '{}' (must be 'ctc' or 'warpctc')".format(args.loss))

    hp = Hyperparams()

    # Start a multiprocessor captcha image generator
    mp_captcha = MPDigitCaptcha(
        font_paths=get_fonts(args.font_path), h=hp.seq_length, w=30,
        num_digit_min=3, num_digit_max=4, num_processes=args.num_proc, max_queue_size=hp.batch_size * 2)
    try:
        # Must call start() before any call to mxnet module (https://github.com/apache/incubator-mxnet/issues/9213)
        mp_captcha.start()

        if args.gpu:
            contexts = [mx.context.gpu(i) for i in range(args.gpu)]
        else:
            contexts = [mx.context.cpu(i) for i in range(args.cpu)]

        init_states = lstm.init_states(hp.batch_size, hp.num_lstm_layer, hp.num_hidden)

        data_train = OCRIter(
            hp.train_epoch_size // hp.batch_size, hp.batch_size, init_states, captcha=mp_captcha, name='train')
        data_val = OCRIter(
            hp.eval_epoch_size // hp.batch_size, hp.batch_size, init_states, captcha=mp_captcha, name='val')

        symbol = lstm.lstm_unroll(
            num_lstm_layer=hp.num_lstm_layer,
            seq_len=hp.seq_length,
            num_hidden=hp.num_hidden,
            num_label=hp.num_label,
            loss_type=args.loss)

        head = '%(asctime)-15s %(message)s'
        logging.basicConfig(level=logging.DEBUG, format=head)

        module = mx.mod.Module(
            symbol,
            data_names=['data', 'l0_init_c', 'l0_init_h', 'l1_init_c', 'l1_init_h'],
            label_names=['label'],
            context=contexts)

        metrics = CtcMetrics(hp.seq_length)
        module.fit(train_data=data_train,
                   eval_data=data_val,
                   # use metrics.accuracy or metrics.accuracy_lcs
                   eval_metric=mx.metric.np(metrics.accuracy, allow_extra_outputs=True),
                   optimizer='sgd',
                   optimizer_params={'learning_rate': hp.learning_rate,
                                     'momentum': hp.momentum,
                                     'wd': 0.00001,
                                     },
                   initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
                   num_epoch=hp.num_epoch,
                   batch_end_callback=mx.callback.Speedometer(hp.batch_size, 50),
                   epoch_end_callback=mx.callback.do_checkpoint(args.prefix),
                   )
    except KeyboardInterrupt:
        print("W: interrupt received, stopping...")
    finally:
        # Reset multiprocessing captcha generator to stop processes
        mp_captcha.reset()
Ejemplo n.º 6
0
def main():
    args = parse_args()
    
    if not any(args.loss == s for s in ['ctc', 'warpctc']):
        raise ValueError("Invalid loss {}".format(args.loss))
        
    mp_captcha = MPDigitCaptcha(
        font_paths=get_fonts(args.font_path), h=hp.seq_length, w=30,
        num_digit_min=3, num_digit_max=4, num_processes=args.num_proc, max_queue_size=hp.batch_size*2)
    
    try:
        mp_captcha.strat()
        
        if args.gpu:
            contexts = [mx.context.gpu(i) for i in range(args.gpu)]
        else:
            contexts = [mx.context.cpu(i) for i in range(args.cpu)]
            
        init_states = lstm.init_states(hp.batch_size, hp.num_lstm_layer, hp.num_hidden)
        
        data_train = OCRIter(
            hp.train_epoch_size // hp.batch_size, hp.batch_size, init_states, captcha=mp_captcha, name='train')
        data_eval = OCRIter(
            hp.eval_epoch_size // hp.batch_size, hp.batch_size, init_states, captcha=mp_captcha, name='eval')
        
        symbol = lstm.unroll(
            num_lstm_layer=hp.num_lstm_layer,
            seq_len=seq_length,
            num_hidden=hp.num_hidden,
            num_label=hp.num_label,
            loss_type=args.loss)
        
        head = '%(asctime)-15s %(message)s'
        logging.basicConfig(level=logging.DEBUG, format=head)
        
        module = mx.mod.Module(
            symbol,
            data_names=['data', 'l0_init_c', 'l0_init_h', 'l1_init_c', 'l1_init_h'],
            label_names=['label'],
            context=contexts)
        
        metrics = CtcMetrics(hp.seq_length)
        module.fit(train_data=data_train,
                   eval_data=data_eval,
                   eval_metric=mx.metric.np(metrics.accuracy, allow_extra_outputs=True),
                   optimizer='sgd'
                   optimizer_params={
                       'learning_rate':hp.learning_rate,
                       'momentum':hp.momentum,
                       'wd':0.0001,
                   },
                   initializer=mx.init.Xavier(factor_type='in', magnitude=2.34),
                   num_epoch=hp.num_epoch,
                   batch_end_callback=mx.callback.Speedometer(hp.batch_size, 50),
                   epoch_end_callback=mx.callback.do_checkpoint(args.prefix),
                  )
        
    except KeyboardInterrupt:
        print("W: interrupt received, stopping...")
    finally:
        mp_captcha.reset()