コード例 #1
0
ファイル: run.py プロジェクト: gchrupala/platalea
        config = M1.get_default_config()
        net = M1.SpeechTranscriber(config)
        run_config = dict(max_norm=2.0, max_lr=2 * 1e-4, epochs=32, opt='adam')
        logging.info('Training ASR')
        M1.experiment(net, data, run_config)
        suffix = str(ds_factor).zfill(lz)
        res_fname = 'result_asr_{}.json'.format(suffix)
        copyfile('result.json', res_fname)
        net_fname = 'asr_{}.best.pt'.format(ds_factor)
        copy_best(res_fname, net_fname, experiment_type='asr')
        net = torch.load(net_fname)

    logging.info('Extracting ASR transcriptions')
    for set_name in ['train', 'val']:
        ds = data[set_name].dataset
        hyp_asr, ref_asr = extract_trn(net, ds, use_beam_decoding=True)
        # Replacing original transcriptions with ASR's output
        for i in range(len(hyp_asr)):
            item = ds.split_data[i]
            if item[2] == ref_asr[i]:
                ds.split_data[i] = (item[0], item[1], hyp_asr[i])
            else:
                msg = 'Extracted reference #{} ({}) doesn\'t match dataset\'s \
                        one ({}) for {} set.'

                msg = msg.format(i, ref_asr[i], ds.split_data[i][3], set_name)
                logging.warning(msg)

    if args.asr_model_dir:
        # Saving config for text-image model
        pickle.dump(
コード例 #2
0
ファイル: run.py プロジェクト: gchrupala/platalea
    else:
        logging.info('Building ASR model')
        config = M1.get_default_config()
        net = M1.SpeechTranscriber(config)
        run_config = dict(max_norm=2.0, max_lr=2 * 1e-4, epochs=32, opt='adam')
        logging.info('Training ASR')
        M1.experiment(net, data, run_config)
        suffix = str(ds_factor).zfill(lz)
        res_fname = 'result_asr_{}.json'.format(suffix)
        copyfile('result.json', res_fname)
        net_fname = 'asr_{}.best.pt'.format(ds_factor)
        copy_best(res_fname, net_fname, experiment_type='asr')
        net = torch.load(net_fname)

    logging.info('Extracting ASR transcriptions')
    hyp_asr, _ = extract_trn(net, data['val'].dataset, use_beam_decoding=True)

    if args.text_image_model_dir:
        config_fpath = os.path.join(args.text_image_model_dir, 'config.pkl')
        config = pickle.load(open(config_fpath, 'rb'))
        fd.le = config['label_encoder']
    elif args.asr_model_dir:
        # Saving config for text-image model
        pickle.dump(dict(feature_fname=feature_fname,
                         label_encoder=fd.get_label_encoder(),
                         language='en'),
                    open('config.pkl', 'wb'))

    if args.text_image_model_dir:
        net_fname = 'net_{}.best.pt'.format(ds_factor)
        net = torch.load(os.path.join(args.text_image_model_dir, net_fname))