config = M1.get_default_config() net = M1.SpeechTranscriber(config) run_config = dict(max_norm=2.0, max_lr=2 * 1e-4, epochs=32, opt='adam') logging.info('Training ASR') M1.experiment(net, data, run_config) suffix = str(ds_factor).zfill(lz) res_fname = 'result_asr_{}.json'.format(suffix) copyfile('result.json', res_fname) net_fname = 'asr_{}.best.pt'.format(ds_factor) copy_best(res_fname, net_fname, experiment_type='asr') net = torch.load(net_fname) logging.info('Extracting ASR transcriptions') for set_name in ['train', 'val']: ds = data[set_name].dataset hyp_asr, ref_asr = extract_trn(net, ds, use_beam_decoding=True) # Replacing original transcriptions with ASR's output for i in range(len(hyp_asr)): item = ds.split_data[i] if item[2] == ref_asr[i]: ds.split_data[i] = (item[0], item[1], hyp_asr[i]) else: msg = 'Extracted reference #{} ({}) doesn\'t match dataset\'s \ one ({}) for {} set.' msg = msg.format(i, ref_asr[i], ds.split_data[i][3], set_name) logging.warning(msg) if args.asr_model_dir: # Saving config for text-image model pickle.dump(
else: logging.info('Building ASR model') config = M1.get_default_config() net = M1.SpeechTranscriber(config) run_config = dict(max_norm=2.0, max_lr=2 * 1e-4, epochs=32, opt='adam') logging.info('Training ASR') M1.experiment(net, data, run_config) suffix = str(ds_factor).zfill(lz) res_fname = 'result_asr_{}.json'.format(suffix) copyfile('result.json', res_fname) net_fname = 'asr_{}.best.pt'.format(ds_factor) copy_best(res_fname, net_fname, experiment_type='asr') net = torch.load(net_fname) logging.info('Extracting ASR transcriptions') hyp_asr, _ = extract_trn(net, data['val'].dataset, use_beam_decoding=True) if args.text_image_model_dir: config_fpath = os.path.join(args.text_image_model_dir, 'config.pkl') config = pickle.load(open(config_fpath, 'rb')) fd.le = config['label_encoder'] elif args.asr_model_dir: # Saving config for text-image model pickle.dump(dict(feature_fname=feature_fname, label_encoder=fd.get_label_encoder(), language='en'), open('config.pkl', 'wb')) if args.text_image_model_dir: net_fname = 'net_{}.best.pt'.format(ds_factor) net = torch.load(os.path.join(args.text_image_model_dir, net_fname))