Beispiel #1
0
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x,
                   mesh_y, labels, grid_index):
    print("Beginning decode for {}, {}".format(lm_alpha, lm_beta))
    test_loader = AudioDataLoader(test_dataset,
                                  batch_size=batch_size,
                                  num_workers=0)
    target_decoder = GreedyDecoder(labels, blank_index=labels.index('_'))
    decoder = BeamCTCDecoder(labels,
                             beam_width=args.beam_width,
                             cutoff_top_n=args.cutoff_top_n,
                             blank_index=labels.index('_'),
                             lm_path=args.lm_path,
                             alpha=lm_alpha,
                             beta=lm_beta,
                             num_processes=1)
    total_cer, total_wer = 0, 0
    for i, (data) in enumerate(test_loader):
        inputs, targets, input_percentages, target_sizes = data

        # unflatten targets
        split_targets = []
        offset = 0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        out = torch.from_numpy(logits[i][0])
        sizes = torch.from_numpy(logits[i][1])

        decoded_output, _, = decoder.decode(out, sizes)
        target_strings = target_decoder.convert_to_strings(split_targets)
        wer, cer = 0, 0
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(transcript, reference) / float(
                len(reference.split()))
            cer_inst = decoder.cer(transcript, reference) / float(
                len(reference))
            wer += wer_inst
            cer += cer_inst
        total_cer += cer
        total_wer += wer

    wer = total_wer / len(test_loader.dataset)
    cer = total_cer / len(test_loader.dataset)

    return [grid_index, mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x, mesh_y, labels, grid_index):
    print("Beginning decode for {}, {}".format(lm_alpha, lm_beta))
    test_loader = AudioDataLoader(test_dataset, batch_size=batch_size, num_workers=0)
    target_decoder = GreedyDecoder(labels, blank_index=labels.index('_'))
    decoder = BeamCTCDecoder(labels, beam_width=args.beam_width, cutoff_top_n=args.cutoff_top_n,
                             blank_index=labels.index('_'), lm_path=args.lm_path,
                             alpha=lm_alpha, beta=lm_beta, num_processes=1)
    total_cer, total_wer = 0, 0
    for i, (data) in enumerate(test_loader):
        inputs, targets, input_percentages, target_sizes = data

        # unflatten targets
        split_targets = []
        offset = 0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        out = torch.from_numpy(logits[i][0])
        sizes = torch.from_numpy(logits[i][1])

        decoded_output, _, = decoder.decode(out, sizes)
        target_strings = target_decoder.convert_to_strings(split_targets)
        wer, cer = 0, 0
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(transcript, reference) / float(len(reference.split()))
            cer_inst = decoder.cer(transcript, reference) / float(len(reference))
            wer += wer_inst
            cer += cer_inst
        total_cer += cer
        total_wer += wer

    wer = total_wer / len(test_loader.dataset)
    cer = total_cer / len(test_loader.dataset)

    return [grid_index, mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]
Beispiel #3
0
            offset += size

        inputs = inputs.to(device)
        out, output_sizes = model(inputs, input_sizes)

        if decoder is None:
            # add output to data array, and continue
            output_data.append((out.numpy(), output_sizes.numpy()))
            continue

        decoded_output, _ = decoder.decode(out.data, output_sizes.data)
        target_strings = target_decoder.convert_to_strings(split_targets)
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(transcript, reference)
            cer_inst = decoder.cer(transcript, reference)
            total_wer += wer_inst
            total_cer += cer_inst
            num_tokens += len(reference.split())
            num_chars += len(reference)
            if args.verbose:
                print("Ref:", reference.lower())
                print("Hyp:", transcript.lower())
                print("WER:",
                      float(wer_inst) / len(reference.split()), "CER:",
                      float(cer_inst) / len(reference), "\n")

    if decoder is not None:
        wer = float(total_wer) / num_tokens
        cer = float(total_cer) / num_chars
Beispiel #4
0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        if args.cuda:
            inputs = inputs.cuda()

        out = model(inputs)
        out = out.transpose(0, 1)  # TxNxH
        seq_length = out.size(0)
        sizes = input_percentages.mul_(int(seq_length)).int()

        decoded_output = decoder.decode(out.data, sizes)
        target_strings = decoder.process_strings(
            decoder.convert_to_strings(split_targets))
        wer, cer = 0, 0
        for x in range(len(target_strings)):
            wer += decoder.wer(decoded_output[x], target_strings[x]) / float(
                len(target_strings[x].split()))
            cer += decoder.cer(decoded_output[x], target_strings[x]) / float(
                len(target_strings[x]))
        total_cer += cer
        total_wer += wer

    wer = total_wer / len(test_loader.dataset)
    cer = total_cer / len(test_loader.dataset)

    print('Test Summary \t'
          'Average WER {wer:.3f}\t'
          'Average CER {cer:.3f}\t'.format(wer=wer * 100, cer=cer * 100))
Beispiel #5
0
        sizes = input_percentages.mul_(int(seq_length)).int()

        if decoder is None:
            # add output to data array, and continue
            output_data.append((out.data.cpu().numpy(), sizes.numpy()))
            continue

        decoded_output, _, = decoder.decode(out.data, sizes)
        target_strings = target_decoder.convert_to_strings(split_targets)
        wer, cer = 0, 0

        for x in range(len(target_strings)):
            decoding, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(decoding, reference) / float(
                len(reference.split()))
            cer_inst = decoder.cer(decoding, reference) / float(len(reference))
            wer += wer_inst
            cer += cer_inst
            if (random.uniform(0, 1) < args.transcript_prob):
                #if(random.uniform(0, 1) < 1): # debug
                print('reference = ' + reference)
                print('decoding = ' + decoding)
                print('wer = ' + str(wer_inst) + ', cer = ' + str(cer_inst))

                logger.error('decoding : ' + decoding)
                logger.error('reference : ' + reference)
                logger.error('WER = ' + str(wer_inst) + ', CER = ' +
                             str(cer_inst))
                logger.error(' ')

        total_cer += cer
Beispiel #6
0
Datei: test.py Projekt: gruly/DSA
            transcript, reference = decoded_output[x][0], target_strings[x][0]
	    if args.save_path or args.eval :
		ref_file.write(reference.encode('utf-8')+"("+audio_ids[x]+")\n")            
		trans_file.write(transcript.encode('utf-8')+"("+audio_ids[x]+")\n")            
                cp=cp+1 
    
            wer_inst=0
            if args.eval == 'concept': # Concept error rate evaluation       
                new_ref=convert_to_NE(reference)
                new_hyp=convert_to_NE(transcript)
                n_concept_err+= decoder.wer(new_hyp, new_ref)
                n_concept_ref+=len(new_ref.split())
        #       print " wer_inst  ", wer_inst
            else:
		wer_inst = decoder.wer(transcript, reference) / float(len(reference.split()))
            cer_inst = decoder.cer(transcript, reference) / float(len(reference))
            wer += wer_inst
            cer += cer_inst
            if args.verbose:
                print("Ref:", reference.lower())
                print("Hyp:", transcript.lower())
                print("WER:", wer_inst, "CER:", cer_inst, "\n")
	    if args.nbest_path != None :
#		print " saving the ",args.beam_width , "best list "
                
		nbest_file=open(args.nbest_path,'a')
                nbest_file.write(" REF: "+ " "+reference.encode('utf-8')+"\n")
		for j in range (10):
#	c	    print " audio_ids ", audio_ids[x], " x j ", x, " ",j, " score  ",scores[x][j], " utterance  ", decoded_output[x][j]
		    nbest_file.write(audio_ids[x]+" "+str(scores[x][j])+" "+decoded_output[x][j].encode('utf-8')+"\n")
	        nbest_file.close()
Beispiel #7
0
        if args.cuda:
            inputs = inputs.cuda()

        out, output_sizes = model(inputs, input_sizes)

        if decoder is None:
            # add output to data array, and continue
            output_data.append((out.numpy(), output_sizes.numpy()))
            continue

        decoded_output, _ = decoder.decode(out.data, output_sizes.data)
        target_strings = target_decoder.convert_to_strings(split_targets)
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(transcript, reference)
            cer_inst = decoder.cer(transcript, reference)
            total_wer += wer_inst
            total_cer += cer_inst
            num_tokens += len(reference.split())
            num_chars += len(reference)
            if args.verbose:
                print("Ref:", reference.lower())
                print("Hyp:", transcript.lower())
                print("WER:", float(wer_inst) / len(reference.split()), "CER:", float(cer_inst) / len(reference), "\n")

    if decoder is not None:
        wer = float(total_wer) / num_tokens
        cer = float(total_cer) / num_chars

        print('Test Summary \t'
              'Average WER {wer:.3f}\t'
Beispiel #8
0
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x,
                   mesh_y, index, labels, eval):
    print("Beginning decode for {}, {}".format(lm_alpha, lm_beta))
    test_loader = AudioDataLoader(test_dataset,
                                  batch_size=batch_size,
                                  num_workers=0)
    target_decoder = GreedyDecoder(labels, blank_index=labels.index('_'))
    decoder = BeamCTCDecoder(labels,
                             beam_width=args.beam_width,
                             cutoff_top_n=args.cutoff_top_n,
                             blank_index=labels.index('_'),
                             lm_path=args.lm_path,
                             alpha=lm_alpha,
                             beta=lm_beta,
                             num_processes=1)
    model_name = re.sub('.json.pth.tar', '', os.path.basename(args.model_path))
    ref_file = None
    if eval == 'concept':
        eval_dir = "%s/%s/%s" % (os.path.dirname(
            args.output_path), model_name, index)
        if not os.path.exists(eval_dir):
            os.makedirs(eval_dir)
        ref_file = open(
            "%s/%s_reference.txt" %
            (eval_dir, re.sub('.csv', '', os.path.basename(
                args.test_manifest))), 'w')
        trans_file = open(
            "%s/%s_transcription.txt" %
            (eval_dir, re.sub('.csv', '', os.path.basename(
                args.test_manifest))), 'w')
    total_cer, total_wer = 0, 0
    for i, (data) in enumerate(test_loader):
        inputs, targets, input_percentages, target_sizes, audio_ids = data

        # unflatten targets
        split_targets = []
        offset = 0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        out = torch.from_numpy(logits[i][0])
        sizes = torch.from_numpy(logits[i][1])

        decoded_output, _, _, _, _ = decoder.decode(out, sizes)
        target_strings = target_decoder.convert_to_strings(split_targets)
        wer, cer = 0, 0
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            if eval == 'concept':
                ref_file.write(
                    reference.encode('utf-8') + "(" + audio_ids[x] + ")\n")
                trans_file.write(
                    transcript.encode('utf-8') + "(" + audio_ids[x] + ")\n")

            wer_inst = decoder.wer(transcript, reference) / float(
                len(reference.split()))
            cer_inst = decoder.cer(transcript, reference) / float(
                len(reference))
            wer += wer_inst
            cer += cer_inst
        total_cer += cer
        total_wer += wer
    ref_file.close()
    trans_file.close()
    wer = total_wer / len(test_loader.dataset)
    cer = total_cer / len(test_loader.dataset)
    if eval == 'concept':  # Concept error rate evaluation
        cmd = "perl /lium/buster1/ghannay/deepSpeech2/deepspeech.pytorch/data/eval.sclit_cer.pl %s" % (
            eval_dir)
        print("cmd  ", cmd)
        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             shell=True)
        coner, error = p.communicate()
        print(" coner  ", coner)
        return [mesh_x, mesh_y, lm_alpha, lm_beta, float(coner) / 100, cer]
    else:
        return [mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]
Beispiel #9
0
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x,
                   mesh_y, labels):
    print("Beginning decode for {}, {}".format(lm_alpha, lm_beta))
    test_loader = FeatLoader(test_dataset,
                             batch_size=batch_size,
                             num_workers=0)
    target_decoder = GreedyDecoder(labels, blank_index=labels.index('_'))
    decoder = BeamCTCDecoder(labels,
                             beam_width=args.beam_width,
                             cutoff_top_n=args.cutoff_top_n,
                             blank_index=labels.index('_'),
                             lm_path=args.lm_path,
                             alpha=lm_alpha,
                             beta=lm_beta,
                             num_processes=1)
    total_cer, total_wer = 0, 0
    #decoding_log = []
    for i, (data) in enumerate(test_loader):
        inputs, targets, input_percentages, target_sizes = data

        # unflatten targets
        split_targets = []
        offset = 0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        out = torch.from_numpy(logits[i][0])
        sizes = torch.from_numpy(logits[i][1])

        decoded_output, _ = decoder.decode(out, sizes)
        target_strings = target_decoder.convert_to_strings(split_targets)
        wer, cer = 0, 0
        for x in range(len(target_strings)):
            transcript, reference = decoded_output[x][0], target_strings[x][0]
            wer_inst = decoder.wer(transcript, reference) / float(
                len(reference.split()))
            cer_inst = decoder.cer(transcript, reference) / float(
                len(reference))
            wer += wer_inst
            cer += cer_inst

            # ver1
            # write result to logFile # can't do this because multi processing code cannot do this
            #logFile.write('decoding : ' + transcript)
            #logFIle.write('reference : ' + reference)
            #logFile.write('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst))

            if (random.uniform(0, 1) < float(args.detail_log_print_prob)):
                print('decoding : ' + transcript)
                print('reference : ' + reference)
                print('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst))
                print(' ')

                #ver1
                #decoding_log_sample = []
                #decoding_log_sample.append(transcript)
                #decoding_log_sample.append(reference)
                #decoding_log.append(decoding_log_sample)

                #ver2. thread safe but does not write anything to file
                #logging.info('decoding : ' + transcript)
                #logging.info('reference : ' + reference)
                #logging.info('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst))
                #logging.info(' ')

                #ver3
                logger.error('decoding : ' + transcript)
                logger.error('reference : ' + reference)
                logger.error('WER = ' + str(wer_inst) + ', CER = ' +
                             str(cer_inst))
                logger.error(' ')

        total_cer += cer
        total_wer += wer

    wer = total_wer / len(test_loader.dataset)
    cer = total_cer / len(test_loader.dataset)

    return [mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]