Esempio n. 1
0
def load_fragments_within(fname, verbose):
    with verb_print('  loading folds within', verbose, True, True, True):
        fragments = load_split(fname, multiple=True)
    return fragments
Esempio n. 2
0
def load_disc(fname, corpus, split_file, truncate, verbose):
    with verb_print('  loading discovered classes', verbose, True, True, True):
        split_mapping = load_split(split_file)
        disc, errors = _load_classes(fname, corpus, split_mapping)
        if not truncate:
            errors_found = len(errors) > 0
            if len(errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                errors = errors[:100]
            for fragment in sorted(errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start,
                    fragment.interval.end)
            if not truncate and errors_found:
                print 'There were errors in {0}. Use option -f to'\
                    ' automatically skip invalid intervals.'.format(fname)
                sys.exit()

    if truncate:
        with verb_print('  checking discovered classes and truncating'):
            disc, filename_errors, interval_errors = \
                truncate_intervals(disc, corpus,
                                   split_mapping)
    else:
        with verb_print('  checking discovered classes', verbose, True, True,
                        True):
            filename_errors, interval_errors = \
                check_intervals(disc, split_mapping)
    if not truncate:
        filename_errors = sorted(filename_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_errors = sorted(interval_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_error = len(interval_errors) > 0
        filename_error = len(filename_errors) > 0
        errors_found = filename_error or interval_error
        if interval_error:
            print banner('intervals found in {0} outside of valid'
                         ' splits'.format(fname))
            if len(interval_errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                interval_errors = interval_errors[:100]
            for fragment in sorted(interval_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start,
                    fragment.interval.end)
        if filename_error:
            print banner('unknown filenames found in {0}'.format(fname))
            if len(filename_errors) > 100:
                print 'There were more than 100 filename errors found.'
                print 'Printing only the first 100.'
                print
                filename_errors = filename_errors[:100]
            for fragment in sorted(filename_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0}'.format(fragment.name)
        if not truncate and errors_found:
            print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format(
                fname)
            sys.exit()
    return disc
Esempio n. 3
0
def load_disc(fname, corpus, split_file, truncate, verbose):
    with verb_print('  loading discovered classes',
                             verbose, True, True, True):
        split_mapping = load_split(split_file)
        disc, errors = _load_classes(fname, corpus, split_mapping)
        if not truncate:
            errors_found = len(errors) > 0
            if len(errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                errors = errors[:100]
            for fragment in sorted(errors, key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start, fragment.interval.end)
            if not truncate and errors_found:
                print 'There were errors in {0}. Use option -f to'\
                    ' automatically skip invalid intervals.'.format(fname)
                sys.exit()

    if truncate:
        with verb_print('  checking discovered classes and truncating'):
            disc, filename_errors, interval_errors = \
                truncate_intervals(disc, corpus,
                                   split_mapping)
    else:
        with verb_print('  checking discovered classes', verbose, True,
                                 True, True):
            filename_errors, interval_errors = \
                check_intervals(disc, split_mapping)
    if not truncate:
        filename_errors = sorted(filename_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_errors = sorted(interval_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_error = len(interval_errors) > 0
        filename_error = len(filename_errors) > 0
        errors_found = filename_error or interval_error
        if interval_error:
            print banner('intervals found in {0} outside of valid'
                                      ' splits'.format(fname))
            if len(interval_errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                interval_errors = interval_errors[:100]
            for fragment in sorted(interval_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name,
                    fragment.interval.start, fragment.interval.end)
        if filename_error:
            print banner('unknown filenames found in {0}'
                                      .format(fname))
            if len(filename_errors) > 100:
                print 'There were more than 100 filename errors found.'
                print 'Printing only the first 100.'
                print
                filename_errors = filename_errors[:100]
            for fragment in sorted(filename_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0}'.format(fragment.name)
        if not truncate and errors_found:
            print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format(fname)
            sys.exit()
    return disc
Esempio n. 4
0
def load_fragments_within(fname, verbose):
    with verb_print('  loading folds within',
                             verbose, True, True, True):
        fragments = load_split(fname, multiple=True)
    return fragments
Esempio n. 5
0
    # load gold phones and gold words
    with verb_print('  loading word corpus file',
                             verbose, True, True, True):
        wrd_corpus = load_corpus_txt(wrd_corpus_file)

    with verb_print('  loading phone corpus file',
                             verbose, True, True, True):
        phn_corpus = load_corpus_txt(phn_corpus_file)
    
    # load across and withing folds
    with verb_print('  loading folds cross',
                             verbose, True, True, True):
        #fragments_cross = load_split(folds_cross_file,
        #                             multiple=False)
        intervals_vad = [load_split(vad_file,
                                     multiple=False)]
    # get list of file names from vad: 
    #    names = load_names(vad_file)
    try:
        os.makedirs(dest)
    except OSError:
        pass

    # Before loading the class file, check its consistency.
    # If intervals that overlaps a little with silence are found,
    # they are trimmed. If intervals that contain silences are found,
    # an error is thrown and the evaluation breaks.
    sil_tree = create_silence_tree(vad_file)
    parse_class_file(disc_clsfile, sil_tree, dest, verbose)

    # load discovered intervals and gold intervals