Ejemplo n.º 1
0
def nlp(disc_clsdict, gold_clsdict, fragments_within, fragments_cross,
        dest, verbose, n_jobs):

    if verbose:
        print banner('NLP')
   
    nc, cc = _nlp_sub(disc_clsdict, gold_clsdict, fragments_cross, 'cross',
                      verbose, n_jobs)
    nw, cw = _nlp_sub(disc_clsdict, gold_clsdict, fragments_within, 'within',
                      verbose, n_jobs)
  

    # calculating the pairs/clusters found in the discovery algoritms, 
    # it's stored on the 'nlp' output file, used to compare diff algoritms
    nclust = len(disc_clsdict.items())    
    npairs = sum([nCr(len(v[1]), 2) for v in disc_clsdict.items()])

    with open(path.join(dest, 'nlp'), 'w') as fid:
        fid.write(pretty_score_nlp(nc, cc, 'NLP total',
                     len(fragments_cross), sum(map(len, fragments_cross)), 
                     nclust, npairs))
        fid.write('\n')

        fid.write(pretty_score_nlp(nw, cw, 'NLP within-speaker only',
                     len(fragments_within), sum(map(len, fragments_within)), 
                     nclust, npairs))
Ejemplo n.º 2
0
def token_type(disc_clsdict, wrd_corpus, fragments_within, fragments_cross,
               dest, verbose, n_jobs):
    if verbose:
        print banner('TOKEN/TYPE')
    ptoc, rtoc, ptyc, rtyc = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_cross, 'cross',
                                             verbose, n_jobs)
    ftoc = np.fromiter((fscore(ptoc[i], rtoc[i]) for i in xrange(ptoc.shape[0])),
                       dtype=np.double)
    ftyc = np.fromiter((fscore(ptyc[i], rtyc[i]) for i in xrange(ptyc.shape[0])),
                       dtype=np.double)

    ptow, rtow, ptyw, rtyw = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_within, 'within',
                                             verbose, n_jobs)
    ftow = np.fromiter((fscore(ptow[i], rtow[i]) for i in xrange(ptow.shape[0])),
                       dtype=np.double)
    ftyw = np.fromiter((fscore(ptyw[i], rtyw[i]) for i in xrange(rtyw.shape[0])),
                       dtype=np.double)
    with open(path.join(dest, 'token_type'), 'w') as fid:
        fid.write(pretty_score_f(ptoc, rtoc, ftoc, 'token total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyc, rtyc, ftyc, 'type total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptow, rtow, ftow, 'token within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyw, rtyw, ftyw, 'type within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 3
0
def token_type(disc_clsdict, wrd_corpus, fragments_within, fragments_cross,
               dest, verbose, n_jobs):
    if verbose:
        print banner('TOKEN/TYPE')
    ptoc, rtoc, ptyc, rtyc = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_cross, 'cross',
                                             verbose, n_jobs)
    ftoc = np.fromiter((fscore(ptoc[i], rtoc[i]) for i in xrange(ptoc.shape[0])),
                       dtype=np.double)
    ftyc = np.fromiter((fscore(ptyc[i], rtyc[i]) for i in xrange(ptyc.shape[0])),
                       dtype=np.double)

    ptow, rtow, ptyw, rtyw = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_within, 'within',
                                             verbose, n_jobs)
    ftow = np.fromiter((fscore(ptow[i], rtow[i]) for i in xrange(ptow.shape[0])),
                       dtype=np.double)
    ftyw = np.fromiter((fscore(ptyw[i], rtyw[i]) for i in xrange(rtyw.shape[0])),
                       dtype=np.double)
    with open(path.join(dest, 'token_type'), 'w') as fid:
        fid.write(pretty_score_f(ptoc, rtoc, ftoc, 'token total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyc, rtyc, ftyc, 'type total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptow, rtow, ftow, 'token within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyw, rtyw, ftyw, 'type within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 4
0
def make_typeset(pclus, verbose, debug):
    with verb_print('constructing typeset', verbose, True, True):
        ts = list(typeset(pclus))
    if debug:
        print banner('TYPESET ({0})'.format(len(ts)))
        print pformat(ts)
        print
    return ts
Ejemplo n.º 5
0
def make_weights(pclus, verbose, debug):
    with verb_print('constructing weights', verbose, True, True):
        ws = weights(pclus)
    if debug:
        print banner('WEIGHTS')
        print pformat(ws)
        print
    return ws
Ejemplo n.º 6
0
def make_psubs_nmatch(psubs, verbose, debug):
    with verb_print('making psubs nmatch', verbose, True, True):
        psubs_nmatch = nmatch(psubs)
    if debug:
        print banner('NMATCH(PSUBS)')
        print pformat(psubs_nmatch)
        print
    return psubs_nmatch
Ejemplo n.º 7
0
def make_pgoldclus_nmatch(pgoldclus, verbose, debug):
    with verb_print('constructing pgoldclus_nmatch', verbose, True, True):
        pgoldclus_nmatch = nmatch(pgoldclus)
    if debug:
        print banner('NMATCH(PGOLDCLUS)')
        print pformat(pgoldclus_nmatch)
        print
    return pgoldclus_nmatch
Ejemplo n.º 8
0
def make_pdisc(disc_clsdict, verbose, debug):
    with verb_print('constructing pdisc set', verbose, True, True):
        pdisc = list(Pclus(disc_clsdict))
    if debug:
        print banner('PDISC ({0})'.format(len(pdisc)))
        print pretty_pairs(pdisc)
        print
    return pdisc
Ejemplo n.º 9
0
def make_typeset(psubs, verbose, debug):
    with verb_print('making typeset', verbose, True, True):
        ts = list(typeset(psubs))
    if debug:
        print banner('TYPES(PSUBS) ({0})'.format(len(ts)))
        print pformat(ts)
        print
    return ts
Ejemplo n.º 10
0
def make_weights(psubs, verbose, debug):
    with verb_print('making weights', verbose, True, True):
        ws = weights(psubs)
    if debug:
        print banner('WEIGHTS(PSUBS) ({0})'.format(len(ws)))
        print pformat(ws)
        print
    return ws
Ejemplo n.º 11
0
def make_weights(psubs, verbose, debug):
    with verb_print('making weights', verbose, True, True):
        ws = weights(psubs)
    if debug:
        print banner('WEIGHTS(PSUBS) ({0})'.format(len(ws)))
        print pformat(ws)
        print
    return ws
Ejemplo n.º 12
0
def make_psubs_nmatch(psubs, verbose, debug):
    with verb_print('making psubs nmatch', verbose, True, True):
        psubs_nmatch = nmatch(psubs)
    if debug:
        print banner('NMATCH(PSUBS)')
        print pformat(psubs_nmatch)
        print
    return psubs_nmatch
Ejemplo n.º 13
0
def make_pdisc(disc_clsdict, verbose, debug):
    with verb_print('constructing pdisc set', verbose, True, True):
        pdisc = list(Pclus(disc_clsdict))
    if debug:
        print banner('PDISC ({0})'.format(len(pdisc)))
        print pretty_pairs(pdisc)
        print
    return pdisc
Ejemplo n.º 14
0
def make_pgold(gold_clsdict, verbose, debug):
    with verb_print('constructing pgold set', verbose, True, True):
        pgold = list(Pclus(gold_clsdict))
    if debug:
        print banner('PGOLD ({0})'.format(len(pgold)))
        print pretty_pairs(pgold)
        print
    return pgold
Ejemplo n.º 15
0
def make_typeset(psubs, verbose, debug):
    with verb_print('making typeset', verbose, True, True):
        ts = list(typeset(psubs))
    if debug:
        print banner('TYPES(PSUBS) ({0})'.format(len(ts)))
        print pformat(ts)
        print
    return ts
Ejemplo n.º 16
0
def make_pgold(gold_clsdict, verbose, debug):
    with verb_print('constructing pgold set', verbose, True, True):
        pgold = list(Pclus(gold_clsdict))
    if debug:
        print banner('PGOLD ({0})'.format(len(pgold)))
        print pretty_pairs(pgold)
        print
    return pgold
Ejemplo n.º 17
0
def make_pgold_nmatch(pgold, verbose, debug):
    with verb_print('constructing nmatch_gold', verbose, True, True):
        nmatch_gold = nmatch(pgold)

    if debug:
        print banner('nmatch_gold')
        for k, v in nmatch_gold.iteritems():
            print k, v
    return nmatch_gold
Ejemplo n.º 18
0
def make_pgoldclus(disc_clsdict, verbose, debug):
    with verb_print('constructing pgoldclus', verbose, True, True):
        pgoldclus = list(Pgoldclus(disc_clsdict))
    if debug:
        pgoldclus = list(pgoldclus)
        print banner('PGOLDCLUS ({0})'.format(len(pgoldclus)))
        print pretty_pairs(pgoldclus)
        print
    return pgoldclus
Ejemplo n.º 19
0
def make_psubs_pgold_nmatch(pgold, psubs, verbose, debug):
    with verb_print('making psubs/pgold nmatch', verbose, True, True):
        psubs_pgold_intersect = intersection(pgold, psubs)
        psubs_pgold_nmatch = nmatch(psubs_pgold_intersect)
    if debug:
        print banner('NMATCH(PSUBS/PGOLD)')
        print pformat(psubs_pgold_nmatch)
        print
    return psubs_pgold_nmatch
Ejemplo n.º 20
0
def make_pclus_pgoldclus_nmatch(pclus, pgoldclus, verbose, debug):
    with verb_print('making pclus/pgoldclus nmatch', verbose, True, True):
        pclus_pgoldclus_intersect = list(intersection(pclus, pgoldclus))
        pclus_pgoldclus_nmatch = nmatch(pclus_pgoldclus_intersect)
    if debug:
        print banner('NMATCH(PCLUS/PGOLDCLUS)')
        print pformat(pclus_pgoldclus_nmatch)
        print
    return pclus_pgoldclus_nmatch
Ejemplo n.º 21
0
def make_pgold_nmatch(pgold, verbose, debug):
    with verb_print('constructing nmatch_gold', verbose, True, True):
        nmatch_gold = nmatch(pgold)

    if debug:
        print banner('nmatch_gold')
        for k, v in nmatch_gold.iteritems():
            print k, v
    return nmatch_gold
Ejemplo n.º 22
0
def make_psubs_pgold_nmatch(pgold, psubs, verbose, debug):
    with verb_print('making psubs/pgold nmatch', verbose, True, True):
        psubs_pgold_intersect = intersection(pgold, psubs)
        psubs_pgold_nmatch = nmatch(psubs_pgold_intersect)
    if debug:
        print banner('NMATCH(PSUBS/PGOLD)')
        print pformat(psubs_pgold_nmatch)
        print
    return psubs_pgold_nmatch
Ejemplo n.º 23
0
def make_psubs(disc_clsdict, corpus, minlength, maxlength,
               verbose, debug):
    with verb_print('constructing psubs set', verbose, True, True):
        psubs = list(Psubs(disc_clsdict, corpus, minlength=minlength,
                           maxlength=maxlength))
    if debug:
        print banner('PSUBS ({0})'.format(len(psubs)))
        print pretty_pairs(psubs)
        print
    return psubs
Ejemplo n.º 24
0
def make_pclus(disc_clsdict, verbose, debug):
    with verb_print('constructing pclus', verbose, True, True):
        pclus = list(tuple(sorted((f1, f2),
                             key=lambda f: (f.name, f.interval.start)))
                     for f1, f2 in Pclus_single(disc_clsdict))
    if debug:
        print banner('PCLUS ({0})'.format(len(pclus)))
        print pretty_pairs(pclus)
        print
    return pclus
Ejemplo n.º 25
0
def boundary(disc_clsdict, corpus, fragments_cross,
               dest, verbose, n_jobs):
    if verbose:
        print banner('BOUNDARY')
    pc, rc = _boundary_sub(disc_clsdict, corpus, fragments_cross,
                           'cross', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)
    with open(path.join(dest, 'boundary'), 'w') as fid:
        fid.write(pretty_score(pc, rc, fc, 'boundary total',
                                 sum(map(len, fragments_cross))))
Ejemplo n.º 26
0
def make_psubs(disc_clsdict, corpus, minlength, maxlength, verbose, debug):
    with verb_print('constructing psubs set', verbose, True, True):
        psubs = list(
            Psubs(disc_clsdict,
                  corpus,
                  minlength=minlength,
                  maxlength=maxlength))
    if debug:
        print banner('PSUBS ({0})'.format(len(psubs)))
        print pretty_pairs(psubs)
        print
    return psubs
Ejemplo n.º 27
0
def group(disc_clsdict, fragments_all, dest, verbose, n_jobs):
    if verbose:
        print banner('GROUP')
    #TODO CHECK SCORE ACROSS/WITHIN!
    pc, rc = _group_sub(disc_clsdict, fragments_all, 'all', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)

    #pw, rw = _group_sub(disc_clsdict, fragments_within, 'within', verbose, n_jobs)
    #fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'group'), 'w') as fid:
        fid.write(pretty_score(pc, rc, fc, 'group total',
                                 sum(map(len, fragments_all))))
Ejemplo n.º 28
0
def group(disc_clsdict, fragments_within, fragments_cross, dest, verbose, n_jobs):
    if verbose:
        print banner('GROUP')
    pc, rc = _group_sub(disc_clsdict, fragments_cross, 'cross', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)

    pw, rw = _group_sub(disc_clsdict, fragments_within, 'within', verbose, n_jobs)
    fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'group'), 'w') as fid:
        fid.write(pretty_score_f(pc, rc, fc, 'group total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(pw, rw, fw, 'group within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 29
0
def nlp(disc_clsdict, gold_clsdict, fragments_within, fragments_cross,
        dest, verbose, n_jobs):
    if verbose:
        print banner('NLP')
    nc, cc = _nlp_sub(disc_clsdict, gold_clsdict, fragments_cross, 'cross',
                      verbose, n_jobs)
    nw, cw = _nlp_sub(disc_clsdict, gold_clsdict, fragments_within, 'within',
                      verbose, n_jobs)
    with open(path.join(dest, 'nlp'), 'w') as fid:
        fid.write(pretty_score_nlp(nc, cc, 'NLP total',
                                       len(fragments_within),
                                       sum(map(len, fragments_within))))
        fid.write('\n')
        fid.write(pretty_score_nlp(nw, cw, 'NLP within-speaker only',
                                       len(fragments_cross),
                                       sum(map(len, fragments_cross))))
Ejemplo n.º 30
0
def group(disc_clsdict, fragments_within, fragments_cross, dest, verbose, n_jobs):
    if verbose:
        print banner('GROUP')
    pc, rc = _group_sub(disc_clsdict, fragments_cross, 'cross', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)

    pw, rw = _group_sub(disc_clsdict, fragments_within, 'within', verbose, n_jobs)
    fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'group'), 'w') as fid:
        fid.write(pretty_score_f(pc, rc, fc, 'group total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(pw, rw, fw, 'group within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 31
0
def nlp(disc_clsdict, gold_clsdict, fragments_within, fragments_cross,
        dest, verbose, n_jobs):
    if verbose:
        print banner('NLP')
    nc, cc = _nlp_sub(disc_clsdict, gold_clsdict, fragments_cross, 'cross',
                      verbose, n_jobs)
    nw, cw = _nlp_sub(disc_clsdict, gold_clsdict, fragments_within, 'within',
                      verbose, n_jobs)
    with open(path.join(dest, 'nlp'), 'w') as fid:
        fid.write(pretty_score_nlp(nc, cc, 'NLP total',
                                       len(fragments_within),
                                       sum(map(len, fragments_within))))
        fid.write('\n')
        fid.write(pretty_score_nlp(nw, cw, 'NLP within-speaker only',
                                       len(fragments_cross),
                                       sum(map(len, fragments_cross))))
Ejemplo n.º 32
0
def boundary_wrd(disc_clsdict, corpus, fragments_within, fragments_cross,
                 dest, verbose, n_jobs, threshold=0.03):
    if verbose:
        print banner('BOUNDARY (WRD)')
    pc, rc = _boundary_sub(disc_clsdict, corpus, fragments_cross,
                           'cross', verbose, n_jobs, threshold=threshold)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)
    pw, rw = _boundary_sub(disc_clsdict, corpus, fragments_within,
                           'within', verbose, n_jobs, threshold=threshold)
    fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'boundary_wrd'), 'w') as fid:
        fid.write(pretty_score_f(pc, rc, fc, 'boundary total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(pw, rw, fw, 'boundary within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 33
0
def match(disc_clsdict, gold_clsdict, phn_corpus,
          fragments_within, fragments_cross,
          dest, verbose, n_jobs):
    if verbose:
        print banner('MATCHING')
    pc, rc = _match_sub(disc_clsdict, gold_clsdict, phn_corpus,
                        fragments_cross, 'cross', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)

    pw, rw = _match_sub(disc_clsdict, gold_clsdict, phn_corpus,
                        fragments_within, 'within', verbose, n_jobs)
    fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'matching'), 'w') as fid:
        fid.write(pretty_score_f(pc, rc, fc, 'match total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(pw, rw, fw, 'match within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 34
0
def match(disc_clsdict, gold_clsdict, phn_corpus,
          fragments_within, fragments_cross,
          dest, verbose, n_jobs):
    if verbose:
        print banner('MATCHING')
    pc, rc = _match_sub(disc_clsdict, gold_clsdict, phn_corpus,
                        fragments_cross, 'cross', verbose, n_jobs)
    fc = np.fromiter((fscore(pc[i], rc[i]) for i in xrange(pc.shape[0])), dtype=np.double)

    pw, rw = _match_sub(disc_clsdict, gold_clsdict, phn_corpus,
                        fragments_within, 'within', verbose, n_jobs)
    fw = np.fromiter((fscore(pw[i], rw[i]) for i in xrange(pw.shape[0])), dtype=np.double)
    with open(path.join(dest, 'matching'), 'w') as fid:
        fid.write(pretty_score_f(pc, rc, fc, 'match total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(pw, rw, fw, 'match within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Ejemplo n.º 35
0
def load_disc(fname, corpus, split_file, truncate, verbose):
    with verb_print('  loading discovered classes', verbose, True, True, True):
        split_mapping = load_split(split_file)
        disc, errors = _load_classes(fname, corpus, split_mapping)
        if not truncate:
            errors_found = len(errors) > 0
            if len(errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                errors = errors[:100]
            for fragment in sorted(errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start,
                    fragment.interval.end)
            if not truncate and errors_found:
                print 'There were errors in {0}. Use option -f to'\
                    ' automatically skip invalid intervals.'.format(fname)
                sys.exit()

    if truncate:
        with verb_print('  checking discovered classes and truncating'):
            disc, filename_errors, interval_errors = \
                truncate_intervals(disc, corpus,
                                   split_mapping)
    else:
        with verb_print('  checking discovered classes', verbose, True, True,
                        True):
            filename_errors, interval_errors = \
                check_intervals(disc, split_mapping)
    if not truncate:
        filename_errors = sorted(filename_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_errors = sorted(interval_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_error = len(interval_errors) > 0
        filename_error = len(filename_errors) > 0
        errors_found = filename_error or interval_error
        if interval_error:
            print banner('intervals found in {0} outside of valid'
                         ' splits'.format(fname))
            if len(interval_errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                interval_errors = interval_errors[:100]
            for fragment in sorted(interval_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start,
                    fragment.interval.end)
        if filename_error:
            print banner('unknown filenames found in {0}'.format(fname))
            if len(filename_errors) > 100:
                print 'There were more than 100 filename errors found.'
                print 'Printing only the first 100.'
                print
                filename_errors = filename_errors[:100]
            for fragment in sorted(filename_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0}'.format(fragment.name)
        if not truncate and errors_found:
            print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format(
                fname)
            sys.exit()
    return disc
Ejemplo n.º 36
0
                                      'xitsonga.intervals.within')
    gold_clsfile = path.join(resource_dir, 'xitsonga.classes')
    phn_corpus_file = path.join(resource_dir, 'xitsonga.phn')
    wrd_corpus_file = path.join(resource_dir, 'xitsonga.wrd')
    split_file = path.join(resource_dir, 'xitsonga.split')

    if verbose:
        print 'xitsonga_eval2 version {0}'.format(VERSION)
        print '--------------------------'
        print 'dataset:     xitsonga'
        print 'inputfile:   {0}'.format(disc_clsfile)
        print 'destination: {0}'.format(dest)
        print

    if verbose:
        print banner('LOADING FILES')

    wrd_corpus = load_wrd_corpus(wrd_corpus_file, verbose)
    phn_corpus = load_phn_corpus(phn_corpus_file, verbose)

    fragments_cross = load_fragments_cross(fragments_cross_file, verbose)
    fragments_within = load_fragments_within(fragments_within_file, verbose)

    truncate = args['truncate']
    disc_clsdict = load_disc(disc_clsfile, phn_corpus, split_file, truncate,
                             verbose)
    gold_clsdict = load_gold(gold_clsfile, phn_corpus, verbose)

    try:
        os.makedirs(dest)
    except OSError:
Ejemplo n.º 37
0
def load_disc(fname, corpus, split_file, truncate, verbose):
    with verb_print('  loading discovered classes',
                             verbose, True, True, True):
        split_mapping = load_split(split_file)
        disc, errors = _load_classes(fname, corpus, split_mapping)
        if not truncate:
            errors_found = len(errors) > 0
            if len(errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                errors = errors[:100]
            for fragment in sorted(errors, key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name, fragment.interval.start, fragment.interval.end)
            if not truncate and errors_found:
                print 'There were errors in {0}. Use option -f to'\
                    ' automatically skip invalid intervals.'.format(fname)
                sys.exit()

    if truncate:
        with verb_print('  checking discovered classes and truncating'):
            disc, filename_errors, interval_errors = \
                truncate_intervals(disc, corpus,
                                   split_mapping)
    else:
        with verb_print('  checking discovered classes', verbose, True,
                                 True, True):
            filename_errors, interval_errors = \
                check_intervals(disc, split_mapping)
    if not truncate:
        filename_errors = sorted(filename_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_errors = sorted(interval_errors,
                                 key=lambda x: (x.name, x.interval.start))
        interval_error = len(interval_errors) > 0
        filename_error = len(filename_errors) > 0
        errors_found = filename_error or interval_error
        if interval_error:
            print banner('intervals found in {0} outside of valid'
                                      ' splits'.format(fname))
            if len(interval_errors) > 100:
                print 'There were more than 100 interval errors found.'
                print 'Printing only the first 100.'
                print
                interval_errors = interval_errors[:100]
            for fragment in sorted(interval_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0} [{1:.3f}, {2:.3f}]'.format(
                    fragment.name,
                    fragment.interval.start, fragment.interval.end)
        if filename_error:
            print banner('unknown filenames found in {0}'
                                      .format(fname))
            if len(filename_errors) > 100:
                print 'There were more than 100 filename errors found.'
                print 'Printing only the first 100.'
                print
                filename_errors = filename_errors[:100]
            for fragment in sorted(filename_errors,
                                   key=lambda x: (x.name, x.interval.start)):
                print '  error: {0}'.format(fragment.name)
        if not truncate and errors_found:
            print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format(fname)
            sys.exit()
    return disc
Ejemplo n.º 38
0
        rdir = path.dirname(path.realpath(__file__))
        resource_dir = path.join(rdir, 'resources')

    prefix = 'globalphone-' + lang
    phn_corpus_file       = path.join(resource_dir, prefix + '.phn')

    if verbose:
        print 'globalphone_eval2 version {0}'.format(VERSION)
        print '----------------------------'
        print 'dataset:     globalphone-' + lang
        print 'inputfile:   {0}'.format(match_fn)
        print 'destination: {0}'.format(dest)
        print

    if verbose:
        print banner('Loading phone corpus.')
    phn_corpus = load_corpus_txt(phn_corpus_file)

    if verbose:
        print banner('Loading matches from master_match.')
    matches = load_match_file(match_fn, phn_corpus)

    ned_scores, dtw_scores = ned_sub(matches, verbose, n_jobs)

    with open(dest, 'w') as f:
        for ned_score, dtw_score in zip(ned_scores, dtw_scores):
            f.write("%.4f %.4f\n" % (ned_score, dtw_score))

    # sns.jointplot(np.array(dtw_scores), np.array(ned_scores), kind='kde')
    # plt.show()
Ejemplo n.º 39
0
    fragments_within_file = path.join(resource_dir, 'sample.intervals.within')
    gold_clsfile          = path.join(resource_dir, 'sample.classes')
    phn_corpus_file       = path.join(resource_dir, 'sample.phn')
    wrd_corpus_file       = path.join(resource_dir, 'sample.wrd')
    split_file            = path.join(resource_dir, 'sample.split')

    if verbose:
        print 'sample_eval2 version {0}'.format(VERSION)
        print '--------------------------'
        print 'dataset:     sample'
        print 'inputfile:   {0}'.format(disc_clsfile)
        print 'destination: {0}'.format(dest)
        print

    if verbose:
        print banner('LOADING FILES')

    wrd_corpus = load_wrd_corpus(wrd_corpus_file, verbose)
    phn_corpus = load_phn_corpus(phn_corpus_file, verbose)

    fragments_cross = load_fragments_cross(fragments_cross_file, verbose)
    fragments_within = load_fragments_within(fragments_within_file, verbose)

    truncate = args['truncate']
    disc_clsdict = load_disc(disc_clsfile, phn_corpus, split_file,
                             truncate, verbose)
    gold_clsdict = load_gold(gold_clsfile, phn_corpus, verbose)

    try:
        os.makedirs(dest)
    except OSError: