Beispiel #1
0
def _boundary_sub(disc_clsdict,
                  corpus,
                  names,
                  label,
                  verbose,
                  n_jobs,
                  threshold=0.03):
    eb = eval_from_bounds
    if verbose:
        print '  boundary ({2}): subsampled {0} files in {1} sets'\
            .format(sum(map(len, names)), len(names), label)
    with verb_print('  boundary ({0}): calculating scores'.format(label),
                    verbose, True, True, True):
        disc_bounds = [
            Boundaries(disc_clsdict.restrict(ns), threshold=threshold)
            for ns in names
        ]
        gold_bounds = [
            Boundaries(corpus.restrict(ns), threshold=threshold)
            for ns in names
        ]
    with verb_print('  boundary ({0}): calculating scores'.format(label),
                    verbose, False, True, False):
        p, r = izip(*Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0,
                              pre_dispatch='2*n_jobs') \
                    (delayed(eb)(disc, gold)
                     for disc, gold in zip(disc_bounds, gold_bounds)))
    p, r = np.fromiter(p, dtype=np.double), np.fromiter(r, dtype=np.double)
    p, r = praggregate(p, r)
    return p, r
Beispiel #2
0
def _boundary_sub(disc_clsdict, corpus, names, label, verbose, n_jobs):
    eb = eval_from_bounds
    if verbose:
        print '  boundary ({2}): subsampled {0} files in {1} sets'\
            .format(sum(map(len, names)), len(names), label)
    with verb_print('  boundary ({0}): calculating scores'.format(label),
                    verbose, True, True, True):
        disc_bounds = [Boundaries(disc_clsdict.restrict(ns)) for ns in names]
        gold_bounds = [Boundaries(corpus.restrict(ns)) for ns in names]
    #print(len(disc_bounds[0].bounds))
    acc = 0
    '''for element in disc_bounds:
        for key in element.bounds.keys():
            acc += len(element.bounds[key])
        print(acc, len(element.bounds))'''

    acc = 0
    '''for element in gold_bounds:
        for key in element.bounds.keys():
            acc += len(element.bounds[key])
        print(acc, len(element.bounds))
        acc = 0'''

    #print(element.bounds.keys()[:10], len(element.bounds))
    #print(element.bounds[element.bounds.keys()[0]])

    with verb_print('  boundary ({0}): calculating scores'.format(label),
                    verbose, False, True, False):
        p, r = izip(*Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0,
                              pre_dispatch='2*n_jobs') \
                    (delayed(eb)(disc, gold)
                     for disc, gold in zip(disc_bounds, gold_bounds)))

    p, r = np.fromiter(p, dtype=np.double), np.fromiter(r, dtype=np.double)
    p, r = praggregate(p, r)
    return p, r