def _boundary_sub(disc_clsdict, corpus, names, label, verbose, n_jobs, threshold=0.03): eb = eval_from_bounds if verbose: print ' boundary ({2}): subsampled {0} files in {1} sets'\ .format(sum(map(len, names)), len(names), label) with verb_print(' boundary ({0}): calculating scores'.format(label), verbose, True, True, True): disc_bounds = [ Boundaries(disc_clsdict.restrict(ns), threshold=threshold) for ns in names ] gold_bounds = [ Boundaries(corpus.restrict(ns), threshold=threshold) for ns in names ] with verb_print(' boundary ({0}): calculating scores'.format(label), verbose, False, True, False): p, r = izip(*Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0, pre_dispatch='2*n_jobs') \ (delayed(eb)(disc, gold) for disc, gold in zip(disc_bounds, gold_bounds))) p, r = np.fromiter(p, dtype=np.double), np.fromiter(r, dtype=np.double) p, r = praggregate(p, r) return p, r
def _boundary_sub(disc_clsdict, corpus, names, label, verbose, n_jobs): eb = eval_from_bounds if verbose: print ' boundary ({2}): subsampled {0} files in {1} sets'\ .format(sum(map(len, names)), len(names), label) with verb_print(' boundary ({0}): calculating scores'.format(label), verbose, True, True, True): disc_bounds = [Boundaries(disc_clsdict.restrict(ns)) for ns in names] gold_bounds = [Boundaries(corpus.restrict(ns)) for ns in names] #print(len(disc_bounds[0].bounds)) acc = 0 '''for element in disc_bounds: for key in element.bounds.keys(): acc += len(element.bounds[key]) print(acc, len(element.bounds))''' acc = 0 '''for element in gold_bounds: for key in element.bounds.keys(): acc += len(element.bounds[key]) print(acc, len(element.bounds)) acc = 0''' #print(element.bounds.keys()[:10], len(element.bounds)) #print(element.bounds[element.bounds.keys()[0]]) with verb_print(' boundary ({0}): calculating scores'.format(label), verbose, False, True, False): p, r = izip(*Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0, pre_dispatch='2*n_jobs') \ (delayed(eb)(disc, gold) for disc, gold in zip(disc_bounds, gold_bounds))) p, r = np.fromiter(p, dtype=np.double), np.fromiter(r, dtype=np.double) p, r = praggregate(p, r) return p, r