def test_sample(self): fractions = [i / 10.0 for i in range(1, 10)] for fract in fractions: self.st.class_fracts = {"None": fract} sample(self.st) for inst_fname, samp_fname in zip(self.st.dev_inst_fns, self.st.dev_samp_fns): inst_counts = get_class_counts(inst_fname) target = inst_counts["None"] * fract samp_counts = get_class_counts(samp_fname) approx = samp_counts["None"] diff = abs(target - approx) err = (diff * 100.0) / float(target) #print diff, err # check that error percentage < 5% of the target self.assertTrue(err < 5.0)
def test_sample(self): fractions = [i/10.0 for i in range(1,10)] for fract in fractions: self.st.class_fracts = {"None": fract} sample(self.st) for inst_fname, samp_fname in zip(self.st.dev_inst_fns, self.st.dev_samp_fns): inst_counts = get_class_counts(inst_fname) target = inst_counts["None"] * fract samp_counts = get_class_counts(samp_fname) approx = samp_counts["None"] diff = abs(target - approx) err = (diff * 100.0) / float(target) #print diff, err # check that error percentage < 5% of the target self.assertTrue(err < 5.0)
Reads Timbl instances from standard input and writes a class distribution in the form of an ascii table to standard output. Example: $ tt-class-dist.py -d, < ../data/dimin.train """ import sys from tt.argparse import ArgumentParser, RawDescriptionHelpFormatter from tt.sample import get_class_counts, print_class_dist __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.5" parser = ArgumentParser(description=__doc__, version="%(prog)s version " + __version__, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-d", "--delimiter", default=None, metavar="CHAR", help="field delimiter in instances " "(default is whitespace)") args = parser.parse_args() class_counts = get_class_counts(sys.stdin, sep=args.delimiter) print_class_dist(class_counts)