Beispiel #1
0
    def test_sample(self):
        fractions = [i / 10.0 for i in range(1, 10)]
        for fract in fractions:
            self.st.class_fracts = {"None": fract}
            sample(self.st)

            for inst_fname, samp_fname in zip(self.st.dev_inst_fns,
                                              self.st.dev_samp_fns):
                inst_counts = get_class_counts(inst_fname)
                target = inst_counts["None"] * fract
                samp_counts = get_class_counts(samp_fname)
                approx = samp_counts["None"]
                diff = abs(target - approx)
                err = (diff * 100.0) / float(target)
                #print diff, err

                # check that error percentage < 5% of the target
                self.assertTrue(err < 5.0)
Beispiel #2
0
 def test_sample(self):
     fractions = [i/10.0 for i in range(1,10)]
     for fract in fractions:
         self.st.class_fracts = {"None": fract}
         sample(self.st)
     
         for inst_fname, samp_fname in zip(self.st.dev_inst_fns,
                                           self.st.dev_samp_fns):
             inst_counts = get_class_counts(inst_fname)
             target = inst_counts["None"] * fract
             samp_counts = get_class_counts(samp_fname)
             approx = samp_counts["None"]
             diff = abs(target - approx)
             err =  (diff * 100.0) / float(target)
             #print diff, err
             
             # check that error percentage < 5% of the target
             self.assertTrue(err < 5.0)
Beispiel #3
0
Reads Timbl instances from standard input and writes a class distribution in
the form of an ascii table to standard output.

Example:
  $ tt-class-dist.py -d, < ../data/dimin.train 
"""

import sys

from tt.argparse import ArgumentParser, RawDescriptionHelpFormatter
from tt.sample import get_class_counts, print_class_dist

__author__ = 'Erwin Marsi <*****@*****.**>'
__version__ = "0.5"

parser = ArgumentParser(description=__doc__,
                        version="%(prog)s version " + __version__,
                        formatter_class=RawDescriptionHelpFormatter)

parser.add_argument("-d",
                    "--delimiter",
                    default=None,
                    metavar="CHAR",
                    help="field delimiter in instances "
                    "(default is whitespace)")

args = parser.parse_args()

class_counts = get_class_counts(sys.stdin, sep=args.delimiter)
print_class_dist(class_counts)
Beispiel #4
0
Reads Timbl instances from standard input and writes a class distribution in
the form of an ascii table to standard output.

Example:
  $ tt-class-dist.py -d, < ../data/dimin.train 
"""

import sys

from tt.argparse import ArgumentParser, RawDescriptionHelpFormatter
from tt.sample import get_class_counts, print_class_dist

__author__ = 'Erwin Marsi <*****@*****.**>'
__version__ = "0.5"

        
parser = ArgumentParser(description=__doc__,
                        version="%(prog)s version " + __version__,
                        formatter_class=RawDescriptionHelpFormatter)

parser.add_argument("-d", "--delimiter",
                    default=None,
                    metavar="CHAR",
                    help="field delimiter in instances "
                    "(default is whitespace)")

args = parser.parse_args()

class_counts = get_class_counts(sys.stdin, sep=args.delimiter)
print_class_dist(class_counts)