def classify_file(train_inst_fns, test_inst_fns, out_fns=None, log_fn=None, clas_dir=None, descriptor=None, timbl=None, options="", log=False): """ Classify instances using Timbl @param train_inst_fns: a list of instance filenames for training @param test_inst_fns: a list of instance filenames for testing @keyword out_fns: list of classifier output files to be created @keyword log_fn: classifier log file to be created; ignored if keyword log is false @keyword clas_dir: directory for creating classifier output files; ignored if out_fns is given @keyword descriptor: a Descriptor instance, required to infer the feature metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is supplied @keyword timbl: a tailored TimblFile instance; notice that it must at least set the verbosity options +vo, +vdb, +vdi, and the -m option to specify that the administrative features must be ignored. @keyword options: list of additional Timbl options, excluding -f, -m, +vo, +vdb, +vdi @keyword log: log Timbl's standard output and error streams to file @return: list of Timbl output filenames """ if clas_dir: makedirs(clas_dir) if not timbl: assert descriptor timbl = TimblFile(default_opts=timbl_options_string(descriptor)) else: # ignore descriptor assert isinstance(timbl, TimblFile) assert "+vo" in timbl.default_opts assert "+vdb" in timbl.default_opts assert "+vdi" in timbl.default_opts assert "-m" in timbl.default_opts return timbl.train_test_multi(train_inst_fns, test_inst_fns, out_fns=out_fns, log_fn=log_fn, options=options, log=log, out_dir=clas_dir)
def dump_inst_base(config): from daeso_nl.ga.classifier import timbl_options_string from tt.timblfile import TimblFile # determine Timbl options descriptor = getattr(config, "descriptor_inst", Descriptor(config.features)) timbl_opts = getattr(config, "timbl_opts", None) options = timbl_options_string(descriptor, other=timbl_opts) # determine filenames inst_fname = _abspath(config, "timbl_inst_fname") assert inst_fname inst_base_fname = _abspath(config, "timbl_ib_fname") if not inst_base_fname: inst_base_fname = splitext(inst_fname)[0] + ".ib" # dump instance base file timbl = TimblFile() timbl.train( inst_fname, inst_base_fname, options=options)
def classify_file_cv(inst_fns, test_inst_fns=None, out_fns=None, log_fns=None, clas_dir=None, descriptor=None, timbl=None, options="", n=None, log=False): """ Classify instance using Timbl in a cross-validation procedure. @param inst_fns: a list of instance filenames for training; if no test_inst_fns is supplied, the same files will be used for testing, otherwise they are used for training only @keyword test_inst_fns: a list of instance filenames for testing; this allows for down-sampling of the training instances without affecting the test instances @keyword out_fns: list of classifier output files to be created @keyword log_fns: list of classifier log files to be created; ignored if keyword log is false @keyword clas_dir: directory for creating classifier output files; ignored if out_fns is given @keyword descriptor: a Descriptor instance, required to infer the feature metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is supplied @keyword timbl: a tailored TimblFile instance; notice that it must at least set the verbosity options +vo, +vdb, +vdi, and the -m option to specify that the administrative features must be ignored. @keyword options: list of additional Timbl options, excluding -f, -m, +vo, +vdb, +vdi @keyword n: limit on the number of cross-validations performed (by default equals the number of instance filenames) @keyword log: log Timbl's standard output and error streams to file @return: list of Timbl output filenames """ if clas_dir: makedirs(clas_dir) if not timbl: assert descriptor timbl = TimblFile(default_opts=timbl_options_string(descriptor)) else: # ignore descriptor assert isinstance(timbl, TimblFile) assert "+vo" in timbl.default_opts assert "+vdb" in timbl.default_opts assert "+vdi" in timbl.default_opts assert "-m" in timbl.default_opts return timbl.cross_validate(inst_fns, test_inst_fns=test_inst_fns, out_fns=out_fns, log_fns=log_fns, options=options, n=n, log=log, out_dir=clas_dir)