def train(self,samples): """Train with CPAR on the sample set, returning an AssociationRuleSet.""" current_class = samples.get_current_class() """ original. changed by RVF, see below... print "TRAINER FOUND PARAMETERS FROM %s"%(self.parameters) svm_trainer = libSVMTrainer(self.parameters) cpar_trainer = CPARTrainer(self.parameters) """ if self.parameters == None: print "Using standard parameters. " elif os.path.isfile(self.parameters): print "TRAINER FOUND PARAMETERS FROM %s"%(self.parameters) else: print "Trainer DID NOT find %s"%(self.parameters) svm_trainer = libSVMTrainer(self.parameters) cpar_trainer = CPARTrainer(self.parameters) arset = cpar_trainer.train(samples) print "Found %d rules!"%(len(arset)) distinct_items = {} for rule in arset: for item in rule.ls: distinct_items[item] = 1 print "Found %d distinct items"%(len(distinct_items.keys())) sample_set_feature_selected = samples.feature_select(distinct_items.keys()) sample_set_feature_selected.set_current_class(current_class) non_zero_features = {} class_labels = {} for sample in sample_set_feature_selected: for item in nonzero(sample.get_attribute_matrix())[0]: non_zero_features[int(item)] = 1 class_labels[sample.get_class_label()] = 1 print "Using %d features with SVM classifier over %s (%d) class labels."%(len(non_zero_features.keys()),str(class_labels.keys()),len(class_labels.keys())) model = svm_trainer.train(sample_set_feature_selected) return model
samples.set_current_class(options.target_class) print "Parameters from %s"%(options.parameters) print "Compressing features...", samples = samples.compress_features() print "compressed to %d distinct features."%(samples.get_number_of_features()) samples.set_current_class(options.target_class) samples.hide_nulls(options.target_class) test_configurations = [] confounders = ("genus","family","order","class","phylum","superkingdom") scores_list = (("cmi",),("cwmi",),("mi","cwmi")) feature_selector = CWMIRankFeatureSelector(confounders_filename=options.parameters,scores=("mi",),features_per_class=options.features_per_class,confounder=options.confounder) trainer = libSVMTrainer(kernel_type="LINEAR",C=5) classifier = libSVMClassifier() tc = TestConfiguration("mi",feature_selector,trainer,classifier) test_configurations.append(tc) for scores in scores_list: feature_selector = CWMIRankFeatureSelector(features_per_class=options.features_per_class,confounder=options.confounder,scores=scores,confounders_filename=options.parameters) tc = TestConfiguration("%s_%s"%("_".join(scores),options.confounder),feature_selector,trainer,classifier) test_configurations.append(tc) root = "%s_%s_p%dn%d"%(options.target_class,options.confounder,options.features_per_class,options.features_per_class) crossvalidator = CrossValidation(samples,options.parameters,options.folds,options.replicates,test_configurations,root_output=options.output_filename) crossvalidator.crossvalidate()