pt.start() fileio = FileIO() samples = fileio.load_samples(options.input_samples_filename) samples_time = pt.stop() print "Loaded samples (%0.2fs)"%(samples_time) if options.feature_select: print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score) pt.start() from pica.AssociationRule import load_rules,AssociationRuleSet selected_rules = AssociationRuleSet() rules = load_rules(options.feature_select) rules.set_target_accuracy(options.feature_select_score) selected_rules.extend(rules[:options.feature_select_top_n]) samples = samples.feature_select(selected_rules) print "Finished feature selection (%0.2fs)"%(pt.stop()) classes = fileio.load_classes(options.input_classes_filename) samples.load_class_labels(classes) print samples.get_number_of_features() samples.set_current_class(options.target_class) pt.start() print "Compressing features...", samples = samples.compress_features() compression_time = pt.stop() print "\bfinished compression.(%0.2fs)"%(compression_time) samples.set_current_class(options.target_class) samples.hide_nulls(options.target_class) modulepath = "pica.trainers.%s"%(options.algorithm)
samples_filename = sys.argv[1] class_labels_filename = sys.argv[2] metadata_filename = sys.argv[3] output_filename = sys.argv[4] from pica.Sample import SampleSet, ClassLabelSet from pica.io.FileIO import FileIO from pica.IntegerMapping import IntegerMapping from pica.trainers.cwmi.CWMILibrary import CWMILibrary fileio = FileIO() cwmilibrary = CWMILibrary() metadata = fileio.load_metadata(metadata_filename) samples = fileio.load_samples(samples_filename) classes = fileio.load_classes(class_labels_filename) samples.load_class_labels(classes) confounders = metadata.get_key_list()[1:] outlines = [] header_line = ["phenotype"] header_line.extend(confounders) header_line.append("total") outlines.append("\t".join(header_line)) for class_name in classes.get_classes(): "generate phenotype map" samples.set_current_class(class_name) samples.hide_nulls(class_name)
errorCount += 1 if errorCount > 0: error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0])) exit(1) fileio = FileIO() samples = fileio.load_samples(options.input_samples_filename) if options.feature_select: print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score) from pica.AssociationRule import load_rules,AssociationRuleSet selected_rules = AssociationRuleSet() rules = load_rules(options.feature_select) rules.set_target_accuracy(options.feature_select_score) selected_rules.extend(rules[:options.feature_select_top_n]) samples = samples.feature_select(selected_rules) classes = fileio.load_classes(options.input_classes_filename) samples.load_class_labels(classes) print "Sample set has %d features."%(samples.get_number_of_features()) samples.set_current_class(options.target_class) print "Parameters from %s"%(options.parameters) print "Compressing features...", samples = samples.compress_features() print "compressed to %d distinct features."%(samples.get_number_of_features()) samples.set_current_class(options.target_class) samples.hide_nulls(options.target_class) modulepath = "pica.trainers.%s"%(options.training_algorithm) classname = options.training_algorithm.split(".")[-1] TrainerClass = __import__(modulepath, fromlist=(classname,))