Esempio n. 1
0
	pt.start()
	fileio = FileIO()
	samples = fileio.load_samples(options.input_samples_filename)
	samples_time = pt.stop()
	print "Loaded samples (%0.2fs)"%(samples_time)
	if options.feature_select:
		print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score)
		pt.start()
		from pica.AssociationRule import load_rules,AssociationRuleSet
		selected_rules = AssociationRuleSet()
		rules = load_rules(options.feature_select)
		rules.set_target_accuracy(options.feature_select_score)
		selected_rules.extend(rules[:options.feature_select_top_n])
		samples = samples.feature_select(selected_rules)
		print "Finished feature selection (%0.2fs)"%(pt.stop())
	classes = fileio.load_classes(options.input_classes_filename)
	samples.load_class_labels(classes)
	print samples.get_number_of_features()
	samples.set_current_class(options.target_class)
	
	pt.start()
	print "Compressing features...",
	samples = samples.compress_features()
	compression_time = pt.stop()
	print "\bfinished compression.(%0.2fs)"%(compression_time)
	samples.set_current_class(options.target_class)
	
	
	samples.hide_nulls(options.target_class)
	
	modulepath = "pica.trainers.%s"%(options.algorithm)
samples_filename = sys.argv[1]
class_labels_filename = sys.argv[2]
metadata_filename = sys.argv[3]
output_filename = sys.argv[4]

from pica.Sample import SampleSet, ClassLabelSet
from pica.io.FileIO import FileIO
from pica.IntegerMapping import IntegerMapping
from pica.trainers.cwmi.CWMILibrary import CWMILibrary

fileio = FileIO()
cwmilibrary = CWMILibrary()
metadata = fileio.load_metadata(metadata_filename)
samples = fileio.load_samples(samples_filename)
classes = fileio.load_classes(class_labels_filename)
samples.load_class_labels(classes)
confounders = metadata.get_key_list()[1:]

outlines = []
header_line = ["phenotype"]

header_line.extend(confounders)
header_line.append("total")
outlines.append("\t".join(header_line))

for class_name in classes.get_classes():
	"generate phenotype map"
	
	samples.set_current_class(class_name)
	samples.hide_nulls(class_name)
Esempio n. 3
0
		errorCount += 1
	if errorCount > 0:
		error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0]))
		exit(1)
		
	fileio = FileIO()
	samples = fileio.load_samples(options.input_samples_filename)
	if options.feature_select:
		print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score)
		from pica.AssociationRule import load_rules,AssociationRuleSet
		selected_rules = AssociationRuleSet()
		rules = load_rules(options.feature_select)
		rules.set_target_accuracy(options.feature_select_score)
		selected_rules.extend(rules[:options.feature_select_top_n])
		samples = samples.feature_select(selected_rules)
	classes = fileio.load_classes(options.input_classes_filename)
	samples.load_class_labels(classes)
	print "Sample set has %d features."%(samples.get_number_of_features())
	samples.set_current_class(options.target_class)
	print "Parameters from %s"%(options.parameters)
	print "Compressing features...",
	samples = samples.compress_features()
	print "compressed to %d distinct features."%(samples.get_number_of_features())
	
	samples.set_current_class(options.target_class)
	samples.hide_nulls(options.target_class)
	
	
	modulepath = "pica.trainers.%s"%(options.training_algorithm)
	classname = options.training_algorithm.split(".")[-1]
	TrainerClass = __import__(modulepath, fromlist=(classname,))
Esempio n. 4
0
samples_filename = sys.argv[1]
class_labels_filename = sys.argv[2]
metadata_filename = sys.argv[3]
output_filename = sys.argv[4]

from pica.Sample import SampleSet, ClassLabelSet
from pica.io.FileIO import FileIO
from pica.IntegerMapping import IntegerMapping
from pica.trainers.cwmi.CWMILibrary import CWMILibrary

fileio = FileIO()
cwmilibrary = CWMILibrary()
metadata = fileio.load_metadata(metadata_filename)
samples = fileio.load_samples(samples_filename)
classes = fileio.load_classes(class_labels_filename)
samples.load_class_labels(classes)
confounders = metadata.get_key_list()[1:]

outlines = []
header_line = ["phenotype"]

header_line.extend(confounders)
header_line.append("total")
outlines.append("\t".join(header_line))

for class_name in classes.get_classes():
    "generate phenotype map"

    samples.set_current_class(class_name)
    samples.hide_nulls(class_name)