Ejemplo n.º 1
0
			for sample in samples:
				if (sample.id == target_sample_id):
					target_samples.append(sample)
	else:
		print "You must specify a target sample"
		sys.exit(1)
	
	if len(target_samples) ==0:
		print "Could not find samples!"
		sys.exit()
	samples_time = pt.stop()
	print "Loaded samples (%0.2fs)"%(samples_time)
	
	pt.start()
	
	rules = load_rules(options.model_filename)
	rules = rules.remap_feature_to_index(samples)
	training_time = pt.stop()
	newrules = []
	
	for rule in rules:
		keep_rule = False
		for target_sample in target_samples:
			if target_sample.satisfies(rule.ls):
				keep_rule = True
		if keep_rule:
			newrules.append(rule)
	newruleset = AssociationRuleSet()
	newruleset.extend(newrules)
	newruleset = newruleset.remap_index_to_feature(samples)
	newruleset.write(filename=options.output_filename)
Ejemplo n.º 2
0
		errorCount += 1
	if errorCount > 0:
		error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0]))
		exit(1)
	
	pt.start()
	fileio = FileIO()
	samples = fileio.load_samples(options.input_samples_filename)
	samples_time = pt.stop()
	print "Loaded samples (%0.2fs)"%(samples_time)
	if options.feature_select:
		print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score)
		pt.start()
		from pica.AssociationRule import load_rules,AssociationRuleSet
		selected_rules = AssociationRuleSet()
		rules = load_rules(options.feature_select)
		rules.set_target_accuracy(options.feature_select_score)
		selected_rules.extend(rules[:options.feature_select_top_n])
		samples = samples.feature_select(selected_rules)
		print "Finished feature selection (%0.2fs)"%(pt.stop())
	classes = fileio.load_classes(options.input_classes_filename)
	samples.load_class_labels(classes)
	print samples.get_number_of_features()
	samples.set_current_class(options.target_class)
	
	pt.start()
	print "Compressing features...",
	samples = samples.compress_features()
	compression_time = pt.stop()
	print "\bfinished compression.(%0.2fs)"%(compression_time)
	samples.set_current_class(options.target_class)
Ejemplo n.º 3
0
    (options, args) = parser.parse_args()

    pt.start()
    fileio = FileIO()
    samples = fileio.load_samples(options.samples_filename)
    classes = fileio.load_classes(options.classes_filename)
    samples.load_class_labels(classes)
    samples.set_current_class(options.target_class)
    target_samples = []
    samples_time = pt.stop()
    print "Loaded samples (%0.2fs)" % (samples_time)

    pt.start()

    rules = load_rules(options.model_filename)
    indexed_rules = rules.remap_feature_to_index(samples)
    training_time = pt.stop()
    newsamples = {}

    for sample in samples:
        keep_sample = False
        for rule in indexed_rules:
            if sample.satisfies(rule.ls):
                if not newsamples.has_key(sample.id):
                    newsamples[sample.id] = []
                newsamples[sample.id].append(rule)
    sets = get_same_rulesets(newsamples)

    finished = {}
    f = open(options.output_filename, "w")
Ejemplo n.º 4
0
		error("Please provide the phenotype target to be predicted with -t \"TRAITNAME\"")
		errorCount += 1
	if not options.output_filename:
		error("Please specify a file for the output with -o /path/to/result.file")
		errorCount += 1
	if errorCount > 0:
		error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0]))
		exit(1)
		
	fileio = FileIO()
	samples = fileio.load_samples(options.input_samples_filename)
	if options.feature_select:
		print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score)
		from pica.AssociationRule import load_rules,AssociationRuleSet
		selected_rules = AssociationRuleSet()
		rules = load_rules(options.feature_select)
		rules.set_target_accuracy(options.feature_select_score)
		selected_rules.extend(rules[:options.feature_select_top_n])
		samples = samples.feature_select(selected_rules)
	classes = fileio.load_classes(options.input_classes_filename)
	samples.load_class_labels(classes)
	print "Sample set has %d features."%(samples.get_number_of_features())
	samples.set_current_class(options.target_class)
	print "Parameters from %s"%(options.parameters)
	print "Compressing features...",
	samples = samples.compress_features()
	print "compressed to %d distinct features."%(samples.get_number_of_features())
	
	samples.set_current_class(options.target_class)
	samples.hide_nulls(options.target_class)
	
Ejemplo n.º 5
0
	def load_model(self,model_filename):
		return load_rules(model)