Ejemplo n.º 1
0
    def exportWeights(self, _features, _file):
        M = CSV()

        if len(self.classes) > 0:
            M.header = ['class0', 'class1'] + _features
        else:
            M.header = _features

        for c in range(len(self.weights)):
            W = self.weights[c]

            F = []
            for feature in _features:
                if feature in W:
                    F.append(W[feature])
                else:
                    F.append(0)

            if len(self.classes) > 0:
                M.data.append(','.join(self.classes[c] + [str(x) for x in F]))
            else:
                M.data.append(','.join([str(x) for x in F]))
        M.save(_file)
Ejemplo n.º 2
0
	def run(self, _training, _model, _batchSize, _resultFile):
		csv = CSV(_training)
		csv.randomize(1000)
		csv.removeIndices()

		R = ResultMatrix()
		for i in range(int(len(csv.data)/_batchSize)):
			c = CSV()
			c.header = csv.header
			c.data = csv.data[0:(i+1)*_batchSize]

			file = self.resultFolder + "subset_" + str(i) + ".csv"
			c.save(file)

			header, data = Experiment(file).regression([_model], 10)
			R.add(header, data)

		R.save(_resultFile)
Ejemplo n.º 3
0
# perform a 10-fold cross validation
e = Experiment(training, "example_feature_reduction")
e.regression([model], 10)
CSV(e.path("cv_0.csv")).save(e.path("subset_0.csv"))
xTicks = ["None"]

# obtain a feature ranking
M = CSV(e.path("features_0.csv")).toMatrix()
M.normalizeRows()
M.sortByMean()

# sequentially remove the least important feature from the training data and retrain the model
subset = e.path("subset.csv")
for i in range(len(M.header) - 1):
    key = M.header[-1]
    M.header = M.header[0:-1]
    csv.removeColumnWithKey(key)
    csv.save(subset)

    e = Experiment(subset, "example_feature_reduction")
    e.regression([model], 10)
    CSV(e.path("cv_0.csv")).save(e.path("subset_" + str(i + 1) + ".csv"))
    xTicks.append(key)

#
files = [e.path("subset_" + str(i) + ".csv") for i in range(len(xTicks))]
ResultVisualizer().boxplots(files,
                            "r2",
                            xTicks,
                            xlabel='Sequentially Removed Features',
                            ylabel='R2',