def execute(self, experiment: Experiment) -> ExperimentPerformance: """ Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the given parameters. PARAMETERS ---------- experiment : Experiment Experiment to be run. RETURNS ------- ExperimentPerformance An ExperimentPerformance instance. """ result = ExperimentPerformance() instanceList = experiment.getDataSet().getInstanceList() partition = Partition(instanceList, 0.25, experiment.getParameter().getSeed(), True) crossValidation = KFoldCrossValidation( partition.get(1).getInstances(), self.K, experiment.getParameter().getSeed()) self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation, partition.get(0)) return result
def train(self, trainSet: InstanceList, parameters: Parameter): """ Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015). PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. parameters : Parameter Parameter of the Lda algorithm. """ w0 = {} w = {} priorDistribution = trainSet.classDistribution() classLists = Partition(trainSet) covariance = Matrix(trainSet.get(0).continuousAttributeSize(), trainSet.get(0).continuousAttributeSize()) for i in range(classLists.size()): averageVector = Vector(classLists.get(i).continuousAverage()) classCovariance = classLists.get(i).covariance(averageVector) classCovariance.multiplyWithConstant(classLists.get(i).size() - 1) covariance.add(classCovariance) covariance.divideByConstant(trainSet.size() - classLists.size()) covariance.inverse() for i in range(classLists.size()): Ci = classLists.get(i).getClassLabel() averageVector = Vector(classLists.get(i).continuousAverage()) wi = covariance.multiplyWithVectorFromRight(averageVector) w[Ci] = wi w0i = -0.5 * wi.dotProduct(averageVector) + math.log(priorDistribution.getProbability(Ci)) w0[Ci] = w0i self.model = LdaModel(priorDistribution, w, w0)
def train(self, trainSet: InstanceList, parameters: KMeansParameter): priorDistribution = trainSet.classDistribution() classMeans = InstanceList() classLists = Partition(trainSet) for i in range(classLists.size()): classMeans.add(classLists.get(i).average()) self.model = KMeansModel(priorDistribution, classMeans, parameters.getDistanceMetric())
def train(self, trainSet: InstanceList, parameters: DeepNetworkParameter): """ Training algorithm for deep network classifier. PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. parameters : DeepNetworkParameter Parameters of the deep network algorithm. crossValidationRatio and seed are used as parameters. """ partition = Partition(trainSet, parameters.getCrossValidationRatio(), parameters.getSeed(), True) self.model = DeepNetworkModel(partition.get(1), partition.get(0), parameters)
def execute(self, experiment: Experiment) -> ExperimentPerformance: result = ExperimentPerformance() for j in range(self.M): instanceList = experiment.getDataSet().getInstanceList() partition = Partition(instanceList, 0.25, experiment.getParameter().getSeed(), True) crossValidation = StratifiedKFoldCrossValidation( Partition(partition.get(1)).getLists(), self.K, experiment.getParameter().getSeed()) self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation, partition.get(0)) return result
def train(self, trainSet: InstanceList, parameters: MultiLayerPerceptronParameter): """ Training algorithm for the multilayer perceptron algorithm. 20 percent of the data is separated as cross-validation data used for selecting the best weights. 80 percent of the data is used for training the multilayer perceptron with gradient descent. PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm parameters : MultiLayerPerceptronParameter Parameters of the multilayer perceptron. """ partition = Partition(trainSet, parameters.getCrossValidationRatio(), parameters.getSeed(), True) self.model = MultiLayerPerceptronModel(partition.get(1), partition.get(0), parameters)
def train(self, trainSet: InstanceList, parameters: MultiLayerPerceptronParameter): """ Training algorithm for auto encoders. An auto encoder is a neural network which attempts to replicate its input at its output. PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. parameters : MultiLayerPerceptronParameter Parameters of the auto encoder. """ partition = Partition(trainSet, 0.2, parameters.getSeed(), True) self.model = AutoEncoderModel(partition.get(1), partition.get(0), parameters)
def getClassInstances(self) -> list: """ Returns instances of the items at the list of instance lists from the partitions. RETURNS ------- list Instances of the items at the list of instance lists from the partitions. """ return Partition(self.__instances).getLists()
def train(self, trainSet: InstanceList, parameters: C45Parameter): """ Training algorithm for C4.5 univariate decision tree classifier. 20 percent of the data are left aside for pruning 80 percent of the data is used for constructing the tree. PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. parameters: C45Parameter Parameter of the C45 algorithm. """ if parameters.isPrune(): partition = Partition(trainSet, parameters.getCrossValidationRatio(), parameters.getSeed(), True) tree = DecisionTree(DecisionNode(partition.get(1))) tree.prune(partition.get(0)) else: tree = DecisionTree(DecisionNode(trainSet)) self.model = tree
def train(self, trainSet: InstanceList, parameters: Parameter): """ Training algorithm for the quadratic discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015). PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. """ w0 = {} w = {} W = {} classLists = Partition(trainSet) priorDistribution = trainSet.classDistribution() for i in range(classLists.size()): Ci = classLists.get(i).getClassLabel() averageVector = Vector(classLists.get(i).continuousAverage()) classCovariance = classLists.get(i).covariance(averageVector) determinant = classCovariance.determinant() classCovariance.inverse() Wi = deepcopy(classCovariance) Wi.multiplyWithConstant(-0.5) W[Ci] = Wi wi = classCovariance.multiplyWithVectorFromLeft(averageVector) w[Ci] = wi w0i = -0.5 * (wi.dotProduct(averageVector) + math.log(determinant)) + math.log(priorDistribution. getProbability(Ci)) w0[Ci] = w0i self.model = QdaModel(priorDistribution, W, w, w0)