Esempi in Python per Dataset.GenerateTrainingSetSubFolds

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Dataset

Classe/tipologia: Dataset

Metodo/funzione: GenerateTrainingSetSubFolds

Esempi su hotexamples.com: 2

Dataset.GenerateTrainingSetSubFolds in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per Dataset.Dataset.GenerateTrainingSetSubFolds, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Dataset(30)

__init__(12)

Quality(5)

getColumns(4)

PreparetoEval(4)

add_windows(4)

getHorizontalAt(4)

getLine(4)

_create_dirs(3)

from_db_rows(3)

all_original_code_file_names(2)

getYUnits(2)

addColumn(2)

add_statistics(2)

basis(2)

add_nu_noise(2)

add_adc_noise(2)

_name(2)

NextBatch(2)

MiniBatch(2)

LoadData(2)

GenerateTrainingSetSubFolds(2)

DataLoader(2)

CustomBatch(2)

load_test(1)

read_data_points(1)

test(1)

train(1)

getWordList(1)

getVerticalAt(1)

getTrainAndTestSets(1)

getPlane(1)

CIFAR_test_loader(1)

addresses(1)

file_folder_path(1)

batch_test_iter(1)

batch_iter(1)

avoid_look_ahead_bias(1)

auto_meth(1)

augmentNoise(1)

all_original_req_file_names(1)

CIFAR_train_loader(1)

add(1)

_combine_datasets(1)

__len__(1)

MD(1)

ts(1)

Esempio n. 1

Mostra file

File: FilterWrapperHeuristicSearch.py Progetto: itayh1/ExploreKitPy

    def run(self, originalDataset: Dataset, runInfo: str):
        Logger.Info('Initializing evaluators')
        filterEvaluator = MLFilterEvaluator(originalDataset)

        preRankerEvaluator = None
        if bool(Properties.usePreRanker):
            preRankerEvaluator = FilterPreRankerEvaluator(originalDataset)

        if Properties.wrapperApproach == 'AucWrapperEvaluator':
            wrapperEvaluator = AucWrapperEvaluator()
        else:
            Logger.Error('Missing wrapper approach')
            raise Exception('Missing wrapper approach')

        experimentStartDate = Date()
        Logger.Info("Experiment Start Date/Time: " +
                    str(self.experimentStartDate) + " for dataset " +
                    originalDataset.name)

        # The first step is to evaluate the initial attributes, so we get a reference point to how well we did
        wrapperEvaluator.EvaluationAndWriteResultsToFile(
            originalDataset, "", 0, runInfo, True, 0, -1, -1)

        # now we create the replica of the original dataset, to which we can add columns
        dataset = originalDataset.replicateDataset()

        # Get the training set sub-folds, used to evaluate the various candidate attributes
        originalDatasetTrainingFolds = originalDataset.GenerateTrainingSetSubFolds(
        )
        subFoldTrainingDatasets = dataset.GenerateTrainingSetSubFolds()

        date = Date()

        # We now apply the wrapper on the training subfolds in order to get the baseline score. This is the score a candidate attribute needs to "beat"
        currentScore = wrapperEvaluator.produceAverageScore(
            subFoldTrainingDatasets, None, None, None, None)
        Logger.Info(f"Initial score: {str(currentScore)} : {date}")

        # The probabilities assigned to each instance using the ORIGINAL dataset (training folds only)
        Logger.Info(f"Producing initial classification results: {date}")
        currentClassificationProbs = wrapperEvaluator.produceClassificationResults(
            originalDatasetTrainingFolds)
        date = Date()
        Logger.Info(f"  .....done {date}")

        # Apply the unary operators (discretizers, normalizers) on all the original features. The attributes generated
        # here are different than the ones generated at later stages because they are included in the dataset that is
        # used to generate attributes in the iterative search phase
        Logger.Info(f"Starting to apply unary operators: {date}")
        oam = OperatorsAssignmentsManager()
        candidateAttributes = oam.applyUnaryOperators(
            dataset, None, filterEvaluator, subFoldTrainingDatasets,
            currentClassificationProbs)
        date = Date()
        Logger.Info("  .....done " + str(date))

        # Now we add the new attributes to the dataset (they are added even though they may not be included in the
        # final dataset beacuse they are essential to the full generation of additional features
        Logger.Info("Starting to generate and add columns to dataset: " +
                    str(date))
        oam.GenerateAndAddColumnToDataset(dataset, candidateAttributes)
        date = Date()
        Logger.Info("  .....done " + str(date))

        # The initial dataset has been populated with the discretized/normalized features. Time to begin the search
        iterationsCounter = 1
        columnsAddedInthePreviousIteration = None

        self.performIterativeSearch(
            originalDataset, runInfo, preRankerEvaluator, filterEvaluator,
            wrapperEvaluator, dataset, originalDatasetTrainingFolds,
            subFoldTrainingDatasets, currentClassificationProbs, oam,
            candidateAttributes, iterationsCounter,
            columnsAddedInthePreviousIteration)

Esempio n. 2

Mostra file

File: DatasetBasedAttributes.py Progetto: itayh1/ExploreKitPy

    def processInitialEvaluationInformation(self, dataset: Dataset,
                                            classifier: str):
        # We now need to test all folds combinations (the original train/test allocation is disregarded, which is
        # not a problem for the offline training. The test set dataset MUST submit a new dataset object containing
        # only the training folds
        for fold in dataset.getFolds():
            fold.setIsTestFold(False)

        wrapperName = 'AucWrapperEvaluator'
        if wrapperName == 'AucWrapperEvaluator':
            wrapperEvaluator = AucWrapperEvaluator()
        else:
            raise Exception('Unidentified wrapper')

        leaveOneFoldOutDatasets = dataset.GenerateTrainingSetSubFolds()
        classificationResults = wrapperEvaluator.produceClassificationResults(
            leaveOneFoldOutDatasets)

        aucVals = []
        logLossVals = []
        recallPrecisionValues = []  # list of dicts
        for classificationResult in classificationResults:
            aucVals.append(classificationResult.getAuc())
            logLossVals.append(classificationResult.getLogLoss())
            recallPrecisionValues.append(
                classificationResult.getRecallPrecisionValues())

        self.numOfFoldsInEvaluation = len(dataset.getFolds())

        aucVals = np.asarray(aucVals, dtype=np.float32)
        self.maxAUC = aucVals.max()
        self.minAUC = aucVals.min()
        self.avgAUC = np.average(aucVals)
        self.stdevAUC = aucVals.std()
        # double tempStdev = aucVals.stream().mapToDouble(a -> Math.pow(a - self.avgAUC, 2)).sum();
        # self.stdevAUC = Math.sqrt(tempStdev / aucVals.size());

        logLossVals = np.asarray(logLossVals, dtype=np.float32)
        self.maxLogLoss = logLossVals.max()
        self.minLogLoss = logLossVals.min()
        self.avgLogLoss = np.average(logLossVals)
        self.stdevLogLoss = logLossVals.std()
        # tempStdev = logLossVals.stream().mapToDouble(a -> Math.pow(a - self.avgLogLoss, 2)).sum();
        # self.stdevLogLoss = Math.sqrt(tempStdev / logLossVals.size());

        self.maxPrecisionAtFixedRecallValues = {}
        self.minPrecisionAtFixedRecallValues = {}
        self.avgPrecisionAtFixedRecallValues = {}
        self.stdevPrecisionAtFixedRecallValues = {}

        for recallVal in recallPrecisionValues[0].keys():
            maxVal = -1
            minVal = 2
            valuesList = []
            for precisionRecallVals in recallPrecisionValues:
                maxVal = max(precisionRecallVals.get(recallVal), maxVal)
                minVal = min(precisionRecallVals.get(recallVal), minVal)
                valuesList.append(precisionRecallVals[recallVal])

            # now the assignments
            self.maxPrecisionAtFixedRecallValues[recallVal] = maxVal
            self.minPrecisionAtFixedRecallValues[recallVal] = minVal
            self.avgPrecisionAtFixedRecallValues[recallVal] = np.average(
                valuesList)
            self.stdevPrecisionAtFixedRecallValues[recallVal] = np.std(
                valuesList)