Python Dataset.GenerateTrainingSetSubFolds Exemples

Langage de programmation: Python

Espace de nommage/Pack: Dataset

Class/Type: Dataset

Méthode/Fonction: GenerateTrainingSetSubFolds

Exemples au hotexamples.com: 2

Python Dataset.GenerateTrainingSetSubFolds - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de Dataset.Dataset.GenerateTrainingSetSubFolds extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Dataset(30)

__init__(12)

Quality(5)

getColumns(4)

PreparetoEval(4)

add_windows(4)

getHorizontalAt(4)

getLine(4)

_create_dirs(3)

from_db_rows(3)

all_original_code_file_names(2)

getYUnits(2)

addColumn(2)

add_statistics(2)

basis(2)

add_nu_noise(2)

add_adc_noise(2)

_name(2)

NextBatch(2)

MiniBatch(2)

LoadData(2)

GenerateTrainingSetSubFolds(2)

DataLoader(2)

CustomBatch(2)

load_test(1)

read_data_points(1)

test(1)

train(1)

getWordList(1)

getVerticalAt(1)

getTrainAndTestSets(1)

getPlane(1)

CIFAR_test_loader(1)

addresses(1)

file_folder_path(1)

batch_test_iter(1)

batch_iter(1)

avoid_look_ahead_bias(1)

auto_meth(1)

augmentNoise(1)

all_original_req_file_names(1)

CIFAR_train_loader(1)

add(1)

_combine_datasets(1)

__len__(1)

MD(1)

ts(1)

Méthodes fréquemment utilisées

Dataset (30)

__init__ (12)

Quality (5)

getColumns (4)

PreparetoEval (4)

add_windows (4)

getHorizontalAt (4)

getLine (4)

_create_dirs (3)

from_db_rows (3)

Méthodes fréquemment utilisées

all_original_code_file_names (2)

getYUnits (2)

addColumn (2)

add_statistics (2)

basis (2)

add_nu_noise (2)

add_adc_noise (2)

_name (2)

NextBatch (2)

MiniBatch (2)

LoadData (2)

GenerateTrainingSetSubFolds (2)

DataLoader (2)

CustomBatch (2)

load_test (1)

read_data_points (1)

test (1)

train (1)

getWordList (1)

getVerticalAt (1)

Méthodes fréquemment utilisées

LoadData (2)

GenerateTrainingSetSubFolds (2)

DataLoader (2)

CustomBatch (2)

load_test (1)

read_data_points (1)

test (1)

train (1)

getWordList (1)

getVerticalAt (1)

getTrainAndTestSets (1)

getPlane (1)

CIFAR_test_loader (1)

addresses (1)

file_folder_path (1)

batch_test_iter (1)

batch_iter (1)

avoid_look_ahead_bias (1)

auto_meth (1)

augmentNoise (1)

all_original_req_file_names (1)

CIFAR_train_loader (1)

add (1)

_combine_datasets (1)

__len__ (1)

MD (1)

ts (1)

Méthodes fréquemment utilisées

getTrainAndTestSets (1)

getPlane (1)

CIFAR_test_loader (1)

addresses (1)

file_folder_path (1)

batch_test_iter (1)

batch_iter (1)

avoid_look_ahead_bias (1)

auto_meth (1)

augmentNoise (1)

all_original_req_file_names (1)

CIFAR_train_loader (1)

add (1)

_combine_datasets (1)

__len__ (1)

MD (1)

ts (1)

Exemple #1

0

Afficher le fichier

Fichier : FilterWrapperHeuristicSearch.py Projet : itayh1/ExploreKitPy

def run(self, originalDataset: Dataset, runInfo: str): Logger.Info('Initializing evaluators') filterEvaluator = MLFilterEvaluator(originalDataset) preRankerEvaluator = None if bool(Properties.usePreRanker): preRankerEvaluator = FilterPreRankerEvaluator(originalDataset) if Properties.wrapperApproach == 'AucWrapperEvaluator': wrapperEvaluator = AucWrapperEvaluator() else: Logger.Error('Missing wrapper approach') raise Exception('Missing wrapper approach') experimentStartDate = Date() Logger.Info("Experiment Start Date/Time: " + str(self.experimentStartDate) + " for dataset " + originalDataset.name) # The first step is to evaluate the initial attributes, so we get a reference point to how well we did wrapperEvaluator.EvaluationAndWriteResultsToFile( originalDataset, "", 0, runInfo, True, 0, -1, -1) # now we create the replica of the original dataset, to which we can add columns dataset = originalDataset.replicateDataset() # Get the training set sub-folds, used to evaluate the various candidate attributes originalDatasetTrainingFolds = originalDataset.GenerateTrainingSetSubFolds( ) subFoldTrainingDatasets = dataset.GenerateTrainingSetSubFolds() date = Date() # We now apply the wrapper on the training subfolds in order to get the baseline score. This is the score a candidate attribute needs to "beat" currentScore = wrapperEvaluator.produceAverageScore( subFoldTrainingDatasets, None, None, None, None) Logger.Info(f"Initial score: {str(currentScore)} : {date}") # The probabilities assigned to each instance using the ORIGINAL dataset (training folds only) Logger.Info(f"Producing initial classification results: {date}") currentClassificationProbs = wrapperEvaluator.produceClassificationResults( originalDatasetTrainingFolds) date = Date() Logger.Info(f" .....done {date}") # Apply the unary operators (discretizers, normalizers) on all the original features. The attributes generated # here are different than the ones generated at later stages because they are included in the dataset that is # used to generate attributes in the iterative search phase Logger.Info(f"Starting to apply unary operators: {date}") oam = OperatorsAssignmentsManager() candidateAttributes = oam.applyUnaryOperators( dataset, None, filterEvaluator, subFoldTrainingDatasets, currentClassificationProbs) date = Date() Logger.Info(" .....done " + str(date)) # Now we add the new attributes to the dataset (they are added even though they may not be included in the # final dataset beacuse they are essential to the full generation of additional features Logger.Info("Starting to generate and add columns to dataset: " + str(date)) oam.GenerateAndAddColumnToDataset(dataset, candidateAttributes) date = Date() Logger.Info(" .....done " + str(date)) # The initial dataset has been populated with the discretized/normalized features. Time to begin the search iterationsCounter = 1 columnsAddedInthePreviousIteration = None self.performIterativeSearch( originalDataset, runInfo, preRankerEvaluator, filterEvaluator, wrapperEvaluator, dataset, originalDatasetTrainingFolds, subFoldTrainingDatasets, currentClassificationProbs, oam, candidateAttributes, iterationsCounter, columnsAddedInthePreviousIteration)

Exemple #2

0

Afficher le fichier

Fichier : DatasetBasedAttributes.py Projet : itayh1/ExploreKitPy

def processInitialEvaluationInformation(self, dataset: Dataset, classifier: str): # We now need to test all folds combinations (the original train/test allocation is disregarded, which is # not a problem for the offline training. The test set dataset MUST submit a new dataset object containing # only the training folds for fold in dataset.getFolds(): fold.setIsTestFold(False) wrapperName = 'AucWrapperEvaluator' if wrapperName == 'AucWrapperEvaluator': wrapperEvaluator = AucWrapperEvaluator() else: raise Exception('Unidentified wrapper') leaveOneFoldOutDatasets = dataset.GenerateTrainingSetSubFolds() classificationResults = wrapperEvaluator.produceClassificationResults( leaveOneFoldOutDatasets) aucVals = [] logLossVals = [] recallPrecisionValues = [] # list of dicts for classificationResult in classificationResults: aucVals.append(classificationResult.getAuc()) logLossVals.append(classificationResult.getLogLoss()) recallPrecisionValues.append( classificationResult.getRecallPrecisionValues()) self.numOfFoldsInEvaluation = len(dataset.getFolds()) aucVals = np.asarray(aucVals, dtype=np.float32) self.maxAUC = aucVals.max() self.minAUC = aucVals.min() self.avgAUC = np.average(aucVals) self.stdevAUC = aucVals.std() # double tempStdev = aucVals.stream().mapToDouble(a -> Math.pow(a - self.avgAUC, 2)).sum(); # self.stdevAUC = Math.sqrt(tempStdev / aucVals.size()); logLossVals = np.asarray(logLossVals, dtype=np.float32) self.maxLogLoss = logLossVals.max() self.minLogLoss = logLossVals.min() self.avgLogLoss = np.average(logLossVals) self.stdevLogLoss = logLossVals.std() # tempStdev = logLossVals.stream().mapToDouble(a -> Math.pow(a - self.avgLogLoss, 2)).sum(); # self.stdevLogLoss = Math.sqrt(tempStdev / logLossVals.size()); self.maxPrecisionAtFixedRecallValues = {} self.minPrecisionAtFixedRecallValues = {} self.avgPrecisionAtFixedRecallValues = {} self.stdevPrecisionAtFixedRecallValues = {} for recallVal in recallPrecisionValues[0].keys(): maxVal = -1 minVal = 2 valuesList = [] for precisionRecallVals in recallPrecisionValues: maxVal = max(precisionRecallVals.get(recallVal), maxVal) minVal = min(precisionRecallVals.get(recallVal), minVal) valuesList.append(precisionRecallVals[recallVal]) # now the assignments self.maxPrecisionAtFixedRecallValues[recallVal] = maxVal self.minPrecisionAtFixedRecallValues[recallVal] = minVal self.avgPrecisionAtFixedRecallValues[recallVal] = np.average( valuesList) self.stdevPrecisionAtFixedRecallValues[recallVal] = np.std( valuesList)