def LoadFromDisk(self): self.CurrentPageStart += 2 if self.CurrentPageStart > 2: self.CurrentPageStart = 0 sFileName1 = Storage.JoinPath( self.DataFolder, MLDataIterator.FILENAME_TEMPLATE_PAGE % (self.PageNumbers[self.PageIndex])) oData1 = Storage.DeserializeObjectFromFile(sFileName1, p_bIsVerbose=False) self.Page[self.CurrentPageStart] = oData1 if type(self).__verboseLevel >= 2: print(" [>] Load MEM%d: %d" % (self.CurrentPageStart, self.PageNumbers[self.PageIndex])) if self.PageIndex + 1 < len(self.PageNumbers): sFileName2 = Storage.JoinPath( self.DataFolder, MLDataIterator.FILENAME_TEMPLATE_PAGE % (self.PageNumbers[self.PageIndex + 1])) oData2 = Storage.DeserializeObjectFromFile(sFileName2, p_bIsVerbose=False) if type(self).__verboseLevel >= 2: print(" [>] Load MEM%d: %d " % (self.CurrentPageStart + 1, self.PageNumbers[self.PageIndex + 1])) else: oData2 = None self.Page[self.CurrentPageStart + 1] = oData2
def __determineCommonCaltechClassesForLITE(self): sCaltechClasses = [] with open( Storage.JoinPath(self.DataSetFolder.SourceFolder, "caltech101-classes.txt"), "r") as oFile: for sLine in oFile: sCaltechClasses.append(sLine.strip()) if type(self).__verboseLevel >= 1: print("Caltech classes: %d" % len(sCaltechClasses)) self.ClassCodes = [] for nIndex, sClassCode in enumerate(self.ImageNetClassCodes): sClassDescriptions = self.ImageNetSynSetDict[sClassCode] bFound = any(sClass in sClassDescriptions for sClass in sCaltechClasses) if bFound: sDescriptions = sClassDescriptions.split(",") for sClass in sCaltechClasses: sFound = [[sDescr] for sDescr in sDescriptions if sClass == sDescr.strip()] if len(sFound) != 0: self.ClassCodes.append(sClassCode) self.ClassDescr.append(sClass) self.CaltechClassDescr.append(sClass) self.ImageNetClassID.append(nIndex + 1) self.ImageNetClassDescr.append(sClassDescriptions)
def SetTemplateName(self, p_sName): if p_sName is not None: self.TemplateName = p_sName else: self.TemplateName = "template.cfg" self.TemplateConfigFileName = Storage.JoinPath(self.EditFolder, self.TemplateName)
def GetConfig(cls, p_sFolder): oResult = None sFolder = Storage.JoinPath(p_sFolder, "config") if Storage.IsExistingPath(sFolder): sFileList = Storage.GetFilesSorted(sFolder) sFileName = None for sItem in sFileList: if sItem.startswith("learn-config-used"): sFileName = Storage.JoinPath(sFolder, sItem) break if sFileName is not None: oResult = NNLearnConfig() oResult.LoadFromFile(sFileName) oResult.ParseUID() return oResult
def ListCompressedModels(self): sResult = [] if Storage.IsExistingPath(self.ExperimentModelFolder): sModelZipFiles = Storage.GetFilesSorted(self.ExperimentModelFolder) for sZipFile in sModelZipFiles: sZipFile = Storage.JoinPath(self.ExperimentModelFolder, sZipFile) sResult.append(sZipFile) return sResult
def __init__(self, p_oConfig=None, p_sFileName=None): super(LearningComparisonSettings, self).__init__(p_sFileName) #........................ | Instance Attributes | .............................. self.Config = p_oConfig self.Metrics = [] self.Titles = [] self.ExperimentsToCompare = [] self.ExperimentDescriptions = [] #................................................................................ #self.ExperimentBaseFolder = Storage.JoinPath(BaseFolders.EXPERIMENTS_RUN # , ExperimentFolder.GetExperimentName(self.Config.Architecture, self.Config.DataSetName)) if self.FileName is None: self.FileName = Storage.JoinPath(BaseFolders.EXPERIMENTS_RUN, "learn-comparison.cfg")
def GetNextConfigToEvaluate(self): sFiles = Storage.GetFilesSorted(self.ToEvaluteFolder) sConfigFiles = [] for sFile in sFiles: _, _, sExt = Storage.SplitFileName(sFile) if sExt == ".cfg": sConfigFiles.append( Storage.JoinPath(self.ToEvaluteFolder, sFile)) if len(sFiles) > 0: sResult = sConfigFiles[0] else: sResult = None return sResult
def ListSavedModels(self): sModelFolders = [] if Storage.IsExistingPath(self.ExperimentModelFolder): if not Storage.IsFolderEmpty(self.ExperimentModelFolder): sModelFolders = Storage.GetDirectoriesSorted( self.ExperimentModelFolder) oModels = [] for sModel in sModelFolders: sFolder = Storage.JoinPath(self.ExperimentModelFolder, sModel) sModelFiles = Storage.GetFilesSorted(sFolder) nEpochNumber = int(sModel) oModels.append([nEpochNumber, sFolder, sModelFiles]) return oModels
def GetNextConfig(self): # By priority first evaluates models to save disk space and then start training sResult = self.GetNextConfigToEvaluate() if sResult is None: sFiles = Storage.GetFilesSorted(self.PendingFolder) sConfigFiles = [] for sFile in sFiles: _, _, sExt = Storage.SplitFileName(sFile) if sExt == ".cfg": sConfigFiles.append( Storage.JoinPath(self.PendingFolder, sFile)) if len(sFiles) > 0: sResult = sConfigFiles[0] else: sResult = None return sResult
def Initialize(self, p_sCustomBaseFolder=None): if self.Metrics is None: self.Metrics = self.Settings.Metrics self.SerieLabels = self.Settings.Titles if self.ExperimentsToCompare is None: self.ExperimentsToCompare = self.Settings.ExperimentsToCompare self.Epochs = np.zeros( len(self.ExperimentsToCompare) + 1, np.int32) self.ModelTitles = [] for nIndex, sExperimentERL in enumerate(self.ExperimentsToCompare): if p_sCustomBaseFolder is not None: # Here a subfolder is given and the custom base folder is prepended sExperimentFolder = Storage.JoinPath(p_sCustomBaseFolder, sExperimentERL) oExperiment = ExperimentFolder.GetExperiment( sExperimentFolder, p_sCustomBaseFolder) assert oExperiment is not None, "Experiment folder %s not found" % sExperimentFolder # Sets the config that is needed to return architecture and dataset for the learn comparison if self.Settings.Config is None: self.Settings.Config = oExperiment.LearnConfig else: oExperiment = ExperimentFolder( p_oLearnConfig=self.Settings.Config) oExperiment.OpenERL(p_sERLString=sExperimentERL) #nFoldNumber, sUID = ExperimentFolder.SplitExperimentCode(oExperimentCode) #oExperiment = ExperimentFolder(p_oLearnConfig=self.Settings.Config) #oExperiment.Open(nFoldNumber, sUID) dStats = Storage.DeserializeObjectFromFile( oExperiment.RunSub.StatsFileName) assert dStats is not None, "File not found %s" % oExperiment.RunSub.StatsFileName self.Envs.append(oExperiment) self.Stats.append(dStats) self.Epochs[nIndex] = dStats["EpochNumber"] - 1 #nFoldNumber, sUID = ExperimentFolder.SplitExperimentCode(oExperiment.Code) self.ModelTitles.append( self.Settings.ExperimentDescriptions[nIndex] + " (%s)" % oExperiment.ERL.ExperimentUID)
def __listFiles(self): sEvaluationResultFiles = Storage.GetFilesSorted(self.Folder) self.FileNames = [] self.ResultFiles = [] for sFile in sEvaluationResultFiles: sFileNameFull = Storage.JoinPath(self.Folder, sFile) self.FileNames.append(sFileNameFull) self.ResultFiles.append([sFile, sFileNameFull]) nFileCount = len(self.ResultFiles) self.EpochNumber = np.zeros((nFileCount), np.float32) self.Accuracy = np.zeros((nFileCount), np.float32) self.Recall = np.zeros((nFileCount), np.float32) self.Precision = np.zeros((nFileCount), np.float32) self.F1Score = np.zeros((nFileCount), np.float32) self.Points = np.zeros((nFileCount), np.float32) self.CrossF1Score = np.zeros((nFileCount), np.float32) self.ObjectiveF1Score = np.zeros((nFileCount), np.float32) self.PositiveF1Score = np.zeros((nFileCount), np.float32)
def __init__(self): #....................... | Instance Attributes | ............................... self.BaseFolder = BaseFolders.EXPERIMENTS_SYSTEM self.ToEvaluteFolder = Storage.JoinPath(self.BaseFolder, "toevaluate") self.PendingFolder = Storage.JoinPath(self.BaseFolder, "pending") self.ArchiveFolder = Storage.JoinPath(self.BaseFolder, "archive") self.ErrorFolder = Storage.JoinPath(self.PendingFolder, "errors") self.EditFolder = Storage.JoinPath(self.BaseFolder, "edit") self.RecombineFolder = Storage.JoinPath(self.BaseFolder, "recombine") self.CountersFileName = Storage.JoinPath(self.BaseFolder, "counters") self.TemplateName = None self.TemplateConfigFileName = None #................................................................................ Storage.EnsurePathExists(self.BaseFolder) Storage.EnsurePathExists(self.PendingFolder) Storage.EnsurePathExists(self.ArchiveFolder) Storage.EnsurePathExists(self.ErrorFolder) Storage.EnsurePathExists(self.EditFolder) Storage.EnsurePathExists(self.ToEvaluteFolder) Storage.EnsurePathExists(self.RecombineFolder) self.SetTemplateName(None)
def GetDataSetFolder(self, p_sSubFolder): return Storage.JoinPath(BaseFolders.DATASETS, p_sSubFolder)
def __init__(self, p_sDataFolder, p_nTotalSamples, p_nPageSize, p_bIsValidation=False, p_nFoldNumber=None, p_nFolds=10, p_nValidationPageStart=0, p_nValidationPageCount=0, p_sName=None, p_nBatchSize=15): #........ | Instance Attributes | .............................................. self.DataFolder = Storage.JoinPath(p_sDataFolder, "") self.IsStarted = False self.IsFinished = False self.IsWaiting = False self.Continue = False self.Stopped = False self.FinishedCondition = None self.MustReadNextData = None self.Cycles = None self.TotalSamples = p_nTotalSamples self.PageSize = p_nPageSize self.TotalPageCount = self.TotalSamples / self.PageSize # Support for last page with less samples # TODO: Support training with additional validation set if p_nFoldNumber is None: self.TotalPageCount = np.ceil(self.TotalPageCount) else: assert self.TotalPageCount == int( self.TotalPageCount ), "Count of pages must be an integer. Total Samples %d / PageSize %d = %f" % ( self.TotalSamples, self.PageSize, self.TotalPageCount) assert self.TotalPageCount % 2 == 0, "Count of pages must be an even number" self.IsValidation = p_bIsValidation if p_nFoldNumber is None: self.FoldIndex = None self.Folds = None self.ValidationPageStart = p_nValidationPageStart self.ValidationPageCount = p_nValidationPageCount else: self.FoldIndex = p_nFoldNumber - 1 self.Folds = p_nFolds self.ValidationPageCount = self.TotalPageCount / self.Folds assert self.ValidationPageCount == int( self.ValidationPageCount ), "Count of validation pages must be an integer. TotalPageCount:%d Folds:%s TotalSamples:%d self.PageSize:%d" % ( self.TotalPageCount, self.Folds, self.TotalSamples, self.PageSize) self.ValidationPageStart = self.FoldIndex * self.ValidationPageCount self.ValidationPercentage = self.ValidationPageCount / self.TotalPageCount self.TotalValidationSamples = self.ValidationPageCount * self.PageSize self.TotalTrainSamples = self.TotalSamples - self.TotalValidationSamples if p_bIsValidation: self.TotalIteratedSamples = self.TotalValidationSamples self.Name = "VAL" else: self.TotalIteratedSamples = self.TotalTrainSamples self.Name = "TRN" if p_sName is not None: self.Name = p_sName self.SampleIndex = 0 self.TotalCachedSamples = 0 self.BatchSize = p_nBatchSize self.EpochSamples = 0 self.IsEpochFinished = False if p_nFoldNumber is None: self.IsRecalling = True else: self.IsRecalling = self.IsValidation self.__isWarmup = None self.TotalBatches = None self.ValidationBatches = None self.TrainingBatches = None self.IsWarmup = False #self.__createDataPager() #self.__recalculateBatchCount() self.ValidationIterator = None if not self.IsValidation: if self.ValidationPercentage > 0: if type(self).__verboseLevel >= 2: print( "=|=\t[%s:MLDataIterator] Batches - Total:%d Training:%d Validation%d" % (self.Name, self.TotalBatches, self.ValidationBatches, self.TrainingBatches)) self.ValidationIterator = MLDataIterator( self.DataFolder, self.TotalSamples, self.PageSize, True, p_nValidationPageStart=self.ValidationPageStart, p_nValidationPageCount=self.ValidationPageCount, p_nBatchSize=p_nBatchSize) self.Flags = np.zeros([self.TotalSamples], np.float32) else: self.Flags = None self.__isFilteringOutSamples = False
def Render(self): # Prepares the series with nulls nMaxEpochs = np.amax(self.Epochs) x = np.arange(0, nMaxEpochs) sPlotFolder = self.Envs[-1].RunSub.ExperimentPlotFolder print("[>] Epochs in different models:%s Maximum: %s" % (self.Epochs, nMaxEpochs)) print(" |__ Ploting to folder: %s" % sPlotFolder) nMaxColumn = StatsColumnType.VALUE for nColIndex in range(0, nMaxColumn + 1): for nMetricIndex, sMetric in enumerate(self.Metrics): y = [] oLabels = [] if True: print(" |___ ", sMetric) for nSerieIndex, dStats in enumerate(self.Stats): nMaxOfY = self.Epochs[nSerieIndex] if sMetric.startswith("Custom"): nValError = dStats["ValError"][:, 0][:nMaxOfY] nTrainError = dStats[ "TrainAverageError"][:, 0][:nMaxOfY] nYAll = (nValError - nTrainError) / nValError else: if sMetric in dStats: nYAll = dStats[sMetric] else: nYAll = None print("Warning: Metric %s not found in stats" % sMetric) if nYAll is not None: if (sMetric != "ValAccuracyPerError") and (sMetric != "EpochTotalTime") \ and (sMetric != "EpochRecallTime") and (not sMetric.startswith("Custom")): nYSlice = nYAll[:, nColIndex][:nMaxOfY] else: nYSlice = nYAll[:][:nMaxOfY] nY = np.zeros(nMaxEpochs, np.float32) nY[:] = None #nY[:nMaxOfY]=nYSlice[:] if sMetric == "EpochTotalTime": print(dStats["EpochTotalTime"]) nY[:nMaxOfY] = nYSlice[:] y.append(nY) #oLabels.append(StatsColumnType.ToString(self.SerieLabels[nMetricIndex], nColIndex) + " (%s)" % self.ModelTitles[nSerieIndex]) oLabels.append(self.ModelTitles[nSerieIndex]) sTitle = "Comparison of CNN models on %s" % self.Settings.Config.DataSetName #sTitle = "Training of BioCNNs with GLAVP layer" sCaptionX = "Training Epoch" sCaptionY = StatsColumnType.ToString( self.SerieLabels[nMetricIndex], nColIndex) oGraph = MultiSerieGraph() oGraph.Setup.LegendFontSize = 10 oGraph.Setup.Title = sTitle oGraph.Setup.CaptionX = sCaptionX oGraph.Setup.CaptionY = sCaptionY oGraph.Setup.CommonLineWidth = 1.5 oGraph.Setup.DisplayFinalValue = True oGraph.Initialize(x, y, p_oLabels=oLabels, p_oColors=type(self).DEFAULT_SERIE_COLORS) oGraph.Render() bPlot = True if (sMetric == "ValAccuracyPerError") and (nColIndex > 0): bPlot = False if (sMetric.startswith("Custom")) and (nColIndex > 0): bPlot = False if bPlot: oGraph.Plot( Storage.JoinPath( sPlotFolder, "%02i. %s-%i.png" % (nMetricIndex, self.SerieLabels[nMetricIndex], nColIndex))) #oGraph.Plot(ExperimentEnvironment.EXPERIMENTSPACE_FOLDER + "=Results=\\%02i. %s-%i.png" % (nMetricIndex, sSerieLabels[nMetricIndex],nColIndex)) #------------------------------------------------------------------------------------ #==================================================================================================