def fillPath(self, baseFolderName, folderName): 'Returns a function that expands a fileName into a filePath' # Make sure that folderName is valid match = pattern_name.match(folderName) if not match: raise FolderError('Invalid name: %s\n%s' % (folderName, 'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.')) # Fill path baseFolderPath = store.makeFolderSafely(self.folderPathByName[baseFolderName]) stampedFolderName = '%s-%s' % (store.makeTimestamp(), folderName) folderPath = store.makeFolderSafely(os.path.join(baseFolderPath, stampedFolderName)) # Return return lambda fileName: os.path.join(folderPath, fileName)
def fillPath(self, baseFolderName, folderName): # Make sure that folderName is valid match = pattern_name.match(folderName) if not match: explanation = 'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.' raise FolderError('Invalid name: %s\n%s' % (folderName, explanation)) # Fill path baseFolderPath = store.makeFolderSafely(self.folderPathByName[baseFolderName]) template = templateByFolderName[baseFolderName] fileName = fileNameByFolderName[baseFolderName] folderPath = os.path.join(baseFolderPath, template % (store.makeTimestamp(), folderName)) filePath = os.path.join(store.makeFolderSafely(folderPath), fileName) # Return return filePath
def train(targetClassifierPath, trainingPath, testPath, parameterByName): # Set paths store.makeFolderSafely(targetClassifierPath) # Count the number of training samples trainingCount = classifier.getSampleCount(trainingPath) # Initialize weights uniformly over training samples sampleWeights = numpy.array([1 / float(trainingCount)] * trainingCount) # Initialize alphas = []; boostCount = parameterByName['boost count'] # For each boost, for boostIndex in xrange(boostCount): # Show feedback print '--- Boosting %d/%d ---' % (boostIndex + 1, boostCount) # Normalize sampleWeights sampleWeights = sampleWeights / float(sum(sampleWeights)) # Weight samples weightedTrainingPath, sampleMultipliers = weightSamples(trainingPath, sampleWeights) # Train weak classifier weakClassifierPath = os.path.join(targetClassifierPath, 'weak%d' % boostIndex) weakResultByName = classifier.train(weakClassifierPath, weightedTrainingPath, testPath, parameterByName) shutil.rmtree(os.path.split(weightedTrainingPath)[0]) # Test weak classifier on training set predictedLabels = classifier.test(weakClassifierPath, trainingPath) actualLabels = classifier.loadLabels(trainingPath) errorDistribution = predictedLabels != actualLabels # Compute training error weighted according to sampleWeights weightedError = numpy.dot(sampleWeights, errorDistribution) # Prevent zero division or zero log if weightedError == 0: weightedError = 1e-9 elif weightedError == 1: weightedError = 1 - 1e-9 # Get alpha from training error alpha = 0.5 * numpy.log(float(1 - weightedError) / weightedError) coefficients = [numpy.exp(alpha) if isWrong else numpy.exp(-alpha) for isWrong in errorDistribution] # Save weakClassifier performance print 'weighted training error = %s' % (100 * weightedError) print 'alpha = %s' % alpha weakResultByName['alpha'] = alpha weakResultByName['weighted training error'] = weightedError weakResultByName['sample multipliers'] = ' '.join(str(x) for x in sampleMultipliers) weakResultByName['error distribution'] = ''.join(str(int(x)) for x in errorDistribution) saveWeakClassifierInformation(weakClassifierPath, trainingPath, testPath, parameterByName, {'performance': weakResultByName}) # Update sampleWeights sampleWeights = coefficients * sampleWeights # Append alphas.append(alpha) # Save classifier save(targetClassifierPath, alphas) # Return evaluation return evaluate(targetClassifierPath, testPath)
def fillPath(self, baseFolderName, folderName): 'Returns a function that expands a fileName into a filePath' # Make sure that folderName is valid match = pattern_name.match(folderName) if not match: raise FolderError('Invalid name: %s\n%s' % ( folderName, 'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.' )) # Fill path baseFolderPath = store.makeFolderSafely( self.folderPathByName[baseFolderName]) stampedFolderName = '%s-%s' % (store.makeTimestamp(), folderName) folderPath = store.makeFolderSafely( os.path.join(baseFolderPath, stampedFolderName)) # Return return lambda fileName: os.path.join(folderPath, fileName)
def __init__(self, basePath='.'): # Make the folder if it doesn't exist self.basePath = store.makeFolderSafely(basePath) # Set folderPathByName self.folderPathByName = dict( (folderName, os.path.join(basePath, '%d-%s' % (folderIndex + 1, folderName))) for folderIndex, folderName in enumerate(folderNames))
def saveSamples(sampleDataset, sampleIDs, featureSet): # Initialize sampleCount = len(sampleIDs) sampleDatasetPath = sampleDataset.getDatasetPath() sampleInformation = { 'source dataset': { 'path': sampleDatasetPath, 'sample ids': ' '.join(str(x) for x in sampleIDs), }, 'feature': { 'module name': featureSet.__module__, 'class name': featureSet.__class__.__name__, } } targetSampleName = '%s-count%s-min%s' % (folder_store.getFolderName(sampleDatasetPath), sampleCount, min(sampleIDs)) targetSamplePath = os.path.join(store.makeFolderSafely(os.path.join(store.temporaryPath, 'cnn_datasets')), targetSampleName) # If targetDatasetPath exists, return if store.loadInformation(targetSamplePath) == sampleInformation: print 'Using existing samples...\n\ttargetSamplePath = ' + targetSamplePath return targetSamplePath # Save print 'Saving samples...\n\ttargetSamplePath = ' + targetSamplePath sampleGenerator = makeSampleLabelGeneratorFromSampleDataset(sampleDataset, sampleIDs, featureSet) sampleFile, labelFile = [open(x, 'wt') for x in makeSampleLabelPaths(targetSamplePath)] for sampleIndex, (sample, label) in enumerate(sampleGenerator): # If we are starting, write header if sampleIndex == 0: sampleFile.write(makeLushMatrixHeaderFromPart(sample, sampleCount)) labelFile.write(makeLushMatrixHeaderFromPart(label, sampleCount)) # Write content sampleFile.write(makeLushMatrixContent(sample)) labelFile.write(makeLushMatrixContent(label)) if sampleIndex % 100 == 0: view.printPercentUpdate(sampleIndex + 1, sampleCount) view.printPercentFinal(sampleCount) # Return labelFile.close(); sampleFile.close() store.saveInformation(targetSamplePath, sampleInformation) return targetSamplePath
def __init__(self, basePath='.'): # Make the folder if it doesn't exist self.basePath = store.makeFolderSafely(basePath) # Set folderPathByName self.folderPathByName = dict((folderName, os.path.join(basePath, '%d-%s' % (folderIndex + 1, folderName))) for folderIndex, folderName in enumerate(folderNames))
def getPath(self, fileType, fileName): fileTypePath = os.path.join(self.basePath, folderNameByName[fileType]) return store.makeFolderSafely(os.path.join(fileTypePath, str(fileName)))
def __init__(self, basePath): # Make the folder if it doesn't exist self.basePath = store.makeFolderSafely(basePath) # Set folderPathByName self.folderPathByName = dict((key, os.path.join(basePath, folderNameByName[key])) for key in folderNameByName)
'Simple framework for logging' # Import system modules import logging import os # Import custom modules import config import model_notes import store # Prepare logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s', filename=os.path.join(store.makeFolderSafely(config.logPath), 'log.txt'), filemode='a') def error(message): # Save in log logging.error(message) # Show on screen print message # Email administrator model_notes.Model(config.mailHost, config.mailPath, config.mailPassword).write(config.logEmail, '[tco-helper] Fatal error', message)