def fillPath(self, baseFolderName, folderName):
     'Returns a function that expands a fileName into a filePath'
     # Make sure that folderName is valid
     match = pattern_name.match(folderName)
     if not match: raise FolderError('Invalid name: %s\n%s' % (folderName, 'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.'))
     # Fill path
     baseFolderPath = store.makeFolderSafely(self.folderPathByName[baseFolderName])
     stampedFolderName = '%s-%s' % (store.makeTimestamp(), folderName)
     folderPath = store.makeFolderSafely(os.path.join(baseFolderPath, stampedFolderName))
     # Return
     return lambda fileName: os.path.join(folderPath, fileName)
 def fillPath(self, baseFolderName, folderName):
     # Make sure that folderName is valid
     match = pattern_name.match(folderName)
     if not match: 
         explanation = 'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.'
         raise FolderError('Invalid name: %s\n%s' % (folderName, explanation))
     # Fill path
     baseFolderPath = store.makeFolderSafely(self.folderPathByName[baseFolderName])
     template = templateByFolderName[baseFolderName]
     fileName = fileNameByFolderName[baseFolderName]
     folderPath = os.path.join(baseFolderPath, template % (store.makeTimestamp(), folderName))
     filePath = os.path.join(store.makeFolderSafely(folderPath), fileName)
     # Return
     return filePath
def train(targetClassifierPath, trainingPath, testPath, parameterByName):
    # Set paths
    store.makeFolderSafely(targetClassifierPath)
    # Count the number of training samples
    trainingCount = classifier.getSampleCount(trainingPath)
    # Initialize weights uniformly over training samples
    sampleWeights = numpy.array([1 / float(trainingCount)] * trainingCount)
    # Initialize
    alphas = []; boostCount = parameterByName['boost count']
    # For each boost,
    for boostIndex in xrange(boostCount):
        # Show feedback
        print '--- Boosting %d/%d ---' % (boostIndex + 1, boostCount)
        # Normalize sampleWeights
        sampleWeights = sampleWeights / float(sum(sampleWeights))
        # Weight samples
        weightedTrainingPath, sampleMultipliers = weightSamples(trainingPath, sampleWeights)
        # Train weak classifier
        weakClassifierPath = os.path.join(targetClassifierPath, 'weak%d' % boostIndex)
        weakResultByName = classifier.train(weakClassifierPath, weightedTrainingPath, testPath, parameterByName)
        shutil.rmtree(os.path.split(weightedTrainingPath)[0])
        # Test weak classifier on training set
        predictedLabels = classifier.test(weakClassifierPath, trainingPath)
        actualLabels = classifier.loadLabels(trainingPath)
        errorDistribution = predictedLabels != actualLabels
        # Compute training error weighted according to sampleWeights
        weightedError = numpy.dot(sampleWeights, errorDistribution)
        # Prevent zero division or zero log
        if weightedError == 0: weightedError = 1e-9
        elif weightedError == 1: weightedError = 1 - 1e-9
        # Get alpha from training error
        alpha = 0.5 * numpy.log(float(1 - weightedError) / weightedError)
        coefficients = [numpy.exp(alpha) if isWrong else numpy.exp(-alpha) for isWrong in errorDistribution]
        # Save weakClassifier performance
        print 'weighted training error = %s' % (100 * weightedError)
        print 'alpha = %s' % alpha
        weakResultByName['alpha'] = alpha
        weakResultByName['weighted training error'] = weightedError
        weakResultByName['sample multipliers'] = ' '.join(str(x) for x in sampleMultipliers)
        weakResultByName['error distribution'] = ''.join(str(int(x)) for x in errorDistribution)
        saveWeakClassifierInformation(weakClassifierPath, trainingPath, testPath, parameterByName, {'performance': weakResultByName})
        # Update sampleWeights
        sampleWeights = coefficients * sampleWeights
        # Append
        alphas.append(alpha)
    # Save classifier
    save(targetClassifierPath, alphas)
    # Return evaluation
    return evaluate(targetClassifierPath, testPath)
Esempio n. 4
0
 def fillPath(self, baseFolderName, folderName):
     'Returns a function that expands a fileName into a filePath'
     # Make sure that folderName is valid
     match = pattern_name.match(folderName)
     if not match:
         raise FolderError('Invalid name: %s\n%s' % (
             folderName,
             'Names can have letters, digits, underscores, hyphens, spaces, parentheses, commas and periods.'
         ))
     # Fill path
     baseFolderPath = store.makeFolderSafely(
         self.folderPathByName[baseFolderName])
     stampedFolderName = '%s-%s' % (store.makeTimestamp(), folderName)
     folderPath = store.makeFolderSafely(
         os.path.join(baseFolderPath, stampedFolderName))
     # Return
     return lambda fileName: os.path.join(folderPath, fileName)
Esempio n. 5
0
 def __init__(self, basePath='.'):
     # Make the folder if it doesn't exist
     self.basePath = store.makeFolderSafely(basePath)
     # Set folderPathByName
     self.folderPathByName = dict(
         (folderName,
          os.path.join(basePath, '%d-%s' % (folderIndex + 1, folderName)))
         for folderIndex, folderName in enumerate(folderNames))
def saveSamples(sampleDataset, sampleIDs, featureSet):
    # Initialize
    sampleCount = len(sampleIDs)
    sampleDatasetPath = sampleDataset.getDatasetPath()
    sampleInformation = {
        'source dataset': {
            'path': sampleDatasetPath,
            'sample ids': ' '.join(str(x) for x in sampleIDs),
        },
        'feature': {
            'module name': featureSet.__module__,
            'class name': featureSet.__class__.__name__,
        }
    }
    targetSampleName = '%s-count%s-min%s' % (folder_store.getFolderName(sampleDatasetPath), sampleCount, min(sampleIDs))
    targetSamplePath = os.path.join(store.makeFolderSafely(os.path.join(store.temporaryPath, 'cnn_datasets')), targetSampleName)
    # If targetDatasetPath exists, return
    if store.loadInformation(targetSamplePath) == sampleInformation: 
        print 'Using existing samples...\n\ttargetSamplePath = ' + targetSamplePath
        return targetSamplePath
    # Save
    print 'Saving samples...\n\ttargetSamplePath = ' + targetSamplePath
    sampleGenerator = makeSampleLabelGeneratorFromSampleDataset(sampleDataset, sampleIDs, featureSet)
    sampleFile, labelFile = [open(x, 'wt') for x in makeSampleLabelPaths(targetSamplePath)]
    for sampleIndex, (sample, label) in enumerate(sampleGenerator): 
        # If we are starting, write header
        if sampleIndex == 0:
            sampleFile.write(makeLushMatrixHeaderFromPart(sample, sampleCount))
            labelFile.write(makeLushMatrixHeaderFromPart(label, sampleCount))
        # Write content
        sampleFile.write(makeLushMatrixContent(sample))
        labelFile.write(makeLushMatrixContent(label))
        if sampleIndex % 100 == 0: 
            view.printPercentUpdate(sampleIndex + 1, sampleCount)
    view.printPercentFinal(sampleCount)
    # Return
    labelFile.close(); sampleFile.close()
    store.saveInformation(targetSamplePath, sampleInformation)
    return targetSamplePath
 def __init__(self, basePath='.'):
     # Make the folder if it doesn't exist
     self.basePath = store.makeFolderSafely(basePath)
     # Set folderPathByName
     self.folderPathByName = dict((folderName, os.path.join(basePath, '%d-%s' % (folderIndex + 1, folderName))) for folderIndex, folderName in enumerate(folderNames))
 def getPath(self, fileType, fileName):
     fileTypePath = os.path.join(self.basePath, folderNameByName[fileType])
     return store.makeFolderSafely(os.path.join(fileTypePath, str(fileName)))
 def __init__(self, basePath):
     # Make the folder if it doesn't exist
     self.basePath = store.makeFolderSafely(basePath)
     # Set folderPathByName
     self.folderPathByName = dict((key, os.path.join(basePath, folderNameByName[key])) for key in folderNameByName)
Esempio n. 10
0
'Simple framework for logging'
# Import system modules
import logging
import os
# Import custom modules
import config
import model_notes
import store


# Prepare
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s', filename=os.path.join(store.makeFolderSafely(config.logPath), 'log.txt'), filemode='a')


def error(message):
    # Save in log
    logging.error(message)
    # Show on screen
    print message
    # Email administrator
    model_notes.Model(config.mailHost, config.mailPath, config.mailPassword).write(config.logEmail, '[tco-helper] Fatal error', message)