Ejemplo n.º 1
0
def trainLogisticRegressionModel(
    featureData, labels, Cvalue, outputClassifierFile, scaleData=True, requireAllClasses=True
    ):
    # See [http://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html]
    # Features are numPixel x numFeature np arrays, labels are numPixel np array
    numTrainDataPoints = np.shape(featureData)[0]
    numDataLabels = np.size(labels)
    
    assert ( np.size( np.shape(labels) ) == 1) , ("Labels should be a 1d array.  Shape of labels = " + str(np.shape(labels)))
    assert ( numTrainDataPoints == numDataLabels) , ("The length of the feature and label data arrays must be equal.  Num data points=" + str(numTrainDataPoints) + ", labels=" + str(numDataLabels) )
    classLabels = np.unique(labels)
    assert not requireAllClasses or \
        ( np.size(classLabels) == pomio.getNumClasses() or np.size(classLabels) == pomio.getNumLabels() ), \
        "Training data does not contains all classes::\n\t" + str(classLabels)
     
    if scaleData == True:
        featureData = preprocessing.scale(featureData)
    
    # sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None)
    lrc = LogisticRegression(penalty='l1' , dual=False, tol=0.0001, C=Cvalue, fit_intercept=True, intercept_scaling=1)
    lrc.fit(featureData, labels)
    
    pickleObject(lrc, outputClassifierFile)
    print "LogisticRegression classifier saved to " + str(outputClassifierFile)
    
    return lrc
Ejemplo n.º 2
0
def trainLogisticRegressionModel(featureData,
                                 labels,
                                 Cvalue,
                                 outputClassifierFile,
                                 scaleData=True,
                                 requireAllClasses=True):
    # See [http://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html]
    # Features are numPixel x numFeature np arrays, labels are numPixel np array
    numTrainDataPoints = np.shape(featureData)[0]
    numDataLabels = np.size(labels)

    assert (np.size(np.shape(labels)) == 1), (
        "Labels should be a 1d array.  Shape of labels = " +
        str(np.shape(labels)))
    assert (numTrainDataPoints == numDataLabels), (
        "The length of the feature and label data arrays must be equal.  Num data points="
        + str(numTrainDataPoints) + ", labels=" + str(numDataLabels))
    classLabels = np.unique(labels)
    assert not requireAllClasses or \
        ( np.size(classLabels) == pomio.getNumClasses() or np.size(classLabels) == pomio.getNumLabels() ), \
        "Training data does not contains all classes::\n\t" + str(classLabels)

    if scaleData == True:
        featureData = preprocessing.scale(featureData)

    # sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None)
    lrc = LogisticRegression(penalty='l1',
                             dual=False,
                             tol=0.0001,
                             C=Cvalue,
                             fit_intercept=True,
                             intercept_scaling=1)
    lrc.fit(featureData, labels)

    pickleObject(lrc, outputClassifierFile)
    print "LogisticRegression classifier saved to " + str(outputClassifierFile)

    return lrc
else:
    labs = pomio.readMatFromCSV(infileLabs).astype(np.int32)

n = len(labs)
assert n == ftrs.shape[0], 'Error: there are %d labels and %d training examples' \
    % ( n, ftrs.shape[0] )

assert np.all(np.isfinite(ftrs))

print 'There are %d unique labels in range [%d,%d]' % (len(
    np.unique(labs)), np.min(labs), np.max(labs))

if args.verbose:
    print 'There are %d training examples' % len(labs)
    plt.interactive(True)
    plt.hist(labs, bins=range(pomio.getNumLabels()))
    plt.waitforbuttonpress()

# Train the classifier, either with CV param search or with default values
if paramSearch:
    paramSrc = 'grid search'
    # create crossValidation object
    stratCV = cross_validation.StratifiedKFold(labs, paramSearchFolds)

    print 'Training %s classifier using %d-fold cross-validation parameter search, over %s samples...' % (
        clfrType, paramSearchFolds, n)

    # empy param values
    params = {}

    if clfrType == 'logreg':
    labs = pomio.unpickleObject( infileLabs )
else:
    labs = pomio.readMatFromCSV( infileLabs ).astype(np.int32)

n = len(labs)
assert n == ftrs.shape[0], 'Error: there are %d labels and %d training examples' \
    % ( n, ftrs.shape[0] )

assert np.all( np.isfinite( ftrs ) )

print 'There are %d unique labels in range [%d,%d]' % ( len(np.unique(labs)), np.min(labs), np.max(labs) )

if args.verbose:
    print 'There are %d training examples' % len(labs)
    plt.interactive(True)
    plt.hist( labs, bins=range(pomio.getNumLabels()) )
    plt.waitforbuttonpress()
    
# Train the classifier, either with CV param search or with default values
if paramSearch:
    paramSrc = 'grid search'
    # create crossValidation object
    stratCV = cross_validation.StratifiedKFold(labs, paramSearchFolds)

    print 'Training %s classifier using %d-fold cross-validation parameter search, over %s samples...' % (clfrType, paramSearchFolds, n)

    # empy param values
    params = {}
    
    if clfrType == 'logreg':
        # create a set of C value and regularisation types for logisitc regression