def trainLogisticRegressionModel( featureData, labels, Cvalue, outputClassifierFile, scaleData=True, requireAllClasses=True ): # See [http://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html] # Features are numPixel x numFeature np arrays, labels are numPixel np array numTrainDataPoints = np.shape(featureData)[0] numDataLabels = np.size(labels) assert ( np.size( np.shape(labels) ) == 1) , ("Labels should be a 1d array. Shape of labels = " + str(np.shape(labels))) assert ( numTrainDataPoints == numDataLabels) , ("The length of the feature and label data arrays must be equal. Num data points=" + str(numTrainDataPoints) + ", labels=" + str(numDataLabels) ) classLabels = np.unique(labels) assert not requireAllClasses or \ ( np.size(classLabels) == pomio.getNumClasses() or np.size(classLabels) == pomio.getNumLabels() ), \ "Training data does not contains all classes::\n\t" + str(classLabels) if scaleData == True: featureData = preprocessing.scale(featureData) # sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None) lrc = LogisticRegression(penalty='l1' , dual=False, tol=0.0001, C=Cvalue, fit_intercept=True, intercept_scaling=1) lrc.fit(featureData, labels) pickleObject(lrc, outputClassifierFile) print "LogisticRegression classifier saved to " + str(outputClassifierFile) return lrc
def trainLogisticRegressionModel(featureData, labels, Cvalue, outputClassifierFile, scaleData=True, requireAllClasses=True): # See [http://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html] # Features are numPixel x numFeature np arrays, labels are numPixel np array numTrainDataPoints = np.shape(featureData)[0] numDataLabels = np.size(labels) assert (np.size(np.shape(labels)) == 1), ( "Labels should be a 1d array. Shape of labels = " + str(np.shape(labels))) assert (numTrainDataPoints == numDataLabels), ( "The length of the feature and label data arrays must be equal. Num data points=" + str(numTrainDataPoints) + ", labels=" + str(numDataLabels)) classLabels = np.unique(labels) assert not requireAllClasses or \ ( np.size(classLabels) == pomio.getNumClasses() or np.size(classLabels) == pomio.getNumLabels() ), \ "Training data does not contains all classes::\n\t" + str(classLabels) if scaleData == True: featureData = preprocessing.scale(featureData) # sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None) lrc = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=Cvalue, fit_intercept=True, intercept_scaling=1) lrc.fit(featureData, labels) pickleObject(lrc, outputClassifierFile) print "LogisticRegression classifier saved to " + str(outputClassifierFile) return lrc
else: labs = pomio.readMatFromCSV(infileLabs).astype(np.int32) n = len(labs) assert n == ftrs.shape[0], 'Error: there are %d labels and %d training examples' \ % ( n, ftrs.shape[0] ) assert np.all(np.isfinite(ftrs)) print 'There are %d unique labels in range [%d,%d]' % (len( np.unique(labs)), np.min(labs), np.max(labs)) if args.verbose: print 'There are %d training examples' % len(labs) plt.interactive(True) plt.hist(labs, bins=range(pomio.getNumLabels())) plt.waitforbuttonpress() # Train the classifier, either with CV param search or with default values if paramSearch: paramSrc = 'grid search' # create crossValidation object stratCV = cross_validation.StratifiedKFold(labs, paramSearchFolds) print 'Training %s classifier using %d-fold cross-validation parameter search, over %s samples...' % ( clfrType, paramSearchFolds, n) # empy param values params = {} if clfrType == 'logreg':
labs = pomio.unpickleObject( infileLabs ) else: labs = pomio.readMatFromCSV( infileLabs ).astype(np.int32) n = len(labs) assert n == ftrs.shape[0], 'Error: there are %d labels and %d training examples' \ % ( n, ftrs.shape[0] ) assert np.all( np.isfinite( ftrs ) ) print 'There are %d unique labels in range [%d,%d]' % ( len(np.unique(labs)), np.min(labs), np.max(labs) ) if args.verbose: print 'There are %d training examples' % len(labs) plt.interactive(True) plt.hist( labs, bins=range(pomio.getNumLabels()) ) plt.waitforbuttonpress() # Train the classifier, either with CV param search or with default values if paramSearch: paramSrc = 'grid search' # create crossValidation object stratCV = cross_validation.StratifiedKFold(labs, paramSearchFolds) print 'Training %s classifier using %d-fold cross-validation parameter search, over %s samples...' % (clfrType, paramSearchFolds, n) # empy param values params = {} if clfrType == 'logreg': # create a set of C value and regularisation types for logisitc regression