def analysis(ds): """Performs main analysis. :Parameter: ds: Dataset :Returns: Per measure sensitivities as returned from doSensitivityAnalysis() """ # Lets replicate published obtained results. We can do slightly # better using RFEs and initial feature selection, but lets just # replicate for the purpose of the paper clf = sg.SVM(kernel_type='linear') C = -2.0 # our default scaling is too soft # Scale C according to the number of samples per class spl = ds.samplesperlabel ratio = N.sqrt(float(spl[0]) / spl[1]) clf.C = (C / ratio, C * ratio) # If we were only to do classification, following snippet is sufficient. # But lets reuse doSensitivityAnalysis # # cv2A = CrossValidatedTransferError( # TransferError(clf), # NFoldSplitter(), # enable_states=['confusion', 'training_confusion', 'splits']) # # verbose(1, "Running cross-validation on %s" % clf.descr) # error2A = cv2A(ds) # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion) clfs = { # explicitly instruct SMLR just to fit a single set of weights for our # binary task 'SMLR': SMLR(lm=1.0, fit_all_weights=False), 'lCSVM': clf, } # define some pure sensitivities (or related measures) sensanas = {'ANOVA': OneWayAnova()} # perform the analysis and get all sensitivities senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter()) # assign original single C clf.C = C # get results from Figure2B with resampling of the samples to # balance number of samples per label cv2B = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(nperlabel='equal', nrunspersplit=100), enable_states=['confusion', 'training_confusion']) # compute error2B = cv2B(ds) # print full testing confusion table verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion) return senses
def analysis(ds): # Lets first replicate the obtained resuls. We can do slightly # better using RFEs and initial feature selection, but lets just # replicate # clf = sg.SVM(kernel_type='linear') # C=-2.0 gives 84% when properly scaled and 5% ANOVA voxels # C=-1.0 and RFE gives up to 85% correct # clf = sg.SVM(kernel_type='linear') C = -2.0 # Scale C according to the number of samples per class spl = ds.samplesperlabel ratio = N.sqrt(float(spl[0])/spl[1]) clf.C = (C/ratio, C*ratio) # If we were only to do classification, following snippet is sufficient. # But lets reuse doSensitivityAnalysis # # cv2A = CrossValidatedTransferError( # TransferError(clf), # NFoldSplitter(), # enable_states=['confusion', 'training_confusion', 'splits']) # # verbose(1, "Running cross-validation on %s" % clf.descr) # error2A = cv2A(ds) # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion) # Used in RFE implementations rfesvm = sg.SVM(kernel_type='linear') rfesvm2 = sg.SVM(kernel_type='linear') rfesvm.C = clf.C rfesvm2.C = clf.C rfesvm_split = SplitClassifier(rfesvm2) clfs = { # explicitly instruct SMLR just to fit a single set of weights for our # binary task 'SMLR': SMLR(lm=1.0, fit_all_weights=False), 'lCSVM': clf, #'lGPR': GPR(kernel=KernelLinear()), #'lCSVM+RFE(farm)': SplitClassifier( # which does splitting internally # FeatureSelectionClassifier( # clf = clf, # feature_selection = RFE( # on features selected via RFE # sensitivity_analyzer=\ # rfesvm.getSensitivityAnalyzer(transformer=Absolute), # transfer_error=TransferError(rfesvm), # stopping_criterion=FixedErrorThresholdStopCrit(0.05), # feature_selector=FractionTailSelector( # 0.2, mode='discard', tail='lower'), # # remove 20% of features at each step # update_sensitivity=True)), # # update sensitivity at each step # descr='LinSVM+RFE(farm,N-Fold)'), #'lCSVM+RFE(mean)': FeatureSelectionClassifier( # clf = clf, # feature_selection = RFE( # on features selected via RFE # # based on sensitivity of a clf which does splitting internally # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer( # transformer=Absolute), # transfer_error=ConfusionBasedError( # rfesvm_split, confusion_state="confusion"), # # and whose internal error we use # feature_selector=FractionTailSelector( # 0.2, mode='discard', tail='lower'), # # remove 20% of features at each step # update_sensitivity=True), # # update sensitivity at each step # descr='LinSVM+RFE(avg,N-Fold)' ) } # define some pure sensitivities (or related measures) sensanas={ 'ANOVA': OneWayAnova(), # Crashes for Yarik -- I guess openopt issue #'GPR_Model': GPRWeights(GPR(kernel=KernelLinear()), combiner=None), # # no I-RELIEF for now -- takes too long #'I-RELIEF': IterativeReliefOnline(), # gimme more !! } # perform the analysis and get all sensitivities senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter()) # assign original single C clf.C = C # get results from Figure2B with resampling of the samples to # ballance number of samples per label cv2B = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(nperlabel='equal', # increase to reasonable number nrunspersplit=4), enable_states=['confusion', 'training_confusion']) error2B = cv2B(ds) verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion) # Sure we repeat ourselves here but for the sake of clarity return senses
def analysis(ds): # Lets first replicate the obtained resuls. We can do slightly # better using RFEs and initial feature selection, but lets just # replicate # clf = sg.SVM(kernel_type='linear') # C=-2.0 gives 84% when properly scaled and 5% ANOVA voxels # C=-1.0 and RFE gives up to 85% correct # clf = sg.SVM(kernel_type='linear') C = -2.0 # Scale C according to the number of samples per class spl = ds.samplesperlabel ratio = N.sqrt(float(spl[0]) / spl[1]) clf.C = (C / ratio, C * ratio) # If we were only to do classification, following snippet is sufficient. # But lets reuse doSensitivityAnalysis # # cv2A = CrossValidatedTransferError( # TransferError(clf), # NFoldSplitter(), # enable_states=['confusion', 'training_confusion', 'splits']) # # verbose(1, "Running cross-validation on %s" % clf.descr) # error2A = cv2A(ds) # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion) # Used in RFE implementations rfesvm = sg.SVM(kernel_type='linear') rfesvm2 = sg.SVM(kernel_type='linear') rfesvm.C = clf.C rfesvm2.C = clf.C rfesvm_split = SplitClassifier(rfesvm2) clfs = { # explicitly instruct SMLR just to fit a single set of weights for our # binary task 'SMLR': SMLR(lm=1.0, fit_all_weights=False), 'lCSVM': clf, #'lGPR': GPR(kernel=KernelLinear()), #'lCSVM+RFE(farm)': SplitClassifier( # which does splitting internally # FeatureSelectionClassifier( # clf = clf, # feature_selection = RFE( # on features selected via RFE # sensitivity_analyzer=\ # rfesvm.getSensitivityAnalyzer(transformer=Absolute), # transfer_error=TransferError(rfesvm), # stopping_criterion=FixedErrorThresholdStopCrit(0.05), # feature_selector=FractionTailSelector( # 0.2, mode='discard', tail='lower'), # # remove 20% of features at each step # update_sensitivity=True)), # # update sensitivity at each step # descr='LinSVM+RFE(farm,N-Fold)'), #'lCSVM+RFE(mean)': FeatureSelectionClassifier( # clf = clf, # feature_selection = RFE( # on features selected via RFE # # based on sensitivity of a clf which does splitting internally # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer( # transformer=Absolute), # transfer_error=ConfusionBasedError( # rfesvm_split, confusion_state="confusion"), # # and whose internal error we use # feature_selector=FractionTailSelector( # 0.2, mode='discard', tail='lower'), # # remove 20% of features at each step # update_sensitivity=True), # # update sensitivity at each step # descr='LinSVM+RFE(avg,N-Fold)' ) } # define some pure sensitivities (or related measures) sensanas = { 'ANOVA': OneWayAnova(), # Crashes for Yarik -- I guess openopt issue #'GPR_Model': GPRWeights(GPR(kernel=KernelLinear()), combiner=None), # # no I-RELIEF for now -- takes too long #'I-RELIEF': IterativeReliefOnline(), # gimme more !! } # perform the analysis and get all sensitivities senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter()) # assign original single C clf.C = C # get results from Figure2B with resampling of the samples to # ballance number of samples per label cv2B = CrossValidatedTransferError( TransferError(clf), NFoldSplitter( nperlabel='equal', # increase to reasonable number nrunspersplit=4), enable_states=['confusion', 'training_confusion']) error2B = cv2B(ds) verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion) # Sure we repeat ourselves here but for the sake of clarity return senses
def analysis(ds): """Performs main analysis. :Parameter: ds: Dataset :Returns: Per measure sensitivities as returned from doSensitivityAnalysis() """ # Lets replicate published obtained results. We can do slightly # better using RFEs and initial feature selection, but lets just # replicate for the purpose of the paper clf = sg.SVM(kernel_type='linear') C = -2.0 # our default scaling is too soft # Scale C according to the number of samples per class spl = ds.samplesperlabel ratio = N.sqrt(float(spl[0])/spl[1]) clf.C = (C/ratio, C*ratio) # If we were only to do classification, following snippet is sufficient. # But lets reuse doSensitivityAnalysis # # cv2A = CrossValidatedTransferError( # TransferError(clf), # NFoldSplitter(), # enable_states=['confusion', 'training_confusion', 'splits']) # # verbose(1, "Running cross-validation on %s" % clf.descr) # error2A = cv2A(ds) # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion) clfs = { # explicitly instruct SMLR just to fit a single set of weights for our # binary task 'SMLR': SMLR(lm=1.0, fit_all_weights=False), 'lCSVM': clf, } # define some pure sensitivities (or related measures) sensanas={'ANOVA': OneWayAnova()} # perform the analysis and get all sensitivities senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter()) # assign original single C clf.C = C # get results from Figure2B with resampling of the samples to # balance number of samples per label cv2B = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(nperlabel='equal', nrunspersplit=100), enable_states=['confusion', 'training_confusion']) # compute error2B = cv2B(ds) # print full testing confusion table verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion) return senses