Python monotonicValidationCut Examples

Programming Language: Python

Namespace/Package Name: cuts

Method/Function: monotonicValidationCut

Examples at hotexamples.com: 3

Python monotonicValidationCut - 3 examples found. These are the top rated real world Python examples of cuts.monotonicValidationCut extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: mrmrSelectionTest.py Project: qq431169079/JupyterNotebook

def mrmrTest(cutMethod=1, method=0, runs=3):
	#Artifial Datasets
	files = ['data1000-f1.csv', 'data1000-f2.csv','data1000-f3.csv','data1000-f4.csv','data5000-f1.csv', 'data5000-f2.csv','data5000-f3.csv','data5000-f4.csv','data20000-f1.csv', 'data20000-f2.csv','data20000-f3.csv','data20000-f4.csv','data1000-f1-r500.csv','data5000-f1-r500.csv','data20000-f1-r500.csv']
	buenos = [[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14]]	
	modelsType = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
	#Real Datasets
	#files = ['real/sonar_scale.csv', 'real/splice_scale.csv', 'real/colon-cancer.csv', 'real/leu.csv', 'real/duke.csv', 'real/BH20000.csv', 'real/madelon-test.csv']
	#buenos = [['?'],['?'],['?'],['?'],['?'],['?'],['?']]
	#modelsType = [0,0,0,0,0,0,0]
	i=0
	verboseClassifiers = True
	for f in files:
		modelType = modelsType[i]
		filepath = 'Data/'+f		
		filepath2 = 'Data2/'+f				
		data = read_csv(filepath)
		X = np.array(data.ix[:,0:-1])
		y = np.array(data.ix[:,-1])
		print (filepath, buenos[i])
		startTime = time.time()
		if(modelType==0):
			acc = ml.clasificationJudge(X=X,y=y, testPerc=0.5, runs=runs)
		else:
			acc = ml.regresionJudge(X=X,y=y, testPerc=0.5, runs=runs)
		endTime = time.time()
		print ("original:", acc, X.shape[1], str(round(endTime-startTime,3))+"s")
		#try:
		startTime = time.time()
		[rank,featureImportance] = rankExtraction(filepath2,method)
		if(cutMethod==0):
			cutpos = cuts.greatestDiffCut(weights=featureImportance)
		elif(cutMethod==1):
			cutpos = cuts.monotonicValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=5, runs=runs)
		
		rank = rank[0:cutpos]
		endTime = time.time()
		timefs = round(endTime-startTime,3)
		X = np.array(data.ix[:,rank])
		startTime = time.time()
		if(modelType==0):
			acc = ml.clasificationJudge(X=X,y=y, testPerc=0.5, runs=runs)
		else:
			acc = ml.regresionJudge(X=X,y=y, testPerc=0.5, runs=runs)
		endTime = time.time()
		timeml = round(endTime-startTime,3)
		print ("result: ",acc, timefs, timeml, len(rank), rank[0:5])
		print ()

Example #2

Show file

File: featureSelection.py Project: VinayChaudhari1996/automatic-feature-selection

def featureSelection(X,y, modelType=0, runs=3, processes=0, measure=1, binMethod=0, cutMethod=1, minRed=0, rrThreshold=0.9, debug=False):
	
	if(measure<=4):
		corrMethod = measure
	elif(measure==5):
		measure = [0,1]
	elif(measure==6):
		measure = [1,3,4]
	wlist = []
	if(measure<=4):
		if(binMethod==0):
			weights = p.binStatic(X=X,y=y,processes=processes,measure=corrMethod)
		elif(binMethod==1):
			weights = p.binarySearchBins(X=X, y=y, processes=processes, measure=corrMethod, split=0, useSteps=2, normalizeResult=False, debug=False)			
	else:
		for corrMethod in measure: 	
			if(binMethod==0):
				wlist.append(p.binStatic(X=X,y=y,processes=processes,measure=corrMethod))
			elif(binMethod==1):
				wlist.append(p.binarySearchBins(X=X, y=y, processes=processes, measure=corrMethod, split=0, useSteps=2, normalizeResult=False, debug=False))
		weights = (ut.sumMixedCorrelation(wlist))
	#print weights
	rank = ut.getOrderRank(weights)
	orank = set(rank)
	if(cutMethod==-1):
		rank = rank	[0:20]
	if(cutMethod==0):
		rank = rank[0:cuts.greatestDiffCut(weights=weights)]
	elif(cutMethod==1):
		rank = rank[0:cuts.monotonicValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=5, runs=runs)]
	elif(cutMethod==2):
		#rank = rank[0:cuts.monotonicValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=X.shape[1], runs=runs)]
		[rank,originalRankPositions] = cuts.searchValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=X.shape[1], runs=runs)
	elif(cutMethod==3):
		[rank,originalRankPositions] = cuts.searchValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=5, runs=runs)
	if(debug):
		print "cutted",rank
	if(minRed==1):
		rank = p.parallelRemoveRedundant(X=X, rank=rank, processes=processes, measure=measure, threshold=rrThreshold)
	if(debug):
		print "mrmr",rank
	#print "weights:",
	#for i in rank:
	#	print weights[i],
	#print
	return rank

Example #3

Show file

def artificialTest():
    #Syntentic classification datasets
    #files = ['data1000-f1.csv', 'data1000-f2.csv','data1000-f3.csv','data1000-f4.csv','data5000-f1.csv', 'data5000-f2.csv','data5000-f3.csv','data5000-f4.csv','data20000-f1.csv', 'data20000-f2.csv','data20000-f3.csv','data20000-f4.csv','data1000-f1-r500.csv','data5000-f1-r500.csv','data20000-f1-r500.csv']
    #buenos = [[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14]]
    #modelsType = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

    #Syntetic Regression datasets
    #files = ['regression/reg1000-f1.csv']
    #buenos = [[0,1,2,3,4,5]]
    #modelsType = [1]

    #Real Datasets
    files = [
        'real/sonar_scale.csv', 'real/splice_scale.csv',
        'real/colon-cancer.csv', 'real/leu.csv', 'real/duke.csv',
        'real/BH20000.csv', 'real/madelon-test.csv'
    ]
    buenos = [['?'], ['?'], ['?'], ['?'], ['?'], ['?'], ['?']]
    modelsType = [0, 0, 0, 0, 0, 0, 0]

    i = 0
    for f in files:
        modelType = modelsType[i]
        filename = 'Data/' + f
        ########### Separate Data ###########
        print(filename, buenos[i])
        data = read_csv(filename)
        X = np.array(data.ix[:, 0:-1])
        y = np.array(data.ix[:, -1])

        ########### Search ###########
        #Static search
        #'''
        startTime = time.time()
        weights = bs.binStatic(X, y, 2)
        endTime = time.time()
        print("Serial static " + str(round(endTime - startTime, 3)) +
              " seconds.")
        print("weights:", weights[0:20])
        startTime = time.time()
        weights = p.binStatic(X, y, 0, 2)
        endTime = time.time()
        print("Parallel static " + str(round(endTime - startTime, 3)) +
              " seconds.")
        print("weights:", weights[0:20])
        weights = ut.sumMixedCorrelation(
            [bs.binStatic(X, y, 0),
             bs.binStatic(X, y, 1)])
        print("Combined Static:", weights)
        #'''

        #Dynamic search
        '''
		startTime = time.time()
		weights = bs.binarySearchBins(X,y,2,0,2)
		endTime = time.time()
		print "Serial dynamic " + str(round(endTime-startTime,3)) + " seconds."
		print "weights:", weights[0:20]
		startTime = time.time()
		weights = p.binarySearchBins(X,y,0,2,0,2)
		endTime = time.time()
		print "Parallel dynamic " + str(round(endTime-startTime,3)) + " seconds."
		print "weights:", weights[0:20]
		weights = ut.sumMixedCorrelation([bs.binarySearchBins(X,y,0,0,2),bs.binarySearchBins(X,y,1,0,2)])
		print "Combined Dyniamic:",weights
		#'''

        ########### Cuts ###########

        print("\nCuts:")
        rank = ut.getOrderRank(weights)
        print("rank:", rank[0:20])

        #'''
        startTime = time.time()

        print("Full features Accurracy:",
              ml.clasificationJudge(X=X, y=y, testPerc=0.5, runs=3))
        endTime = time.time()
        print("Full classification time: " +
              str(round(endTime - startTime, 3)) + " seconds.")

        startTime = time.time()
        cutpos1 = cuts.greatestDiffCut(weights)
        print(rank[0:cutpos1])
        endTime = time.time()
        print("\nCut greatestDiffCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")
        startTime = time.time()
        print(
            "greatestDiffCut Accurracy:",
            ml.clasificationJudge(X=X[:, rank[0:cutpos1]],
                                  y=y,
                                  testPerc=0.5,
                                  runs=3), " #features:", cutpos1)
        endTime = time.time()
        print("Classification greatestDiffCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")

        #'''
        startTime = time.time()

        cutpos2 = cuts.monotonicValidationCut(X=X,
                                              y=y,
                                              rank=rank,
                                              modelType=modelType,
                                              consecutives=5,
                                              runs=3)
        endTime = time.time()
        print("\nCut MonotonicValidationCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")
        startTime = time.time()
        print(
            "MonotonicValidation Accurracy:",
            ml.clasificationJudge(X=X[:, rank[0:cutpos2]],
                                  y=y,
                                  testPerc=0.5,
                                  runs=3), " #features:", cutpos2)
        endTime = time.time()
        print("Classification MonotonicValidationCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")
        #'''
        #'''
        startTime = time.time()
        cutpos3 = cuts.monotonicValidationCut(X=X,
                                              y=y,
                                              rank=rank,
                                              modelType=modelType,
                                              consecutives=X.shape[1],
                                              runs=3)
        endTime = time.time()
        print("Cut FullValidationCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")
        startTime = time.time()
        print(
            "FullValidationCut Accurracy:",
            ml.clasificationJudge(X=X[:, rank[0:cutpos3]],
                                  y=y,
                                  testPerc=0.5,
                                  runs=3), " #features:", cutpos3)
        endTime = time.time()
        print("Classification FullValidationCut time: " +
              str(round(endTime - startTime, 3)) + " seconds.")
        #'''

        #Removing redundant
        originalRank = list(rank)
        print("\nFinding redundant features:")
        '''
		startTime = time.time()
		rank =  set(bs.removeRedundant(X, rank))
		print "Serial mode"
		print "Original Rank:", originalRank
		print "Not redundant:",rank
		print "Redundant:",set(originalRank).difference(set(rank))
		endTime = time.time()
		print "Time finding redundant: " + str(round(endTime-startTime,3)) + " seconds."
		#'''
        #'''
        startTime = time.time()
        rank = list(originalRank)
        rank = set(p.parallelRemoveRedundant(X, rank))
        print("Parallel mode")
        print("Original Rank:", originalRank)
        print("Not redundant:", rank)
        print("Redundant:", set(originalRank).difference(set(rank)))
        endTime = time.time()
        rank = list(rank)
        print("Time finding redundant: " + str(round(endTime - startTime, 3)) +
              " seconds.")
        print("Final not redundant features Accurracy:",
              ml.clasificationJudge(X=X[:, rank], y=y, testPerc=0.5, runs=3))
        #'''

        i = i + 1
        print("-------------------------------------\n")