Ejemplo n.º 1
0
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model


sizes = ['10', '50', '100', '150', '200', '250']
methods = ['MRMR', 'JMI', 'JMIM']
for method in methods:
    for size in sizes:
        global curr_size
        curr_size = int(size)
        print size
        print method
        import time
        d = DataSetLoader()
        X_train = d.LoadDataSet("A")
        y_train = d.LoadDataSetClasses("A")
        print X_train.shape
        print y_train.shape
        #chaipee will fix it later on
        y_train = numpy.transpose(y_train)
        print y_train.shape
        targets = list(y_train)
        y_train = []
        for i in targets:
            #print i
            y_train.append(int(i))
        #print len(y_train)
        #first run indices
        indices = joblib.load('datasetA_pickles/selected_indices_' + method +
                              '.joblib.pkl')
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn import metrics
from sklearn import preprocessing
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import LeaveOneOut

sizes = ['10', '50', '100', '150', '200', '250']
methods = ['MRMR', 'JMI', 'JMIM']
for method in methods:
    for size in sizes:
        print size
        print method
        import time
        d = DataSetLoader()
        X_train = d.LoadDataSet("B_train")
        y_train = d.LoadDataSetClasses("B_train")
        print X_train.shape
        print y_train.shape
        #chaipee will fix it later on
        y_train = numpy.transpose(y_train)
        print y_train.shape
        targets = list(y_train)
        y_train = []
        for i in targets:
            #print i
            y_train.append(int(i))
        #print len(y_train)

        indices = joblib.load('datasetB_pickles/datasetB' + size + '-' +
                              method + '.joblib.pkl')
Ejemplo n.º 3
0
preprocessings = ['Standard','Imputer','Robust','Quantile']
#datasets = ["A","B"]
classifiers = ["MLP","SVM","AdaBoost","DT","RandomForest","ExtraTree"]
dataset = "B"
f=open('mcc/mccResults'+dataset+'.txt','w');
f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, extra info");

for classifierName in classifiers:
    for method in methods:
        for size in sizes:
			for preproc in preprocessings:
				for validation in validationTechnique:
						#print size
						#print method        
						d = DataSetLoader();
						X_train= d.LoadDataSet("B_train");
						y_train = d.LoadDataSetClasses("B_train");
						X_test= d.LoadDataSet("B_test");
						y_test = d.LoadDataSetClasses("B_test");

						#chaipee will fix it later on
						y_train=numpy.transpose(y_train)
					print y_train.shape
					targets=list(y_train)
					y_train=[]
					for i in targets:
					#print i
					y_train.append(int(i))

						y_test=numpy.transpose(y_train)
					print y_test.shape
Ejemplo n.º 4
0
methods=['MRMR','JMI','JMIM']
sizes=['10','50','100','150','200','250']
classifiers = ["MLP","SVM","RandomForest","AdaBoost","DT","ExtraTree"]
validationTechniques = ["10FoldCV","LOOCV"],
preps=["Standard","Robust","Quantile","Imputer"]
n_iter_search = 20

#Iterating over each method
for dataset in datasets:
	f=open('mcc/mccResults'+dataset+'.txt','a');
	f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, cv.max, cv.mean, cv.min, cv.std, preprocessing");
	print "Dataset = ",dataset
	#initiating datasetloader object
	d = DataSetLoader();
	#loading relevant Data and coresponding labels of dataset A
	X_train_full = d.LoadDataSet(dataset+"_train");	
	y_train = d.LoadDataSetClasses(dataset+"_train");
	X_validate_full = d.LoadDataSet(dataset+"_test");
	y_validate = d.LoadDataSetClasses(dataset+"_test");		
	
	print ("Dimensions of training data and labels:",X_train_full.shape,y_train.shape)
	print ("Dimensions of validation data and labels:",X_validate_full.shape,y_validate.shape)
	
	#chaipee will fix it later on
	targets=list(numpy.transpose(y_train))
	y_train=[]
	for i in targets:
		y_train.append(int(i))
	

	targets=list(numpy.transpose(y_validate))
Ejemplo n.º 5
0
    results = "";
    FTs = eval(Dataset + "_FTs");
    FT_Accuracies = eval(Dataset + "_FT_Accuracies")
    start_time = time.time();
    padding = 0;
    #load the dataset
    d = DataSetLoader();
    #X_train_full = d.LoadDataSet(Dataset+"_train");
    #y_train = d.LoadDataSetClasses(Dataset+"_train");
    #targets=list(numpy.transpose(y_train))
    #y_train=[]
    #for i in targets:
    #    y_train.append(int(i))


    X_validate_full = d.LoadDataSet(Dataset + "_test");
    y_validate = d.LoadDataSetClasses(Dataset + "_test");
    print ("Dimensions of validation data and labels:", X_validate_full.shape, y_validate.shape)
    targets = list(numpy.transpose(y_validate))
    y_validate = []
    if Dataset == "C":
        y_validate = numpy.array(targets)
        #y_validate[y_validate == 0] = -1
    else:
        for i in targets:
            y_validate.append(int(i))

    y_test = y_validate

    actuals = ','.join([str(elem) for elem in y_test])
    actuals = actuals.replace("\n","").replace("[","").replace("]","").replace(" ",",") #to handle dataset C targets
Ejemplo n.º 6
0
from MachineSpecificSettings import Settings
import scipy.io
from DataSetLoaderLib import DataSetLoader
from sklearn.neural_network import MLPClassifier

from sklearn import metrics


print("")
print("")
print("")
print("")

targets=numpy.array(joblib.load('DatasetA_ValidationClasses.joblib.pkl'))
d = DataSetLoader();
G = d.LoadDataSet("A");
indices= joblib.load('selected_indicesv2.joblib.pkl')
result=numpy.array(G)[:,indices]
clf = MLPClassifier(activation='logistic',solver='sgd')
import time
start_time=time.time()
scores = cross_val_score(clf, result, targets, cv=10)
end_time=time.time()-start_time
print end_time
for i in scores:
	print i
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


filename='MLP_k-fold.joblib.pkl'
joblib.dump(clf,filename, compress=9) 
Ejemplo n.º 7
0
methods = ['MRMR']  #,'JMI','JMIM'
sizes = ['10']  #,'50','100','150','200','250'
validationTechniques = ["10FoldCV"]  #"LOOCV",
preps = ["Standard", "Robust", "Quantile", "Imputer"]

#Iterating over each method
print("Dataset, prepType, validationTechnique, method, size")
for dataset in datasets:
    #	f=open('mcc/mccEnsembleResults.txt','w');
    #	f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, cv.max, cv.mean, cv.min, cv.std, preprocessing");

    print "Dataset = ", dataset
    #initiating datasetloader object
    d = DataSetLoader()
    #loading relevant Data and coresponding labels of dataset A
    X_train = d.LoadDataSet(dataset + "_train")
    y_train = d.LoadDataSetClasses(dataset + "_train")
    X_test = d.LoadDataSet(dataset + "_test")
    y_test = d.LoadDataSetClasses(dataset + "_test")

    print("Dimensions of validation data and labels:", X_test.shape,
          y_test.shape)

    #chaipee will fix it later on
    targets = list(numpy.transpose(y_train))
    y_train = []
    for i in targets:
        y_train.append(int(i))

    targets = list(numpy.transpose(y_test))
    y_test = []
Ejemplo n.º 8
0
"""
TEST PROGRAM
"""
from GlobalUtils import *
import scipy
from MachineSpecificSettings import Settings
import scipy.io
import numpy
from DataSetLoaderLib import DataSetLoader
import csv

#variables =joblib.load('DatasetA_Validation.joblib.pkl')
#targets =joblib.load('DatasetA_ValidationClasses.joblib.pkl')
d = DataSetLoader();
variables  = d.LoadDataSet("A");
targets = d.LoadDataSetClasses("A");
def test_range(start,end,G,targets,half):
    try:
	print "first"
	print start
	print end-half
        SelectSubSetmRMR(G[:,start:end-half],targets)
    except:
	
	return 1
    try:
	print "second"
	print start+half
	print end
        SelectSubSetmRMR(G[:,start+half:end],targets)
						i+=1
					values.append(i+self.add_by)
				joblib.dump(values,'selected_features/datasetC/selected_indices'+'_'+useMethod+'.joblib.pkl', compress=9) 
		except:
			type, value, traceback = sys.exc_info()
			print('Error Occured %s: %s: %s' % (type, value, traceback))
		threadLock.release()
		print len(values)
        	print "Exiting " + self.name
		return
		

threads=[]

d = DataSetLoader();
G = d.LoadDataSet("C_train");
targets = d.LoadDataSetClasses("C_train");
#chaipee will fix it later on
#y_train=numpy.transpose(targets)
y_train=numpy.asarray(targets)
print y_train.shape
targets=list(y_train)
y_train=[]
for i in targets:
	print i
	y_train.append(int(i))
targets = y_train
#targets =numpy.asarray(targets )
#targets = column_or_1d(targets, warn=True)

print "Dataset loaded"
Ejemplo n.º 10
0
def performTreeletClustering(DatasetName):
    saveFreq = 1000
    #temp value for i
    x = -1
    if (not (os.path.isfile("objs.pickle"))):
        print "New Start"
        d = DataSetLoader()
        G = d.LoadDataSet(DatasetName)
        F = G
        M = []
        cacheTopXPerPart = d.CacheTopXPerPart(DatasetName)
        corrCalculator = PairwisePearsonCorrelationCalculator()
        print "calling corr calculator"
        corrMatrix = corrCalculator.CalculateSimilarity(
            G, d.GetPartSize(DatasetName), cacheTopXPerPart)

    else:
        print "continuing from where we left off"
        d = DataSetLoader()
        G, F, M, x, corrMatrix, cacheTopXPerPart = read()
        corrCalculator = PairwisePearsonCorrelationCalculator()
    p = F[0, :].size
    #because we have already done the previous iteration and loaded that one
    #save(G,F,M,i,corrMatrix,cacheTopXPerPart)
    i = x + 1
    print corrMatrix[0]
    while i < p:
        #for i in range (x+1, p):
        recalc = False
        if checkCorr(corrMatrix, p) == 0:
            print "ERROR IN CORRMATRIX INDEX"
            return 0
        #calculating value of p
        p = F[0, :].size
        print "Value of i is : " + str(i) + " out of " + str(p)
        theVectors = corrMatrix[0]
        #this is always the max corr so the element we want to process
        try:
            if (corrMatrix[0][3] == ''):
                recalc = False
            else:
                print corrMatrix[0]
                recalc = True
        except:
            pass
        Fa = F[:, theVectors[0]]
        Fb = F[:, theVectors[1]]
        print "calling generate metagene"
        m = generateNewMetaGene(Fa, Fb)
        print "calling scipy delete on F"
        F = scipy.delete(F, theVectors[1], 1)
        if not len(M):  #if this is the first meta gene in this matrix
            M = m
        else:
            M = numpy.column_stack(
                (m, M))  #include in the meta genes set as well
        corrMatrix.pop(0)
        corrMatrix = corrCalculator.UpdateSimilarity(corrMatrix, F, list(m),
                                                     theVectors[0],
                                                     theVectors[1])
        F[:, theVectors[0]] = m
        if len(
                corrMatrix
        ) <= 0 or recalc == True:  #everything after this is potentially incorrect so lets recalculate the matrix
            corrMatrix = corrCalculator.CalculateSimilarity(
                F, d.GetPartSize(DatasetName), cacheTopXPerPart)
        if i % saveFreq == 0:
            save(G, F, M, i, corrMatrix, cacheTopXPerPart)
        i += 1
    F = numpy.column_stack((
        G,
        M))  #scipy.append(G, M, 1) #define a new expanded featureset F = G U M
    return F