コード例 #1
0
def loadDataset(identifier):
    d = DataSetLoader()
    x = d.LoadDataSet(identifier)
    print 'X', x.shape
    y = d.LoadDataSetClasses(identifier)
    print 'Y', y.shape
    y = numpy.transpose(y.astype(numpy.int64))
    print 'Y', y.shape
    target = []
    y = list(y)
    print "y before manual transform =", y
    for i in y:
        target.append(int(i))
    print len(y)
    print y
    return x, y
コード例 #2
0
def loadDataset(identifier):
    d = DataSetLoader()
    x = d.LoadDataSet(identifier)
    print 'X', x.shape
    y = d.LoadDataSetClasses(identifier)
    print 'Y', y.shape
    #y=numpy.transpose(y.astype(numpy.int64))
    y = sklearn.utils.validation.column_or_1d(y, warn=True)
    print 'Y', y.shape
    target = []
    y = list(y)
    print "y before manual transform =", y
    for i in y:
        target.append(int(i))
    print len(y)
    print y
    return x, y
コード例 #3
0
dataset = "B"
f = open('mcc/B-Full-mccResults' + dataset + '.txt', 'w')
f.write(
    "dataset, size, method, classifier, validationTechnique, mc, timeTaken, extra info"
)

for classifierName in classifiers:
    for method in methods:
        for size in sizes:
            for preproc in preprocessings:
                for validation in validationTechnique:
                    #print size
                    #print method
                    d = DataSetLoader()
                    X_train = d.LoadDataSet("B_train")
                    y_train = d.LoadDataSetClasses("B_train")
                    X_test = d.LoadDataSet("B_test")
                    y_test = d.LoadDataSetClasses("B_test")

                    #chaipee will fix it later on
                    y_train = numpy.transpose(y_train)
                    print y_train.shape
                    targets = list(y_train)
                    y_train = []
                    for i in targets:
                        #print i
                        y_train.append(int(i))

                    y_test = numpy.transpose(y_test)
                    print y_test.shape
                    targets = list(y_test)
コード例 #4
0
from sklearn.pipeline import make_pipeline
from sklearn import metrics
from sklearn import preprocessing
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import LeaveOneOut

sizes = ['10', '50', '100', '150', '200', '250']
methods = ['MRMR', 'JMI', 'JMIM']
for method in methods:
    for size in sizes:
        print size
        print method
        import time
        d = DataSetLoader()
        X_train = d.LoadDataSet("A")
        y_train = d.LoadDataSetClasses("A")
        print X_train.shape
        print y_train.shape
        #chaipee will fix it later on
        y_train = numpy.transpose(y_train)
        print y_train.shape
        targets = list(y_train)
        y_train = []
        for i in targets:
            #print i
            y_train.append(int(i))
        #print len(y_train)
        #first run indices
        indices = joblib.load('datasetA_pickles/selected_indices_' + method +
                              '.joblib.pkl')
        X_train = X_train[:, indices]
コード例 #5
0
	
	# check ranking of features
	print (feat_selector.ranking_)
	print (len(feat_selector.ranking_))
	selected_indices=feat_selector.ranking_

	# call transform() on X to filter it down to selected features
	X_filtered = feat_selector.transform(X)
	return [X_filtered,selected_indices]



d = DataSetLoader();
x = d.LoadDataSet("B_train");
y=d.LoadDataSetClasses("B_train");
print y.shape
y=numpy.transpose(y)
print x.shape
print y.shape
target=[]
y=list(y)
for i in y:
	target.append(int(i))
print len(y)
sizes=['10','50','100','150','200','250']
methods=['MRMR','JMI','JMIM']
for method in methods:
    for size in sizes:
	print size
        print method
コード例 #6
0
import scipy.io
import numpy
from DataSetLoaderLib import DataSetLoader
import csv
#Used for storing and loading the trained classifier
from sklearn.externals import joblib

print("")
print("")
print("")
print("")

variables = None
targets = None

d = DataSetLoader()
variables = d.LoadDataSet("A")
targets = d.LoadDataSetClasses("A")
"""
convert an array to csv
http://stackoverflow.com/questions/16482895/convert-a-numpy-array-to-a-csv-string-and-a-csv-string-back-to-a-numpy-array
targetsString = ','.join(['%d' % num for num in targets[0]])
variablesString = ','.join(['%.5f' % num for num in variables[0]])
numpy.fromstring(targetsString, sep=',')

load a csv to an array
http://stackoverflow.com/questions/13381815/python-csv-text-file-to-arrayi-j
"""
selected_indices = []
[subset, selected_indices] = SelectSubSetmRMR(variables, targets)
コード例 #7
0
    results = ""
    LIGs = eval(Dataset + "_LIGs")
    LIG_Accuracies = eval(Dataset + "_LIG_Accuracies")
    start_time = time.time()
    padding = 0
    #load the dataset
    d = DataSetLoader()
    #X_train_full = d.LoadDataSet(Dataset+"_train");
    #y_train = d.LoadDataSetClasses(Dataset+"_train");
    #targets=list(numpy.transpose(y_train))
    #y_train=[]
    #for i in targets:
    #    y_train.append(int(i))

    X_validate_full = d.LoadDataSet(Dataset + "_test")
    y_validate = d.LoadDataSetClasses(Dataset + "_test")
    print("Dimensions of validation data and labels:", X_validate_full.shape,
          y_validate.shape)
    targets = list(numpy.transpose(y_validate))
    y_validate = []
    if Dataset == "C":
        y_validate = numpy.array(targets)
        #y_validate[y_validate == 0] = -1
    else:
        for i in targets:
            y_validate.append(int(i))

    y_test = y_validate

    actuals = ','.join([str(elem) for elem in y_test])
    actuals = actuals.replace("\n",
コード例 #8
0
classifiers = ["RandomForest", "AdaBoost", "DT", "ExtraTree", "MLP", "SVM"]
validationTechniques = ["10FoldCV"]  #"LOOCV",
preps = ["Standard", "Robust", "Quantile", "Imputer"]

basePath = ''  #needed when we want to run it locally
#Iterating over each method
for dataset in datasets:
    f = open('mcc/mccResults' + dataset + '.txt', 'a')
    f.write('\n{date:%Y-%m-%d_%H:%M:%S}'.format(date=datetime.datetime.now()))
    #f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, cv.max, cv.mean, cv.min, cv.std, preprocessing");
    print "Dataset = ", dataset
    #initiating datasetloader object
    d = DataSetLoader()
    #loading relevant Data and coresponding labels of dataset A
    X_train_full = d.LoadDataSet(dataset + "_train")
    y_train = d.LoadDataSetClasses(dataset + "_train")
    X_validate_full = d.LoadDataSet(dataset + "_test")
    y_validate = d.LoadDataSetClasses(dataset + "_test")

    print("Dimensions of training data and labels:", X_train_full.shape,
          y_train.shape)
    print("Dimensions of validation data and labels:", X_validate_full.shape,
          y_validate.shape)

    #READY with Dataset, going to perform the main loop now

    for method in methods:
        #Iterating over each size
        for size in sizes:
            print("Size and method:", size, method)
            #first run indices
コード例 #9
0
            joblib.dump(values,
                        'selected_indices' + '_' + useMethod + '.joblib.pkl',
                        compress=9)
        except:
            print "Error Occured"
        threadLock.release()
        print len(values)
        print "Exiting " + self.name
        return


threads = []

d = DataSetLoader()
G = d.LoadDataSet("B_train")
targets = d.LoadDataSetClasses("B_train")

print "Dataset loaded"

G = numpy.asarray(G)
targets = numpy.asarray(targets)
threadLock = threading.Lock()
print G.shape
vals = 649
original = 649
for i in range(0, 1547):
    print "vals= " + str(vals) + "\n"
    # Create new threads
    thread = myThread(i, "Thread-" + str(i), vals - original,
                      G[:, vals - original:vals], targets)
コード例 #10
0
methods = ['MRMR','JMI','JMIM']
validationTechnique = ['LOOCV',"10FoldCV"]
preprocessing = ['Standard','Imputer','Robust','Quantile']
#datasets = ["A","B"]
classifiers = ["MLP","SVM","AdaBoost","DT","RandomForest","ExtraTree"]
dataset = "A"
f=open('mcc/mccResults'+dataset+'.txt','w');
f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, extra info");

for classifierName in classifiers:
    for method in methods:
        for size in sizes:
			for preproc in preprocessing:
				d = DataSetLoader();
				X_train= d.LoadDataSet(dataset);
				y_train = d.LoadDataSetClasses(dataset);
				#print X_train.shape
				#print y_train.shape
				#chaipee will fix it later on
				y_train=numpy.transpose(y_train)
				#print y_train.shape
				targets=list(y_train)
				y_train=[]
				for i in targets:
					#print i
					y_train.append(int(i))
				#print len(y_train)
				
				#first run indices				
				indices= joblib.load('dataset'+dataset+'_pickles/selected_indices_'+method+'.joblib.pkl')
				X_train=X_train[:,indices]
コード例 #11
0
#validationTechnique = ['LOOCV',"10FoldCV"] -- NOT USED???
#preprocessing = ['','NP']
#datasets = ["A","B"]
classifiers = ["MLP","SVM","AdaBoost","DT","RandomForest","ExtraTree"]

f=open('mcc/mccResultsC.txt','w');
f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken");

for classifierName in classifiers:
    for method in methods:
        for size in sizes:            
            #print size
            #print method        
            d = DataSetLoader();
            X_train= d.LoadDataSet("C_train");
            y_train = d.LoadDataSetClasses("C_train");
            X_test= d.LoadDataSet("C_test");
            y_test = d.LoadDataSetClasses("C_test");

            #chaipee will fix it later on
            y_train=numpy.transpose(y_train)
	    print y_train.shape
	    targets=list(y_train)
	    y_train=[]
	    for i in targets:
		#print i
		y_train.append(int(i))

            y_test=numpy.transpose(y_train)
	    print y_test.shape
	    targets=list(y_train)