def makeMnistDataSets(path): """Return a pair consisting of two datasets, the first being the training and the second being the test dataset.""" # test = SupervisedDataSet(28 * 28, 10) test = ClassificationDataSet(28 * 28, 10) test_image_file = os.path.join(path, 't10k-images-idx3-ubyte') test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte') test_images = images(test_image_file) test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file)) for image, label in zip(test_images, test_labels): test.appendLinked(image, label) # test.addSample(image, label) # train = SupervisedDataSet(28 * 28, 10) train = ClassificationDataSet(28 * 28, 10) train_image_file = os.path.join(path, 'train-images-idx3-ubyte') train_label_file = os.path.join(path, 'train-labels-idx1-ubyte') train_images = images(train_image_file) train_labels = (flaggedArrayByIndex(l, 10) for l in labels(train_label_file)) for image, label in zip(train_images, train_labels): train.appendLinked(image, label) # train.addSample(image, label) return train, test
def mlpClassifier(X, y, train_indices, test_indices, mom=0.1, weightd=0.01, epo=5): X_train, y_train, X_test, y_test = X[train_indices], y[train_indices], X[ test_indices], y[test_indices] #Converting the data into a dataset which is easily understood by PyBrain. tstdata = ClassificationDataSet(X.shape[1], target=1, nb_classes=8) trndata = ClassificationDataSet(X.shape[1], target=1, nb_classes=8) for i in range(y_train.shape[0]): trndata.addSample(X_train[i, :], y_train[i]) for i in range(y_test.shape[0]): tstdata.addSample(X_test[i, :], y_test[i]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() mlpc = buildNetwork(trndata.indim, 100, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(mlpc, dataset=trndata, momentum=mom, verbose=True, weightdecay=weightd) trainer.trainEpochs(epo) y_pred = trainer.testOnClassData(dataset=tstdata) print "Done. Accu: " + "%.2f" % accuracy_score(y_test, y_pred) return y_test, y_pred
def consturt_train_data(self): # print len(self.output_train) # print len(self.eigenvector) ds = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for i in range(len(self.output_train)): ds.appendLinked(self.eigenvector[i], self.output_train[i]) # print ds # print ds ds.calculateStatistics() # split training, testing, validation data set (proportion 4:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) # one hot encoding # print trndata testdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) test_data_temp = self.test_data for n in range(len(test_data_temp)): testdata.addSample(test_data_temp[n], [0]) # print testdata trndata._convertToOneOfMany() tstdata._convertToOneOfMany() testdata._convertToOneOfMany() return trndata, tstdata, testdata, ds
def get_datasets(inputdir, dstype="all", proportion=0.3): untransformed_files = [f for f in os.listdir(inputdir) if "trans" not in f] if dstype == "symbol": files = [f for f in untransformed_files if f.endswith(".png")] classifier_func = classes.get_symbol_class num_classes = 2 elif dstype == "pitch": files = [f for f in untransformed_files if "note" in f] classifier_func = classes.get_pitch_class num_classes = 18 elif dstype == "note_duration": files = [ f for f in untransformed_files if "note" in f and "other" not in f ] classifier_func = classes.get_duration_class num_classes = 5 elif dstype == "rest_duration": files = [f for f in untransformed_files if "rest" in f] classifier_func = classes.get_duration_class num_classes = 5 elif dstype == "quarters_and_eighths": files = [ f for f in untransformed_files if "note-quarter" in f or "note-eighth" in f ] classifier_func = classes.quarter_or_eighth num_classes = 2 else: files = [] classifier_func = classes.get_symbol_class num_classes = 18 random_indices = np.random.permutation(len(files)) sep = int(len(files) * proportion) train_indices = random_indices[sep:] test_indices = random_indices[:sep] train_files = [f for i, f in enumerate(files) if i in train_indices] test_files = [f for i, f in enumerate(files) if i in test_indices] transformed = [] for f in train_files: transformed += images.get_transformed_filenames(f) train_files += transformed Xtrain, ytrain = get_data(inputdir, train_files, classifier_func) Xtest, ytest = get_data(inputdir, test_files, classifier_func) train_set = ClassificationDataSet(Xtrain.shape[1], nb_classes=num_classes) for i in range(len(Xtrain)): train_set.addSample(Xtrain[i], ytrain[i]) test_set = ClassificationDataSet(Xtest.shape[1], nb_classes=num_classes) for i in range(len(Xtest)): test_set.addSample(Xtest[i], ytest[i]) return train_set, test_set
def main(): # Get Data dataSets = genfromtxt('normalizedData.csv', delimiter=',') alldata = ClassificationDataSet(13, 1, nb_classes=3) for dataSet in dataSets: alldata.addSample(dataSet[1:14], int(dataSet[0]) - 1) # Split the data tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) tstdata = ClassificationDataSet(13, 1, nb_classes=3) for n in range(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(13, 1, nb_classes=3) for n in range(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() # Build Network fnn = buildNetwork(trndata.indim, 4, 4, 4, trndata.outdim) # Construct Trainer trainer = BackpropTrainer(fnn, trndata, learningrate=0.1) # Train while True: trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) print("Training Test Error: %5.2f%%" % trnresult) if trnresult < 1: break tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print("test error: %5.2f%%" % tstresult) out1 = fnn.activate([ 0.70789474, 0.13636364, 0.60962567, 0.31443299, 0.41304348, 0.83448276, 0.70253165, 0.11320755, 0.51419558, 0.47098976, 0.33333333, 0.58608059, 0.71825963 ]) out2 = fnn.activate([ 0.26578947, 0.70355731, 0.54545455, 0.58762887, 0.10869565, 0.3862069, 0.29746835, 0.54716981, 0.29652997, 0.11262799, 0.25203252, 0.47619048, 0.21540656 ]) out3 = fnn.activate([ 0.81578947, 0.66403162, 0.73796791, 0.71649485, 0.2826087, 0.36896552, 0.08860759, 0.81132075, 0.29652997, 0.67576792, 0.10569106, 0.12087912, 0.20114123 ]) print(out1, out2, out3)
def execute(data, learn_rate, momentum_rate, file_result, p_train): inputs = data[:, :-1] #COPIAR TODAS AS COLUNAS MENOS A ULTIMA targets = data[:, -1] #COPIAR ULTIMA COLUNA train_data = ClassificationDataSet(4, 1, nb_classes=3) test_data = ClassificationDataSet(4, 1, nb_classes=3) size = int(len(inputs) * p_train) for n in range(0, size): #print(targets[n]) train_data.addSample(inputs[n], [targets[n]]) for n in range(size, len(inputs)): #print(targets[n]) test_data.addSample(inputs[n], [targets[n]]) train_data._convertToOneOfMany() test_data._convertToOneOfMany() fnn = buildNetwork(train_data.indim, 2, train_data.outdim) trainer = BackpropTrainer(fnn, train_data, learningrate=learn_rate, momentum=momentum_rate, verbose=False) epochs = 0 for i in range(300): epochs += 1 trainer.train() #print (trainer.testOnClassData()) #print (trainer.testOnData()) cont = 0 for test in test_data: r = fnn.activate(test[0]) cls = convert(r) print(cls, test[1]) if ((cls == test[1]).all()): cont += 1 print(cont) error = cont / len(test_data) line_result = str(momentum_rate) + "\t" + str(learn_rate) + "\t" + str( error) + "\t" + str(epochs) + "\t" + str(p_train) f.write(line_result + "\n") f.flush()
def splitWithProportion(self, proportion=0.7): indicies = random.permutation(len(self)) separator = int(len(self) * proportion) leftIndicies = indicies[:separator] rightIndicies = indicies[separator:] leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(), target=self['target'][leftIndicies].copy()) rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(), target=self['target'][leftIndicies].copy()) return leftDs, rightDs
def classify(self, file_name): self.load() self.t_ds = ClassificationDataSet(8, 1, nb_classes=2) self.ds = ClassificationDataSet(8, 1, nb_classes=2) self.classifier_neural_net = buildNetwork(8, 30, 2, outclass=SoftmaxLayer, hiddenclass=TanhLayer) #self.add_patients_data_to_train(file_name); self.trainer = BackpropTrainer(self.classifier_neural_net, self.ds) self.add_patients_data_to_train(file_name) self.trainer.train() self.save()
def splitWithProportion(self, proportion=0.7): """Produce two new datasets, the first one containing the fraction given by `proportion` of the samples.""" indicies = random.permutation(len(self)) separator = int(len(self) * proportion) leftIndicies = indicies[:separator] rightIndicies = indicies[separator:] leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(), target=self['target'][leftIndicies].copy()) rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(), target=self['target'][rightIndicies].copy()) return leftDs, rightDs
def classif(): means = [(-1, 0), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass], cov[klass]) alldata.addSample(input, [klass]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) ticks = arange(-3., 6., 0.2) X, Y = meshgrid(ticks, ticks) # need column vectors in dataset, not arrays griddata = ClassificationDataSet(2, 1, nb_classes=3) for i in xrange(X.size): griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0]) griddata._convertToOneOfMany( ) # this is still needed to make the fnn feel comfy for i in range(20): trainer.trainEpochs(5) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax( axis=1) # the highest output activation gives the class out = out.reshape(X.shape) print out
def train(X, y): """ Trains and predicts dataset with a Neural Network classifier """ ds = ClassificationDataSet(len(X.columns), 1, nb_classes=2) for k in xrange(len(X)): ds.addSample(X.iloc[k], np.array(y[k])) tstdata, trndata = ds.splitWithProportion(0.20) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() input_size = len(X.columns) target_size = 1 hidden_size = 5 fnn = None if os.path.isfile('fnn.xml'): fnn = NetworkReader.readFrom('fnn.xml') else: fnn = buildNetwork(trndata.indim, hidden_size, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.05, learningrate=0.1, verbose=False, weightdecay=0.01) trainer.trainUntilConvergence(verbose=False, validationProportion=0.15, maxEpochs=100, continueEpochs=10) NetworkWriter.writeToFile(fnn, 'oliv.xml') predictions = trainer.testOnClassData(dataset=tstdata) return tstdata['class'], predictions
def test_trained_model(filename, training_filename): fileObject = open(filename, 'r') fann = pickle.load(fileObject) testing_dataset = np.genfromtxt(training_filename, skip_header=0, dtype="int", delimiter='\t') data = ClassificationDataSet(len(testing_dataset[0]) - 1, 2, nb_classes=2) for aSample in testing_dataset: data.addSample(aSample[0:len(aSample) - 1], [aSample[len(aSample) - 1]]) # data._convertToOneOfMany() test = BackpropTrainer(fann, dataset=data, momentum=0.1, verbose=False, weightdecay=0.01) trnresult = percentError(test.testOnClassData(), data['class']) results = "Train error on testing data : %5.2f%%" % trnresult log_file.write(results + " , The length of data " + str(len(data))) print results
def generate_Testdata(index): INPUT_FEATURES = 200 CLASSES = 3 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "Leukemia1_test.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "B_cell": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "AML": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "T_cell": klass = 2 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def build_dataset(mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True): # depricated if edgedetect: import cv2 from pybrain.datasets import SupervisedDataSet, ClassificationDataSet patch_size = min(patch_size, orig_size) trim = round((orig_size - patch_size) / 2) #ds = SupervisedDataSet(patch_size**2, 1) ds = ClassificationDataSet(patch_size**2, target=1, nb_classes=nb_classes) cursor = list(mongo_collection.find()) for one_image in cursor: # convert from binary to numpy array and transform img_array = np.fromstring(one_image["image"], dtype='uint8') if edgedetect: img_array = cv2.Canny(img_array, 150, 200) img_crop = img_array.reshape(orig_size, orig_size)[trim:(trim + patch_size), trim:(trim + patch_size)] classification = float(one_image["class"]) if transform: transformed = transform_img(img_crop.ravel(), patch_size) else: transformed = [img_crop.ravel()] for one_img in transformed: ds.addSample(one_img.ravel(), classification) print('New dataset contains %d images (%d positive).' % (len(ds), sum(ds['target']))) return ds
def generate_Testdata(index): INPUT_FEATURES = 200 CLASSES = 5 train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "Breast_test.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "lumina": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "ERBB2": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "basal": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "normal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "cell_lines": klass = 4 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def __init__(self, indim, hiddendim, nb_classes): #net = buildNetwork(size[0] * size[1], 96, ds.outdim, outclass=SoftmaxLayer) self.ds = ClassificationDataSet(indim, nb_classes=nb_classes) self.net = buildNetwork(indim, hiddendim, nb_classes, outclass=SoftmaxLayer) #
def gen_data(csv_file, db): keywords = {} count = 0 img_list = [] with open(csv_file) as f: content = f.readlines() f.close() for line in content: aux = line.replace('\n', '').split(',') if aux[1] not in keywords: keywords[aux[1]] = count count += 1 img_list.append(aux) data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords)) n = len(keywords) for img in img_list: path = db + '/' + img[0] im = Image.open(path).convert('RGB') data.addSample(get_img_feats(im), get_keyword_class(keywords[img[1]], n)) return data, n, keywords
def make_data_set(beg, end): ds = ClassificationDataSet( HISTORY * 2 + 1, class_labels=['None', 'Buy', 'Sell']) #SupervisedDataSet(HISTORY*3, 1) trainQ = rawData[(rawData.tradeDate <= end) & (rawData.tradeDate >= beg)] for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD - 1): cur = idx + HISTORY - 1 if (abs(trainQ.iloc[cur]['MACD']) > 0.5): continue sample = [] for i in range(HISTORY): #sample.append( trainQ.iloc[idx+i]['EMAL'] )# [['EMAL','DIFF','DEA','CDIS']] ) ) sample.append(trainQ.iloc[idx + i]['DIFF']) sample.append(trainQ.iloc[idx + i]['DEA']) sample.append(trainQ.iloc[cur]['CDIS']) if max(trainQ.iloc[cur + 1:cur + HOLD + 1]['EMAS']) / trainQ.iloc[cur]['closeIndex'] > 1.05: answer = 1 elif min(trainQ.iloc[cur + 1:cur + HOLD + 1] ['EMAS']) / trainQ.iloc[cur]['closeIndex'] < 0.95: answer = 2 else: answer = 0 # print(sample) ds.addSample(sample, answer) return ds
def toClassificationDataset(codedSampleSet): classifiedSampleSet = [] # Calculate the unique classes classes = [] for sample in codedSampleSet: classifier = getClassifier(sample) if classifier not in classes: classes.append(classifier) classes.sort() # Now that we have all the classes, we process the outputs for sample in codedSampleSet: classifier = getClassifier(sample) classifiedSample = one_to_n(classes.index(classifier), len(classes)) classifiedSampleSet.append(classifiedSample) # Build the dataset sampleSize = len(codedSampleSet[0]) classifiedSampleSize = len(classifiedSampleSet[0]) dataset = ClassificationDataSet(sampleSize, classifiedSampleSize) for i in range(len(classifiedSampleSet)): dataset.addSample(codedSampleSet[i], classifiedSampleSet[i]) return dataset, classes
def generate_data(): index = [2242,3833,3252,3286,1460,7054,6036,2527,538,3886,3329,6464,826,7071,3454,6544,2487,4225,4422,6447,681,2203,5715,3188,6417,3785,7095,391,3257,6581,6962,5535,6531,2389,5756,3262,1808,1860,3532,7128,7127,7126,7125,7124,7123,7122,7121,7120,7119,7118,7117,7116,7115,7114,7113,7112,7111,7110,7109,7108,7107,7106,7105,7104,7103,7102,7101,7100,7099,7098,7097,7096,7094,7093,7092,7091,7090,7089,7088,7087,7086,7085,7084,7083,7082,7081,7080,7079,7078,7077,7076,7075,7074,7073,7072,7070,7069,7068,7067,7066,7065,7064,7063,7062,7061,7060,7059,7058,7057,7056,7055,7053,7052,7051,7050,7049,7048,7047,7046,7045,7044,7043,7042,7041,7040,7039,7038,7037,7036,7035,7034,7033,7032,7031,7030,7029,7028,7027,7026,7025,7024,7023,7022,7021,7020,7019,7018,7017,7016,7015,7014,7013,7012,7011,7010,7009,7008,7007,7006,7005,7004,7003,7002,7001,7000,6999,6998,6997,6996,6995,6994,6993,6992,6991,6990,6989,6988,6987,6986,6985,6984,6983,6982,6981,6980,6979,6978,6977,6976,6975,6974,6973,6972,6971,6970,6969,6968,6967,6966,6965] INPUT_FEATURES = 200 CLASSES = 3 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Lung1_train.data") train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="A" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="C" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="N" : klass = 2 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
def trainModel(self): self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted] self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1] ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses) for rowData in self.finalDataSet: target = rowData[-1] variables = rowData[0:-1] ds.addSample(variables, target) self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25) self.testDataSet._convertToOneOfMany() self.trainDataSet._convertToOneOfMany() print self.testDataSet print self.trainDataSet self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True) self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99) begin0 = time.time() # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10) for i in xrange(10): begin = time.time() self.trainer.trainEpochs(10) end = time.time() print 'iteration ', i, ' takes ', end-begin, 'seconds' end0 = time.time() print 'total time consumed: ', end0 - begin0
def generate_data(): index = [629,2641,1009,3280,6224,4041,2994,5553,5299,6509,2014,759,1024,6375,3820,4884,2596,2744,3935,3577,1238,2309,4445,7128,7127,7126,7125,7124,7123,7122,7121,7120,7119,7118,7117,7116,7115,7114,7113,7112,7111,7110,7109,7108,7107,7106,7105,7104,7103,7102,7101,7100,7099,7098,7097,7096,7095,7094,7093,7092,7091,7090,7089,7088,7087,7086,7085,7084,7083,7082,7081,7080,7079,7078,7077,7076,7075,7074,7073,7072,7071,7070,7069,7068,7067,7066,7065,7064,7063,7062,7061,7060,7059,7058,7057,7056,7055,7054,7053,7052,7051,7050,7049,7048,7047,7046,7045,7044,7043,7042,7041,7040,7039,7038,7037,7036,7035,7034,7033,7032,7031,7030,7029,7028,7027,7026,7025,7024,7023,7022,7021,7020,7019,7018,7017,7016,7015,7014,7013,7012,7011,7010,7009,7008,7007,7006,7005,7004,7003,7002,7001,7000,6999,6998,6997,6996,6995,6994,6993,6992,6991,6990,6989,6988,6987,6986,6985,6984,6983,6982,6981,6980,6979,6978,6977,6976,6975,6974,6973,6972,6971,6970,6969,6968,6967,6966,6965,6964,6963,6962,6961,6960,6959,6958,6957,6956,6955,6954,6953,6952] INPUT_FEATURES = 200 CLASSES = 3 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Leukemia1_train.data") train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="B_cell" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="AML" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="T_cell" : klass = 2 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
def batch_classify(self, samples): ds = ClassificationDataSet(len(self._fx)) for sample in samples: fvec = [sample[l] for l in self._fx] ds.addSample(fvec, [0]) results = self._trainer.testOnClassData(ds) return [self._rmap[r] for r in results]
def __init__(self, hidden_nodes=30): """ parameters to buildNetwork are inputs, hidden layers, output bias = true allows for a bias unit to be added in each neural net layer hiddenclass represents the method used by the hidden layer """ # Regression # self.classifier_neural_net = buildNetwork(12, hidden_nodes, 1, bias=True, hiddenclass=TanhLayer) # # Initializing dataset for supervised regression training # self.data_sets = SupervisedDataSet(12, 1) # # classification_trainer uses backpropagation supervised training method for training the newural network # self.classification_trainer = BackpropTrainer(self.classifier_neural_net, self.data_sets) # Classification self.classifier_neural_net = buildNetwork(12, hidden_nodes, 3, outclass=SoftmaxLayer, hiddenclass=TanhLayer) self.data_sets = ClassificationDataSet(12, 1, nb_classes=3) self.classification_trainer = BackpropTrainer( self.classifier_neural_net, self.data_sets, momentum=0.1, verbose=True, weightdecay=0.01)
def generate_data(n=400): INPUT_FEATURES = 2 CLASSES = 3 #means = [(-1, 0), (2, 4), (3, 1)] #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) #minX, maxX = means[0][0], means[0][0] #minY, maxY = means[0][1], means[0][1] #print minX, maxX , minY, maxY # #for i in range(n): # for klass in range(CLASSES): # features = multivariate_normal(means[klass], cov[klass]) # #print means[klass], cov[klass] # #print features # x, y = features # minX, maxX = min(minX, x), max(maxX, x) # minY, maxY = min(minY, y), max(maxY, y) # alldata.addSample(features, [klass]) #print alldata alldata.addSample([0,0], [0]) alldata.addSample([0,1], [1]) alldata.addSample([1,0], [1]) alldata.addSample([1,1], [0]) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata}
def learn(self): try: self.data_for_training = [] mem = Memory('http://localhost:9200') data = mem.get_data() categories = list(data.keys()) for category in data: for req in data[category]: self.data_for_training.append( [req.token, [categories.index(category)]]) net = buildNetwork(len(self.data_for_training[0][0]), 15, len(categories), hiddenclass=SoftmaxLayer, recurrent=False) ds = ClassificationDataSet(len(self.data_for_training[0][0]), nb_classes=len(categories), class_labels=categories) for data in self.data_for_training: ds.addSample(data[0], data[1]) ds._convertToOneOfMany() trainer = BackpropTrainer(net, ds, momentum=0.1, learningrate=0.01, verbose=True) trainer.trainUntilConvergence(maxEpochs=500) NetworkWriter.writeToFile(net, 'net.xml') except Exception as e: raise e return net
def _convert_supervised_to_classification2(supervised_dataset,classes): classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes) for n in xrange(0,supervised_dataset.getLength()): classification_dataset.addSample(supervised_dataset.getSample(n)[0], [0]) return classification_dataset
def generate_training_set(dimensions, mult=1): ''' Returns training set and Pybrain ClassificationDataSet class''' P_data_set = ClassificationDataSet(dimensions, 1, nb_classes=2) #P_data_set = SupervisedDataSet(dimensions, 1) combinations = [ "".join(seq) for seq in itertools.product("01", repeat=dimensions) ] train_x = [] train_y = [] for i in range(mult): for rec in combinations: r = list(map(int, list(rec))) if (sum(r)) == 0: label = 0 else: label = (sum(r) + 1) % 2 #print(label,r) train_x.append(r) train_y.append(label) for k in range(len(train_y)): P_data_set.addSample(np.ravel(train_x[k]), train_y[k]) '''classification label is list of two. If it is [1 0] than output is classified as "0", if it is [0 1] it is classified as "1". This is done with _convertToOneOfMany() ''' P_data_set._convertToOneOfMany() return P_data_set, train_x, train_y
def conductGeneration(generation, corpus): ''' Conducts a generation of learning and testing on the input data generation (int) --- the number of the generation corpus (object) --- corpus object containing info needed ''' # Set up the dataset skeleton alldata = ClassificationDataSet(2, 1, nb_classes=3, class_labels=['a', 'b', 'c']) # means = [(-1,0),(2,4),(3,1)] # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] # alldata = ClassificationDataSet(2, 1, nb_classes=3) # for n in xrange(400): # for klass in range(3): # input = multivariate_normal(means[klass],cov[klass]) # print type(input) # alldata.addSample(input, [klass]) alldata.addSample((0, 1), (1)) alldata.addSample((1, 0), (0)) alldata.addSample((0, 0), (2)) alldata.addSample((1, 1), (0)) trndata, partdata = alldata.splitWithProportion(0.5) return alldata
def loadData(paths, classes): class_dict = generateDictOfClasses(classes) all_data = None for i in range(len(paths)): path = paths[i] print path for img in os.listdir(path): m = re.search('.*Thumbs\.(db)', img) # in windows XP, this is a problem if (m is None): img_path = path + "/" + img img_data = thresholdOp(Image(img_path)) flattened = img_data.getNumpy()[:, :, 1].flatten() # 25x20 (wxh) flattened[flattened == 255] = 1 # set every '255' to '1' if all_data is None: all_data = ClassificationDataSet(len(flattened), nb_classes=len(classes), class_labels=classes) all_data.addSample( flattened, [class_dict[classes[i]]]) # [data[1],data[2]] return all_data