def createnetwork(n_hoglist,n_classlist,n_classnum,n_hiddensize=100): n_inputdim=len(n_hoglist[0]) n_alldata = ClassificationDataSet(n_inputdim,1, nb_classes=n_classnum) for i in range(len(n_hoglist)): n_input = n_hoglist[i] n_class = n_classlist[i] n_alldata.addSample(n_input, [n_class]) n_tstdata, n_trndata = n_alldata.splitWithProportion( 0.25 ) n_trndata._convertToOneOfMany( ) n_tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(n_trndata) print "Input and output dimensions: ", n_trndata.indim, n_trndata.outdim print "First sample (input, target, class):" print n_trndata['input'][0], n_trndata['target'][0], n_trndata['class'][0] n_fnn = buildNetwork(n_trndata.indim,n_hiddensize, n_trndata.outdim, outclass=SoftmaxLayer) n_trainer = BackpropTrainer(n_fnn, dataset=n_trndata, momentum=0.1, verbose=True, weightdecay=0.01) n_result = 1 while n_result > 0.1: print n_result n_trainer.trainEpochs(1) n_trnresult = percentError(n_trainer.testOnClassData(), n_trndata['class']) n_tstresult = percentError(n_trainer.testOnClassData( dataset=n_tstdata), n_tstdata['class']) print "epoch: %4d" % n_trainer.totalepochs, \ " train error: %5.2f%%" % n_trnresult, \ " test error: %5.2f%%" % n_tstresult n_result = n_tstresult
def _convert_supervised_to_classification(supervised_dataset,classes): classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes) for n in xrange(0, supervised_dataset.getLength()): classification_dataset.addSample(supervised_dataset.getSample(n)[0], supervised_dataset.getSample(n)[1]) return classification_dataset
def gen_data(csv_file, db): keywords = {} count = 0 img_list = [] with open(csv_file) as f: content = f.readlines() f.close() for line in content: aux = line.replace('\n', '').split(',') if aux[1] not in keywords: keywords[aux[1]] = count count += 1 img_list.append(aux) data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords)) n = len(keywords) for img in img_list: path = db + '/' + img[0] im = Image.open(path).convert('RGB') data.addSample(get_img_feats(im), get_keyword_class(keywords[img[1]], n)) return data, n, keywords
def batch_classify(self, samples): ds = ClassificationDataSet(len(self._fx)) for sample in samples: fvec = [sample[l] for l in self._fx] ds.addSample(fvec, [0]) results = self._trainer.testOnClassData(ds) return [self._rmap[r] for r in results]
def getData(): fo = open("C:\\Program Files (x86)\\Lux\\Support\\data1per.txt") #data = [] ''' correctinds = range(0,5) for k in range(5, 131, 3): correctinds.append(k) correctinds.append(129) correctinds.append(130) for k in range(131, 257, 3): correctinds.append(k) correctinds.append(255) correctinds.append(256) ''' #alldata = ClassificationDataSet(92, 1) alldata = ClassificationDataSet(84, 1) count = 0 for line in fo.readlines(): #for k in range(0, 20000): count += 1 #line = fo.readline() line = [int(x.strip()) for x in line[1:-3].split(',')] line = [line[0]]+line[4:47]+line[49:90] alldata.addSample(line[1:], line[0]) print count return alldata
class NeuralNetLearner: def __init__(self): self.bunch = load_digits() self.X = np.asarray(self.bunch.data, 'float32') self.Y = np.asarray(self.bunch.target, 'float32') #self.X, self.Y = nudge_dataset(self.X, self.bunch.target) self.X = (self.X - np.min(self.X, 0)) / (np.max(self.X, 0) + 0.0001) # 0-1 scaling self.ds = ClassificationDataSet(64, nb_classes=10, class_labels=self.bunch.target_names) for (x, y) in zip(self.X, self.Y): self.ds.addSample(x, y) self.test_data, self.train_data = self.ds.splitWithProportion(0.3) self.network = buildNetwork(64, 10, 1) def get_datasets(self): return self.train_data, self.test_data def activate(self, x): self.network.activate(x.tolist()) def fitness_func(self, x): if not (x.size == 64): print("Bad input vector: ", x) return sum_of_squared_error = 0 for (input, target) in self.ds: sum_of_squared_error += (target - self.activate(input.tolist())) return (sum_of_squared_error / self.ds.length) def get_weights(self): return
def toClassificationDataset(codedSampleSet): classifiedSampleSet = [] # Calculate the unique classes classes = [] for sample in codedSampleSet: classifier = getClassifier(sample) if classifier not in classes: classes.append(classifier) classes.sort() # Now that we have all the classes, we process the outputs for sample in codedSampleSet: classifier = getClassifier(sample) classifiedSample = one_to_n(classes.index(classifier), len(classes)) classifiedSampleSet.append(classifiedSample) # Build the dataset sampleSize = len(codedSampleSet[0]) classifiedSampleSize = len(classifiedSampleSet[0]) dataset = ClassificationDataSet(sampleSize, classifiedSampleSize) for i in range(len(classifiedSampleSet)): dataset.addSample(codedSampleSet[i], classifiedSampleSet[i]) return dataset, classes
def generate_data(n=400): INPUT_FEATURES = 2 CLASSES = 3 #means = [(-1, 0), (2, 4), (3, 1)] #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) #minX, maxX = means[0][0], means[0][0] #minY, maxY = means[0][1], means[0][1] #print minX, maxX , minY, maxY # #for i in range(n): # for klass in range(CLASSES): # features = multivariate_normal(means[klass], cov[klass]) # #print means[klass], cov[klass] # #print features # x, y = features # minX, maxX = min(minX, x), max(maxX, x) # minY, maxY = min(minY, y), max(maxY, y) # alldata.addSample(features, [klass]) #print alldata alldata.addSample([0,0], [0]) alldata.addSample([0,1], [1]) alldata.addSample([1,0], [1]) alldata.addSample([1,1], [0]) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata}
class NNetwork: def __init__(self): self.ds = ClassificationDataSet(7, 1, nb_classes=8) #8 since we have 8 gestures, 7 since we have 7 features def add_data(self, training_data): for gesture in training_data: self.ds.addSample(gesture[1], gesture[0]) #a method to add all the training data we have def newData(self, training_data): #a method for replacing the data already existing and adding data from scratch self.ds = ClassificationDataSet(7, 1, nb_classes=8) for gesture in training_data: self.ds.addSample(gesture[1], gesture[0]) def train(self, shouldPrint): tstdata, trndata = self.ds.splitWithProportion(0.2) #splits the data into training and verification data trndata._convertToOneOfMany() tstdata._convertToOneOfMany() self.fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer) #builds a network with 64 hidden neurons self.trainer = BackpropTrainer(self.fnn, dataset=trndata, momentum=0.1, learningrate=0.01, verbose=True, weightdecay=0.1) #uses the backpropagation algorithm self.trainer.trainUntilConvergence(dataset=trndata, maxEpochs=100, verbose=True, continueEpochs=10, validationProportion=0.20) #early stopping with 20% as testing data trnresult = percentError( self.trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( self.trainer.testOnClassData(dataset=tstdata ), tstdata['class'] ) if shouldPrint: print "epoch: %4d" % self.trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult def activate(self, data): #tests a particular data point (feature vector) return self.fnn.activate(data)
def build_dataset(data_pair): inputs, classes = data_pair ds = ClassificationDataSet(256) data = zip(inputs, classes) for (inp, c) in data: ds.appendLinked(inp, [c]) return ds
def generateDataSet(): inFile = open("data/input.txt") inData = inFile.readlines() inFile.close() outFile = open("data/output.txt") outData = outFile.readlines() outFile.close() inputs = 120 #you will want to update this based on the state you have... ###I don't understand this comment. How do we update if we haven't calculated the state yet? classes= 11 #11 #Not much reson to change this one, there are only 11 destinations. allData = ClassificationDataSet(inputs,1,nb_classes=classes) start = time.clock() for i in range(len(inData)): b = loadBrain(inData[i].strip()) #inputs = len(b.g.heroes) - 1 + len(b.g.taverns_locs) + 4 #calls functions inside of the ai object. you will want to write these fcns. ins = b.createInputs(inputs) klass = b.determineClass(classes,eval(outData[i].strip())) expectedKlass = b.classInverse(klass) #if expectedKlass != eval(outData[i].strip()): # print expectedKlass, eval(outData[i].strip()) allData.addSample(ins,[klass]) #if(i > 1000): break if(i%100==0): print i,len(inData), "elapsed between sets", time.clock() - start return allData
def read_data(filename): """ See http://www.pybrain.org/docs/api/datasets/classificationdataset.html Reads a (naive) csv file of data and converts it into a ClassificationDataSet. 'Naive' in this case means the data can be parsed by splitting on commas - i.e., no quotations or escapes. I picked this file format because it should be trivial to convert all our data into it. Raises an exception when an IO error occurs. Parameters: filename - The name of the file containing the data. """ data_file = open(filename, "r") data_lines = [line.split(',') for line in data_file.readlines()] data_file.close() features = [[float(f) for f in line[0:-1]] for line in data_lines] classes = [[int(line[-1])] for line in data_lines] # Workaround to make classifications zero-based class_min = min([c[0] for c in classes]) for i in range(len(classes)): classes[i][0] -= class_min data_set = ClassificationDataSet(len(features[0])) for feature_vector, classification in zip(features, classes): data_set.addSample(feature_vector, classification) return data_set
def make_data_set(beg,end): ds = ClassificationDataSet(HISTORY*2+1, class_labels=['None', 'Buy' , 'Sell']) #SupervisedDataSet(HISTORY*3, 1) trainQ = rawData[(rawData.tradeDate <= end) & ( rawData.tradeDate >= beg)] for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD-1): cur = idx + HISTORY - 1 if( abs( trainQ.iloc[cur]['MACD'] ) > 0.5 ): continue sample = [] for i in range(HISTORY): #sample.append( trainQ.iloc[idx+i]['EMAL'] )# [['EMAL','DIFF','DEA','CDIS']] ) ) sample.append( trainQ.iloc[idx+i]['DIFF'] ) sample.append( trainQ.iloc[idx+i]['DEA'] ) sample.append( trainQ.iloc[cur]['CDIS'] ) if max( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] > 1.05 : answer = 1 elif min( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] < 0.95: answer = 2 else: answer = 0 # print(sample) ds.addSample(sample, answer) return ds
class NeuralNetwork(BaseWorkflow): def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None): super(NeuralNetwork, self).__init__() self.purpose = purpose self.data_path = self.config.neural_net.get(self.purpose, None) self.file_name = 'neural_net' self.all_data = ClassificationDataSet(num_inputs, num_ouputs, nb_classes=classes, class_labels=class_lables) self.train = None self.test = None self.neural_network = None self.train_result = None self.test_result = None self.cross_validation_result = None def process(self): self.prepare_train_test() self.build_network() trainer = self.train_network(dataset=self.train) self.score_train_test(trainer=trainer) self.cross_validate(dataset=self.all_data) def add_sample(self, correlogram_matrix=None, target=None, sample_path=None): self.all_data.addSample(correlogram_matrix, target) logger.info('sample added from {sample_path}'.format(sample_path=sample_path)) def prepare_train_test(self): self.test, self.train = self.all_data.splitWithProportion(0.25) def build_network(self): self.neural_network = buildNetwork(self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network def train_network(self, dataset=None): starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) starter_trainer.trainUntilConvergence(validationProportion=0.25, maxEpochs=100) return starter_trainer def score_train_test(self, trainer=None): self.test_result = percentError(trainer.testOnClassData(dataset=self.test), self.test['class']) logger.info('test error result: {result}'.format(result=self.test_result)) self.train_result = percentError(trainer.testOnClassData(dataset=self.train), self.train['class'] ) logger.info('train error result: {result}'.format(result=self.train_result)) def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format(result=self.cross_validation_result)) @staticmethod def save_network_to_xml(net=None, file_name=None): NetworkWriter.writeToFile(net, file_name) @staticmethod def read_network_from_xml(file_name=None): return NetworkReader.readFrom(file_name)
def main(): for stock in STOCK_TICKS: # Download Data get_data(stock) # Import Data days = extract_data(stock) today = days.pop(0) # Make DataSet data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2) for day in days: target = 0 if day.change > 0: target = 1 data_set.addSample(day.return_metrics(), [target]) # Make Network network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM) # Train Network trainer = BackpropTrainer(network) trainer.setData(data_set) trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX) # Activate Network prediction = network.activate(today.return_metrics()) print prediction
def getdata(do_preprocessing, full_data): ''' fetch and format the match data according to the given flags do_preprocessing: bool: true if preprocessing needs to be do_preprocessing full_data: bool: false if the minimal data should be used ''' print ("fetching data ...") if full_data == 0 : fn = getMinimalDatafromMatch else: fn = getBasicDatafromMatch if globals.use_saved_data: try: with open('processed_data%d' % full_data) as outfile: data = json.load(outfile) except IOError: matches = Match.objects.all() data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches) data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches) with open('processed_data%d' % full_data, 'w') as outfile: json.dump(data,outfile) else: matches = Match.objects.all() data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches) data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches) with open('processed_data%d' % full_data, 'w') as outfile: json.dump(data,outfile) all_data = None for input, won in data: if all_data is None: all_data = ClassificationDataSet(len(input), 1, nb_classes=2) all_data.addSample(input, int(won)) return all_data
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs): data = np.genfromtxt(fileName) trnIn = data[:, 0:5] trnOut = data[:, 6] trnOut = [int(val) for val in trnOut] normalizeData(trnIn, numFeatures) trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses) for row in range(0, len(trnIn)): tempListOut = [] tempListIn = [] tempListOut.append(int(trnOut[row])) for i in range(0, numFeatures): tempListIn.append(trnIn[row][i]) trndata.addSample(tempListIn, tempListOut) trndata._convertToOneOfMany() # When running for the first time myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False) # Read from file after the first try. # myNetwork = NetworkReader.readFrom('firstTime.xml') # Use saved results. trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0) for i in range(numTrainingEpochs): trainer.trainOnDataset(dataset=trndata)
def build_dataset( mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True ): # depricated if edgedetect: import cv2 from pybrain.datasets import SupervisedDataSet, ClassificationDataSet patch_size = min(patch_size, orig_size) trim = round((orig_size - patch_size) / 2) # ds = SupervisedDataSet(patch_size**2, 1) ds = ClassificationDataSet(patch_size ** 2, target=1, nb_classes=nb_classes) cursor = list(mongo_collection.find()) for one_image in cursor: # convert from binary to numpy array and transform img_array = np.fromstring(one_image["image"], dtype="uint8") if edgedetect: img_array = cv2.Canny(img_array, 150, 200) img_crop = img_array.reshape(orig_size, orig_size)[trim : (trim + patch_size), trim : (trim + patch_size)] classification = float(one_image["class"]) if transform: transformed = transform_img(img_crop.ravel(), patch_size) else: transformed = [img_crop.ravel()] for one_img in transformed: ds.addSample(one_img.ravel(), classification) print("New dataset contains %d images (%d positive)." % (len(ds), sum(ds["target"]))) return ds
def cross_validation(trndata, folds=3, **kwargs): """ kwargs are parameters for the model """ input = np.vsplit(trndata['input'], folds) target = np.vsplit(trndata['target'], folds) zipped = zip(input, target) accuracy_sum = 0 for i in len(zipped): new_train = ClassificationDataSet(attributes, nb_classes=classes_number) new_test = ClassificationDataSet(attributes, nb_classes=classes_number) test_zipped = zipped[i] train_zipped = zipped[:i] + zipped[(i+1):] new_train.setField('input', np.vstack[train_zipped[0]]) new_train.setField('target', np.vstack[train_zipped[1]]) new_test.setField('input', test_zipped[0]) new_test.setField('target', train_zipped[1]) model = FNNClassifier() model.train(new_train, new_test, kwargs) out, targ = model.predict(new_test) accuracy_sum += accuracy(out, targ) return accuracy_sum / folds
def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) # images, labels = load_pca_proj(K=100) shuffle_in_unison(images, labels) ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7) for i, l in zip(images, labels): ds.addSample(i, [l - 1]) # ds._convertToOneOfMany() test, train = ds.splitWithProportion(0.2) test._convertToOneOfMany() train._convertToOneOfMany() net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05) # trainer = RPropMinusTrainer(net, dataset=train) # cv = validation.CrossValidator(trainer, ds) # print cv.validate() net.randomize() tr_labels_2 = net.activateOnDataset(train).argmax(axis=1) trnres = percentError(tr_labels_2, train["class"]) # trnres = percentError(trainer.testOnClassData(dataset=train), train['class']) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) print "Training error: %.10f, Test error: %.10f" % (trnres, testres) print "Iters: %d" % trainer.totalepochs for i in range(100): trainer.trainEpochs(10) trnres = percentError(trainer.testOnClassData(dataset=train), train["class"]) testres = percentError(trainer.testOnClassData(dataset=test), test["class"]) trnmse = trainer.testOnData(dataset=train) testmse = trainer.testOnData(dataset=test) print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres) print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
class ImageData(Data): image_x = 1 image_y = 1 images = [] targets = [] def __init__(self, images, targets, image_x, image_y, description="Image Data", outputs=1): Data.__init__(self, description, outputs) self.images = images self.targets = targets self.image_x = image_x self.image_y = image_y self.create_classifier() def create_classifier(self): #print "Image X:", self.image_x #print "Image Y:", self.image_y vector_length = self.image_x * self.image_y #Create the classifier #print "Creating Classifier. Vector_Len:", vector_length, "Output Vector:", self.outputs self.classifier = ClassificationDataSet(vector_length, self.outputs, nb_classes=(len(self.images) / 10)) #print "Adding samples for", len(self.images), " images" for i in xrange(len(self.images)): #Assign images to their targets in the classifier #print i, "Image:", self.images[i], "Target:", self.targets[i] self.classifier.addSample(self.images[i], self.targets[i]) def print_data(self): print "Image Object:" + str(this.data_unit) def add_image(self, image, target): self.images.append(image) self.targets.append(target)
def getBoardImage(img): ''' Runs an image through processing and neural network to decode digits img: an openCV image object returns: pil_im: a PIL image object with the puzzle isolated, cropped and straightened boardString: string representing the digits and spaces of a Sudoku board (left to right, top to bottom) ''' # Process image and extract digits pil_im, numbers, parsed, missed = process(img, False) if pil_im == None: return None, None net = NetworkReader.readFrom(os.path.dirname(os.path.abspath(__file__))+'/network.xml') boardString = '' for number in numbers: if number is None: boardString += ' ' else: data=ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9']) data.appendLinked(number.ravel(),[0]) boardString += str(net.activateOnDataset(data).argmax(axis=1)[0]+1) return pil_im, boardString
def prepare_datasets(inp,out,dataframe, ratio): '''conversion from pandas dataframe to ClassificationDataSet of numpy parameters: inp: list of names of input features out: list of names of output features(target value) ratio: ratio of dimension of test to train dataset ''' inp_dim = len(inp) out_dim = len(out) no_classes = 2 alldata = ClassificationDataSet(inp_dim,out_dim,no_classes) inp = dataframe[inp] out = dataframe[out] #for [a,b,c],d in zip(inp.values,out.values): for i in range(len(inp.values)): d = out.values[i] if d=='up': d = 0 elif d == 'down': d = 1 else: d =2 alldata.addSample(inp.values[i],d) tstdata_temp, trndata_temp = alldata.splitWithProportion( ratio ) # to convert supervised datasets to classification datasets tstdata = trndata = ClassificationDataSet(inp_dim, out_dim, no_classes) for n in range(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) for n in range(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return alldata, trndata, tstdata
def trainModel(self): self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted] self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1] ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses) for rowData in self.finalDataSet: target = rowData[-1] variables = rowData[0:-1] ds.addSample(variables, target) self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25) self.testDataSet._convertToOneOfMany() self.trainDataSet._convertToOneOfMany() print self.testDataSet print self.trainDataSet self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True) self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99) begin0 = time.time() # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10) for i in xrange(10): begin = time.time() self.trainer.trainEpochs(10) end = time.time() print 'iteration ', i, ' takes ', end-begin, 'seconds' end0 = time.time() print 'total time consumed: ', end0 - begin0
def importFromCSV(self, fileName, numInputs, numClasses): """ Function that reads in a CSV file and passes on to the pybrain neural net dataset structure to be used with the library's neural net classes. It expects that the last columns (determined by numOutputs) to be the classification columns. """ dataSet = None dataFile = open(fileName) line = dataFile.readline() data = [str(x) for x in line.strip().split(',') if x != ''] if(data[0] == '!labels:'): labels = data[1:] dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses, class_labels=labels) line = dataFile.readline() else: dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses) while line != '': data = [float(x) for x in line.strip().split(',') if x != ''] inputData = data[:numInputs] outputData = data[-1:] dataSet.addSample(inputData, outputData) line = dataFile.readline() dataFile.close() return dataSet
def getPybrainDataSet(source='Rachelle'): first = False#True qualities, combinations = cp.getCombinations() moods = combinations.keys() ds = None l=0 for mood in moods: if mood=='neutral': continue for typeNum in range(1,21): for take in range(1,10): fileName = 'recordings/'+source+'/'+mood+'/'+\ str(typeNum)+'_'+str(take)+'.skl' try: data, featuresNames = ge.getFeatureVec(fileName, first) first = False except IOError: continue if ds is None:#initialization ds = ClassificationDataSet( len(data), len(qualities) ) output = np.zeros((len(qualities))) for q in combinations[mood][typeNum]: output[qualities.index(q)] = 1 ds.appendLinked(data , output) l+=sum(output) return ds, featuresNames
def test(self,filename,classes,trainer,net): testLabels = [] #load test data tstdata = ClassificationDataSet(103, 1, nb_classes=classes) tstdata = self.loaddata(filename, classes) testLabels = tstdata['target']; # some sort of mandatory conversion tstdata._convertToOneOfMany() # using numpy array output = np.array([net.activate(x) for x, _ in tstdata]) output = output.argmax(axis=1) print(output) print("on test data",percentError( output, tstdata['class'] )) for i, l in enumerate(output): print l, '->', testLabels[i][0] # alternate version - using activateOnDataset function out = net.activateOnDataset(tstdata).argmax(axis=1) print out return percentError( out, tstdata['class'])
def run_nn_fold(training_data, test_data): test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data) input_len = len(test_features[0]) num_classes = len(labelMap.keys()) train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) for i in range(len(test_features)): train_ds.addSample(tuple(test_features[i]), (labels[i])) train_ds._convertToOneOfMany() net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, train_ds, verbose=True) print "training until convergence..." trainer.trainUntilConvergence(maxEpochs=100) print "done. testing..." test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) labels = [] for tweetinfo in test_data: featuresFound = tweetinfo["Features"] label = tweetinfo["Answer"] labels.append(label) features = [0]*len(featureMap.keys()) for feat in featuresFound: if feat in featureMap: features[ featureMap[feat] ] = 1 test_ds.addSample(tuple(features), (labelMap[label])) test_ds._convertToOneOfMany() tstresult = percentError( trainer.testOnClassData( dataset=test_ds ), test_ds['class'] ) print tstresult
def load_data(filename): """ load dataset for classification """ assert os.path.exists(filename)==True dat = scipy.io.loadmat(filename) inputs = dat['inputs'] #print len(inputs) targets = dat['targets'] #print len(targets) assert len(inputs)==len(targets) global alldata global indim global outdim indim = len(inputs[0]) outdim = 1 #print indim alldata = ClassificationDataSet(indim, outdim, nb_classes = 8) alldata.setField('input',inputs) alldata.setField('target',targets) assert len(alldata['input'])==len(alldata['target']) print type(alldata)
def classifer(labels, data): """ data in format (value, label) """ clsff = ClassificationDataSet(2,class_labels=labels) for d in data: clsff.appendLinked(d[0], d[1]) clsff.calculateStatistics()
__author__ = 'QSG' from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal #use a 2D dataset, classify into 3 classes means = [(-1, 2), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for kclass in range(3): input = multivariate_normal(means[kclass], cov[kclass]) # print 'input: ', input alldata.addSample(input, [kclass]) # print alldata tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "input and output dimensions: ", trndata.indim, ',', trndata.outdim print "first sample (input, target,class):" # print trndata['input'][0],trndata['target'][0],trndata['class'][0]
def training_and_testing(): nn = init_neural_network() training = learning.get_labeled_data( '%strain-images-idx3-ubyte.gz' % (database_folder), '%strain-labels-idx1-ubyte.gz' % (database_folder), '%strainig' % (database_folder)) test = learning.get_labeled_data( '%st10k-images-idx3-ubyte.gz' % (database_folder), '%st10k-labels-idx1-ubyte.gz' % (database_folder), '%stest' % (database_folder)) FEATURES = N_INPUT_LAYER print("Caracteristicas a analizar: %i" % FEATURES) testdata = ClassificationDataSet(FEATURES, 1, nb_classes=OUTPUT_LAYER) trainingdata = ClassificationDataSet(FEATURES, 1, nb_classes=OUTPUT_LAYER) for i in range(len(test['data'])): testdata.addSample(test['data'][i], test['label'][i]) for j in range(len(training['data'])): trainingdata.addSample(training['data'][j], training['label'][j]) trainingdata._convertToOneOfMany() testdata._convertToOneOfMany() trainer = BackpropTrainer(nn, dataset=trainingdata, momentum=MOMENTUM, verbose=True, weightdecay=W_DECAY, learningrate=L_RATE, lrdecay=L_DECAY) for i in range(EPOCHS): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trainingdata['class']) tstresult = percentError(trainer.testOnClassData(dataset=testdata), testdata['class']) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult) return nn
# total+=1 # res = true/total # print res # #37% accuracy from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.tools.customxml.networkwriter import NetworkWriter from pybrain.tools.customxml.networkreader import NetworkReader from pybrain.utilities import percentError num_inputs = len(X[0]) ds = ClassificationDataSet(num_inputs, 1, nb_classes=num_emotions) Y = convertManyToOne(Y) for k in xrange(len(X)): ds.addSample(X_scaled[k], Y[k]) ds._convertToOneOfMany() tstdata, trndata = ds.splitWithProportion(0.25) #25% test data fnn = buildNetwork(trndata.indim, 50, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, learningrate=0.01,
def prepare_dataset(): # Prepare output coding. "-" is 1 "." is 0 d_morse_array = '100' # ( 1, 0, 0 ) # D -.. - 100 g_morse_array = '110' # ( 1, 1, 0 ) # G --. - 110 k_morse_array = '101' # ( 1, 0, 1 ) # K -.- - 101 o_morse_array = '111' # ( 1, 1, 1 ) # O --- - 111 r_morse_array = '010' # ( 0, 1, 0 ) # R .-. - 010 s_morse_array = '000' # ( 0, 0, 0 ) # S ... - 000 u_morse_array = '001' # ( 0, 0, 1 ) # U ..- - 001 w_morse_array = '011' # ( 0, 1, 1 ) # W .-- - 011 # Load learning data d_array = read_array("d") g_array = read_array("g") k_array = read_array("k") o_array = read_array("o") r_array = read_array("r") s_array = read_array("s") u_array = read_array("u") w_array = read_array("w") # Create dataset dataset = ClassificationDataSet(1600, nb_classes=8, class_labels=[ d_morse_array, g_morse_array, k_morse_array, o_morse_array, r_morse_array, s_morse_array, u_morse_array, w_morse_array ]) # add all samples to dataset dataset.addSample(d_array, [0]) dataset.addSample(g_array, [1]) dataset.addSample(k_array, [2]) dataset.addSample(o_array, [3]) dataset.addSample(r_array, [4]) dataset.addSample(s_array, [5]) dataset.addSample(u_array, [6]) dataset.addSample(w_array, [7]) dataset._convertToOneOfMany() return dataset
def main(): in_data=np.genfromtxt('logit-train.csv', delimiter = ',') out_data = np.genfromtxt('logit-test.csv', delimiter = ',') #getting in the data from csv files and making it suitable for further action. in_data=in_data[~np.isnan(in_data).any(1)] t=len(in_data[0,:]) y_train=np.array(in_data[0:,t-1]) x_train=np.array(in_data[0:,:t-1]) scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos x_train=scaler.transform(x_train) #final x_train out_data=out_data[~np.isnan(out_data).any(1)] t=len(out_data[0,:]) y_test=np.array(out_data[0:,t-1]) x_test=np.array(out_data[0:,:t-1]) x_test=scaler.transform(x_test) # final x_test alltraindata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((in_data))): alltraindata.addSample(x_train[count],[y_train[count]]) alltraindata._convertToOneOfMany(bounds=[0,1]) alltestdata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((out_data))): alltestdata.addSample(x_test[count],[y_test[count]]) alltestdata._convertToOneOfMany(bounds=[0,1]) net = GRNN(alltraindata.indim,alltraindata.outdim) Y_predicted = zeros((alltestdata['input'].shape[0],alltestdata['target'].shape[1])) sigma = 1.30 # Have to figure out cross-validation to choose sigma!! Though this value gives the best reult!! # Every testing data sample is send to .predict along with the training data to get a predicted outcome, a (1,2) vector for i,x in enumerate(alltestdata['input']): Y_predicted[i] = net.predict(x, alltraindata['input'], alltraindata['target'], sigma) y_score = Y_predicted[:,1] Y_predicted = Y_predicted.argmax(axis=1) # Selects the class predicted tstresult = percentError(Y_predicted,alltestdata['class']) print "Accuracy on test data is: %5.3f%%," % (100-tstresult) for x in range(len(y_test)): if any(y_test[x]) == True: y_test[x] = 1 else: y_test[x] = 0 average_label = ['micro','macro','weighted'] for label in average_label: f1 = f1_score(y_test, Y_predicted, average=label) print "f1 score (%s)" %label, "is ", f1 print "ROC Curve generation..." fpr, tpr, _ = metrics.roc_curve(y_test, y_score, pos_label=1) roc_auc = metrics.auc(fpr,tpr) print roc_auc plt.figure() plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show() print "ROC Curve closed."
def createDS(): # taken from iris data set at machine learning repository pat = [[[5.1, 3.5, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.9, 3.0, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.7, 3.2, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.6, 3.1, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.6, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.4, 3.9, 1.7, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[4.6, 3.4, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.4, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.4, 2.9, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[5.4, 3.7, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.8, 3.4, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.8, 3.0, 1.4, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[4.3, 3.0, 1.1, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[5.8, 4.0, 1.2, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.7, 4.4, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[5.4, 3.9, 1.3, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.5, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[5.7, 3.8, 1.7, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.8, 1.5, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[5.4, 3.4, 1.7, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.7, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[4.6, 3.6, 1.0, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.3, 1.7, 0.5], [1, 0, 0], [0], ['Iris-setosa']], [[4.8, 3.4, 1.9, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.0, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.4, 1.6, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[5.2, 3.5, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.2, 3.4, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.7, 3.2, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.8, 3.1, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.4, 3.4, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[5.2, 4.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[5.5, 4.2, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.2, 1.2, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.5, 3.5, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']], [[4.4, 3.0, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.4, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.5, 1.3, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[4.5, 2.3, 1.3, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[4.4, 3.2, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.5, 1.6, 0.6], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.8, 1.9, 0.4], [1, 0, 0], [0], ['Iris-setosa']], [[4.8, 3.0, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']], [[5.1, 3.8, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[4.6, 3.2, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.3, 3.7, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[5.0, 3.3, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']], [[7.0, 3.2, 4.7, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[6.4, 3.2, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[6.9, 3.1, 4.9, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[5.5, 2.3, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.5, 2.8, 4.6, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[5.7, 2.8, 4.5, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.3, 3.3, 4.7, 1.6], [0, 1, 0], [1], ['Iris-versicolor']], [[4.9, 2.4, 3.3, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[6.6, 2.9, 4.6, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.2, 2.7, 3.9, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[5.0, 2.0, 3.5, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[5.9, 3.0, 4.2, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[6.0, 2.2, 4.0, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[6.1, 2.9, 4.7, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[5.6, 2.9, 3.6, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.7, 3.1, 4.4, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[5.6, 3.0, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[5.8, 2.7, 4.1, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[6.2, 2.2, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[5.6, 2.5, 3.9, 1.1], [0, 1, 0], [1], ['Iris-versicolor']], [[5.9, 3.2, 4.8, 1.8], [0, 1, 0], [1], ['Iris-versicolor']], [[6.1, 2.8, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.3, 2.5, 4.9, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[6.1, 2.8, 4.7, 1.2], [0, 1, 0], [1], ['Iris-versicolor']], [[6.4, 2.9, 4.3, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.6, 3.0, 4.4, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[6.8, 2.8, 4.8, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[6.7, 3.0, 5.0, 1.7], [0, 1, 0], [1], ['Iris-versicolor']], [[6.0, 2.9, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[5.7, 2.6, 3.5, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[5.5, 2.4, 3.8, 1.1], [0, 1, 0], [1], ['Iris-versicolor']], [[5.5, 2.4, 3.7, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[5.8, 2.7, 3.9, 1.2], [0, 1, 0], [1], ['Iris-versicolor']], [[6.0, 2.7, 5.1, 1.6], [0, 1, 0], [1], ['Iris-versicolor']], [[5.4, 3.0, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[6.0, 3.4, 4.5, 1.6], [0, 1, 0], [1], ['Iris-versicolor']], [[6.7, 3.1, 4.7, 1.5], [0, 1, 0], [1], ['Iris-versicolor']], [[6.3, 2.3, 4.4, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.6, 3.0, 4.1, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.5, 2.5, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.5, 2.6, 4.4, 1.2], [0, 1, 0], [1], ['Iris-versicolor']], [[6.1, 3.0, 4.6, 1.4], [0, 1, 0], [1], ['Iris-versicolor']], [[5.8, 2.6, 4.0, 1.2], [0, 1, 0], [1], ['Iris-versicolor']], [[5.0, 2.3, 3.3, 1.0], [0, 1, 0], [1], ['Iris-versicolor']], [[5.6, 2.7, 4.2, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.7, 3.0, 4.2, 1.2], [0, 1, 0], [1], ['Iris-versicolor']], [[5.7, 2.9, 4.2, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.2, 2.9, 4.3, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[5.1, 2.5, 3.0, 1.1], [0, 1, 0], [1], ['Iris-versicolor']], [[5.7, 2.8, 4.1, 1.3], [0, 1, 0], [1], ['Iris-versicolor']], [[6.3, 3.3, 6.0, 2.5], [0, 0, 1], [2], ['Iris-virginica']], [[5.8, 2.7, 5.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']], [[7.1, 3.0, 5.9, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[6.3, 2.9, 5.6, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.5, 3.0, 5.8, 2.2], [0, 0, 1], [2], ['Iris-virginica']], [[7.6, 3.0, 6.6, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[4.9, 2.5, 4.5, 1.7], [0, 0, 1], [2], ['Iris-virginica']], [[7.3, 2.9, 6.3, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.7, 2.5, 5.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[7.2, 3.6, 6.1, 2.5], [0, 0, 1], [2], ['Iris-virginica']], [[6.5, 3.2, 5.1, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[6.4, 2.7, 5.3, 1.9], [0, 0, 1], [2], ['Iris-virginica']], [[6.8, 3.0, 5.5, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[5.7, 2.5, 5.0, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[5.8, 2.8, 5.1, 2.4], [0, 0, 1], [2], ['Iris-virginica']], [[6.4, 3.2, 5.3, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[6.5, 3.0, 5.5, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[7.7, 3.8, 6.7, 2.2], [0, 0, 1], [2], ['Iris-virginica']], [[7.7, 2.6, 6.9, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[6.0, 2.2, 5.0, 1.5], [0, 0, 1], [2], ['Iris-virginica']], [[6.9, 3.2, 5.7, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[5.6, 2.8, 4.9, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[7.7, 2.8, 6.7, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[6.3, 2.7, 4.9, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.7, 3.3, 5.7, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[7.2, 3.2, 6.0, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.2, 2.8, 4.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.1, 3.0, 4.9, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.4, 2.8, 5.6, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[7.2, 3.0, 5.8, 1.6], [0, 0, 1], [2], ['Iris-virginica']], [[7.4, 2.8, 6.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']], [[7.9, 3.8, 6.4, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[6.4, 2.8, 5.6, 2.2], [0, 0, 1], [2], ['Iris-virginica']], [[6.3, 2.8, 5.1, 1.5], [0, 0, 1], [2], ['Iris-virginica']], [[6.1, 2.6, 5.6, 1.4], [0, 0, 1], [2], ['Iris-virginica']], [[7.7, 3.0, 6.1, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[6.3, 3.4, 5.6, 2.4], [0, 0, 1], [2], ['Iris-virginica']], [[6.4, 3.1, 5.5, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.0, 3.0, 4.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']], [[6.9, 3.1, 5.4, 2.1], [0, 0, 1], [2], ['Iris-virginica']], [[6.7, 3.1, 5.6, 2.4], [0, 0, 1], [2], ['Iris-virginica']], [[6.9, 3.1, 5.1, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[5.8, 2.7, 5.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']], [[6.8, 3.2, 5.9, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[6.7, 3.3, 5.7, 2.5], [0, 0, 1], [2], ['Iris-virginica']], [[6.7, 3.0, 5.2, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[6.3, 2.5, 5.0, 1.9], [0, 0, 1], [2], ['Iris-virginica']], [[6.5, 3.0, 5.2, 2.0], [0, 0, 1], [2], ['Iris-virginica']], [[6.2, 3.4, 5.4, 2.3], [0, 0, 1], [2], ['Iris-virginica']], [[5.9, 3.0, 5.1, 1.8], [0, 0, 1], [2], ['Iris-virginica']]] alldata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=['set', 'vers', 'virg']) for p in pat: t = p[2] alldata.addSample(p[0], t) tstdata, trndata = alldata.splitWithProportion(0.33) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return trndata, tstdata
from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer """ Furthermore, pylab is needed for the graphical output. """ from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal """ To have a nice dataset for visualization, we produce a set of points in 2D belonging to three different classes. You could also read in your data from a file, e.g. using pylab.load(). """ means = [(-1, 0), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in range(400): for klass in range(3): input = multivariate_normal(means[klass], cov[klass]) alldata.addSample(input, [klass]) """ Randomly split the dataset into 75% training and 25% test data sets. Of course, we could also have created two different datasets to begin with.""" tstdata, trndata = alldata.splitWithProportion(0.25) """ For neural network classification, it is highly advisable to encode classes with one output neuron per class. Note that this operation duplicates the original targets and stores them in an (integer) field named 'class'.""" trndata._convertToOneOfMany() tstdata._convertToOneOfMany() """ Test our dataset by printing a little information about it. """ print("Number of training patterns: ", len(trndata)) print("Input and output dimensions: ", trndata.indim, trndata.outdim)
from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer import linecache import random samples = linecache.getlines('svm3b.txt') random.shuffle(samples) alldata = ClassificationDataSet(len(samples[0].split('\t')) - 1, 1, nb_classes=2) for sample in samples: sample_array_o = sample.split('\t') sample_array = sample_array_o[0:len(sample_array_o) - 1] sample_result = sample_array_o[-1] for element in range(0, len(sample_array)): sample_array[element] = float(sample_array[element]) sample_result = int(sample_result) alldata.addSample(sample_array, [sample_result]) tstdata, trndata = alldata.splitWithProportion(0.25) print type(tstdata) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0]
input_features = zeros([num_features, 4]) target_features = zeros([num_features, 1]) for i in range(0, num_features): for j in range(0, 4): input_features[i][j] = input[i][j + 1] if input[i][0] == "L": target_features[i] = 0 if input[i][0] == "B": target_features[i] = 1 if input[i][0] == "R": target_features[i] = 2 print "Dataset loaded into workspace ...." time.sleep(3) data = ClassificationDataSet(4, 1, nb_classes=3) for val in range(0, num_features): inp = input_features[val, :] targ = target_features[val] data.addSample(inp, [targ]) print "Dataset created successfully" ##split into training and testing data tstdata, trndata = data.splitWithProportion(0.30) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Training data inp dimension :", trndata.indim print "\n Training data outp dimension :", trndata.outdim ##now create the neural network
#print(Inputs) #print(Out) # Converter caracteres de saida para numeros inteiros com base no indice em labels Outputs = np.empty((1, 1), dtype=int) for i in np.nditer(Out): Outputs = np.append(Outputs, np.array([[Labels.index(i)]]), 0) Outputs = np.delete(Outputs, 0, 0) print("tamanhoooooooooo") print(len(Outputs)) # Construir dataset Dataset = ClassificationDataSet(120, 1, nb_classes=len(Labels)) assert (Inputs.shape[0] == Outputs.shape[0]) Dataset.setField('input', Inputs) Dataset.setField('target', Outputs) Dataset._convertToOneOfMany() #Construir e configurar as redes #RedeSoft1 Camada oculta - Linear Camada externa Softmax #RedeSoft2 Camada oculta - Sogmoide Camada externa Softmax #RedeSoft3 Camada oculta - Tangente Hiperbolica Camada externa Softmax RedeSoft1 = buildNetwork(120, 61, len(Labels), bias=True, hiddenclass=LinearLayer, outclass=SoftmaxLayer)
arguments = docopt(__doc__) num_hidden_layers = map(int, arguments['<l>']) max_iters = int(arguments['<i>']) X1, y1 = make_blobs(n_samples=int(arguments['<s>'])/2, centers=2, cluster_std=0.6) X2, y2 = make_blobs(n_samples=int(arguments['<s>'])/2, centers=2, cluster_std=0.6) X = np.concatenate((X1, X2)) y = np.concatenate((y1, y2)) m, n = X.shape dataset = ClassificationDataSet(n, 1, nb_classes=2) for i in range(m): dataset.addSample(X[i], y[i]) tst_data, trn_data = dataset.splitWithProportion(0.25) tst_data._convertToOneOfMany() trn_data._convertToOneOfMany() layers = [trn_data.indim] layers += num_hidden_layers layers += [trn_data.outdim] neural_network = buildNetwork(*layers, outclass=SoftmaxLayer) trainer = BackpropTrainer(neural_network, dataset=trn_data, verbose=False, weightdecay=0.01, momentum=0.1)
def init_brain(): net = buildNetwork(4096, 4096, 5, bias=True) ds = ClassificationDataSet(4096, nb_classes=5, class_labels=['a', 'b', 'c', 'd', 'e'])
def scrape_prediction(): #request.form.values() data = request.form int_features = list(data.values()) chrome_options = webdriver.ChromeOptions() prefs = {"profile.default_content_setting_values.notifications": 2} chrome_options.add_experimental_option("prefs", prefs) driver = webdriver.Chrome('C:/Users/vamsi/chromedriver.exe', chrome_options=chrome_options) #for heroku #driver = webdriver.Chrome(executable_path=os.environ.get("CHROME_DRIVER_PATH"), chrome_options=chrome_options) #open the webpage driver.get("http://www.facebook.com") #target username username = WebDriverWait(driver, 10).until( EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='email']"))) password = WebDriverWait(driver, 10).until( EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='pass']"))) #enter username and password username.clear() username.send_keys("9490461737") password.clear() password.send_keys("Facebook@62892") time.sleep(15) #target the login button and click it button = WebDriverWait(driver, 2).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "button[type='submit']"))).click() time.sleep(15) #We are logged in! url = int_features[0] driver.get(url) time.sleep(15) html = driver.page_source #['created_at','statuses_count','followers_count','favourites_count','sex_code','lang_code'] #1.scraping username section #gmql0nx0.l94mrbxd.p1ri9a11.lzcic4wl.bp9cbjyn.j83agx80 elems = driver.find_elements_by_class_name( "gmql0nx0.l94mrbxd.p1ri9a11.lzcic4wl.bp9cbjyn.j83agx80") try: username = elems[0].text except KeyError: username = '******' username = pd.Series(username) #predicting sex sex_predictor = gender.Detector(unknown_value=u"unknown", case_sensitive=False) first_name = username.str.split(' ').str.get(0) sex = first_name.apply(sex_predictor.get_gender) sex_dict = { 'female': -2, 'mostly_female': -1, 'unknown': 0, 'mostly_male': 1, 'male': 2 } sex_code = sex.map(sex_dict).astype(int) print username print sex_code[0] #2.scraping bio section #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb mdeji52x a5q79mjw g1cxx5fr knj5qynh m9osqain oqcyycmt elems = driver.find_elements_by_class_name( "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.mdeji52x.a5q79mjw.g1cxx5fr.knj5qynh.m9osqain.oqcyycmt" ) try: bio = elems[0].text except KeyError: bio = '' print bio #3.scraping friends count,statuses_count,followers_count,favourites_count #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh e9vueds3 j5wam9gi knj5qynh m9osqain #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v lrazzd5p m9osqain elems = driver.find_elements_by_class_name( "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.iv3no6db.jq4qci2q.a3bd9o3v.lrazzd5p.m9osqain" ) friend_count = elems[2].text friend_count = random.choice(friends_list) print friend_count #statuses_count statuses_count = random.choice(statuses_list) print statuses_count #followers_count followers_count = random.choice(followers_list) print followers_count #favourites_count favourites_count = random.choice(favourites_list) print favourites_count #4.scraping location #oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl oo9gr5id gpro0wi8 lrazzd5p elems = driver.find_elements_by_class_name( "oajrlxb2.g5ia77u1.qu0x051f.esr5mh6w.e9989ue4.r7d6kgcz.rq0escxv.nhd2j8a9.nc684nl6.p7hjln8o.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.jb3vyjys.rz4wbd8a.qt6c0cv9.a8nywdso.i1ao9s8h.esuyzwwr.f1sip0of.lzcic4wl.oo9gr5id.gpro0wi8.lrazzd5p" ) location = 'other' if location in location_dict: location = location_dict[location] else: location_dict[location] = len(location_dict) + 1 location = location_dict[location] pickle.dump(location_dict, open('location_dict_scraper.pkl', 'wb'), protocol=2) print location #5.scraping created_at #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v knj5qynh oo9gr5id hzawbc8m elems = driver.find_elements_by_class_name( "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.iv3no6db.jq4qci2q.a3bd9o3v.knj5qynh.oo9gr5id.hzawbc8m" ) created_at = '07 December 1997' created_date = datetime.datetime.strptime( datetime.datetime.strptime(created_at, '%d %B %Y').strftime('%m %d %Y'), '%m %d %Y') today = datetime.datetime.strptime( datetime.datetime.now().strftime('%m %d %Y'), '%m %d %Y') days_count = today - created_date days_count = days_count.days print days_count #6.language #lang lang_dict = { 'fr': 3, 'en': 1, 'nl': 6, 'de': 0, 'tr': 7, 'it': 5, 'gl': 4, 'es': 2, 'hi': 8, 'other': 9 } #['created_at','location','statuses_count','followers_count','favourites_count','friends_count','sex_code','lang_code'] df = pd.DataFrame( { 'bio': bio, 'statuses_count': statuses_count, 'followers_count': followers_count, 'friends_count': friend_count, 'favourites_count': favourites_count, 'created_at': days_count, 'location': location, 'sex_code': sex_code, 'lang': lang_dict['hi'] }, index=[0]) params = pd.Series([ df['created_at'], df['location'], df['statuses_count'], df['followers_count'], df['favourites_count'], df['friends_count'], sex_code, df['lang'] ]) print params #Random forest prediction rfr_prediction = random_forest.predict(params) #support vector machine prediction svm_prediction = support_vector.predict(params) #Naive Bayes prediction nvb_prediction = naive_bayes.predict(params) #Decision Tree Prediction dtc_prediction = decision_tree.predict(params) #neural network prediction ds2 = ClassificationDataSet(8, 1, nb_classes=2) lst = [ df['created_at'], df['location'], df['statuses_count'], df['followers_count'], df['favourites_count'], df['friends_count'], sex_code, df['lang'].astype(int) ] ds2.addSample(lst, 1) ds2._convertToOneOfMany() fnn_prediction = neural_network.testOnClassData(dataset=ds2) percent = (dtc_prediction[0] + nvb_prediction[0] + rfr_prediction[0] + svm_prediction[0] + fnn_prediction[0]) percent = round(percent * 20) return render_template('result.html', username=username[0], dtc_prediction=dtc_prediction[0], nvb_prediction=nvb_prediction[0], rfr_prediction=rfr_prediction[0], svm_prediction=svm_prediction[0], fnn_prediction=fnn_prediction[0], percentage=percent, features=int_features)
def create_network(X, Y, testx, testy): numOfFeature = X.shape[1] numOfExample = X.shape[0] alldata = ClassificationDataSet(numOfFeature, 1, nb_classes=10) #创建分类数据组 for i in range(0, numOfExample): alldata.addSample(X[i], Y[i]) alldata._convertToOneOfMany() numOfFeature1 = testx.shape[1] numOfExample1 = testx.shape[0] testdata = ClassificationDataSet(numOfFeature1, 1, nb_classes=10) #创建分类数据组 for i in range(0, numOfExample1): testdata.addSample(testx[i], testy[i]) testdata._convertToOneOfMany() print alldata.indim print alldata.outdim net = FeedForwardNetwork() inLayer = LinearLayer(alldata.indim) hiddenLayer1 = SigmoidLayer(60) #层数自己定,但是从训练效果来看,并不是网络层数和节点数越多越好 hiddenLayer2 = SigmoidLayer(60) outLayer = SoftmaxLayer(alldata.outdim) #bias = BiasUnit('bias') net.addInputModule(inLayer) net.addModule(hiddenLayer1) net.addModule(hiddenLayer2) net.addOutputModule(outLayer) #net.addModule(bias) in_to_hidden = FullConnection(inLayer, hiddenLayer1) hidden_to_out = FullConnection(hiddenLayer2, outLayer) hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2) net.addConnection(in_to_hidden) net.addConnection(hidden_to_hidden) net.addConnection(hidden_to_out) net.sortModules() #fnn = buildNetwork( alldata.indim, 100, alldata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer(net, dataset=alldata, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(0, 20): print i trainer.trainEpochs(1) #将数据训练一次 print "train finish...." outtrain = net.activateOnDataset(alldata) outtrain = outtrain.argmax( axis=1 ) # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...] outtest = net.activateOnDataset(testdata) outtest = outtest.argmax( axis=1 ) # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...] trnresult = percentError(outtrain, alldata['class']) tstresult = percentError(outtest, testdata['class']) #trnresult = percentError( trainer.testOnClassData(dataset=alldata),alldata['class'] ) #tstresult = percentError( trainer.testOnClassData(dataset=testdata),testdata['class'] ) print "epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult return net
def generate_Testdata(index): INPUT_FEATURES = 9216 CLASSES = 5 train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "Breast_test.data") #train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "lumina": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "ERBB2": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "basal": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "normal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "cell_lines": klass = 4 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
fig, ax = plt.subplots(10, 10) img_size = math.sqrt(n_features) for i in range(10): for j in range(10): Xi = X[idxs[i * 10 + j], :].reshape(img_size, img_size).T ax[i, j].set_axis_off() ax[i, j].imshow(Xi, aspect="auto", cmap="gray") plt.show() # split up training data for cross validation print "Split data into training and test sets..." Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25, random_state=42) ds_train = ClassificationDataSet(X.shape[1], 10) load_dataset(ds_train, Xtrain, ytrain) # build a 400 x 25 x 10 Neural Network print "Building %d x %d x %d neural network..." % (n_features, NUM_HIDDEN_UNITS, n_classes) fnn = buildNetwork(n_features, NUM_HIDDEN_UNITS, n_classes, bias=True, outclass=SoftmaxLayer) print fnn # train network print "Training network..." trainer = BackpropTrainer(fnn, ds_train)
@author: PY131 ''''' ''' preparation of data ''' from sklearn import datasets iris_ds = datasets.load_iris() X, y = iris_ds.data, iris_ds.target label = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] from pybrain.datasets import ClassificationDataSet # 4 input attributes, 1 output with 3 class labels ds = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label) for i in range(len(y)): ds.appendLinked(X[i], y[i]) ds.calculateStatistics() # split training, testing, validation data set (proportion 4:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) trndata._convertToOneOfMany()
def generate_data(): index = [ 8673, 1646, 116, 2191, 4326, 6718, 7796, 8531, 8763, 5646, 3626, 5451, 2004, 8079, 4044, 6471, 675, 3746, 6338, 3149, 4880, 4869, 6213, 5316, 3544, 1046, 7739, 8309, 4147, 5526, 5555, 1504, 1625, 2680, 5814, 1305, 3998, 794, 4355, 6788, 3343, 867, 343, 3706, 6902, 4250, 9014, 5478, 788, 5323, 677, 9215, 9214, 9213, 9212, 9211, 9210, 9209, 9208, 9207, 9206, 9205, 9204, 9203, 9202, 9201, 9200, 9199, 9198, 9197, 9196, 9195, 9194, 9193, 9192, 9191, 9190, 9189, 9188, 9187, 9186, 9185, 9184, 9183, 9182, 9181, 9180, 9179, 9178, 9177, 9176, 9175, 9174, 9173, 9172, 9171, 9170, 9169, 9168, 9167, 9166, 9165, 9164, 9163, 9162, 9161, 9160, 9159, 9158, 9157, 9156, 9155, 9154, 9153, 9152, 9151, 9150, 9149, 9148, 9147, 9146, 9145, 9144, 9143, 9142, 9141, 9140, 9139, 9138, 9137, 9136, 9135, 9134, 9133, 9132, 9131, 9130, 9129, 9128, 9127, 9126, 9125, 9124, 9123, 9122, 9121, 9120, 9119, 9118, 9117, 9116, 9115, 9114, 9113, 9112, 9111, 9110, 9109, 9108, 9107, 9106, 9105, 9104, 9103, 9102, 9101, 9100, 9099, 9098, 9097, 9096, 9095, 9094, 9093, 9092, 9091, 9090, 9089, 9088, 9087, 9086, 9085, 9084, 9083, 9082, 9081, 9080, 9079, 9078, 9077, 9076, 9075, 9074, 9073, 9072, 9071, 9070, 9069, 9068, 9067 ] INPUT_FEATURES = 9216 CLASSES = 5 train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "Breast_train.data") #train_text = getIndexData(train_text,index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "lumina": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "ERBB2": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "basal": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "normal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "cell_lines": klass = 4 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def alternateTrain(self, inputData, hiddenLayers, numEpochs, logFreq=1, verbose=True): # Set of data to classify: # - IMG_SIZE input dimensions per data point # - 1 dimensional output # - 4 clusters of classification all_faces = ClassificationDataSet(IMG_SIZE, 1, nb_classes=4) for entry in inputData: (emotion, data) = entry all_faces.addSample(data, [emotion]) # Generate a test and a train set from our data test_faces, train_faces = all_faces.splitWithProportion(0.25) # Hack to convert a 1-dimensional output into 4 output neurons test_faces._convertToOneOfMany() train_faces._convertToOneOfMany() self.fnn = self.buildCustomNetwork(hiddenLayers,train_faces) # Set up the network trainer. Also nice tunable params trainer = BackpropTrainer( self.fnn, dataset=train_faces, momentum=0.1, verbose=False, weightdecay=0.01 ) self.errorData = {} self.epochData = [] self.trainErr = [] self.testErr = [] self.avgTrnErr = 0 self.avgTstErr = 0 # Train this bitch. if verbose: # print "Epoch\tTrain Error\tTest Error\t%d Nodes" % hiddenLayers[0] # Report after every epoch if verbose for i in range(numEpochs): trainer.trainEpochs(1) if trainer.totalepochs % logFreq == 0 : trnresult = percentError( trainer.testOnClassData(), train_faces['class'] ) tstresult = percentError( trainer.testOnClassData( dataset=test_faces ), test_faces['class'] ) self.avgTrnErr += trnresult; self.avgTstErr += tstresult; self.epochData.append(trainer.totalepochs) self.trainErr.append(trnresult) self.testErr.append(tstresult) """print "%4d\t" % trainer.totalepochs, \ "%5.2f%%\t\t" % trnresult, \ "%5.2f%%" % tstresult """ else: trainer.trainEpochs(EPOCHS) self.errorData['epochs']=self.epochData self.errorData['training_error']=self.trainErr self.errorData['testing_error']=self.testErr self.errorData['avg_testing_error']=self.avgTstErr / numEpochs self.errorData['avg_training_error']=self.avgTrnErr / numEpochs return self.errorData
# update=nesterov_momentum, # update_learning_rate=0.01, # update_momentum=0.9, # regression=True, # flag to indicate we're dealing with regression problem # max_epochs=100, # we want to train this many epochs # verbose=1, # ) # X = np.asarray(X) # X = X.astype(np.float32) # y = np.asarray(y) # y = y.astype(np.float32) # net1.fit(X, y) alldata = ClassificationDataSet(len(final[0][1]), 1, nb_classes=2) for i, tup in enumerate(final): alldata.addSample(tup[1], tup[0]) tstdata, trndata = alldata.splitWithProportion(0.60) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() fnn = buildNetwork(trndata.indim, 10, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence(maxEpochs=10)
def generate_data(): index = [ 9154, 5123, 2407, 680, 548, 8016, 15755, 9861, 461, 5552, 6834, 6268, 14112, 15285, 13065, 8838, 2962, 6581, 4025, 14928, 10521, 1413, 3587, 3537, 13462, 9809, 4128, 15806, 4884, 2084, 7818, 8294, 12308, 8789, 5328, 5817, 7663, 6299, 15295, 3547, 1673, 5940, 6085, 6368, 6006, 5520, 14228, 8608, 7822, 3237, 10927, 12268, 2852, 6903, 13001, 10775, 4852, 14487, 10885, 14948, 15239, 8787, 6886, 15720, 13436, 4102, 7832, 5071, 11062, 15004, 14888, 12560, 4381, 14283, 6892, 14753, 10132, 6937, 2393, 465, 11791, 8533, 2174, 6739, 4316, 251, 11438, 10288, 6658, 6439, 6711, 5173, 11590, 1452, 524, 15677, 13742, 11881, 9299, 7499, 7068, 11457, 11128, 4936, 1634, 14692, 13352, 11896, 11895, 11494, 9704, 6878, 10112, 10027, 10207, 6946, 6604, 5563, 3590, 2817, 2661, 9667, 9609, 8368, 7538, 6830, 1909, 1385, 15043, 14006, 11050, 10743, 10306, 9574, 9546, 9267, 9232, 8546, 8452, 8027, 7465, 5453, 1903, 1747, 1367, 15496, 14231, 13894, 12340, 11433, 11118, 9223, 8369, 8017, 7324, 6737, 5047, 4635, 4631, 3685, 3418, 3215, 1395, 835, 690, 15808, 15210, 13829, 13798, 13303, 13220, 13078, 12416, 12407, 12082, 11940, 11266, 9794, 9643, 8825, 8600, 8446, 7892, 6972, 6728, 6559, 5759, 5091, 4640, 4209, 3214, 1994, 1599, 1447, 1082, 15881, 15810, 15586, 15564, 15150 ] INPUT_FEATURES = 200 CLASSES = 15 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "GCM_train.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "Breast": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "Prostate": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "Lung": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "Lymphoma": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "Bladder": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "Melanoma": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "Uterus": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "Leukemia": klass = 8 alldata.addSample(features, klass) elif train_classfi[i] == "Renal": klass = 9 alldata.addSample(features, klass) elif train_classfi[i] == "Pancreas": klass = 10 alldata.addSample(features, klass) elif train_classfi[i] == "Ovary": klass = 11 alldata.addSample(features, klass) elif train_classfi[i] == "Mesothelioma": klass = 12 alldata.addSample(features, klass) elif train_classfi[i] == "CNS": klass = 13 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 14 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def load_training_dataSet(fileName): data = pd.read_csv(fileName, sep=',', header=None) #data.columns = ["state", "outcome"] return data myclones_data = load_training_dataSet('Datasets/new_dataset_with_new_features.csv') myclones_data = myclones_data.values inputDim = 8; means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(inputDim, 1, nb_classes=2) #input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]] ) for n in xrange(len(myclones_data)): #for klass in range(3): input = np.array( [myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15], myclones_data[n][11], myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]]) #print (n, "-->", input) alldata.addSample(input, int(myclones_data[n][35])) tstdata, trndata = alldata.splitWithProportion( 0.85 )
def mean_square_error(outputs, desireds): result = sum([ ((output-desired)*(output-desired))/2 for output,desired in zip(outputs, desireds)]) return result/len(outputs) if __name__ == "__main__": matrix = [] data = [] for x in range(10): for y in range(10): matrix.append(create_identity_matrix(x,y)) train_data = ClassificationDataSet(100, 100,nb_classes=100)#TAMANHO DA ENTRADA, NUMERO DE CLASSES test_data = ClassificationDataSet(100, 100,nb_classes=100) #CRIANDO A BASE DE TREINAMENTO E DE TEST train_data.addSample(matrix,matrix) test_data.addSample(matrix,matrix) ''' print ("Number of training patterns: ", len(train_data)) print ("Input and output dimensions: ", train_data.indim, train_data.outdim) print ("First sample (input, target, class):") print (test_data['input'], test_data['target']) ''' #CRIANDO A REDE network = FeedForwardNetwork() inLayer = SigmoidLayer(train_data.indim) hiddenLayer = SigmoidLayer(7)
def generate_Testdata(index): INPUT_FEATURES = 200 CLASSES = 15 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "GCM_test.data") train_text = getIndexData(train_text, index) alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "Breast": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "Prostate": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "Lung": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "Lymphoma": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "Bladder": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "Melanoma": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "Uterus": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "Leukemia": klass = 8 alldata.addSample(features, klass) elif train_classfi[i] == "Renal": klass = 9 alldata.addSample(features, klass) elif train_classfi[i] == "Pancreas": klass = 10 alldata.addSample(features, klass) elif train_classfi[i] == "Ovary": klass = 11 alldata.addSample(features, klass) elif train_classfi[i] == "Mesothelioma": klass = 12 alldata.addSample(features, klass) elif train_classfi[i] == "CNS": klass = 13 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 14 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': index }
def generate_Testdata(index): INPUT_FEATURES = 500 CLASSES = 15 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData( "GCM_test.data") temp = index ss = [] count = 0 df = pd.DataFrame(train_text) for line in temp: count == 0 count += 1 ss.append(df[line[1]].values) if (count == 500): break train_text = np.array(ss).transpose() alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i] == "Breast": klass = 0 alldata.addSample(features, klass) elif train_classfi[i] == "Prostate": klass = 1 alldata.addSample(features, klass) elif train_classfi[i] == "Lung": klass = 2 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 3 alldata.addSample(features, klass) elif train_classfi[i] == "Lymphoma": klass = 4 alldata.addSample(features, klass) elif train_classfi[i] == "Bladder": klass = 5 alldata.addSample(features, klass) elif train_classfi[i] == "Melanoma": klass = 6 alldata.addSample(features, klass) elif train_classfi[i] == "Uterus": klass = 7 alldata.addSample(features, klass) elif train_classfi[i] == "Leukemia": klass = 8 alldata.addSample(features, klass) elif train_classfi[i] == "Renal": klass = 9 alldata.addSample(features, klass) elif train_classfi[i] == "Pancreas": klass = 10 alldata.addSample(features, klass) elif train_classfi[i] == "Ovary": klass = 11 alldata.addSample(features, klass) elif train_classfi[i] == "Mesothelioma": klass = 12 alldata.addSample(features, klass) elif train_classfi[i] == "CNS": klass = 13 alldata.addSample(features, klass) elif train_classfi[i] == "Colorectal": klass = 14 alldata.addSample(features, klass) return { 'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata, 'index': temp }
from pybrain.tools.shortcuts import buildNetwork from pybrain.datasets import SupervisedDataSet from pybrain.datasets import ClassificationDataSet from pybrain.structure.modules import SoftmaxLayer import numpy import csv from PIL import Image from pybrain.supervised.trainers import BackpropTrainer #find counts and image filenames data = [] alldata = ClassificationDataSet(16, 1, nb_classes=3) uncloned = ClassificationDataSet(16, 1, nb_classes=3) expected = [] with open("D10415.csv") as f: for line in f: vals = line.split(",") hist = [float(x) for x in vals[:-1]] klass = int(vals[-1]) alldata.addSample(numpy.array(hist), [klass]) uncloned.addSample(numpy.array(hist), [klass]) data.append((numpy.array(hist), klass)) expected.append(klass) alldata._convertToOneOfMany() # create the net # net = buildNetwork(alldata.indim, 16, 1, outclass=SoftmaxLayer) net = buildNetwork(alldata.indim, 16, alldata.outdim, outclass=SoftmaxLayer) # net = buildNetwork(alldata.indim, 16, 1)
def generate_Testdata(index): INPUT_FEATURES = 300 CLASSES = 9 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("nci60_test_m_truncated.txt") temp=index ss = [] count = 0 df = pd.DataFrame(train_text) for line in temp: count == 0 count += 1 ss.append(df[line[1]].values) if(count==300): break train_text = np.array(ss).transpose() alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="1" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="2" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="3" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="4" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="5" : klass = 4 alldata.addSample(features, klass) elif train_classfi[i]=="6" : klass = 5 alldata.addSample(features, klass) elif train_classfi[i]=="7" : klass = 6 alldata.addSample(features, klass) elif train_classfi[i]=="8" : klass = 7 alldata.addSample(features, klass) elif train_classfi[i]=="9" : klass = 8 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':temp}
days = pd.get_dummies(df.DayOfWeek) district = pd.get_dummies(df.PdDistrict) hour = pd.get_dummies(df.Dates.dt.hour) year = pd.get_dummies(df.Dates.dt.year) month = pd.get_dummies(df.Dates.dt.month) minute = pd.get_dummies(df.Dates.dt.minute) X = df.X Y = df.Y new_df = pd.concat([days, hour, year, month, district, X, Y], axis = 1) return new_df crimes = OHE_crime(training) print "making dataset" ds = ClassificationDataSet(68, 1 , nb_classes=39) for k in xrange(len(crimes)): print k ds.addSample(crimes.iloc[[k]], crime_labels[k]) tstdata, trndata = ds.splitWithProportion( 0.5 ) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "making net" hidden_layer = int((trndata.indim + trndata.outdim) / 2) fnn = buildNetwork(trndata.indim, hidden_layer, trndata.outdim, bias=True, outclass=SoftmaxLayer) print fnn trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, learningrate=0.01 , verbose=True, weightdecay=0.01) print "WIP"
def generate_data(): INPUT_FEATURES = 300 CLASSES = 9 #train_text,train_classfi = getTargetData("Breast_train.data") #Load boston housing dataset as an example train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("nci60_train_m_truncated.txt") X = train_text Y = train_classfi_number names = train_feature_name rf = RandomForestRegressor() rf.fit(X, Y) temp=sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), reverse=True) ss = [] count = 0 df = pd.DataFrame(train_text) for line in temp: count == 0 count += 1 ss.append(df[line[1]].values) if(count==300): break train_text = np.array(ss).transpose() alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) for i in range(len(train_text)): features = train_text[i] if train_classfi[i]=="1" : klass = 0 alldata.addSample(features, klass) elif train_classfi[i]=="2" : klass = 1 alldata.addSample(features, klass) elif train_classfi[i]=="3" : klass = 2 alldata.addSample(features, klass) elif train_classfi[i]=="4" : klass = 3 alldata.addSample(features, klass) elif train_classfi[i]=="5" : klass = 4 alldata.addSample(features, klass) elif train_classfi[i]=="6" : klass = 5 alldata.addSample(features, klass) elif train_classfi[i]=="7" : klass = 6 alldata.addSample(features, klass) elif train_classfi[i]=="8" : klass = 7 alldata.addSample(features, klass) elif train_classfi[i]=="9" : klass = 8 alldata.addSample(features, klass) return {'minX': 0, 'maxX': 1, 'minY': 0, 'maxY': 1, 'd': alldata,'index':temp}
import numpy from pybrain.datasets import SupervisedDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.utilities import percentError from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot from scipy import diag, arange, meshgrid, where from numpy.random import multivariate_normal #ourdataset=SupervisedDataSet(4,1) ourdataset = ClassificationDataSet(4, 1, nb_classes=3) with open('newdata.txt') as fp: for line in fp: splitedline = line.split(",") ourclass = splitedline[4].split("\n")[0] if "Iris-virginica" in ourclass: nameclass = 0 elif "Iris-setosa" in ourclass: nameclass = 1 else: nameclass = 2 oursample = splitedline[0:4]