Example #1
0
def makeMnistDataSets(path):
    """Return a pair consisting of two datasets, the first being the training
    and the second being the test dataset."""
    # test = SupervisedDataSet(28 * 28, 10)
    test = ClassificationDataSet(28*28, 10)
    test_image_file = os.path.join(path, 't10k-images-idx3-ubyte')
    test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte')
    test_images = images(test_image_file)
    test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file))

    for image, label in zip(test_images, test_labels):
        test.appendLinked(image, label)
        # test.addSample(image, label)

    # train = SupervisedDataSet(28 * 28, 10)
    train = ClassificationDataSet(28*28, 10)
    train_image_file = os.path.join(path, 'train-images-idx3-ubyte')
    train_label_file = os.path.join(path, 'train-labels-idx1-ubyte')
    train_images = images(train_image_file)
    train_labels = (flaggedArrayByIndex(l, 10) for l in labels(train_label_file))
    for image, label in zip(train_images, train_labels):
        train.appendLinked(image, label)
        # train.addSample(image, label)

    return train, test
Example #2
0
def build_dataset(data_pair):
    inputs, classes = data_pair
    ds = ClassificationDataSet(256)
    data = zip(inputs, classes)
    for (inp, c) in data:
        ds.appendLinked(inp, [c])
    return ds
Example #3
0
 def classifer(labels, data):
     """ data in format (value, label)
     """
     clsff = ClassificationDataSet(2,class_labels=labels)
     for d in data:
         clsff.appendLinked(d[0], d[1])
     clsff.calculateStatistics()
Example #4
0
def getPybrainDataSet(source='Rachelle'):
    first = False#True
    qualities, combinations = cp.getCombinations()
    moods = combinations.keys()
    ds = None
    l=0
    for mood in moods:
        if mood=='neutral':
            continue
        for typeNum in range(1,21):
            for take in range(1,10):
                fileName = 'recordings/'+source+'/'+mood+'/'+\
                str(typeNum)+'_'+str(take)+'.skl'
                try:
                    data, featuresNames = ge.getFeatureVec(fileName, first)
                    first = False
                except IOError:
                    continue
                if ds is None:#initialization
                    ds = ClassificationDataSet( len(data), len(qualities) )
                output = np.zeros((len(qualities)))
                for q in combinations[mood][typeNum]:
                    output[qualities.index(q)] = 1
                ds.appendLinked(data ,  output)

                l+=sum(output)
    return ds, featuresNames
Example #5
0
def makeMnistDataSets(path):
    """Return a pair consisting of two datasets, the first being the training
    and the second being the test dataset."""
    # test = SupervisedDataSet(28 * 28, 10)
    test = ClassificationDataSet(28 * 28, 10)
    test_image_file = os.path.join(path, 't10k-images-idx3-ubyte')
    test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte')
    test_images = images(test_image_file)
    test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file))

    for image, label in zip(test_images, test_labels):
        test.appendLinked(image, label)
        # test.addSample(image, label)

    # train = SupervisedDataSet(28 * 28, 10)
    train = ClassificationDataSet(28 * 28, 10)
    train_image_file = os.path.join(path, 'train-images-idx3-ubyte')
    train_label_file = os.path.join(path, 'train-labels-idx1-ubyte')
    train_images = images(train_image_file)
    train_labels = (flaggedArrayByIndex(l, 10)
                    for l in labels(train_label_file))
    for image, label in zip(train_images, train_labels):
        train.appendLinked(image, label)
        # train.addSample(image, label)

    return train, test
Example #6
0
def getBoardImage(img):
    '''
    Runs an image through processing and neural network to decode digits

    img: an openCV image object

    returns:
        pil_im: a PIL image object with the puzzle isolated, cropped and straightened
        boardString: string representing the digits and spaces of a Sudoku board (left to right, top to bottom)
    '''

    # Process image and extract digits
    pil_im, numbers, parsed, missed = process(img, False)
    if pil_im == None:
        return None, None

    net = NetworkReader.readFrom(os.path.dirname(os.path.abspath(__file__))+'/network.xml')
    boardString = ''

    for number in numbers:
        if number is None:
            boardString += ' '
        else:
            data=ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9'])
            data.appendLinked(number.ravel(),[0])
            boardString += str(net.activateOnDataset(data).argmax(axis=1)[0]+1)
    return pil_im, boardString
def import_dataset(path, shapes, used_for, samples_nbr):
    ds = ClassificationDataSet(4, nb_classes=3)
    for shape in sorted(shapes):
        for i in range(samples_nbr):
            image = imread(path + used_for + "/" + shape + str(i + 1) + ".png", as_grey=True, plugin=None, flatten=None)
            image_inputs = image_to_inputs(image)
            ds.appendLinked(image_inputs, shapes[shape])
    return ds
def create_data_set(file_name):
    raw_data = open(file_name).readlines()

    data_set = ClassificationDataSet(64, nb_classes=10, class_labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
    for line in raw_data:
        # Get raw line into a list of integers
        line = map(lambda x: int(x), line.strip().split(','))
        data_set.appendLinked(line[:-1], line[-1])
    return data_set
Example #9
0
def classificationDataSet(subjects=['a2','b','c1','c2'], segClass=0, db=None, seg_width=10, usePCA=True, n_components=5, isTrainingData=False):
	if not db:
		db = gyroWalkingData()

	if usePCA:
		DS = ClassificationDataSet(n_components*3, nb_classes=2)
	else:
		DS = ClassificationDataSet(21*3, nb_classes=2)
	
	for subject in subjects:
		# Initialise data
		if usePCA:
			raw = db.pca_dict(n_components=n_components, whiten=False)[subject]
		else:
			raw = db.data[subject][:,2:]
		gradients, standardDeviations = summaryStatistics(raw, std_window=seg_width)

		# Initialise segments
		if 0 <= segClass < 4:
			segs = [s for s,c in db.manual_gait_segments[subject] if c == segClass]
		else:
			segs = db.segments[subject]

		# Add data
		for i in range(0,len(raw)):

			"""
			# Look for segments in window, including those of other classes
			hasSeg = 0
			hasOtherSeg = False
			for j in range(seg_width):
				if i+j in segs:
					hasSeg = 1
				else:
					if i+j in zip(*db.manual_gait_segments[subject])[0]:
						hasOtherSeg = True
				if hasOtherSeg:
					hasSeg = 0

			# Add segments to classifier, duplicating rare classes if it is training data
			for j in range(seg_width):
				if i+j < len(raw):
					DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] )
					if isTrainingData and (hasSeg or hasOtherSeg):
						for i in range(0):
							DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] )
			"""

			hasSeg = 0
			if i in segs:
				hasSeg = 1
			DS.appendLinked( np.concatenate( [raw[i],gradients[i],standardDeviations[i]] ), [hasSeg] )
	
	DS._convertToOneOfMany()
	if isTrainingData:
		DS = balanceClassRatios(DS)
	return DS
def conv2DS(Xv,yv = None) :
    if yv == None :
        yv =  np.asmatrix( np.ones( (Xv.shape[0],1) ) )
        for j in range(len(classNames)) : yv[j] = j

    C = len(unique(yv.flatten().tolist()[0]))
    DS = ClassificationDataSet(M, 1, nb_classes=C)
    for i in range(Xv.shape[0]) : DS.appendLinked(Xv[i,:].tolist()[0], [yv[i].A[0][0]])
    DS._convertToOneOfMany( )
    return DS
Example #11
0
def conv2DS(Xv,yv = None) :
    if yv == None : 
        yv =  np.asmatrix( np.ones( (Xv.shape[0],1) ) )
        for j in range(len(classNames)) : yv[j] = j
            
    C = len(unique(yv.flatten().tolist()[0]))
    DS = ClassificationDataSet(M, 1, nb_classes=C)
    for i in range(Xv.shape[0]) : DS.appendLinked(Xv[i,:].tolist()[0], [yv[i].A[0][0]])
    DS._convertToOneOfMany( )
    return DS    
def import_dataset(path, shapes, used_for, samples_nbr):
    ds = ClassificationDataSet(4, nb_classes=3)
    for shape in sorted(shapes):
        for i in range(samples_nbr):
            image = imread(path + used_for + '/' + shape + str(i + 1) + '.png',
                           as_grey=True,
                           plugin=None,
                           flatten=None)
            image_inputs = image_to_inputs(image)
            ds.appendLinked(image_inputs, shapes[shape])
    return ds
Example #13
0
 def __get_classification_dataset__(self, data):
     DS = ClassificationDataSet(self.__num_features__,
                                class_labels=['neg', 'pos'])
     for curr_data_idx in range(data.shape[0]):
         curr_data = data[curr_data_idx, :-1]
         print 'curr_data', curr_data
         curr_label = data[curr_data_idx, -1]
         print 'curr_label', curr_label
         DS.appendLinked(curr_data, [curr_label])
     #
     return DS
def getSeparateDataSets(testSize = 0.2):
    trnDs = ClassificationDataSet(len(feats), nb_classes=len(classes))
    tstDs = SupervisedDataSet(len(feats), 1)
    for c in classes:
        with codecs.open(os.path.join(data_root, c+".txt"), 'r', 'utf8') as f:
            lines = f.readlines()
            breakpoint = (1.0 - testSize) * len(lines)
            for i in range(len(lines)):
                r = Record("11", lines[i], c, "")
                if i < breakpoint:
                    trnDs.appendLinked(r.features(), [r.class_idx()])
                else:
                    tstDs.appendLinked(r.features(), [r.class_idx()])
    trnDs._convertToOneOfMany([0, 1])
    return trnDs, tstDs
    def conv2DS(Xv, yv=None, labels=string.ascii_uppercase):
        N, M = Xv.shape
        if yv is None:
            yv = np.asmatrix(np.ones((Xv.shape[0], 1)))
            for j in range(len(classNames)):
                yv[j] = j

        le = preprocessing.LabelEncoder()
        y_asnumbers = le.fit_transform(np.ravel(yv))

        C = len(np.unique(np.ravel(yv)))
        DS = ClassificationDataSet(M, 1, nb_classes=C, class_labels=labels)
        for i in range(Xv.shape[0]):
            DS.appendLinked(Xv[i, :], y_asnumbers[i])
        return DS
Example #16
0
    def __prepareTrainingData(self,places,num_of_places):
        
        alldata = ClassificationDataSet(2, 1, nb_classes=self.num_of_places)
        previous_feature_vector=None
        previous_place=None
        counter=0
               
        for location_event in places:
            if location_event.place!=None:
                current_timestamp=location_event.timestamp
                new_feature_vector=self.__prepare_features(location_event.place,current_timestamp)
                new_place=self.__prepare_place(location_event.place)
                #if previous_feature_vector!=None and previous_place!=None and location_event.place.name!=previous_place.name:
                if previous_feature_vector!=None:
                    counter+=1
                    
                    if location_event.place.name=="2":
                        print previous_feature_vector
                        print location_event.place.name
                        for i in range(1):
                            alldata.appendLinked(previous_feature_vector,[new_place])

                previous_feature_vector=new_feature_vector
                previous_place=location_event.place
                self.last_visit_map[location_event.place]=current_timestamp
                
        previous_feature_vector=None
        previous_place=None
        probiability_of_static=float(counter)/float(len(places))
        probiability_of_static=0.5
        for location_event in places:
            if location_event.place!=None:
                current_timestamp=location_event.timestamp
                new_feature_vector=self.__prepare_features(location_event.place,current_timestamp)
                new_place=self.__prepare_place(location_event.place)
                rand=random.random()
                if previous_feature_vector!=None and rand<=probiability_of_static:
                    counter+=1
                    
                    if location_event.place.name=="1":
                        print new_feature_vector
                        print location_event.place.name
                        for i in range(1):
                            alldata.appendLinked(previous_feature_vector,[new_place])
                previous_feature_vector=new_feature_vector
                previous_place=new_place
                self.last_visit_map[location_event.place]=current_timestamp
        return alldata
Example #17
0
	def init_classifier(self, hidden_units = 20):
		data = ClassificationDataSet(len(self.channels), nb_classes=5)
		# Prepare the dataset
		for i in range(len(self.classification_proc)):
			data.appendLinked(self.y_proc[i], self.classification_proc[i])
		# Make global for test purposes
		self.data = data
		# Prepare training and test data, 75% - 25% proportion
		self.testdata, self.traindata = data.splitWithProportion(0.25)
		#self.traindata._convertToOneOfMany()
		#self.testdata._convertToOneOfMany()
		# CHECK the number of hidden units
		fnn = buildNetwork(self.traindata.indim, hidden_units, self.traindata.outdim)
		# CHECK meaning of the parameters
		trainer = BackpropTrainer(fnn, dataset=self.traindata, momentum=0, verbose=True, weightdecay=0.01)
		return fnn, trainer, data
 def train(training_data):
     training_set = ClassificationDataSet(len(feats), nb_classes=len(classes))
     for inst in training_data:
         training_set.appendLinked(inst.features(), [inst.class_idx()])
     training_set._convertToOneOfMany([0, 1])
     net_placeholder[0] = buildNetwork(
         training_set.indim,
         int((training_set.indim + training_set.outdim)/2),
         training_set.outdim, bias=True,
         hiddenclass=TanhLayer,
         outclass=SoftmaxLayer
     )
     trainer = BackpropTrainer(
         net_placeholder[0], training_set, momentum=0.75, verbose=False, learningrate=0.05
     )
     trainer.trainUntilConvergence(maxEpochs=100, validationProportion=0.1)
Example #19
0
def fnn():
    data = orange.ExampleTable("D:\\Back-up-THICK_on_Vista\\Orange\\W1BIN.tab")#input_dict['data'])
    addMetaID(data)
    n_attrs = len(data.domain.attributes)
    classes = list(data.domain.classVar.values)
    pbdata = ClassificationDataSet(n_attrs, class_labels=classes)
    for ex in data:
        pbdata.appendLinked([x.value for x in list(ex)[:n_attrs]], [classes.index(ex.getclass().value)])
        
    tstdata, trndata = pbdata.splitWithProportion( 0.25 )
    trndata._convertToOneOfMany( )
    tstdata._convertToOneOfMany( )
    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]
 def build_net(self):
     if os.path.exists(self.NET_FILE):
         return NetworkReader.readFrom(self.NET_FILE)
     ds = ClassificationDataSet(len(feats), nb_classes=len(classes))
     for c in classes:
         print c
         with codecs.open(os.path.join(self.data_root, c+".txt"), 'r', 'utf8') as f:
             for line in f:
                 r = Record("11", line, c, "")
                 ds.appendLinked(r.features(), [r.class_idx()])
     ds._convertToOneOfMany([0, 1])
     net = buildNetwork(ds.indim, int((ds.indim + ds.outdim)/2), ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
     trainer = BackpropTrainer(net, ds, momentum=0.75, verbose=True)
     trainer.trainUntilConvergence(maxEpochs=300)
     NetworkWriter.writeToFile(net, self.NET_FILE)
     return net
 def bagging_classifier(self, trainInstances, testInstances, L):
     """Train and test bagging classifier for the neural network.  
         (1) generate self.m new training sets each with L instances 
         from trainInstances using replacement;
         (2) train self.m neural networks on the self.m training sets; 
         (3) majority vote
     
     Precondition: dimensions of trainInstances,testInstances must match self.fnn
     
     :param trainInstances: collection of training examples
     :type trainInstances: ClassificationDataSet
     :param testInstances: collection of test examples
     :type testInstances: ClassificationDataSet
     :param L: number of items in each training set
     :type L: int
     :returns: accuracy of predictions
     :rtype: float
     """ 
     ensemble = []
     for j in range(self.m):
         # generate random sample of indices
         tset = random.sample(range(0, len(trainInstances["input"])), L) 
         c = ClassificationDataSet(self.fnn.indim, 1, nb_classes=self.fnn.outdim)
         for index in tset:
             c.appendLinked(trainInstances['input'][index], trainInstances['target'][index])
         c._convertToOneOfMany(bounds=[0,1]) # 1 of k binary representation
         net = buildNetwork(24, 18, 16, 8, hiddenclass=TanhLayer, outclass=SoftmaxLayer) # define neural net
         trainer = BackpropTrainer(net, dataset=c, learningrate=0.01, momentum=0.1, verbose=True, weightdecay=0.01)
         trainer.trainEpochs(20) # train
         ensemble.append(net)
         print percentError(trainer.testOnClassData(
                             dataset=testInstances ), testInstances['class'])
     # key is test example, value is list of labels from each model    
     d = dict.fromkeys(np.arange(len(testInstances['input']))) 
     for model in ensemble:
         # get label with highest probability for each test example
         result = model.activateOnDataset(testInstances).argmax(axis=1)
         for k in range(len(result)):
             if d[k] == None:
                 d[k] = [result[k]]
             else:
                 d[k].append(result[k])
     predictions = []
     for ex in d.keys():
         predictions.append(max(set(d[ex]), key=d[ex].count)) # majority voting 
     actual = [int(row[0]) for row in testInstances['class']]
     return accuracy_score(actual, predictions) # traditional accuracy calc
Example #22
0
def cross_validate(comps, view=False):
	for layer_size in [5,10,25]:
		for alpha in [0.01, 0.03, 0.1]:
			fold_accuracy = []
			for i in xrange(0, 5):
				if not view:
					xs = np.load('pca_fold_'+str(i)+'_train_xs.npy')[:,0:comps]
					ys = np.load('pca_fold_'+str(i)+'_train_ys.npy')
					DS = ClassificationDataSet(comps, nb_classes=10)
					for j in xrange(0, xs.shape[0]):
						DS.appendLinked(xs[j,:], ys[j])
					DS._convertToOneOfMany(bounds=[0,1])

				net = buildNetwork(comps, layer_size, 10, outclass=SoftmaxLayer)
				#net = buildNetwork(comps, layer_size, layer_size, 10, outclass=SoftmaxLayer)
				if not view:	
					trainer = BackpropTrainer(net, DS, learningrate=alpha)
					trainer.trainUntilConvergence(maxEpochs=4)
				test_xs = np.load('pca_fold_'+str(i)+'_test_xs.npy')[:,0:comps]
				test_ys = np.load('pca_fold_'+str(i)+'_test_ys.npy')
				preds = np.zeros(test_ys.shape)
				correct = 0
				for j in xrange(0, test_xs.shape[0]):
					if view:
						break
					pred_raw = net.activate(test_xs[j,:].tolist())
					pred = np.argmax(np.array(pred_raw))
					preds[j] = pred
					if pred == test_ys[j]:
						correct += 1
				if view:
					preds = np.load('long_result_%d_%d_%f_%d.npy' % (comps, layer_size, alpha, i))
					for j in xrange(0, preds.shape[0]):
						if preds[j] == test_ys[j]:
							correct += 1
				else:
					np.save('long_result_%d_%d_%f_%d.npy' % (comps, layer_size, alpha, i), preds)
				accuracy = float(correct)/test_xs.shape[0]
				fold_accuracy.append(accuracy)
			acc = np.sum(fold_accuracy)/5
			if view:
				#print "%d & %d & %f & %f\\\\" % (comps, layer_size, alpha, acc)
				#print "\hline"
				print acc,",",
			else:	
				print "Components: %d\tHidden Nodes: %d\tLearning Rate: %f Accuracy: %f" % (comps, layer_size, alpha, acc)
Example #23
0
 def getdata(self):
     dataset = ClassificationDataSet(9, 1)
     with open('xtraindata.csv') as tf:
         for line in tf:
             data = [x for x in line.strip().split(',') if x]
             # indata =  tuple(data[1:10])
             # outdata = tuple(data[10:])
             """
             for i in range(4,10):
                 data[i] = str(float(data[i])*100)
             if float(data[12]) > 0:
                 data[12] = float(data[12]) * 100
             """
             for i in range(1, 4):
                 data[i] = str(float(data[i]) / 100)
             dataset.appendLinked(data[1:10], data[12])
     return dataset
Example #24
0
 def getdata(self):
     dataset = ClassificationDataSet(9, 1)
     with open('xtraindata.csv') as tf:
         for line in tf:
             data = [x for x in line.strip().split(',') if x]
             # indata =  tuple(data[1:10])
             # outdata = tuple(data[10:])
             """
             for i in range(4,10):
                 data[i] = str(float(data[i])*100)
             if float(data[12]) > 0:
                 data[12] = float(data[12]) * 100
             """
             for i in range(1, 4):
                 data[i] = str(float(data[i]) / 100)
             dataset.appendLinked(data[1:10], data[12])
     return dataset
Example #25
0
def teachNeuralNetwork(countState, testNumber):

    data = tuple(readData("DataMatrix" + str(1) + ".txt"))
    size = len(data)
    ds = ClassificationDataSet(size,
                               1,
                               nb_classes=3,
                               class_labels=['0', '1', '-1'])

    #SET TRAINI DATA
    for i in range(1, countState):
        try:
            data = []
            data = (readData("DataMatrix" + str(i) + ".txt"))
            answer = readAnswer("Answer" + str(i) + ".txt")
            idx = 2 if answer == -1 else answer
            ds.appendLinked(data, [idx])
        except BaseException:
            l = 1  #Just some fo catch it's not usefull

    ds._convertToOneOfMany()
    net = buildNetwork(ds.indim, 300, ds.outdim, recurrent=True)
    #  trainer = RPropMinusTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03)
    trainer = BackpropTrainer(net,
                              dataset=ds,
                              momentum=0.1,
                              verbose=False,
                              weightdecay=0.03)
    trainer.trainUntilConvergence(maxEpochs=2000)
    tstData = (readData("DataMatrixExpr" + str(testNumber) + ".txt"))

    ansArr = (net.activate(tstData))
    indx = 0
    max = ansArr[0]
    for i in range(len(ansArr)):
        if ansArr[i] > max:
            max = ansArr[i]
            indx = i
    t = [0, 1, -1]

    return t[indx]


#========MAINCODE=============
#teachNeuralNetwork()
	def Predict(self):
		prediction=[]
		
		attributescount=len(self.testdata[0])
		nrclass = len(set(self.testlabel))
		dstraindata = ClassificationDataSet(attributescount, target=nrclass, nb_classes=nrclass, class_labels=list(set(self.testlabel)))
		for i in range(len(self.testdata)):
			dstraindata.appendLinked(self.testdata[i], self.testlabel[i])
		dstraindata._convertToOneOfMany()
		out = self.net.activateOnDataset(dstraindata)
		prediction = out.argmax(axis=1)
		'''
		for testrecord in self.testdata :
			out = self.net.activate(testrecord)[0]
			prediction.append(out)
		'''	
			
		self.result = 	[self.testlabel, prediction]
Example #27
0
def classificationDataSet(subject='a1', db=None):
	if not db:
		db = gyroWalkingData()

	raw = db.data[subject][:,2:]
	segs = db.segments[subject]
	DS = ClassificationDataSet(21, nb_classes=2)

	for i in range(0,len(raw),5):
		hasSeg = 0
		for j in range(5):
			if i+j in segs:
				hasSeg = 1
		for j in range(5):
			if i+j < len(raw):
				DS.appendLinked(raw[i+j],[hasSeg])
	DS._convertToOneOfMany()
	return DS
Example #28
0
def fnn():
    data = orange.ExampleTable(
        "D:\\Back-up-THICK_on_Vista\\Orange\\W1BIN.tab")  #input_dict['data'])
    addMetaID(data)
    n_attrs = len(data.domain.attributes)
    classes = list(data.domain.classVar.values)
    pbdata = ClassificationDataSet(n_attrs, class_labels=classes)
    for ex in data:
        pbdata.appendLinked([x.value for x in list(ex)[:n_attrs]],
                            [classes.index(ex.getclass().value)])

    tstdata, trndata = pbdata.splitWithProportion(0.25)
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]
    def conv2DS(Xv, yv=None, labels=string.ascii_uppercase):
        N, M = Xv.shape
        if yv is None:
            yv = np.asmatrix(np.ones((Xv.shape[0], 1)))
            for j in range(len(classNames)):
                yv[j] = j

        le = preprocessing.LabelEncoder()
        y_asnumbers = le.fit_transform(np.ravel(yv))

        C = len(np.unique(np.ravel(yv)))
        DS = ClassificationDataSet(
            M,
            1,
            nb_classes=C,
            class_labels=labels)
        for i in range(Xv.shape[0]):
            DS.appendLinked(Xv[i, :], y_asnumbers[i])
        return DS
Example #30
0
def split_dataset(data_path, ratio):
    dataset = ClassificationDataSet(256, 10)
    with open("../data/semeion.data") as data:
        for record in data:
            line = record[:1812]
            line = line.replace(' ', ', ')

            data = line[:2046]
            dataList = data.split(',')
            dataList = map(float, dataList)

            ans = line[2048:-2]
            ansList = ans.split(',')
            ansList = map(int, ansList)

            dataset.appendLinked(dataList, ansList)

    train_data, test_data = dataset.splitWithProportion(ratio)
    return train_data, test_data
Example #31
0
def split_dataset(data_path, ratio):
    dataset = ClassificationDataSet(256, 10)
    with open("../data/semeion.data") as data:
        for record in data:
            line = record[:1812]
            line = line.replace(' ', ', ')

            data = line[:2046]
            dataList = data.split(',')
            dataList = map(float, dataList)

            ans = line[2048:-2]
            ansList = ans.split(',')
            ansList = map(int, ansList)

            dataset.appendLinked(dataList, ansList)

    train_data, test_data = dataset.splitWithProportion(ratio)
    return train_data, test_data
Example #32
0
def init_brain(learn_data, epochs, hidden_count, TrainerClass=BackpropTrainer):
    global data_dir
    print("\t Epochs: ", epochs)
    if learn_data is None:
        return None
    print("Building network")
    net = buildNetwork(7 * 7, hidden_count, 4, hiddenclass=SigmoidLayer)
    # net = buildNetwork(64 * 64, 32 * 32, 8 * 8, 5)
    # net = buildNetwork(64 * 64, 5, hiddenclass=LinearLayer)
    # fill dataset with learn data
    trans = {'0': 0, '1': 1, '2': 2, '3': 3}
    ds = ClassificationDataSet(7 * 7,
                               nb_classes=4,
                               class_labels=['0', '1', '2', '3'])
    for inp, out in learn_data:
        ds.appendLinked(inp, [trans[out]])
    ds.calculateStatistics()
    print("\tNumber of classes in dataset = {0}".format(ds.nClasses))
    print("\tOutput in dataset is ", ds.getField('target').transpose())
    ds._convertToOneOfMany(bounds=[0, 1])
    print("\tBut after convert output in dataset is \n", ds.getField('target'))
    trainer = TrainerClass(net, learningrate=0.1, verbose=True)
    trainer.setData(ds)
    print(
        "\tEverything is ready for learning.\nPlease wait, training in progress..."
    )
    start = time.time()
    trainer.trainEpochs(epochs=epochs)
    end = time.time()

    f = open(data_dir + "/values.txt", "w")
    f.write("Training time: %.2f \n" % (end - start))
    f.write("Total epochs: %s \n" % (trainer.totalepochs))
    # f.write("Error: %.22f" % (trainer.trainingErrors[len(trainer.trainingErrors) - 1]))
    f.close()

    print("Percent of error: ",
          percentError(trainer.testOnClassData(), ds['class']))
    print("\tOk. We have trained our network.")
    NetworkWriter.writeToFile(net, data_dir + "/net.xml")
    return net
Example #33
0
 def init_classifier(self, hidden_units=20):
     data = ClassificationDataSet(len(self.channels), nb_classes=5)
     # Prepare the dataset
     for i in range(len(self.classification_proc)):
         data.appendLinked(self.y_proc[i], self.classification_proc[i])
     # Make global for test purposes
     self.data = data
     # Prepare training and test data, 75% - 25% proportion
     self.testdata, self.traindata = data.splitWithProportion(0.25)
     #self.traindata._convertToOneOfMany()
     #self.testdata._convertToOneOfMany()
     # CHECK the number of hidden units
     fnn = buildNetwork(self.traindata.indim, hidden_units,
                        self.traindata.outdim)
     # CHECK meaning of the parameters
     trainer = BackpropTrainer(fnn,
                               dataset=self.traindata,
                               momentum=0,
                               verbose=True,
                               weightdecay=0.01)
     return fnn, trainer, data
    def consturt_train_data(self):

        # print len(self.output_train)
        # print len(self.eigenvector)
        ds = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for i in range(len(self.output_train)):
            ds.appendLinked(self.eigenvector[i], self.output_train[i])
        # print ds
        # print ds
        ds.calculateStatistics()

        # split training, testing, validation data set (proportion 4:1)
        tstdata_temp, trndata_temp = ds.splitWithProportion(0.25)
        tstdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for n in range(0, tstdata_temp.getLength()):
            tstdata.appendLinked(
                tstdata_temp.getSample(n)[0],
                tstdata_temp.getSample(n)[1])

        trndata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for n in range(0, trndata_temp.getLength()):
            trndata.appendLinked(
                trndata_temp.getSample(n)[0],
                trndata_temp.getSample(n)[1])
        # one hot encoding
        # print trndata
        testdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        test_data_temp = self.test_data
        for n in range(len(test_data_temp)):
            testdata.addSample(test_data_temp[n], [0])
        # print testdata
        trndata._convertToOneOfMany()
        tstdata._convertToOneOfMany()
        testdata._convertToOneOfMany()
        return trndata, tstdata, testdata, ds
Example #35
0
def teachNeuralNetwork(countState,testNumber):

    data = tuple(readData("DataMatrix" + str(1) + ".txt"))
    size  = len(data)
    ds = ClassificationDataSet(size, 1,nb_classes=3,class_labels = ['0','1','-1'])

    #SET TRAINI DATA
    for i in range (1,countState):
        try:
            data = []
            data = (readData("DataMatrix" + str(i) + ".txt"))
            answer = readAnswer("Answer" + str(i) + ".txt")
            idx = 2 if answer == -1 else answer
            ds.appendLinked (data,[idx])
        except BaseException:
            l=1 #Just some fo catch it's not usefull


    ds._convertToOneOfMany()
    net = buildNetwork(ds.indim, 300, ds.outdim ,recurrent = True)
  #  trainer = RPropMinusTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03)
    trainer = BackpropTrainer(net, dataset = ds, momentum = 0.1, verbose = False,weightdecay=0.03)
    trainer.trainUntilConvergence(maxEpochs= 2000)
    tstData =  (readData("DataMatrixExpr" + str(testNumber) + ".txt"))

    ansArr =  (net.activate(tstData))
    indx = 0
    max = ansArr[0]
    for i in range(len(ansArr)):
        if ansArr[i] > max:
            max = ansArr[i]
            indx = i
    t = [0,1,-1]

    return t[indx]



#========MAINCODE=============
#teachNeuralNetwork()
Example #36
0
    def Predict(self):
        prediction = []

        attributescount = len(self.testdata[0])
        nrclass = len(set(self.testlabel))
        dstraindata = ClassificationDataSet(attributescount,
                                            target=nrclass,
                                            nb_classes=nrclass,
                                            class_labels=list(
                                                set(self.testlabel)))
        for i in range(len(self.testdata)):
            dstraindata.appendLinked(self.testdata[i], self.testlabel[i])
        dstraindata._convertToOneOfMany()
        out = self.net.activateOnDataset(dstraindata)
        prediction = out.argmax(axis=1)
        '''
		for testrecord in self.testdata :
			out = self.net.activate(testrecord)[0]
			prediction.append(out)
		'''

        self.result = [self.testlabel, prediction]
Example #37
0
def classificationTrainingSet(holdouts=['a1'], db=None):
	if not db:
		db = gyroWalkingData()

	DS = ClassificationDataSet(21, nb_classes=2)
	
	for subject in db.data:
		if subject not in holdouts:
			raw = db.data[subject][:,2:]
			segs = db.segments[subject]

			seg_width = 2
			for i in range(0,len(raw),seg_width):
				hasSeg = 0
				for j in range(seg_width):
					if i+j in segs:
						hasSeg = 1
				for j in range(seg_width):
					if i+j < len(raw):
						DS.appendLinked(raw[i+j],[hasSeg])
	
	DS._convertToOneOfMany()
	return DS
Example #38
0
'''
dataset = pd.get_dummies(df)
pd.set_option('display.max_columns', 1000)  # 把所有的列全部显示出来

X = dataset[dataset.columns[:-2]]
Y = dataset[dataset.columns[-2:]]
labels = dataset.columns._data[-2:]

# Step 3:将数据转换为SupervisedDataSet/ClassificationDtaSet对象
from pybrain.datasets import ClassificationDataSet
ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels)
for i in range(len(Y)):
    y = 0
    if Y['好瓜_是'][i] == 1:
        y = 1
        ds.appendLinked(X.ix[i], y)
ds.calculateStatistics()  # 返回一个类直方图?搞不懂在做什么

# Step 4: 分开测试集和训练集
testdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels)
testdata_temp, traindata_temp = ds.splitWithProportion(0.25)
for n in range(testdata_temp.getLength()):
    testdata.appendLinked(
        testdata_temp.getSample(n)[0],
        testdata_temp.getSample(n)[1])
print(testdata)
testdata._convertToOneOfMany()
print(testdata)
traindata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels)
for n in range(traindata_temp.getLength()):
    traindata.appendLinked(
Example #39
0
ds = ClassificationDataSet(numInput,
                           nb_classes=numTarget)  #2D input and 1D output

# Loading code based off of this code:
# http://stackoverflow.com/questions/8139822/how-to-load-training-data-in-pybrain
import csv
tf = open(trainingDataFile, 'r')

for line in tf.readlines():
    # Split the values on the current line, and convert to float
    tfData = [float(x) for x in line.strip().split(',') if x != '']
    inData = tuple(tfData[:numInput])  # Grab first numInput values
    outData = tuple(tfData[numInput:])  # Grab the rest

    # Add the data to the datasets
    ds.appendLinked(inData, outData)

# This converts each output to the desired activations of each neuron in the output layer
# Ex. class 1 target -> 10000000, class 2 target -> 01000000, class 3 target -> 00100000 etc.
ds._convertToOneOfMany(bounds=[0, 1])

# Some info printing code from here: http://pybrain.org/docs/tutorial/fnn.html
print("Number of training patterns: ", len(ds))
print("Input and output dimensions: ", ds.indim, ds.outdim)
print("First sample (input, target, class):")
print(ds['input'][0], ds['target'][0], ds['class'][0])
#input()

# Trainers
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
#supervised learning tutorial
from pybrain.datasets import SupervisedDataSet
from pybrain.datasets import ClassificationDataSet
# DS = SupervisedDataSet(3,2)
# DS.appendLinked([1,2,3], [4,5])
# print(len(DS))
# DS['input']
# array([[1., 2., 3.]])

DS = ClassificationDataSet(2, class_labels=['Urd', 'Verdandi', 'skuld'])
DS.appendLinked([0.1, 0.5] , [0])
DS.appendLinked([1.2, 1.2] , [1])
DS.appendLinked([1.4, 1.6] , [1])
DS.appendLinked([1.6, 1.8] , [1])
DS.appendLinked([0.10, 0.80] , [2])
DS.appendLinked([0.20, 0.90] , [2])

print(DS.calculateStatistics())
print(DS.classHist)
print(DS.nClasses)
print(DS.getClass(1))
print(DS.getField('target').transpose())

Example #41
0
class NeuralNetworkClassification(algorithmbase):
    def ExtraParams(self, hiddenlayerscount, hiddenlayernodescount):
        self.hiddenlayerscount = hiddenlayerscount
        self.hiddenlayernodescount = hiddenlayernodescount
        return self

    def PreProcessTrainData(self):
        self.traindata = preprocess_apply(self.traindata,
                                          self.missingvaluemethod,
                                          self.preprocessingmethods)

    def PrepareModel(self, savedmodel=None):

        if savedmodel != None:
            self.trainer = savedmodel
        else:
            attributescount = len(self.traindata[0])
            nrclass = len(set(self.trainlabel))
            self.ds = ClassificationDataSet(attributescount,
                                            target=nrclass,
                                            nb_classes=nrclass,
                                            class_labels=list(
                                                set(self.trainlabel)))

            for i in range(len(self.traindata)):
                self.ds.appendLinked(self.traindata[i], [self.trainlabel[i]])
            self.ds._convertToOneOfMany()

            self.net = FeedForwardNetwork()
            inLayer = LinearLayer(len(self.traindata[0]))
            self.net.addInputModule(inLayer)
            hiddenLayers = []
            for i in range(self.hiddenlayerscount):
                hiddenLayer = SigmoidLayer(self.hiddenlayernodescount)
                hiddenLayers.append(hiddenLayer)
                self.net.addModule(hiddenLayer)
            outLayer = SoftmaxLayer(nrclass)
            self.net.addOutputModule(outLayer)

            layers_connections = []
            layers_connections.append(FullConnection(inLayer, hiddenLayers[0]))
            for i in range(self.hiddenlayerscount - 1):
                layers_connections.append(
                    FullConnection(hiddenLayers[i - 1], hiddenLayers[i]))
            layers_connections.append(
                FullConnection(hiddenLayers[-1], outLayer))

            for layers_connection in layers_connections:
                self.net.addConnection(layers_connection)
            self.net.sortModules()

            #training the network
            self.trainer = BackpropTrainer(self.net, self.ds)
            self.trainer.train()

    def PreProcessTestDate(self):
        self.testdata = preprocess_apply(self.testdata,
                                         self.missingvaluemethod,
                                         self.preprocessingmethods)

    def Predict(self):
        prediction = []

        attributescount = len(self.testdata[0])
        nrclass = len(set(self.testlabel))
        dstraindata = ClassificationDataSet(attributescount,
                                            target=nrclass,
                                            nb_classes=nrclass,
                                            class_labels=list(
                                                set(self.testlabel)))
        for i in range(len(self.testdata)):
            dstraindata.appendLinked(self.testdata[i], self.testlabel[i])
        dstraindata._convertToOneOfMany()
        out = self.net.activateOnDataset(dstraindata)
        prediction = out.argmax(axis=1)
        '''
		for testrecord in self.testdata :
			out = self.net.activate(testrecord)[0]
			prediction.append(out)
		'''

        self.result = [self.testlabel, prediction]

    def GetModel(self):
        return self.trainer
Example #42
0
print("Th complete dataset shape is : ", Data.shape)
print("Th complete target shape is : ", Target.shape)
print("The training data shape is (2/3 of complete dataset): ", DataTrain.shape)
print("The training target shape is (2/3 of complete target): ", TargetTrain.shape)
print("The test data shape is (1/3 of complete dataset): ", DataTest.shape)
print("The test target shape is (1/3 of complete target): ", TargetTest.shape)
print("\n")

#prepare data for pybrain
number_of_columns = Data.shape[1]
PyBData = ClassificationDataSet(number_of_columns, 1, nb_classes=2)
PyBDataTrain = ClassificationDataSet(number_of_columns, 1, nb_classes=2)
PyBDataTest = ClassificationDataSet(number_of_columns, 1, nb_classes=2)

for i in xrange(len(Data)):
	PyBData.appendLinked(Data[i], Target[i])
	
for i in xrange(len(DataTrain)):
	PyBDataTrain.appendLinked(DataTrain[i], TargetTrain[i])
	
for i in xrange(len(DataTest)):
	PyBDataTest.appendLinked(DataTest[i], TargetTest[i])

#*******************End of Preparing Data & Target for Estimators******************

#*******************Decision Tree Classification******************
print("Entering Decision Tree Classifier with starting time", time.localtime())

clf_dt = tree.DecisionTreeClassifier(criterion="entropy")
clf_dt = clf_dt.fit(DataTrain, TargetTrain)
Example #43
0
    ]  # list of black and white pixels
    # Normalize the pixels to average brightness
    avgluminosity = sum(pixels) / len(pixels)
    processedpixels = map(
        lambda p: min(p + 255 / 2 - avgluminosity, 255)
        if (avgluminosity < 255 / 2) else max(p + 255 / 2 - avgluminosity, 0),
        pixels)

    # Save lowres images
    a = np.array(processedpixels)
    a = a.reshape(-1, width)
    im = toimage(a)
    im.save(os.path.join(murkafolder + '/bw', trainingpicture))

    # Populate database
    ds.appendLinked(processedpixels, [0])  # 0 = Murka; 1 = Masya

for trainingpicture in [
        f for f in os.listdir(masyafolder) if f.endswith('.png')
]:
    im = Image.open(os.path.join(masyafolder, trainingpicture))
    imlow = im.resize((width, height), Image.ANTIALIAS)
    # Convert black and white (L = luminosity; 0 = black; 255 = white)
    bw_im = imlow.convert('L')
    pixels = [
        bw_im.getpixel((i, j)) for j in range(height) for i in range(width)
    ]  # list of black and white pixels
    # Normalize the pixels to average brightness
    avgluminosity = sum(pixels) / len(pixels)
    processedpixels = map(
        lambda p: min(p + 255 / 2 - avgluminosity, 255)
Example #44
0
for i in range(len(data)):
    if data[i][4] == 'setosa':
        data[i][4] = 0
    elif data[i][4] == 'versicolor':
        data[i][4] = 1
    else:
        data[i][4] = 2

net = buildNetwork(4, 5, 3)

ds = ClassificationDataSet(4, nb_classes=3, class_labels=['setosa',
                                                          'versicolor',
                                                          'verginica'])
for i in data:
    ds.appendLinked(i[:4], list(i[4]))

ds._convertToOneOfMany(bounds=[0, 1])

trainer = BackpropTrainer(net, verbose=True)
trainer.setData(ds)
trainer.trainUntilConvergence(maxEpochs=100)


names_dict = {
    '0': 'setosa',
    '1': 'versicolor',
    '2': 'verginica',
    0: 'setosa',
    1: 'versicolor',
    2: 'verginica'
Example #45
0
def getDataFromFolder(folderpath, datapath):
    '''
    Creates pybrain ClassificationDataSet from folder of iPhone images and .txt file of data
    '''
    # Setup Dataset for PyBrain
    data = ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9'])

    # Get photos
    dirs = os.listdir(folderpath)

    # Get data
    dat = []
    with open(datapath) as f:
        dat = f.read().splitlines()

    # Set variables
    parsed = 0
    missed = 0
    missednumbers = 0  # count of numbers in image that were not parsed
    falsenumbers = 0   # count of false number parsings from empty squares
    gatherednumbers = 0
    correctspaces = 0


    # Process each photo/data pairing
    for p in range(1,len(dirs)-1):

        # Create image
        img = cv2.imread((folderpath + dirs[p]))
        print(folderpath + dirs[p])
        # Get all the digits in the image
        pil_im, numbers, parsedcheck, missedcheck = process(img, False)

        # Board located successfully
        parsed += parsedcheck

        # Board not located
        missed += missedcheck

        ind = 0

        # Match digits photos with data
        for number in numbers:

            if (number is None):
                # True negative
                if dat[p-1][ind] == '0':
                    correctspaces += 1
                    ind += 1
                # False negative
                else:
                    missednumbers += 1
                    ind += 1

            else:
                # False positive
                if dat[p-1][ind] == '0':
                    falsenumbers += 1
                    ind += 1
                # True positive
                else:
                    gatherednumbers += 1
                    data.appendLinked(number.ravel(), [int(dat[p-1][ind])-1])
                    ind += 1

    # Print results
    print ("\nData processed: ")
    print("\n  Puzzles located successfully: " + str(parsed))
    print("  Puzzles not located successfully: " + str(missed)+"\n")


    print("  Number of digit samples gathered (true positives): " + str(gatherednumbers))
    print("  Number of spaces confirmed (true negatives): " + str(correctspaces)+"\n")

    print("  Number of digit samples missed in a processed image (false negatives): " + str(missednumbers))
    print("  Number of digit samples that needed to be removed (false positives): " + str(falsenumbers))

    return data
Example #46
0
def classDsBuild(data):
    DS = ClassificationDataSet(5,nb_classes=4)
    for ele in data:
        DS.appendLinked((ele[0],ele[1],ele[2],ele[3],ele[4]), (ele[5]))
    dsTrain,dsTest = DS.splitWithProportion(0.8)
    return dsTrain, dsTest
#########################################################################################
#########################################################################################
#########################################################################################
#create a dataset for use in pybrain
from pybrain.datasets import ClassificationDataSet
alldata=ClassificationDataSet(3,nb_classes=2,class_labels=['default_Yes','default_No'])
#classes are encoded into one output unit per class, that takes on a certain value if the class is present
#alldata._convertToOneOfMany(bounds=[0, 1])

#convert back to a single column of class labels
#alldata._convertToClassNb()

#Target dimension is supposed to be 1
#The targets are class labels starting from zero
for i in range(N):
    alldata.appendLinked(Xdf.ix[i,:],Ydf['default_Yes'].ix[i,:])
#generate training and testing data sets
tstdata, trndata = alldata.splitWithProportion(0.10)
#classes are encoded into one output unit per class, that takes on a certain value if the class is present
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )
len(tstdata), len(trndata)
#calculate statistics and generate histograms
alldata.calculateStatistics()
print alldata.classHist
print alldata.nClasses
print alldata.getClass(1)

#########################################################################################
#########################################################################################
#########################################################################################
Example #48
0
camada2 = int(sys.argv[6])

k = 0
size = 70
for line in inputFile.readlines():
    data = [float(x) for x in line.strip().split() if x != '']
    indata = tuple(data[:7])
    outdata = tuple(data[7:])
    ds.addSample(indata,outdata)
    k +=1
    if (k == size):
		testdata, traindata = ds.splitWithProportion( PorcDivTest )
		ds.clear()
		k = 0
		for inp,targ in testdata:
			testSet.appendLinked(inp,targ-1)
		for inp,targ in traindata:
			trainSet.appendLinked(inp,targ-1)

trainSet._convertToOneOfMany(bounds=[0, 1])
testSet._convertToOneOfMany(bounds=[0, 1])

if(camada2==0):
	net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True)
else :
	net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True)
trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True)
trainer.trainOnDataset(trainSet,Ciclos)

out = net.activateOnDataset(testSet)
out = out.argmax(axis=1) 
Example #49
0
sns.plt.show()    
'''

# one-hot encoding
wm_df = pd.get_dummies(df)
X = wm_df[wm_df.columns[1:-2]]  # input
Y = wm_df[wm_df.columns[-2:]]  # output
label = wm_df.columns._data[-2:]  # class label

# construction of data in pybrain's formation
from pybrain.datasets import ClassificationDataSet
ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label)
for i in range(len(Y)):
    y = 0
    if Y['好瓜_是'][i] == 1: y = 1
    ds.appendLinked(X.values[i], y)
ds.calculateStatistics()

# generation of train set and test set (3:1)
tstdata_temp, trndata_temp = ds.splitWithProportion(0.25)
tstdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label)
for n in range(0, tstdata_temp.getLength()):
    tstdata.appendLinked(
        tstdata_temp.getSample(n)[0],
        tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label)
for n in range(0, trndata_temp.getLength()):
    trndata.appendLinked(
        trndata_temp.getSample(n)[0],
        trndata_temp.getSample(n)[1])
Example #50
0
print "number of inputs m: ", num_input

# initialize two classification data sets, one for training
# and cross-validation purposes, the other for the test data
# default parameter 'target' in method ClassificationDataSet
# is '1'
DS = ClassificationDataSet(len(features[1]), nb_classes=10)
test_DS = ClassificationDataSet(len(features[1]), nb_classes=10)

i = 0
# as written, the follwing 3 lines feed only the first
# 10000 training cases into the NN for training, for speed
# and demonstration purposes. For real training, use
# while i < num_input:
while i < 1000:
    DS.appendLinked(features[i], targets[i])
    i += 1
i = 0

# as written, the following 3 lines predict only the first
# 50 test cases, for the sake of speed and demonstration
while i < 50:
    test_DS.appendLinked(test_features[i], 0)
    i += 1

# split up the classification data set 'DS' into training
# and cross-validation sets
cvdata, trndata = DS.splitWithProportion(0.2)

# the _convertToOneOfMany method
DS._convertToOneOfMany(bounds=[0, 1])
data_set = load_breast_cancer()

X = data_set.data  # feature
feature_names = data_set.feature_names
y = data_set.target  # label
target_names = data_set.target_names

# data normalization
from sklearn import preprocessing
normalized_X = preprocessing.normalize(X)

# construction of data in pybrain's formation
from pybrain.datasets import ClassificationDataSet
ds = ClassificationDataSet(30, 1, nb_classes=2, class_labels=y)
for i in range(len(y)):
    ds.appendLinked(X[i], y[i])
ds.calculateStatistics()

# split of training and testing dataset
tstdata_temp, trndata_temp = ds.splitWithProportion(0.5)
tstdata = ClassificationDataSet(30, 1, nb_classes=2)
for n in range(0, tstdata_temp.getLength()):
    tstdata.appendLinked(
        tstdata_temp.getSample(n)[0],
        tstdata_temp.getSample(n)[1])

trndata = ClassificationDataSet(30, 1, nb_classes=2)
for n in range(0, trndata_temp.getLength()):
    trndata.appendLinked(
        trndata_temp.getSample(n)[0],
        trndata_temp.getSample(n)[1])
Example #52
0
import cv2
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import ClassificationDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.tools.customxml.networkwriter import NetworkWriter
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.structure import SigmoidLayer

DS = ClassificationDataSet(896, class_labels=['notFace', 'Face'])

posDir = 'pos/'
posFilenames = [f for f in listdir(posDir)]
for f in posFilenames:
    img = (cv2.imread(posDir + f, 0)).ravel()
    img = img / 127.5 - 1
    DS.appendLinked(img, [1])

negDir = 'neg/'
negFilenames = [f for f in listdir(negDir)]
for f in negFilenames:
    img = cv2.imread(negDir + f, 0).ravel()
    img = img / 127.5 - 1
    DS.appendLinked(img, [0])  # Dataset setup here

Momen = 0.0
WeiDecay = 0.003
print 'training...'
net = buildNetwork(896, 100, 10, 1, bias=True, outclass=SigmoidLayer)
trainer = BackpropTrainer(net, DS, momentum=Momen, weightdecay=WeiDecay)
proportion2Cost = trainer.trainUntilConvergence(validationProportion=0.20,
                                                maxEpochs=1000,
Example #53
0
    # open image
    currentArray = numpy.array([])
    fullPath = negativeImageDirectory + '/' + fi
    currentImage = Image.open(fullPath)
    imagePixels = currentImage.load()
    imageSize = currentImage.size
    # read pixel values
    for i in range(imageSize[0]):
        for j in range(imageSize[1]):
            pixelArray = [
                imagePixels[i, j][0], imagePixels[i, j][1], imagePixels[i,
                                                                        j][2]
            ]
            currentArray = numpy.append(currentArray, pixelArray)
    # append to dataset
    dataSet.appendLinked(currentArray, 0)

# do the same but for the positive images
positiveImageFiles = os.listdir(positiveImageDirectory)

for fi in positiveImageFiles:
    currentArray = numpy.array([])
    fullPath = positiveImageDirectory + '/' + fi
    currentImage = Image.open(fullPath)
    imagePixels = currentImage.load()
    imageSize = currentImage.size
    for i in range(imageSize[0]):
        for j in range(imageSize[1]):
            pixelArray = [
                imagePixels[i, j][0], imagePixels[i, j][1], imagePixels[i,
                                                                        j][2]
Example #54
0
    def model_net(self, fields, datas=None):
        # 对需要处理的数据进行归一化处理,防止大数吃掉小数
        # https://www.jianshu.com/p/682c24aef525 用python做数据分析4|pandas库介绍之DataFrame基本操作
        # 归一 https://www.zhihu.com/question/57509028
        # 标准化和归一化什么区别? https://www.zhihu.com/question/20467170
        # sklearn库中数据预处理函数fit_transform()和transform()的区别 http://blog.csdn.net/quiet_girl/article/details/72517053
        # 需具体了解其实现方式
        from sklearn.preprocessing import MinMaxScaler
        from pybrain.structure import SoftmaxLayer
        from pybrain.datasets import ClassificationDataSet
        from pybrain.tools.shortcuts import buildNetwork
        from pybrain.supervised.trainers import BackpropTrainer
        from pybrain.utilities import percentError
        from pybrain.structure import TanhLayer

        scaler = MinMaxScaler()
        datas[fields] = scaler.fit_transform(datas[fields])

        tran_data = datas[fields].values
        tran_target = datas['Flag'].values
        tran_label = ['Sell', 'Hold', 'Buy']

        class_datas = ClassificationDataSet(6,
                                            1,
                                            nb_classes=3,
                                            class_labels=tran_label)
        print(type(tran_target))
        print(tran_target)
        for i in range(len(tran_data)):
            class_datas.appendLinked(tran_data[i], tran_target[i])

        tstdata_temp, trndata_temp = class_datas.splitWithProportion(0.25)

        print(len(tstdata_temp), len(trndata_temp))

        tstdata = ClassificationDataSet(6,
                                        1,
                                        nb_classes=3,
                                        class_labels=tran_label)
        trndata = ClassificationDataSet(6,
                                        1,
                                        nb_classes=3,
                                        class_labels=tran_label)

        for n in range(0, trndata_temp.getLength()):
            trndata.appendLinked(
                trndata_temp.getSample(n)[0],
                trndata_temp.getSample(n)[1])

        for n in range(0, tstdata_temp.getLength()):
            tstdata.appendLinked(
                tstdata_temp.getSample(n)[0],
                tstdata_temp.getSample(n)[1])

        tstdata._convertToOneOfMany()
        trndata._convertToOneOfMany()

        tnet = buildNetwork(trndata.indim,
                            5,
                            trndata.outdim,
                            hiddenclass=TanhLayer,
                            outclass=SoftmaxLayer)
        trainer = BackpropTrainer(tnet,
                                  dataset=trndata,
                                  batchlearning=True,
                                  momentum=0.1,
                                  verbose=True,
                                  weightdecay=0.01)

        for i in range(5000):
            trainer.trainEpochs(20)
            trnresult = percentError(trainer.testOnClassData(),
                                     trndata['class'])
            testResult = percentError(trainer.testOnClassData(dataset=tstdata),
                                      tstdata['class'])
            print("epoch: %4d" % trainer.totalepochs, \
                  "  train error: %5.2f%%" % trnresult, \
                  "  test error: %5.2f%%" % testResult)

        return trainer, class_datas
Example #55
0
        return leftDs, rightDs

    def castToRegression(self, values):
        """Converts data set into a SupervisedDataSet for regression. Classes
        are used as indices into the value array given."""
        regDs = SupervisedDataSet(self.indim, 1)
        fields = self.getFieldNames()
        fields.remove('target')
        for f in fields:
            regDs.setField(f, self[f])
        regDs.setField('target', values[self['class'].astype(int)])
        return regDs

if __name__ == "__main__":
    dataset = ClassificationDataSet(2, 1, class_labels=['Urd', 'Verdandi', 'Skuld'])
    dataset.appendLinked([ 0.1, 0.5 ]   , [0])
    dataset.appendLinked([ 1.2, 1.2 ]   , [1])
    dataset.appendLinked([ 1.4, 1.6 ]   , [1])
    dataset.appendLinked([ 1.6, 1.8 ]   , [1])
    dataset.appendLinked([ 0.10, 0.80 ] , [2])
    dataset.appendLinked([ 0.20, 0.90 ] , [2])

    dataset.calculateStatistics()
    print(("class histogram:", dataset.classHist))
    print(("# of classes:", dataset.nClasses))
    print(("class 1 is: ", dataset.getClass(1)))
    print(("targets: ", dataset.getField('target')))
    dataset._convertToOneOfMany(bounds=[0, 1])
    print("converted targets: ")
    print((dataset.getField('target')))
    dataset._convertToClassNb()
Example #56
0
    """
    return ret

vecSize =  100
subjects = [2, 5, 6, 7, 8, 12, 16, 35 ,39]
ds = None
for s in subjects:
    for cycleNum in range(1, 13):
        fileName = '../inputs/Vicon from CMU/subjects/'+str(s)+'/'+str(cycleNum)+'.amc'
        try:
            data = getData(fileName, vecSize)
        except IOError:
            continue
        if ds is None:#initialization
            ds = ClassificationDataSet( len(data), 1 )
        ds.appendLinked(data ,  subjects.index(s))
ds.nClasses = len(subjects)

decay= 0.99995
myWeightdecay = 0.8
initialLearningrate= 0.005
hidden_size = 1000
epochs=1000
splitProportion = 0.5

print 'dataset size', len(ds)
print 'input layer size', len(ds.getSample(0)[0])
tstdata, trndata = ds.splitWithProportion( splitProportion )
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )
# Calculate and print number of total inputnodes (unique taxa) and total output nodes (uniqe categories to classify)
collection.setUniqueTaxa()
collection.setUniqueCategories()
print 'Unique Taxa (#input nodes): ' + str(len(collection.getUniqueTaxa()))
print 'Unique Categories (#output nodes): ' + str(
    len(collection.getUniqueCategories()))

# Create trainingsets and test sets
trainingset = collection.createAnnTrainingsets()
testset = collection.createAnnTestsets()

# Map trainingsets and test sets to PyBrain
DS = ClassificationDataSet(trainingset['input_dimension'],
                           trainingset['output_dimension'])
for i in range(0, len(trainingset['input_arrays'])):
    DS.appendLinked(trainingset['input_arrays'][i],
                    trainingset['output_arrays'][i])
DStest = ClassificationDataSet(trainingset['input_dimension'],
                               trainingset['output_dimension'])
for i in range(0, len(testset['input_arrays'])):
    DStest.appendLinked(testset['input_arrays'][i],
                        testset['output_arrays'][i])

# Create network
fnn = buildNetwork(DS.indim, 50, DS.outdim, outclass=SoftmaxLayer, fast=False)
#fnn = buildNetwork( DS.indim, 5, DS.outdim, outclass=SoftmaxLayer )
# Create trainer
trainer = BackpropTrainer(fnn,
                          dataset=DS,
                          momentum=0.01,
                          verbose=True,
                          weightdecay=0.0001)
numPatTest, numColsTest = patternTest.shape

#Generar el input
patternTrainInput = patternTrain[:, 1:numColsTrain]
patternValidInput = patternValid[:, 1:numColsValid]
patternTestInput = patternTest[:, 1:numColsTest]

#Generar salidas deseadas 
patternTrainTarget = np.zeros([numPatTrain, 2])
patternValidTarget = np.zeros([numPatValid, 2])
patternTestTarget = np.zeros([numPatTest, 2])

#Crear los dataset supervisados
trainDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer'])
for i in range(numPatTrain):
	trainDS.appendLinked(patternTrainInput[i], patternTrain[i, 0])
	
validDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer'])
for i in range(numPatValid):
	validDS.appendLinked(patternValidInput[i], patternValid[i, 0])
	
testDS = ClassificationDataSet(numColsTrain-1, nb_classes=2, class_labels=['Not_Cancer', 'Cancer'])
for i in range(numPatTest):
	testDS.appendLinked(patternTestInput[i], patternTest[i, 0])

#Crear la SVM y el trainer
svm = SVMUnit()
trainer = SVMTrainer(svm, trainDS)

#Parámetros de la SVM
myLog2C=0.
Example #59
0
def getDataFromSudokuDataset():
    '''
    Creates pybrain ClassificationDataSet from folder of images from Sudoku dataset found at
    https://github.com/wichtounet/sudoku_dataset
    '''
    data = ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9'])

    path = '/Users/kdelaney/Downloads/sudoku_dataset-master/images/'
    dirs = os.listdir(path)

    parsed = 0
    missed = 0
    missednumbers = 0  # count of numbers in image that were not parsed
    falsenumbers = 0   # count of false number parsings from empty squares
    gatherednumbers = 0
    correctspaces = 0

    for p in range(0,len(dirs), 2):
        img = cv2.imread((path + dirs[p+1]))
        print((path + dirs[p+1]))
        dat = []
        with open((path + dirs[p])) as f:
            next(f)
            next(f)
            for line in f:
                dat += line.split()

        pil_im, numbers, parsedcheck, missedcheck = process(img, False)

        parsed += parsedcheck
        missed += missedcheck

        ind = 0
        if numbers is not None:
            for number in numbers:
                if number is None:
                    if dat[ind] == '0':
                        correctspaces += 1
                        ind += 1
                    else:
                        missednumbers += 1
                        ind += 1
                else:
                    if dat[ind] == '0':
                        falsenumbers += 1
                        ind += 1
                    else:
                        gatherednumbers += 1
                        data.appendLinked(number.ravel(), [int(dat[ind])-1])
                        ind += 1

    print ("\nprocessed: ")
    print("\n  Test images processed successfully: " + str(parsed))
    print("  Test images not processed successfully : " + str(missed)+"\n")


    print("  Number of digit samples gathered (true positives): " + str(gatherednumbers))
    print("  Number of spaces confirmed (true negatives): " + str(correctspaces)+"\n")

    print("  Number of digit samples missed in a processed image (false negatives): " + str(missednumbers))
    print("  Number of digit samples removed (false positives): " + str(falsenumbers))

    return data
    tstresults.append([])
    #hits[m].append(0)
    excpectedLens.append(0)
    #for mood in couple:
    for typeNum in range(1,21):
        for take in range(1,10):
            fileName = '../inputs/Rachelle/v2/recordingsByMood/'+mood+'/'+\
            str(typeNum)+'_'+str(take)+'.skl'
            try:
                data = ge.getFeatureVec(fileName)
            except IOError:
                continue
            if ds is None:#initialization
                ds = ClassificationDataSet( len(data), 1 )
            excpectedLens[m]+=1
            ds.appendLinked(data ,  moods.index(mood))
splitProportion = 0.2
decay= 0.99993
myWeightdecay = 0.5
initialLearningrate= 0.01
hidden_size = 200
epochs=1000
momentum=0.15
ds.nClasses = len(moods)
tstdata, trndata = ds.splitWithProportion( splitProportion )
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )
inLayer = LinearLayer(len(trndata.getSample(0)[0]))
hiddenLayer = SigmoidLayer(hidden_size)
outLayer = LinearLayer(len(trndata.getSample(0)[1]))
n = FeedForwardNetwork()