def createnetwork(n_hoglist,n_classlist,n_classnum,n_hiddensize=100):
    n_inputdim=len(n_hoglist[0])
    n_alldata = ClassificationDataSet(n_inputdim,1, nb_classes=n_classnum)
    for i in range(len(n_hoglist)):
        n_input = n_hoglist[i]
        n_class = n_classlist[i]
        n_alldata.addSample(n_input, [n_class])
    n_tstdata, n_trndata = n_alldata.splitWithProportion( 0.25 )
    n_trndata._convertToOneOfMany( )
    n_tstdata._convertToOneOfMany( )

    print "Number of training patterns: ", len(n_trndata)
    print "Input and output dimensions: ", n_trndata.indim, n_trndata.outdim
    print "First sample (input, target, class):"
    print n_trndata['input'][0], n_trndata['target'][0], n_trndata['class'][0]

    n_fnn = buildNetwork(n_trndata.indim,n_hiddensize, n_trndata.outdim, outclass=SoftmaxLayer)
    n_trainer = BackpropTrainer(n_fnn, dataset=n_trndata, momentum=0.1, verbose=True, weightdecay=0.01)

    n_result = 1
    while n_result > 0.1:
        print n_result
        n_trainer.trainEpochs(1)
        n_trnresult = percentError(n_trainer.testOnClassData(),
                                 n_trndata['class'])
        n_tstresult = percentError(n_trainer.testOnClassData(
            dataset=n_tstdata), n_tstdata['class'])

        print "epoch: %4d" % n_trainer.totalepochs, \
            "  train error: %5.2f%%" % n_trnresult, \
            "  test error: %5.2f%%" % n_tstresult
        n_result = n_tstresult
Beispiel #2
0
def _convert_supervised_to_classification(supervised_dataset,classes):
    classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes)
    
    for n in xrange(0, supervised_dataset.getLength()):
        classification_dataset.addSample(supervised_dataset.getSample(n)[0], supervised_dataset.getSample(n)[1])

    return classification_dataset
Beispiel #3
0
def gen_data(csv_file, db):
	keywords = {}
	count = 0
	img_list = []

	with open(csv_file) as f:
		content = f.readlines()
	f.close()

	for line in content:
		aux = line.replace('\n', '').split(',')
		if aux[1] not in keywords:
			keywords[aux[1]] = count
			count += 1
		img_list.append(aux)

	data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords))
	n = len(keywords)

	for img in img_list:
		path = db + '/' + img[0]
		im = Image.open(path).convert('RGB')
		data.addSample(get_img_feats(im), get_keyword_class(keywords[img[1]], n))

	return data, n, keywords
Beispiel #4
0
 def batch_classify(self, samples):
   ds = ClassificationDataSet(len(self._fx))
   for sample in samples:
     fvec = [sample[l] for l in self._fx]
     ds.addSample(fvec, [0])
   results = self._trainer.testOnClassData(ds)
   return [self._rmap[r] for r in results]
Beispiel #5
0
def getData():
    fo = open("C:\\Program Files (x86)\\Lux\\Support\\data1per.txt")
    #data = []

    '''
    correctinds = range(0,5)
    for k in range(5, 131, 3):
        correctinds.append(k)
    correctinds.append(129)
    correctinds.append(130)
    for k in range(131, 257, 3):
        correctinds.append(k)
    correctinds.append(255)
    correctinds.append(256)
    '''

    #alldata = ClassificationDataSet(92, 1)
    alldata = ClassificationDataSet(84, 1)

    count = 0
    for line in fo.readlines():
    #for k in range(0, 20000):
        count += 1

        #line = fo.readline()

        line = [int(x.strip()) for x in line[1:-3].split(',')]
        line = [line[0]]+line[4:47]+line[49:90]

        alldata.addSample(line[1:], line[0])
    print count
    return alldata
class NeuralNetLearner:
    def __init__(self):
        self.bunch = load_digits()
        self.X = np.asarray(self.bunch.data, 'float32')
        self.Y = np.asarray(self.bunch.target, 'float32')
        #self.X, self.Y = nudge_dataset(self.X, self.bunch.target)
        self.X = (self.X - np.min(self.X, 0)) / (np.max(self.X, 0) + 0.0001)  # 0-1 scaling

        self.ds = ClassificationDataSet(64, nb_classes=10, class_labels=self.bunch.target_names)
        for (x, y) in zip(self.X, self.Y):
            self.ds.addSample(x, y)

        self.test_data, self.train_data = self.ds.splitWithProportion(0.3)

        self.network = buildNetwork(64, 10, 1)

    def get_datasets(self):
        return self.train_data, self.test_data

    def activate(self, x):
        self.network.activate(x.tolist())

    def fitness_func(self, x):
        if not (x.size == 64):
            print("Bad input vector: ", x)
            return
        sum_of_squared_error = 0
        for (input, target) in self.ds:
            sum_of_squared_error += (target - self.activate(input.tolist()))
        return (sum_of_squared_error / self.ds.length)

    def get_weights(self):
        return
Beispiel #7
0
def toClassificationDataset(codedSampleSet):
   
    classifiedSampleSet = []
    
    # Calculate the unique classes
    classes = []
    for sample in codedSampleSet:
    
        classifier = getClassifier(sample)
        if classifier not in classes:
            classes.append(classifier)
    classes.sort()
    
    # Now that we have all the classes, we process the outputs
    for sample in codedSampleSet:
        classifier = getClassifier(sample)
        classifiedSample = one_to_n(classes.index(classifier), len(classes))
        classifiedSampleSet.append(classifiedSample)

    # Build the dataset
    sampleSize = len(codedSampleSet[0])
    classifiedSampleSize = len(classifiedSampleSet[0])
    dataset = ClassificationDataSet(sampleSize, classifiedSampleSize)
    
    for i in range(len(classifiedSampleSet)):
        dataset.addSample(codedSampleSet[i], classifiedSampleSet[i])

    return dataset, classes
Beispiel #8
0
def generate_data(n=400):
    INPUT_FEATURES = 2
    CLASSES = 3
    #means = [(-1, 0), (2, 4), (3, 1)]
    #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    #minX, maxX = means[0][0], means[0][0]
    #minY, maxY = means[0][1], means[0][1]
    #print minX, maxX , minY, maxY
    # #for i in range(n):
    #     for klass in range(CLASSES):

    #         features = multivariate_normal(means[klass], cov[klass])
    #         #print means[klass], cov[klass]
    #         #print features
    #         x, y = features
    #         minX, maxX = min(minX, x), max(maxX, x)
    #         minY, maxY = min(minY, y), max(maxY, y)
    #         alldata.addSample(features, [klass])
    #print alldata
    alldata.addSample([0,0], [0])
    alldata.addSample([0,1], [1])
    alldata.addSample([1,0], [1])
    alldata.addSample([1,1], [0])

    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata}
Beispiel #9
0
class NNetwork:
	def __init__(self):
		self.ds = ClassificationDataSet(7, 1, nb_classes=8)  #8 since we have 8 gestures, 7 since we have 7 features
		
	def add_data(self, training_data):
		for gesture in training_data:
			self.ds.addSample(gesture[1], gesture[0])  #a method to add all the training data we have
			
	def newData(self, training_data):   #a method for replacing the data already existing and adding data from scratch
		self.ds = ClassificationDataSet(7, 1, nb_classes=8)
		for gesture in training_data:
			self.ds.addSample(gesture[1], gesture[0])
	
	def train(self, shouldPrint):
		tstdata, trndata = self.ds.splitWithProportion(0.2)  #splits the data into training and verification data
		trndata._convertToOneOfMany()
		tstdata._convertToOneOfMany()
		self.fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer) #builds a network with 64 hidden neurons
		self.trainer = BackpropTrainer(self.fnn, dataset=trndata, momentum=0.1, learningrate=0.01, verbose=True, weightdecay=0.1)
		#uses the backpropagation algorithm
		self.trainer.trainUntilConvergence(dataset=trndata, maxEpochs=100, verbose=True, continueEpochs=10, validationProportion=0.20) #early stopping with 20% as testing data
		trnresult = percentError( self.trainer.testOnClassData(), trndata['class'] )
		tstresult = percentError( self.trainer.testOnClassData(dataset=tstdata ), tstdata['class'] )
		
		if shouldPrint:
			print "epoch: %4d" % self.trainer.totalepochs, "  train error: %5.2f%%" % trnresult, "  test error: %5.2f%%" % tstresult
	def activate(self, data): #tests a particular data point (feature vector)
	    return self.fnn.activate(data)
Beispiel #10
0
def build_dataset(data_pair):
    inputs, classes = data_pair
    ds = ClassificationDataSet(256)
    data = zip(inputs, classes)
    for (inp, c) in data:
        ds.appendLinked(inp, [c])
    return ds
Beispiel #11
0
def generateDataSet():

    inFile = open("data/input.txt")
    inData = inFile.readlines()
    inFile.close()
    
    outFile = open("data/output.txt")
    outData = outFile.readlines()
    outFile.close()


    inputs = 120 #you will want to update this based on the state you have... ###I don't understand this comment. How do we update if we haven't calculated the state yet?
    classes= 11 #11 #Not much reson to change this one, there are only 11 destinations.
    allData = ClassificationDataSet(inputs,1,nb_classes=classes)
    start = time.clock()
    for i in range(len(inData)):
        b = loadBrain(inData[i].strip())
        #inputs = len(b.g.heroes) - 1 + len(b.g.taverns_locs) + 4
        #calls functions inside of the ai object.  you will want to write these fcns. 
        ins = b.createInputs(inputs)
        klass = b.determineClass(classes,eval(outData[i].strip()))
        expectedKlass = b.classInverse(klass)
        #if expectedKlass != eval(outData[i].strip()):
        #    print expectedKlass, eval(outData[i].strip())
        allData.addSample(ins,[klass])
        #if(i > 1000): break
        if(i%100==0): print i,len(inData), "elapsed between sets", time.clock() - start
    
    return allData    
def read_data(filename):
	"""
	See http://www.pybrain.org/docs/api/datasets/classificationdataset.html

	Reads a (naive) csv file of data and converts it into
	a ClassificationDataSet. 'Naive' in this case means
	the data can be parsed by splitting on commas - i.e.,
	no quotations or escapes. I picked this file format
	because it should be trivial to convert all our data into it.

	Raises an exception when an IO error occurs.

	Parameters:
	  filename - The name of the file containing the data.
	"""
	data_file = open(filename, "r")
	data_lines = [line.split(',') for line in data_file.readlines()]
	data_file.close()

	features = [[float(f) for f in line[0:-1]] for line in data_lines]
	classes = [[int(line[-1])] for line in data_lines]
        # Workaround to make classifications zero-based
        class_min = min([c[0] for c in classes])
        for i in range(len(classes)):
                classes[i][0] -= class_min

	data_set = ClassificationDataSet(len(features[0]))
	for feature_vector, classification in zip(features, classes):
		data_set.addSample(feature_vector, classification)

	return data_set
Beispiel #13
0
def make_data_set(beg,end):
    ds = ClassificationDataSet(HISTORY*2+1, class_labels=['None', 'Buy' , 'Sell']) #SupervisedDataSet(HISTORY*3, 1) 
    trainQ = rawData[(rawData.tradeDate <= end) & ( rawData.tradeDate >= beg)]
    

    for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD-1):
        cur = idx + HISTORY - 1  
        if( abs( trainQ.iloc[cur]['MACD'] ) > 0.5 ):
            continue        
        sample = []
        for i in range(HISTORY):
            #sample.append( trainQ.iloc[idx+i]['EMAL'] )#  [['EMAL','DIFF','DEA','CDIS']] ) )
            sample.append( trainQ.iloc[idx+i]['DIFF'] )
            sample.append( trainQ.iloc[idx+i]['DEA'] )
                   
        sample.append( trainQ.iloc[cur]['CDIS'] )
        if max( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] > 1.05 : 
            answer = 1
        elif min( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] < 0.95:
            answer = 2
        else:
            answer = 0
#        print(sample)    
        ds.addSample(sample, answer)
    return ds
Beispiel #14
0
class NeuralNetwork(BaseWorkflow):

    def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None):
        super(NeuralNetwork, self).__init__()
        self.purpose = purpose
        self.data_path = self.config.neural_net.get(self.purpose, None)
        self.file_name = 'neural_net'
        self.all_data = ClassificationDataSet(num_inputs,
                                              num_ouputs,
                                              nb_classes=classes,
                                              class_labels=class_lables)
        self.train = None
        self.test = None
        self.neural_network = None
        self.train_result = None
        self.test_result = None
        self.cross_validation_result = None

    def process(self):
        self.prepare_train_test()
        self.build_network()
        trainer = self.train_network(dataset=self.train)
        self.score_train_test(trainer=trainer)
        self.cross_validate(dataset=self.all_data)

    def add_sample(self, correlogram_matrix=None, target=None, sample_path=None):
        self.all_data.addSample(correlogram_matrix, target)
        logger.info('sample added from {sample_path}'.format(sample_path=sample_path))

    def prepare_train_test(self):
        self.test, self.train = self.all_data.splitWithProportion(0.25)

    def build_network(self):
        self.neural_network = buildNetwork(self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network

    def train_network(self, dataset=None):
        starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
        starter_trainer.trainUntilConvergence(validationProportion=0.25,  maxEpochs=100)
        return starter_trainer

    def score_train_test(self, trainer=None):
        self.test_result = percentError(trainer.testOnClassData(dataset=self.test), self.test['class'])
        logger.info('test error result: {result}'.format(result=self.test_result))
        self.train_result = percentError(trainer.testOnClassData(dataset=self.train), self.train['class'] )
        logger.info('train error result: {result}'.format(result=self.train_result))

    def cross_validate(self, dataset=None):
        trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
        validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10)
        mean_validation_result = validator.validate()
        self.cross_validation_result = mean_validation_result
        logger.info('cross val result: {result}'.format(result=self.cross_validation_result))

    @staticmethod
    def save_network_to_xml(net=None, file_name=None):
        NetworkWriter.writeToFile(net, file_name)

    @staticmethod
    def read_network_from_xml(file_name=None):
        return NetworkReader.readFrom(file_name)
def main():
    for stock in STOCK_TICKS:
        # Download Data
        get_data(stock)

        # Import Data
        days = extract_data(stock)
        today = days.pop(0)

        # Make DataSet
        data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2)
        for day in days:
            target = 0
            if day.change > 0:
                target = 1
            data_set.addSample(day.return_metrics(), [target])

        # Make Network
        network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM)

        # Train Network
        trainer = BackpropTrainer(network)
        trainer.setData(data_set)
        trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX)

        # Activate Network
        prediction = network.activate(today.return_metrics())
        print prediction
Beispiel #16
0
def getdata(do_preprocessing, full_data):
    '''
    fetch and format the match data according to the given flags
    do_preprocessing: bool: true if preprocessing needs to be do_preprocessing
    full_data: bool: false if the minimal data should be used
    '''
    print ("fetching data ...")
    if full_data == 0 :
        fn = getMinimalDatafromMatch
    else:
        fn = getBasicDatafromMatch
    if globals.use_saved_data:
        try:
            with open('processed_data%d' % full_data) as outfile:
                data = json.load(outfile)
        except IOError:
            matches = Match.objects.all()
            data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
            data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
            with open('processed_data%d' % full_data, 'w') as outfile:
                json.dump(data,outfile)
    else:
        matches = Match.objects.all()
        data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
        data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
        with open('processed_data%d' % full_data, 'w') as outfile:
            json.dump(data,outfile)

    all_data = None
    for input, won in data:        
        if all_data is None:
            all_data = ClassificationDataSet(len(input), 1, nb_classes=2)                 
        all_data.addSample(input, int(won)) 
    return all_data
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs):

    data = np.genfromtxt(fileName)
    trnIn = data[:, 0:5]
    trnOut = data[:, 6]
    trnOut = [int(val) for val in trnOut]

    normalizeData(trnIn, numFeatures)
    trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses)
    for row in range(0, len(trnIn)):
        tempListOut = []
        tempListIn = []
        tempListOut.append(int(trnOut[row]))
        for i in range(0, numFeatures):
            tempListIn.append(trnIn[row][i])
        trndata.addSample(tempListIn, tempListOut)

    trndata._convertToOneOfMany()

    #  When running for the first time
    myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False)

    # Read from file after the first try.
    #  myNetwork = NetworkReader.readFrom('firstTime.xml')    # Use saved results.
    trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0)
    for i in range(numTrainingEpochs):
        trainer.trainOnDataset(dataset=trndata)
Beispiel #18
0
def build_dataset(
    mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True
):
    # depricated
    if edgedetect:
        import cv2
    from pybrain.datasets import SupervisedDataSet, ClassificationDataSet

    patch_size = min(patch_size, orig_size)
    trim = round((orig_size - patch_size) / 2)
    # ds = SupervisedDataSet(patch_size**2, 1)
    ds = ClassificationDataSet(patch_size ** 2, target=1, nb_classes=nb_classes)
    cursor = list(mongo_collection.find())
    for one_image in cursor:
        # convert from binary to numpy array and transform
        img_array = np.fromstring(one_image["image"], dtype="uint8")
        if edgedetect:
            img_array = cv2.Canny(img_array, 150, 200)
        img_crop = img_array.reshape(orig_size, orig_size)[trim : (trim + patch_size), trim : (trim + patch_size)]
        classification = float(one_image["class"])
        if transform:
            transformed = transform_img(img_crop.ravel(), patch_size)
        else:
            transformed = [img_crop.ravel()]
        for one_img in transformed:
            ds.addSample(one_img.ravel(), classification)
    print("New dataset contains %d images (%d positive)." % (len(ds), sum(ds["target"])))
    return ds
Beispiel #19
0
def cross_validation(trndata, folds=3, **kwargs):
    """
        kwargs are parameters for the model
    """
    input = np.vsplit(trndata['input'], folds)
    target = np.vsplit(trndata['target'], folds)

    zipped = zip(input, target)

    accuracy_sum = 0
    for i in len(zipped):
        new_train = ClassificationDataSet(attributes, nb_classes=classes_number)
        new_test = ClassificationDataSet(attributes, nb_classes=classes_number)
        test_zipped = zipped[i]
        train_zipped = zipped[:i] + zipped[(i+1):]

        new_train.setField('input', np.vstack[train_zipped[0]])
        new_train.setField('target', np.vstack[train_zipped[1]])

        new_test.setField('input', test_zipped[0])
        new_test.setField('target', train_zipped[1])

        model = FNNClassifier()
        model.train(new_train, new_test, kwargs)
        out, targ = model.predict(new_test)
        accuracy_sum += accuracy(out, targ)

    return accuracy_sum / folds
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    # images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    # ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05)
    # trainer = RPropMinusTrainer(net, dataset=train)
    # cv = validation.CrossValidator(trainer, ds)
    # print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train["class"])
    # trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train), train["class"])
        testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
Beispiel #21
0
class ImageData(Data):
  
  image_x = 1
  image_y = 1
  images = []
  targets = []

  def __init__(self, images, targets, image_x, image_y, description="Image Data", outputs=1):
      Data.__init__(self, description, outputs)
      self.images = images
      self.targets = targets
      self.image_x = image_x
      self.image_y = image_y
      self.create_classifier()

  def create_classifier(self):
      #print "Image X:", self.image_x
      #print "Image Y:", self.image_y
      vector_length = self.image_x * self.image_y
      #Create the classifier
      #print "Creating Classifier. Vector_Len:", vector_length, "Output Vector:", self.outputs
      self.classifier = ClassificationDataSet(vector_length, self.outputs, nb_classes=(len(self.images) / 10))
      #print "Adding samples for", len(self.images), " images"
      for i in xrange(len(self.images)):
          #Assign images to their targets in the classifier
          #print i, "Image:", self.images[i], "Target:", self.targets[i]
          self.classifier.addSample(self.images[i], self.targets[i])

  def print_data(self):
    print "Image Object:" + str(this.data_unit)
    
  def add_image(self, image, target):
    self.images.append(image)
    self.targets.append(target)
Beispiel #22
0
def getBoardImage(img):
    '''
    Runs an image through processing and neural network to decode digits

    img: an openCV image object

    returns:
        pil_im: a PIL image object with the puzzle isolated, cropped and straightened
        boardString: string representing the digits and spaces of a Sudoku board (left to right, top to bottom)
    '''

    # Process image and extract digits
    pil_im, numbers, parsed, missed = process(img, False)
    if pil_im == None:
        return None, None

    net = NetworkReader.readFrom(os.path.dirname(os.path.abspath(__file__))+'/network.xml')
    boardString = ''

    for number in numbers:
        if number is None:
            boardString += ' '
        else:
            data=ClassificationDataSet(400, nb_classes=9, class_labels=['1','2','3','4','5','6','7','8','9'])
            data.appendLinked(number.ravel(),[0])
            boardString += str(net.activateOnDataset(data).argmax(axis=1)[0]+1)
    return pil_im, boardString
def prepare_datasets(inp,out,dataframe, ratio):
    '''conversion from pandas dataframe to ClassificationDataSet of numpy
    parameters:
    inp: list of names of input features
    out: list of names of output features(target value)
    ratio: ratio of dimension of test to train dataset
    '''
    inp_dim = len(inp)
    out_dim = len(out)
    no_classes = 2
    alldata = ClassificationDataSet(inp_dim,out_dim,no_classes)
    inp = dataframe[inp]
    out = dataframe[out]
    #for [a,b,c],d in zip(inp.values,out.values):
    for i in range(len(inp.values)):
        d = out.values[i]
        if d=='up': d = 0
        elif d == 'down': d = 1
        else: d =2
        alldata.addSample(inp.values[i],d)
    tstdata_temp, trndata_temp = alldata.splitWithProportion( ratio )
    # to convert supervised datasets to classification datasets
    tstdata = trndata = ClassificationDataSet(inp_dim, out_dim, no_classes)
    for n in range(0, tstdata_temp.getLength()):
        tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] )
    for n in range(0, trndata_temp.getLength()):
        trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1])
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    return alldata, trndata, tstdata
Beispiel #24
0
 def trainModel(self):
     self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted]
     self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader
     self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1]
     ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses)
     for rowData in self.finalDataSet:
         target = rowData[-1]
         variables = rowData[0:-1]
         ds.addSample(variables, target)
     self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25)
     self.testDataSet._convertToOneOfMany()
     self.trainDataSet._convertToOneOfMany()
     print self.testDataSet
     print self.trainDataSet
     self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True)
     self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99)
     begin0 = time.time()
     # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10)
     for i in xrange(10):
         begin = time.time()
         self.trainer.trainEpochs(10)
         end = time.time()
         print 'iteration ', i, ' takes ', end-begin,  'seconds'
     end0 = time.time()
     print 'total time consumed: ', end0 - begin0
    def importFromCSV(self, fileName, numInputs, numClasses):
        """
        Function that reads in a CSV file and passes on to the pybrain
        neural net dataset structure to be used with the library's
        neural net classes.

        It expects that the last columns (determined by numOutputs) to be
        the classification columns.
        """
        dataSet = None
        dataFile = open(fileName)
        line = dataFile.readline()
        data = [str(x) for x in line.strip().split(',') if x != '']
        if(data[0] == '!labels:'):
            labels = data[1:]
            dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses, class_labels=labels)
            line = dataFile.readline()
        else:
            dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses)

        while line != '':
            data = [float(x) for x in line.strip().split(',') if x != '']
            inputData = data[:numInputs]
            outputData = data[-1:]
            dataSet.addSample(inputData, outputData)
            line = dataFile.readline()

        dataFile.close()
        return dataSet
Beispiel #26
0
def getPybrainDataSet(source='Rachelle'):
    first = False#True
    qualities, combinations = cp.getCombinations()
    moods = combinations.keys()
    ds = None
    l=0
    for mood in moods:
        if mood=='neutral':
            continue
        for typeNum in range(1,21):
            for take in range(1,10):
                fileName = 'recordings/'+source+'/'+mood+'/'+\
                str(typeNum)+'_'+str(take)+'.skl'
                try:
                    data, featuresNames = ge.getFeatureVec(fileName, first)
                    first = False
                except IOError:
                    continue
                if ds is None:#initialization
                    ds = ClassificationDataSet( len(data), len(qualities) )
                output = np.zeros((len(qualities)))
                for q in combinations[mood][typeNum]:
                    output[qualities.index(q)] = 1
                ds.appendLinked(data ,  output)

                l+=sum(output)
    return ds, featuresNames
	def test(self,filename,classes,trainer,net):
		testLabels = []

		#load test data
		tstdata = ClassificationDataSet(103, 1, nb_classes=classes)
		tstdata = self.loaddata(filename, classes)

		testLabels = tstdata['target'];

		# some sort of mandatory conversion
		tstdata._convertToOneOfMany()
		
		# using numpy array
		output = np.array([net.activate(x) for x, _ in tstdata])
		output = output.argmax(axis=1)
		print(output)
		print("on test data",percentError( output, tstdata['class'] ))

		for i, l in enumerate(output):
			print l, '->', testLabels[i][0]

		# alternate version - using activateOnDataset function
		out = net.activateOnDataset(tstdata).argmax(axis=1)
		print out
		return percentError( out, tstdata['class'])
Beispiel #28
0
def run_nn_fold(training_data, test_data):
    test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data)

    input_len = len(test_features[0])
    num_classes = len(labelMap.keys())
    train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)
    for i in range(len(test_features)):
        train_ds.addSample(tuple(test_features[i]), (labels[i]))
    train_ds._convertToOneOfMany()
    net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, train_ds, verbose=True)
    print "training until convergence..."
    trainer.trainUntilConvergence(maxEpochs=100)
    print "done. testing..."


    test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)  

    labels = []
    for tweetinfo in test_data:
        featuresFound = tweetinfo["Features"]
        label = tweetinfo["Answer"]
        labels.append(label)
        features = [0]*len(featureMap.keys())
        for feat in featuresFound:
            if feat in featureMap:
                features[ featureMap[feat] ] = 1
        test_ds.addSample(tuple(features), (labelMap[label]))

    test_ds._convertToOneOfMany()
    tstresult = percentError( trainer.testOnClassData(
            dataset=test_ds ), test_ds['class'] )
    print tstresult
Beispiel #29
0
def load_data(filename):
    """
    load dataset for classification
    """
    assert os.path.exists(filename)==True
    dat = scipy.io.loadmat(filename)
    inputs = dat['inputs']
    #print len(inputs)
    targets = dat['targets']
    #print len(targets)
    assert len(inputs)==len(targets)

    global alldata
    global indim 
    global outdim

    indim = len(inputs[0])
    outdim = 1
    #print indim
    alldata = ClassificationDataSet(indim, outdim, nb_classes = 8)
    alldata.setField('input',inputs)
    alldata.setField('target',targets)

    assert len(alldata['input'])==len(alldata['target'])
    print type(alldata)
Beispiel #30
0
 def classifer(labels, data):
     """ data in format (value, label)
     """
     clsff = ClassificationDataSet(2,class_labels=labels)
     for d in data:
         clsff.appendLinked(d[0], d[1])
     clsff.calculateStatistics()
__author__ = 'QSG'
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer

from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
from scipy import diag, arange, meshgrid, where
from numpy.random import multivariate_normal

#use a 2D dataset, classify into 3 classes
means = [(-1, 2), (2, 4), (3, 1)]
cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(400):
    for kclass in range(3):
        input = multivariate_normal(means[kclass], cov[kclass])
        # print 'input: ', input
        alldata.addSample(input, [kclass])
# print alldata

tstdata, trndata = alldata.splitWithProportion(0.25)

trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "Number of training patterns: ", len(trndata)
print "input and output dimensions: ", trndata.indim, ',', trndata.outdim
print "first sample (input, target,class):"
# print trndata['input'][0],trndata['target'][0],trndata['class'][0]
Beispiel #32
0
def training_and_testing():
    nn = init_neural_network()

    training = learning.get_labeled_data(
        '%strain-images-idx3-ubyte.gz' % (database_folder),
        '%strain-labels-idx1-ubyte.gz' % (database_folder),
        '%strainig' % (database_folder))
    test = learning.get_labeled_data(
        '%st10k-images-idx3-ubyte.gz' % (database_folder),
        '%st10k-labels-idx1-ubyte.gz' % (database_folder),
        '%stest' % (database_folder))

    FEATURES = N_INPUT_LAYER
    print("Caracteristicas a analizar: %i" % FEATURES)
    testdata = ClassificationDataSet(FEATURES, 1, nb_classes=OUTPUT_LAYER)
    trainingdata = ClassificationDataSet(FEATURES, 1, nb_classes=OUTPUT_LAYER)

    for i in range(len(test['data'])):
        testdata.addSample(test['data'][i], test['label'][i])
    for j in range(len(training['data'])):
        trainingdata.addSample(training['data'][j], training['label'][j])

    trainingdata._convertToOneOfMany()
    testdata._convertToOneOfMany()

    trainer = BackpropTrainer(nn,
                              dataset=trainingdata,
                              momentum=MOMENTUM,
                              verbose=True,
                              weightdecay=W_DECAY,
                              learningrate=L_RATE,
                              lrdecay=L_DECAY)

    for i in range(EPOCHS):
        trainer.trainEpochs(1)
        trnresult = percentError(trainer.testOnClassData(),
                                 trainingdata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=testdata),
                                 testdata['class'])

        print("epoch: %4d" % trainer.totalepochs,
              "  train error: %5.2f%%" % trnresult,
              "  test error: %5.2f%%" % tstresult)
    return nn
Beispiel #33
0
#     total+=1
# res = true/total
# print res

# #37% accuracy

from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.tools.customxml.networkwriter import NetworkWriter
from pybrain.tools.customxml.networkreader import NetworkReader
from pybrain.utilities import percentError
num_inputs = len(X[0])
ds = ClassificationDataSet(num_inputs, 1, nb_classes=num_emotions)

Y = convertManyToOne(Y)

for k in xrange(len(X)):
    ds.addSample(X_scaled[k], Y[k])

ds._convertToOneOfMany()
tstdata, trndata = ds.splitWithProportion(0.25)  #25% test data

fnn = buildNetwork(trndata.indim, 50, trndata.outdim, outclass=SoftmaxLayer)

trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.1,
                          learningrate=0.01,
Beispiel #34
0
def prepare_dataset():
    # Prepare output coding. "-" is 1 "." is 0
    d_morse_array = '100'  # ( 1, 0, 0 ) # D -.. - 100
    g_morse_array = '110'  # ( 1, 1, 0 ) # G --. - 110
    k_morse_array = '101'  # ( 1, 0, 1 ) # K -.- - 101
    o_morse_array = '111'  # ( 1, 1, 1 ) # O --- - 111
    r_morse_array = '010'  # ( 0, 1, 0 ) # R .-. - 010
    s_morse_array = '000'  # ( 0, 0, 0 ) # S ... - 000
    u_morse_array = '001'  # ( 0, 0, 1 ) # U ..- - 001
    w_morse_array = '011'  # ( 0, 1, 1 ) # W .-- - 011
    # Load learning data
    d_array = read_array("d")
    g_array = read_array("g")
    k_array = read_array("k")
    o_array = read_array("o")
    r_array = read_array("r")
    s_array = read_array("s")
    u_array = read_array("u")
    w_array = read_array("w")
    # Create dataset
    dataset = ClassificationDataSet(1600,
                                    nb_classes=8,
                                    class_labels=[
                                        d_morse_array, g_morse_array,
                                        k_morse_array, o_morse_array,
                                        r_morse_array, s_morse_array,
                                        u_morse_array, w_morse_array
                                    ])
    # add all samples to dataset
    dataset.addSample(d_array, [0])
    dataset.addSample(g_array, [1])
    dataset.addSample(k_array, [2])
    dataset.addSample(o_array, [3])
    dataset.addSample(r_array, [4])
    dataset.addSample(s_array, [5])
    dataset.addSample(u_array, [6])
    dataset.addSample(w_array, [7])
    dataset._convertToOneOfMany()
    return dataset
Beispiel #35
0
def main():

	in_data=np.genfromtxt('logit-train.csv', delimiter = ',')
	out_data = np.genfromtxt('logit-test.csv', delimiter = ',')

	#getting in the data from csv files and making it suitable for further action.
	in_data=in_data[~np.isnan(in_data).any(1)]
	t=len(in_data[0,:])
	y_train=np.array(in_data[0:,t-1])
	x_train=np.array(in_data[0:,:t-1])

	scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos

	x_train=scaler.transform(x_train) #final x_train

	out_data=out_data[~np.isnan(out_data).any(1)]
	t=len(out_data[0,:])
	y_test=np.array(out_data[0:,t-1])
	x_test=np.array(out_data[0:,:t-1])

	x_test=scaler.transform(x_test) # final x_test

	alltraindata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((in_data))):
		alltraindata.addSample(x_train[count],[y_train[count]])

	alltraindata._convertToOneOfMany(bounds=[0,1])

	alltestdata=ClassificationDataSet(t-1,1,nb_classes=2)
	for count in range(len((out_data))):
		alltestdata.addSample(x_test[count],[y_test[count]])

	alltestdata._convertToOneOfMany(bounds=[0,1])

	net = GRNN(alltraindata.indim,alltraindata.outdim)
	Y_predicted = zeros((alltestdata['input'].shape[0],alltestdata['target'].shape[1]))
	sigma = 1.30 # Have to figure out cross-validation to choose sigma!! Though this value gives the best reult!!
	# Every testing data sample is send to .predict along with the training data to get a predicted outcome, a (1,2) vector
	for i,x in enumerate(alltestdata['input']):
		Y_predicted[i] = net.predict(x, alltraindata['input'], alltraindata['target'], sigma)
	y_score = Y_predicted[:,1]
	Y_predicted = Y_predicted.argmax(axis=1) # Selects the class predicted
	
	tstresult = percentError(Y_predicted,alltestdata['class'])
	print "Accuracy on test data is: %5.3f%%," % (100-tstresult)
	
	for x in range(len(y_test)):
		if any(y_test[x]) == True:
			y_test[x] = 1
		else:
			y_test[x] = 0

	average_label = ['micro','macro','weighted']
	for label in average_label: 
		f1 = f1_score(y_test, Y_predicted, average=label)
		print "f1 score (%s)" %label, "is ", f1

	print "ROC Curve generation..."
	fpr, tpr, _ = metrics.roc_curve(y_test, y_score, pos_label=1)

	roc_auc = metrics.auc(fpr,tpr)

	print roc_auc

	plt.figure()
	plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
	plt.plot([0, 1], [0, 1], 'k--')
	plt.xlim([0.0, 1.0])
	plt.ylim([0.0, 1.05])
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('Receiver operating characteristic')
	plt.legend(loc="lower right")
	plt.show()
	print "ROC Curve closed."
Beispiel #36
0
def createDS():
    # taken from iris data set at machine learning repository
    pat = [[[5.1, 3.5, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.9, 3.0, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.7, 3.2, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.6, 3.1, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.6, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.4, 3.9, 1.7, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.6, 3.4, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.4, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.4, 2.9, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.4, 3.7, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.8, 3.4, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.8, 3.0, 1.4, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.3, 3.0, 1.1, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.8, 4.0, 1.2, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.7, 4.4, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.4, 3.9, 1.3, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.5, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.7, 3.8, 1.7, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.8, 1.5, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.4, 3.4, 1.7, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.7, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.6, 3.6, 1.0, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.3, 1.7, 0.5], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.8, 3.4, 1.9, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.0, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.4, 1.6, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.2, 3.5, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.2, 3.4, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.7, 3.2, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.8, 3.1, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.4, 3.4, 1.5, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.2, 4.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.5, 4.2, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.2, 1.2, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.5, 3.5, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.9, 3.1, 1.5, 0.1], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.4, 3.0, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.4, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.5, 1.3, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.5, 2.3, 1.3, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.4, 3.2, 1.3, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.5, 1.6, 0.6], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.8, 1.9, 0.4], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.8, 3.0, 1.4, 0.3], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.1, 3.8, 1.6, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[4.6, 3.2, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.3, 3.7, 1.5, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[5.0, 3.3, 1.4, 0.2], [1, 0, 0], [0], ['Iris-setosa']],
           [[7.0, 3.2, 4.7, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.4, 3.2, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.9, 3.1, 4.9, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.5, 2.3, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.5, 2.8, 4.6, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.7, 2.8, 4.5, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.3, 3.3, 4.7, 1.6], [0, 1, 0], [1], ['Iris-versicolor']],
           [[4.9, 2.4, 3.3, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.6, 2.9, 4.6, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.2, 2.7, 3.9, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.0, 2.0, 3.5, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.9, 3.0, 4.2, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.0, 2.2, 4.0, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.1, 2.9, 4.7, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.6, 2.9, 3.6, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.7, 3.1, 4.4, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.6, 3.0, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.8, 2.7, 4.1, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.2, 2.2, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.6, 2.5, 3.9, 1.1], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.9, 3.2, 4.8, 1.8], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.1, 2.8, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.3, 2.5, 4.9, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.1, 2.8, 4.7, 1.2], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.4, 2.9, 4.3, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.6, 3.0, 4.4, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.8, 2.8, 4.8, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.7, 3.0, 5.0, 1.7], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.0, 2.9, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.7, 2.6, 3.5, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.5, 2.4, 3.8, 1.1], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.5, 2.4, 3.7, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.8, 2.7, 3.9, 1.2], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.0, 2.7, 5.1, 1.6], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.4, 3.0, 4.5, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.0, 3.4, 4.5, 1.6], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.7, 3.1, 4.7, 1.5], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.3, 2.3, 4.4, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.6, 3.0, 4.1, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.5, 2.5, 4.0, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.5, 2.6, 4.4, 1.2], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.1, 3.0, 4.6, 1.4], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.8, 2.6, 4.0, 1.2], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.0, 2.3, 3.3, 1.0], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.6, 2.7, 4.2, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.7, 3.0, 4.2, 1.2], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.7, 2.9, 4.2, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.2, 2.9, 4.3, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.1, 2.5, 3.0, 1.1], [0, 1, 0], [1], ['Iris-versicolor']],
           [[5.7, 2.8, 4.1, 1.3], [0, 1, 0], [1], ['Iris-versicolor']],
           [[6.3, 3.3, 6.0, 2.5], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.8, 2.7, 5.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.1, 3.0, 5.9, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.3, 2.9, 5.6, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.5, 3.0, 5.8, 2.2], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.6, 3.0, 6.6, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[4.9, 2.5, 4.5, 1.7], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.3, 2.9, 6.3, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.7, 2.5, 5.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.2, 3.6, 6.1, 2.5], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.5, 3.2, 5.1, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.4, 2.7, 5.3, 1.9], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.8, 3.0, 5.5, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.7, 2.5, 5.0, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.8, 2.8, 5.1, 2.4], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.4, 3.2, 5.3, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.5, 3.0, 5.5, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.7, 3.8, 6.7, 2.2], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.7, 2.6, 6.9, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.0, 2.2, 5.0, 1.5], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.9, 3.2, 5.7, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.6, 2.8, 4.9, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.7, 2.8, 6.7, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.3, 2.7, 4.9, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.7, 3.3, 5.7, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.2, 3.2, 6.0, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.2, 2.8, 4.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.1, 3.0, 4.9, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.4, 2.8, 5.6, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.2, 3.0, 5.8, 1.6], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.4, 2.8, 6.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.9, 3.8, 6.4, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.4, 2.8, 5.6, 2.2], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.3, 2.8, 5.1, 1.5], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.1, 2.6, 5.6, 1.4], [0, 0, 1], [2], ['Iris-virginica']],
           [[7.7, 3.0, 6.1, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.3, 3.4, 5.6, 2.4], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.4, 3.1, 5.5, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.0, 3.0, 4.8, 1.8], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.9, 3.1, 5.4, 2.1], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.7, 3.1, 5.6, 2.4], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.9, 3.1, 5.1, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.8, 2.7, 5.1, 1.9], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.8, 3.2, 5.9, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.7, 3.3, 5.7, 2.5], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.7, 3.0, 5.2, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.3, 2.5, 5.0, 1.9], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.5, 3.0, 5.2, 2.0], [0, 0, 1], [2], ['Iris-virginica']],
           [[6.2, 3.4, 5.4, 2.3], [0, 0, 1], [2], ['Iris-virginica']],
           [[5.9, 3.0, 5.1, 1.8], [0, 0, 1], [2], ['Iris-virginica']]]
    alldata = ClassificationDataSet(4,
                                    1,
                                    nb_classes=3,
                                    class_labels=['set', 'vers', 'virg'])
    for p in pat:
        t = p[2]
        alldata.addSample(p[0], t)
    tstdata, trndata = alldata.splitWithProportion(0.33)
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    return trndata, tstdata
Beispiel #37
0
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
""" Furthermore, pylab is needed for the graphical output. """
from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
from scipy import diag, arange, meshgrid, where
from numpy.random import multivariate_normal
""" To have a nice dataset for visualization, we produce a set of
points in 2D belonging to three different classes. You could also
read in your data from a file, e.g. using pylab.load(). """

means = [(-1, 0), (2, 4), (3, 1)]
cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(2, 1, nb_classes=3)
for n in range(400):
    for klass in range(3):
        input = multivariate_normal(means[klass], cov[klass])
        alldata.addSample(input, [klass])
""" Randomly split the dataset into 75% training and 25% test data sets. Of course, we
could also have created two different datasets to begin with."""
tstdata, trndata = alldata.splitWithProportion(0.25)
""" For neural network classification, it is highly advisable to encode classes
with one output neuron per class. Note that this operation duplicates the original
targets and stores them in an (integer) field named 'class'."""
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()
""" Test our dataset by printing a little information about it. """
print("Number of training patterns: ", len(trndata))
print("Input and output dimensions: ", trndata.indim, trndata.outdim)
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
import linecache
import random

samples = linecache.getlines('svm3b.txt')
random.shuffle(samples)
alldata = ClassificationDataSet(len(samples[0].split('\t')) - 1,
                                1,
                                nb_classes=2)
for sample in samples:
    sample_array_o = sample.split('\t')
    sample_array = sample_array_o[0:len(sample_array_o) - 1]
    sample_result = sample_array_o[-1]
    for element in range(0, len(sample_array)):
        sample_array[element] = float(sample_array[element])
    sample_result = int(sample_result)
    alldata.addSample(sample_array, [sample_result])

tstdata, trndata = alldata.splitWithProportion(0.25)
print type(tstdata)
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]
Beispiel #39
0
    input_features = zeros([num_features, 4])
    target_features = zeros([num_features, 1])
    for i in range(0, num_features):
        for j in range(0, 4):
            input_features[i][j] = input[i][j + 1]
        if input[i][0] == "L":
            target_features[i] = 0
        if input[i][0] == "B":
            target_features[i] = 1
        if input[i][0] == "R":
            target_features[i] = 2

    print "Dataset loaded into workspace ...."
    time.sleep(3)

    data = ClassificationDataSet(4, 1, nb_classes=3)
    for val in range(0, num_features):
        inp = input_features[val, :]
        targ = target_features[val]
        data.addSample(inp, [targ])

    print "Dataset created successfully"

    ##split into training and testing data
    tstdata, trndata = data.splitWithProportion(0.30)
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    print "Training data inp dimension :", trndata.indim
    print "\n Training data outp dimension :", trndata.outdim

    ##now create the neural network
Beispiel #40
0
#print(Inputs)
#print(Out)

# Converter caracteres de saida para numeros inteiros com base no indice em labels
Outputs = np.empty((1, 1), dtype=int)

for i in np.nditer(Out):
    Outputs = np.append(Outputs, np.array([[Labels.index(i)]]), 0)
Outputs = np.delete(Outputs, 0, 0)

print("tamanhoooooooooo")
print(len(Outputs))

# Construir dataset
Dataset = ClassificationDataSet(120, 1, nb_classes=len(Labels))
assert (Inputs.shape[0] == Outputs.shape[0])
Dataset.setField('input', Inputs)
Dataset.setField('target', Outputs)
Dataset._convertToOneOfMany()

#Construir e configurar as redes
#RedeSoft1 Camada oculta - Linear Camada externa Softmax
#RedeSoft2 Camada oculta - Sogmoide Camada externa Softmax
#RedeSoft3 Camada oculta - Tangente Hiperbolica Camada externa Softmax
RedeSoft1 = buildNetwork(120,
                         61,
                         len(Labels),
                         bias=True,
                         hiddenclass=LinearLayer,
                         outclass=SoftmaxLayer)
Beispiel #41
0
arguments = docopt(__doc__)

num_hidden_layers = map(int, arguments['<l>'])
max_iters = int(arguments['<i>'])

X1, y1 = make_blobs(n_samples=int(arguments['<s>'])/2, centers=2,
                    cluster_std=0.6)
X2, y2 = make_blobs(n_samples=int(arguments['<s>'])/2, centers=2,
                    cluster_std=0.6)

X = np.concatenate((X1, X2))
y = np.concatenate((y1, y2))

m, n = X.shape

dataset = ClassificationDataSet(n, 1, nb_classes=2)
for i in range(m):
    dataset.addSample(X[i], y[i])

tst_data, trn_data = dataset.splitWithProportion(0.25)

tst_data._convertToOneOfMany()
trn_data._convertToOneOfMany()

layers = [trn_data.indim]
layers += num_hidden_layers
layers += [trn_data.outdim]

neural_network = buildNetwork(*layers, outclass=SoftmaxLayer)
trainer = BackpropTrainer(neural_network, dataset=trn_data, verbose=False,
                          weightdecay=0.01, momentum=0.1)
Beispiel #42
0
def init_brain():
    net = buildNetwork(4096, 4096, 5, bias=True)
    ds = ClassificationDataSet(4096, nb_classes=5, class_labels=['a', 'b', 'c', 'd', 'e'])
Beispiel #43
0
def scrape_prediction():
    #request.form.values()
    data = request.form
    int_features = list(data.values())

    chrome_options = webdriver.ChromeOptions()
    prefs = {"profile.default_content_setting_values.notifications": 2}
    chrome_options.add_experimental_option("prefs", prefs)

    driver = webdriver.Chrome('C:/Users/vamsi/chromedriver.exe',
                              chrome_options=chrome_options)
    #for heroku
    #driver = webdriver.Chrome(executable_path=os.environ.get("CHROME_DRIVER_PATH"), chrome_options=chrome_options)

    #open the webpage
    driver.get("http://www.facebook.com")

    #target username
    username = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='email']")))
    password = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='pass']")))

    #enter username and password
    username.clear()
    username.send_keys("9490461737")
    password.clear()
    password.send_keys("Facebook@62892")
    time.sleep(15)
    #target the login button and click it
    button = WebDriverWait(driver, 2).until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "button[type='submit']"))).click()
    time.sleep(15)
    #We are logged in!

    url = int_features[0]
    driver.get(url)
    time.sleep(15)
    html = driver.page_source

    #['created_at','statuses_count','followers_count','favourites_count','sex_code','lang_code']

    #1.scraping username section
    #gmql0nx0.l94mrbxd.p1ri9a11.lzcic4wl.bp9cbjyn.j83agx80
    elems = driver.find_elements_by_class_name(
        "gmql0nx0.l94mrbxd.p1ri9a11.lzcic4wl.bp9cbjyn.j83agx80")
    try:
        username = elems[0].text
    except KeyError:
        username = '******'

    username = pd.Series(username)
    #predicting sex
    sex_predictor = gender.Detector(unknown_value=u"unknown",
                                    case_sensitive=False)
    first_name = username.str.split(' ').str.get(0)
    sex = first_name.apply(sex_predictor.get_gender)
    sex_dict = {
        'female': -2,
        'mostly_female': -1,
        'unknown': 0,
        'mostly_male': 1,
        'male': 2
    }
    sex_code = sex.map(sex_dict).astype(int)
    print username
    print sex_code[0]

    #2.scraping bio section
    #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb mdeji52x a5q79mjw g1cxx5fr knj5qynh m9osqain oqcyycmt
    elems = driver.find_elements_by_class_name(
        "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.mdeji52x.a5q79mjw.g1cxx5fr.knj5qynh.m9osqain.oqcyycmt"
    )
    try:
        bio = elems[0].text
    except KeyError:
        bio = ''
    print bio

    #3.scraping friends count,statuses_count,followers_count,favourites_count
    #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh e9vueds3 j5wam9gi knj5qynh m9osqain
    #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v lrazzd5p m9osqain
    elems = driver.find_elements_by_class_name(
        "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.iv3no6db.jq4qci2q.a3bd9o3v.lrazzd5p.m9osqain"
    )
    friend_count = elems[2].text
    friend_count = random.choice(friends_list)
    print friend_count
    #statuses_count
    statuses_count = random.choice(statuses_list)
    print statuses_count

    #followers_count
    followers_count = random.choice(followers_list)
    print followers_count

    #favourites_count
    favourites_count = random.choice(favourites_list)
    print favourites_count

    #4.scraping location
    #oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl oo9gr5id gpro0wi8 lrazzd5p
    elems = driver.find_elements_by_class_name(
        "oajrlxb2.g5ia77u1.qu0x051f.esr5mh6w.e9989ue4.r7d6kgcz.rq0escxv.nhd2j8a9.nc684nl6.p7hjln8o.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.jb3vyjys.rz4wbd8a.qt6c0cv9.a8nywdso.i1ao9s8h.esuyzwwr.f1sip0of.lzcic4wl.oo9gr5id.gpro0wi8.lrazzd5p"
    )
    location = 'other'
    if location in location_dict:
        location = location_dict[location]
    else:
        location_dict[location] = len(location_dict) + 1
        location = location_dict[location]
        pickle.dump(location_dict,
                    open('location_dict_scraper.pkl', 'wb'),
                    protocol=2)
    print location

    #5.scraping created_at
    #d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v knj5qynh oo9gr5id hzawbc8m
    elems = driver.find_elements_by_class_name(
        "d2edcug0.hpfvmrgz.qv66sw1b.c1et5uql.lr9zc1uh.a8c37x1j.keod5gw0.nxhoafnm.aigsh9s9.d3f4x2em.fe6kdd0r.mau55g9w.c8b282yb.iv3no6db.jq4qci2q.a3bd9o3v.knj5qynh.oo9gr5id.hzawbc8m"
    )
    created_at = '07 December 1997'
    created_date = datetime.datetime.strptime(
        datetime.datetime.strptime(created_at,
                                   '%d %B %Y').strftime('%m %d %Y'),
        '%m %d %Y')
    today = datetime.datetime.strptime(
        datetime.datetime.now().strftime('%m %d %Y'), '%m %d %Y')
    days_count = today - created_date
    days_count = days_count.days
    print days_count

    #6.language
    #lang
    lang_dict = {
        'fr': 3,
        'en': 1,
        'nl': 6,
        'de': 0,
        'tr': 7,
        'it': 5,
        'gl': 4,
        'es': 2,
        'hi': 8,
        'other': 9
    }

    #['created_at','location','statuses_count','followers_count','favourites_count','friends_count','sex_code','lang_code']
    df = pd.DataFrame(
        {
            'bio': bio,
            'statuses_count': statuses_count,
            'followers_count': followers_count,
            'friends_count': friend_count,
            'favourites_count': favourites_count,
            'created_at': days_count,
            'location': location,
            'sex_code': sex_code,
            'lang': lang_dict['hi']
        },
        index=[0])
    params = pd.Series([
        df['created_at'], df['location'], df['statuses_count'],
        df['followers_count'], df['favourites_count'], df['friends_count'],
        sex_code, df['lang']
    ])
    print params
    #Random forest prediction
    rfr_prediction = random_forest.predict(params)

    #support vector machine prediction
    svm_prediction = support_vector.predict(params)

    #Naive Bayes prediction
    nvb_prediction = naive_bayes.predict(params)

    #Decision Tree Prediction
    dtc_prediction = decision_tree.predict(params)

    #neural network prediction
    ds2 = ClassificationDataSet(8, 1, nb_classes=2)
    lst = [
        df['created_at'], df['location'], df['statuses_count'],
        df['followers_count'], df['favourites_count'], df['friends_count'],
        sex_code, df['lang'].astype(int)
    ]
    ds2.addSample(lst, 1)
    ds2._convertToOneOfMany()
    fnn_prediction = neural_network.testOnClassData(dataset=ds2)

    percent = (dtc_prediction[0] + nvb_prediction[0] + rfr_prediction[0] +
               svm_prediction[0] + fnn_prediction[0])
    percent = round(percent * 20)

    return render_template('result.html',
                           username=username[0],
                           dtc_prediction=dtc_prediction[0],
                           nvb_prediction=nvb_prediction[0],
                           rfr_prediction=rfr_prediction[0],
                           svm_prediction=svm_prediction[0],
                           fnn_prediction=fnn_prediction[0],
                           percentage=percent,
                           features=int_features)
def create_network(X, Y, testx, testy):
    numOfFeature = X.shape[1]
    numOfExample = X.shape[0]
    alldata = ClassificationDataSet(numOfFeature, 1, nb_classes=10)  #创建分类数据组
    for i in range(0, numOfExample):
        alldata.addSample(X[i], Y[i])
    alldata._convertToOneOfMany()

    numOfFeature1 = testx.shape[1]
    numOfExample1 = testx.shape[0]
    testdata = ClassificationDataSet(numOfFeature1, 1, nb_classes=10)  #创建分类数据组
    for i in range(0, numOfExample1):
        testdata.addSample(testx[i], testy[i])
    testdata._convertToOneOfMany()

    print alldata.indim
    print alldata.outdim
    net = FeedForwardNetwork()
    inLayer = LinearLayer(alldata.indim)
    hiddenLayer1 = SigmoidLayer(60)  #层数自己定,但是从训练效果来看,并不是网络层数和节点数越多越好
    hiddenLayer2 = SigmoidLayer(60)
    outLayer = SoftmaxLayer(alldata.outdim)
    #bias = BiasUnit('bias')
    net.addInputModule(inLayer)
    net.addModule(hiddenLayer1)
    net.addModule(hiddenLayer2)
    net.addOutputModule(outLayer)
    #net.addModule(bias)
    in_to_hidden = FullConnection(inLayer, hiddenLayer1)
    hidden_to_out = FullConnection(hiddenLayer2, outLayer)
    hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2)
    net.addConnection(in_to_hidden)
    net.addConnection(hidden_to_hidden)
    net.addConnection(hidden_to_out)
    net.sortModules()

    #fnn = buildNetwork( alldata.indim, 100, alldata.outdim, outclass=SoftmaxLayer )
    trainer = BackpropTrainer(net,
                              dataset=alldata,
                              momentum=0.1,
                              verbose=True,
                              weightdecay=0.01)
    for i in range(0, 20):
        print i
        trainer.trainEpochs(1)  #将数据训练一次
        print "train finish...."
        outtrain = net.activateOnDataset(alldata)
        outtrain = outtrain.argmax(
            axis=1
        )  # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...]
        outtest = net.activateOnDataset(testdata)
        outtest = outtest.argmax(
            axis=1
        )  # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...]
        trnresult = percentError(outtrain, alldata['class'])
        tstresult = percentError(outtest, testdata['class'])
        #trnresult = percentError( trainer.testOnClassData(dataset=alldata),alldata['class'] )
        #tstresult = percentError( trainer.testOnClassData(dataset=testdata),testdata['class'] )
        print "epoch: %4d" % trainer.totalepochs, "  train error: %5.2f%%" % trnresult, "  test error: %5.2f%%" % tstresult

    return net
Beispiel #45
0
def generate_Testdata(index):
    INPUT_FEATURES = 9216
    CLASSES = 5
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "Breast_test.data")

    #train_text = getIndexData(train_text,index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "lumina":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "ERBB2":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "basal":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "normal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "cell_lines":
            klass = 4
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
Beispiel #46
0
fig, ax = plt.subplots(10, 10)
img_size = math.sqrt(n_features)
for i in range(10):
    for j in range(10):
        Xi = X[idxs[i * 10 + j], :].reshape(img_size, img_size).T
        ax[i, j].set_axis_off()
        ax[i, j].imshow(Xi, aspect="auto", cmap="gray")
plt.show()

# split up training data for cross validation
print "Split data into training and test sets..."
Xtrain, Xtest, ytrain, ytest = train_test_split(X,
                                                y,
                                                test_size=0.25,
                                                random_state=42)
ds_train = ClassificationDataSet(X.shape[1], 10)
load_dataset(ds_train, Xtrain, ytrain)

# build a 400 x 25 x 10 Neural Network
print "Building %d x %d x %d neural network..." % (n_features,
                                                   NUM_HIDDEN_UNITS, n_classes)
fnn = buildNetwork(n_features,
                   NUM_HIDDEN_UNITS,
                   n_classes,
                   bias=True,
                   outclass=SoftmaxLayer)
print fnn

# train network
print "Training network..."
trainer = BackpropTrainer(fnn, ds_train)
Beispiel #47
0
@author: PY131
'''''

'''
preparation of data
'''
from sklearn import datasets  
iris_ds = datasets.load_iris()


X, y = iris_ds.data, iris_ds.target
label = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

from pybrain.datasets import ClassificationDataSet
# 4 input attributes, 1 output with 3 class labels
ds = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label)  
for i in range(len(y)): 
    ds.appendLinked(X[i], y[i])
ds.calculateStatistics()

# split training, testing, validation data set (proportion 4:1)
tstdata_temp, trndata_temp = ds.splitWithProportion(0.25)  
tstdata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label)
for n in range(0, tstdata_temp.getLength()):
    tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] )

trndata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=label)
for n in range(0, trndata_temp.getLength()):
    trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] )

trndata._convertToOneOfMany()
Beispiel #48
0
def generate_data():
    index = [
        8673, 1646, 116, 2191, 4326, 6718, 7796, 8531, 8763, 5646, 3626, 5451,
        2004, 8079, 4044, 6471, 675, 3746, 6338, 3149, 4880, 4869, 6213, 5316,
        3544, 1046, 7739, 8309, 4147, 5526, 5555, 1504, 1625, 2680, 5814, 1305,
        3998, 794, 4355, 6788, 3343, 867, 343, 3706, 6902, 4250, 9014, 5478,
        788, 5323, 677, 9215, 9214, 9213, 9212, 9211, 9210, 9209, 9208, 9207,
        9206, 9205, 9204, 9203, 9202, 9201, 9200, 9199, 9198, 9197, 9196, 9195,
        9194, 9193, 9192, 9191, 9190, 9189, 9188, 9187, 9186, 9185, 9184, 9183,
        9182, 9181, 9180, 9179, 9178, 9177, 9176, 9175, 9174, 9173, 9172, 9171,
        9170, 9169, 9168, 9167, 9166, 9165, 9164, 9163, 9162, 9161, 9160, 9159,
        9158, 9157, 9156, 9155, 9154, 9153, 9152, 9151, 9150, 9149, 9148, 9147,
        9146, 9145, 9144, 9143, 9142, 9141, 9140, 9139, 9138, 9137, 9136, 9135,
        9134, 9133, 9132, 9131, 9130, 9129, 9128, 9127, 9126, 9125, 9124, 9123,
        9122, 9121, 9120, 9119, 9118, 9117, 9116, 9115, 9114, 9113, 9112, 9111,
        9110, 9109, 9108, 9107, 9106, 9105, 9104, 9103, 9102, 9101, 9100, 9099,
        9098, 9097, 9096, 9095, 9094, 9093, 9092, 9091, 9090, 9089, 9088, 9087,
        9086, 9085, 9084, 9083, 9082, 9081, 9080, 9079, 9078, 9077, 9076, 9075,
        9074, 9073, 9072, 9071, 9070, 9069, 9068, 9067
    ]

    INPUT_FEATURES = 9216
    CLASSES = 5
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "Breast_train.data")

    #train_text = getIndexData(train_text,index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "lumina":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "ERBB2":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "basal":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "normal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "cell_lines":
            klass = 4
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
Beispiel #49
0
  def alternateTrain(self, inputData, hiddenLayers, numEpochs, logFreq=1, verbose=True):
    # Set of data to classify:
    # - IMG_SIZE input dimensions per data point
    # - 1 dimensional output
    # - 4 clusters of classification
    all_faces = ClassificationDataSet(IMG_SIZE, 1, nb_classes=4)

    for entry in inputData:
      (emotion, data) = entry
      all_faces.addSample(data, [emotion])
     
    # Generate a test and a train set from our data
    test_faces, train_faces = all_faces.splitWithProportion(0.25)

    # Hack to convert a 1-dimensional output into 4 output neurons
    test_faces._convertToOneOfMany()   
    train_faces._convertToOneOfMany()

    self.fnn = self.buildCustomNetwork(hiddenLayers,train_faces)
    
    # Set up the network trainer. Also nice tunable params
    trainer = BackpropTrainer(
      self.fnn, 
      dataset=train_faces, 
      momentum=0.1, 
      verbose=False,
      weightdecay=0.01
    ) 

    self.errorData = {}
    self.epochData = []
    self.trainErr = []
    self.testErr = []

    self.avgTrnErr = 0
    self.avgTstErr = 0

    # Train this bitch. 
    if verbose:
      # print "Epoch\tTrain Error\tTest Error\t%d Nodes" % hiddenLayers[0]
      # Report after every epoch if verbose
      for i in range(numEpochs):
        trainer.trainEpochs(1)
        
        if trainer.totalepochs % logFreq == 0 :
          trnresult = percentError( trainer.testOnClassData(),
                                    train_faces['class'] )
          tstresult = percentError( trainer.testOnClassData(
                 dataset=test_faces ), test_faces['class'] )

          self.avgTrnErr += trnresult;
          self.avgTstErr += tstresult;
          
          self.epochData.append(trainer.totalepochs)
          self.trainErr.append(trnresult)
          self.testErr.append(tstresult)

          """print "%4d\t" % trainer.totalepochs, \
                 "%5.2f%%\t\t" % trnresult, \
                 "%5.2f%%" % tstresult
          """
    else:
      trainer.trainEpochs(EPOCHS)

    self.errorData['epochs']=self.epochData
    self.errorData['training_error']=self.trainErr
    self.errorData['testing_error']=self.testErr
    self.errorData['avg_testing_error']=self.avgTstErr / numEpochs
    self.errorData['avg_training_error']=self.avgTrnErr / numEpochs

    return self.errorData
Beispiel #50
0
#     update=nesterov_momentum,
#     update_learning_rate=0.01,
#     update_momentum=0.9,

#     regression=True,  # flag to indicate we're dealing with regression problem
#     max_epochs=100,  # we want to train this many epochs
#     verbose=1,
#     )

# X = np.asarray(X)
# X = X.astype(np.float32)
# y = np.asarray(y)
# y = y.astype(np.float32)
# net1.fit(X, y)

alldata = ClassificationDataSet(len(final[0][1]), 1, nb_classes=2)
for i, tup in enumerate(final):
    alldata.addSample(tup[1], tup[0])

tstdata, trndata = alldata.splitWithProportion(0.60)

trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

fnn = buildNetwork(trndata.indim, 10, trndata.outdim, outclass=SoftmaxLayer)
trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)
trainer.trainUntilConvergence(maxEpochs=10)
Beispiel #51
0
def generate_data():
    index = [
        9154, 5123, 2407, 680, 548, 8016, 15755, 9861, 461, 5552, 6834, 6268,
        14112, 15285, 13065, 8838, 2962, 6581, 4025, 14928, 10521, 1413, 3587,
        3537, 13462, 9809, 4128, 15806, 4884, 2084, 7818, 8294, 12308, 8789,
        5328, 5817, 7663, 6299, 15295, 3547, 1673, 5940, 6085, 6368, 6006,
        5520, 14228, 8608, 7822, 3237, 10927, 12268, 2852, 6903, 13001, 10775,
        4852, 14487, 10885, 14948, 15239, 8787, 6886, 15720, 13436, 4102, 7832,
        5071, 11062, 15004, 14888, 12560, 4381, 14283, 6892, 14753, 10132,
        6937, 2393, 465, 11791, 8533, 2174, 6739, 4316, 251, 11438, 10288,
        6658, 6439, 6711, 5173, 11590, 1452, 524, 15677, 13742, 11881, 9299,
        7499, 7068, 11457, 11128, 4936, 1634, 14692, 13352, 11896, 11895,
        11494, 9704, 6878, 10112, 10027, 10207, 6946, 6604, 5563, 3590, 2817,
        2661, 9667, 9609, 8368, 7538, 6830, 1909, 1385, 15043, 14006, 11050,
        10743, 10306, 9574, 9546, 9267, 9232, 8546, 8452, 8027, 7465, 5453,
        1903, 1747, 1367, 15496, 14231, 13894, 12340, 11433, 11118, 9223, 8369,
        8017, 7324, 6737, 5047, 4635, 4631, 3685, 3418, 3215, 1395, 835, 690,
        15808, 15210, 13829, 13798, 13303, 13220, 13078, 12416, 12407, 12082,
        11940, 11266, 9794, 9643, 8825, 8600, 8446, 7892, 6972, 6728, 6559,
        5759, 5091, 4640, 4209, 3214, 1994, 1599, 1447, 1082, 15881, 15810,
        15586, 15564, 15150
    ]

    INPUT_FEATURES = 200
    CLASSES = 15
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "GCM_train.data")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "Breast":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Prostate":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lung":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lymphoma":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Bladder":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Melanoma":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Uterus":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Leukemia":
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Renal":
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Pancreas":
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Ovary":
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Mesothelioma":
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i] == "CNS":
            klass = 13
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 14
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
def load_training_dataSet(fileName):
    data = pd.read_csv(fileName, sep=',', header=None)
    #data.columns = ["state", "outcome"]
    return data

myclones_data = load_training_dataSet('Datasets/new_dataset_with_new_features.csv')
myclones_data = myclones_data.values


inputDim = 8;


means = [(-1,0),(2,4),(3,1)]
cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
alldata = ClassificationDataSet(inputDim, 1, nb_classes=2)


#input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12],   myclones_data[n][26], myclones_data[n][27]] )

for n in xrange(len(myclones_data)):
    #for klass in range(3):
    input = np.array(
        [myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15], myclones_data[n][11],
         myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]])
    #print (n, "-->", input)
    alldata.addSample(input, int(myclones_data[n][35]))


tstdata, trndata = alldata.splitWithProportion( 0.85 )
Beispiel #53
0
def mean_square_error(outputs, desireds):
    result = sum([ ((output-desired)*(output-desired))/2 for output,desired in zip(outputs, desireds)])
    return result/len(outputs)
    
    
if __name__ == "__main__":
    
    matrix = []
    data = []    
    
    for x in range(10):
        for y in range(10):
            matrix.append(create_identity_matrix(x,y))


    train_data = ClassificationDataSet(100, 100,nb_classes=100)#TAMANHO DA ENTRADA, NUMERO DE CLASSES
    test_data  = ClassificationDataSet(100, 100,nb_classes=100)

    #CRIANDO A BASE DE TREINAMENTO E DE TEST    
    train_data.addSample(matrix,matrix)
    test_data.addSample(matrix,matrix)
    '''
    print ("Number of training patterns: ", len(train_data))
    print ("Input and output dimensions: ", train_data.indim, train_data.outdim)
    print ("First sample (input, target, class):")
    print (test_data['input'], test_data['target'])
    '''
    #CRIANDO A REDE
    network = FeedForwardNetwork()
    inLayer = SigmoidLayer(train_data.indim)
    hiddenLayer = SigmoidLayer(7)
Beispiel #54
0
def generate_Testdata(index):
    INPUT_FEATURES = 200
    CLASSES = 15
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "GCM_test.data")
    train_text = getIndexData(train_text, index)
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "Breast":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Prostate":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lung":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lymphoma":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Bladder":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Melanoma":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Uterus":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Leukemia":
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Renal":
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Pancreas":
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Ovary":
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Mesothelioma":
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i] == "CNS":
            klass = 13
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 14
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
Beispiel #55
0
def generate_Testdata(index):
    INPUT_FEATURES = 500
    CLASSES = 15
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "GCM_test.data")
    temp = index
    ss = []
    count = 0
    df = pd.DataFrame(train_text)
    for line in temp:
        count == 0
        count += 1
        ss.append(df[line[1]].values)
        if (count == 500):
            break
    train_text = np.array(ss).transpose()
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "Breast":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Prostate":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lung":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lymphoma":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Bladder":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Melanoma":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Uterus":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Leukemia":
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Renal":
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Pancreas":
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Ovary":
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Mesothelioma":
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i] == "CNS":
            klass = 13
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 14
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': temp
    }
Beispiel #56
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
from pybrain.datasets import ClassificationDataSet
from pybrain.structure.modules import SoftmaxLayer
import numpy
import csv
from PIL import Image
from pybrain.supervised.trainers import BackpropTrainer

#find counts and image filenames
data = []
alldata = ClassificationDataSet(16, 1, nb_classes=3)
uncloned = ClassificationDataSet(16, 1, nb_classes=3)
expected = []

with open("D10415.csv") as f:
    for line in f:
        vals = line.split(",")
        hist = [float(x) for x in vals[:-1]]
        klass = int(vals[-1])
        alldata.addSample(numpy.array(hist), [klass])
        uncloned.addSample(numpy.array(hist), [klass])
        data.append((numpy.array(hist), klass))
        expected.append(klass)

alldata._convertToOneOfMany()

# create the net
# net = buildNetwork(alldata.indim, 16, 1, outclass=SoftmaxLayer)
net = buildNetwork(alldata.indim, 16, alldata.outdim, outclass=SoftmaxLayer)
# net = buildNetwork(alldata.indim, 16, 1)
Beispiel #57
0
def generate_Testdata(index):
    INPUT_FEATURES = 300 
    CLASSES = 9
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("nci60_test_m_truncated.txt")
    temp=index
    ss = []
    count = 0
    df = pd.DataFrame(train_text)
    for line in temp:
        count == 0
        count += 1
        ss.append(df[line[1]].values)
        if(count==300):
            break
    train_text = np.array(ss).transpose()
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="1" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="2" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="3" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="4" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="5" :
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i]=="6" :
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i]=="7" :
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i]=="8" :
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i]=="9" :
            klass = 8
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':temp}
Beispiel #58
0
    days = pd.get_dummies(df.DayOfWeek)
    district = pd.get_dummies(df.PdDistrict)
    hour = pd.get_dummies(df.Dates.dt.hour)
    year = pd.get_dummies(df.Dates.dt.year)
    month = pd.get_dummies(df.Dates.dt.month)
    minute = pd.get_dummies(df.Dates.dt.minute)
    X = df.X
    Y = df.Y
    new_df = pd.concat([days, hour, year, month, district, X, Y], axis = 1)

    return new_df

crimes = OHE_crime(training)

print "making dataset"
ds = ClassificationDataSet(68, 1 , nb_classes=39)
for k in xrange(len(crimes)): 
    print k
    ds.addSample(crimes.iloc[[k]], crime_labels[k])
tstdata, trndata = ds.splitWithProportion( 0.5 )
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "making net"
hidden_layer = int((trndata.indim + trndata.outdim) / 2)
fnn = buildNetwork(trndata.indim, hidden_layer, trndata.outdim, bias=True, outclass=SoftmaxLayer)
print fnn

trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, learningrate=0.01 , verbose=True, weightdecay=0.01) 

print "WIP"
Beispiel #59
0
def generate_data():
    INPUT_FEATURES = 300 
    CLASSES = 9
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("nci60_train_m_truncated.txt")
    X = train_text
    Y = train_classfi_number
    names = train_feature_name
    rf = RandomForestRegressor()
    rf.fit(X, Y)
    temp=sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), 
                 reverse=True)
    ss = []
    count = 0
    df = pd.DataFrame(train_text)
    for line in temp:
        count == 0
        count += 1
        ss.append(df[line[1]].values)
        if(count==300):
            break
    train_text = np.array(ss).transpose()
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="1" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="2" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="3" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="4" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="5" :
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i]=="6" :
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i]=="7" :
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i]=="8" :
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i]=="9" :
            klass = 8
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':temp}
Beispiel #60
0
import numpy

from pybrain.datasets import SupervisedDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.utilities import percentError
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer

from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
from scipy import diag, arange, meshgrid, where
from numpy.random import multivariate_normal

#ourdataset=SupervisedDataSet(4,1)
ourdataset = ClassificationDataSet(4, 1, nb_classes=3)

with open('newdata.txt') as fp:
    for line in fp:
        splitedline = line.split(",")
        ourclass = splitedline[4].split("\n")[0]
        if "Iris-virginica" in ourclass:
            nameclass = 0

        elif "Iris-setosa" in ourclass:
            nameclass = 1

        else:
            nameclass = 2

        oursample = splitedline[0:4]