コード例 #1
0
ファイル: BPModelTrainer.py プロジェクト: zoulily/credit
 def trainModel(self):
     self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted]
     self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader
     self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1]
     ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses)
     for rowData in self.finalDataSet:
         target = rowData[-1]
         variables = rowData[0:-1]
         ds.addSample(variables, target)
     self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25)
     self.testDataSet._convertToOneOfMany()
     self.trainDataSet._convertToOneOfMany()
     print self.testDataSet
     print self.trainDataSet
     self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True)
     self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99)
     begin0 = time.time()
     # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10)
     for i in xrange(10):
         begin = time.time()
         self.trainer.trainEpochs(10)
         end = time.time()
         print 'iteration ', i, ' takes ', end-begin,  'seconds'
     end0 = time.time()
     print 'total time consumed: ', end0 - begin0
コード例 #2
0
    def importFromCSV(self, fileName, numInputs, numClasses):
        """
        Function that reads in a CSV file and passes on to the pybrain
        neural net dataset structure to be used with the library's
        neural net classes.

        It expects that the last columns (determined by numOutputs) to be
        the classification columns.
        """
        dataSet = None
        dataFile = open(fileName)
        line = dataFile.readline()
        data = [str(x) for x in line.strip().split(',') if x != '']
        if(data[0] == '!labels:'):
            labels = data[1:]
            dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses, class_labels=labels)
            line = dataFile.readline()
        else:
            dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses)

        while line != '':
            data = [float(x) for x in line.strip().split(',') if x != '']
            inputData = data[:numInputs]
            outputData = data[-1:]
            dataSet.addSample(inputData, outputData)
            line = dataFile.readline()

        dataFile.close()
        return dataSet
コード例 #3
0
ファイル: breastrf.py プロジェクト: Guosmilesmile/pythonstudy
def generate_Testdata(index):
    INPUT_FEATURES = 200 
    CLASSES = 5
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Breast_test.data")
    
    train_text = getIndexData(train_text,index)   

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="lumina" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="ERBB2" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="basal" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="normal" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="cell_lines" :
            klass = 4
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
コード例 #4
0
def conductGeneration(generation, corpus):
        '''
        Conducts a generation of learning and testing on the input data
                generation (int) --- the number of the generation
                corpus (object) --- corpus object containing info needed
        '''
        # Set up the dataset skeleton
        alldata = ClassificationDataSet(2, 1, nb_classes=3, class_labels=['a', 'b', 'c'])

        # means = [(-1,0),(2,4),(3,1)]
        # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]

        # alldata = ClassificationDataSet(2, 1, nb_classes=3)
        # for n in xrange(400):
        #     for klass in range(3):
        #         input = multivariate_normal(means[klass],cov[klass])
        #         print type(input)
        #         alldata.addSample(input, [klass])

        alldata.addSample((0, 1), (1))
        alldata.addSample((1, 0), (0))
        alldata.addSample((0, 0), (2))
        alldata.addSample((1, 1), (0))

        trndata, partdata = alldata.splitWithProportion(0.5)

        return alldata
コード例 #5
0
ファイル: neural_net.py プロジェクト: AvenTu/emote-cat
def run_nn_fold(training_data, test_data):
    test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data)

    input_len = len(test_features[0])
    num_classes = len(labelMap.keys())
    train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)
    for i in range(len(test_features)):
        train_ds.addSample(tuple(test_features[i]), (labels[i]))
    train_ds._convertToOneOfMany()
    net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, train_ds, verbose=True)
    print "training until convergence..."
    trainer.trainUntilConvergence(maxEpochs=100)
    print "done. testing..."


    test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)  

    labels = []
    for tweetinfo in test_data:
        featuresFound = tweetinfo["Features"]
        label = tweetinfo["Answer"]
        labels.append(label)
        features = [0]*len(featureMap.keys())
        for feat in featuresFound:
            if feat in featureMap:
                features[ featureMap[feat] ] = 1
        test_ds.addSample(tuple(features), (labelMap[label]))

    test_ds._convertToOneOfMany()
    tstresult = percentError( trainer.testOnClassData(
            dataset=test_ds ), test_ds['class'] )
    print tstresult
コード例 #6
0
ファイル: breastrf.py プロジェクト: Guosmilesmile/pythonstudy
def generate_data():
    index = [8673,1646,116,2191,4326,6718,7796,8531,8763,5646,3626,5451,2004,8079,4044,6471,675,3746,6338,3149,4880,4869,6213,5316,3544,1046,7739,8309,4147,5526,5555,1504,1625,2680,5814,1305,3998,794,4355,6788,3343,867,343,3706,6902,4250,9014,5478,788,5323,677,9215,9214,9213,9212,9211,9210,9209,9208,9207,9206,9205,9204,9203,9202,9201,9200,9199,9198,9197,9196,9195,9194,9193,9192,9191,9190,9189,9188,9187,9186,9185,9184,9183,9182,9181,9180,9179,9178,9177,9176,9175,9174,9173,9172,9171,9170,9169,9168,9167,9166,9165,9164,9163,9162,9161,9160,9159,9158,9157,9156,9155,9154,9153,9152,9151,9150,9149,9148,9147,9146,9145,9144,9143,9142,9141,9140,9139,9138,9137,9136,9135,9134,9133,9132,9131,9130,9129,9128,9127,9126,9125,9124,9123,9122,9121,9120,9119,9118,9117,9116,9115,9114,9113,9112,9111,9110,9109,9108,9107,9106,9105,9104,9103,9102,9101,9100,9099,9098,9097,9096,9095,9094,9093,9092,9091,9090,9089,9088,9087,9086,9085,9084,9083,9082,9081,9080,9079,9078,9077,9076,9075,9074,9073,9072,9071,9070,9069,9068,9067]

    INPUT_FEATURES = 200 
    CLASSES = 5
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Breast_train.data")

    train_text = getIndexData(train_text,index)    

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="lumina" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="ERBB2" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="basal" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="normal" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="cell_lines" :
            klass = 4
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
コード例 #7
0
def createnetwork(n_hoglist,n_classlist,n_classnum,n_hiddensize=100):
    n_inputdim=len(n_hoglist[0])
    n_alldata = ClassificationDataSet(n_inputdim,1, nb_classes=n_classnum)
    for i in range(len(n_hoglist)):
        n_input = n_hoglist[i]
        n_class = n_classlist[i]
        n_alldata.addSample(n_input, [n_class])
    n_tstdata, n_trndata = n_alldata.splitWithProportion( 0.25 )
    n_trndata._convertToOneOfMany( )
    n_tstdata._convertToOneOfMany( )

    print "Number of training patterns: ", len(n_trndata)
    print "Input and output dimensions: ", n_trndata.indim, n_trndata.outdim
    print "First sample (input, target, class):"
    print n_trndata['input'][0], n_trndata['target'][0], n_trndata['class'][0]

    n_fnn = buildNetwork(n_trndata.indim,n_hiddensize, n_trndata.outdim, outclass=SoftmaxLayer)
    n_trainer = BackpropTrainer(n_fnn, dataset=n_trndata, momentum=0.1, verbose=True, weightdecay=0.01)

    n_result = 1
    while n_result > 0.1:
        print n_result
        n_trainer.trainEpochs(1)
        n_trnresult = percentError(n_trainer.testOnClassData(),
                                 n_trndata['class'])
        n_tstresult = percentError(n_trainer.testOnClassData(
            dataset=n_tstdata), n_tstdata['class'])

        print "epoch: %4d" % n_trainer.totalepochs, \
            "  train error: %5.2f%%" % n_trnresult, \
            "  test error: %5.2f%%" % n_tstresult
        n_result = n_tstresult
コード例 #8
0
ファイル: ml.py プロジェクト: aboSamoor/NLP
 def batch_classify(self, samples):
   ds = ClassificationDataSet(len(self._fx))
   for sample in samples:
     fvec = [sample[l] for l in self._fx]
     ds.addSample(fvec, [0])
   results = self._trainer.testOnClassData(ds)
   return [self._rmap[r] for r in results]
コード例 #9
0
ファイル: features.py プロジェクト: rgoomes/mini-google
def gen_data(csv_file, db):
	keywords = {}
	count = 0
	img_list = []

	with open(csv_file) as f:
		content = f.readlines()
	f.close()

	for line in content:
		aux = line.replace('\n', '').split(',')
		if aux[1] not in keywords:
			keywords[aux[1]] = count
			count += 1
		img_list.append(aux)

	data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords))
	n = len(keywords)

	for img in img_list:
		path = db + '/' + img[0]
		im = Image.open(path).convert('RGB')
		data.addSample(get_img_feats(im), get_keyword_class(keywords[img[1]], n))

	return data, n, keywords
コード例 #10
0
def prepare_datasets(inp,out,dataframe, ratio):
    '''conversion from pandas dataframe to ClassificationDataSet of numpy
    parameters:
    inp: list of names of input features
    out: list of names of output features(target value)
    ratio: ratio of dimension of test to train dataset
    '''
    inp_dim = len(inp)
    out_dim = len(out)
    no_classes = 2
    alldata = ClassificationDataSet(inp_dim,out_dim,no_classes)
    inp = dataframe[inp]
    out = dataframe[out]
    #for [a,b,c],d in zip(inp.values,out.values):
    for i in range(len(inp.values)):
        d = out.values[i]
        if d=='up': d = 0
        elif d == 'down': d = 1
        else: d =2
        alldata.addSample(inp.values[i],d)
    tstdata_temp, trndata_temp = alldata.splitWithProportion( ratio )
    # to convert supervised datasets to classification datasets
    tstdata = trndata = ClassificationDataSet(inp_dim, out_dim, no_classes)
    for n in range(0, tstdata_temp.getLength()):
        tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] )
    for n in range(0, trndata_temp.getLength()):
        trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1])
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    return alldata, trndata, tstdata
コード例 #11
0
ファイル: lung1.py プロジェクト: Guosmilesmile/pythonstudy
def _convert_supervised_to_classification(supervised_dataset,classes):
    classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes)
    
    for n in xrange(0, supervised_dataset.getLength()):
        classification_dataset.addSample(supervised_dataset.getSample(n)[0], supervised_dataset.getSample(n)[1])

    return classification_dataset
コード例 #12
0
ファイル: ann.py プロジェクト: maliilyas/metabolite_analysis
def ann(training_filename , testing_filename,itr,epoch,model_type):
    training_start_time = "The generation of data set and training started at :%s" % datetime.datetime.now()
    training_dataset            = np.genfromtxt(training_filename, skip_header=0,dtype="int", delimiter='\t' )
    data = ClassificationDataSet(len(training_dataset[0])-1, 2, nb_classes=2)
    for aSample in training_dataset:
        data.addSample(aSample[0:len(aSample)-1],[aSample[len(aSample)-1]] );
        
    #  
    data._convertToOneOfMany( )

    fann = buildNetwork(314,2,outclass=SoftmaxLayer);
    trainer = BackpropTrainer( fann, dataset=data, momentum=0.1, verbose=False, weightdecay=0.01)
    counter = 0;
    print training_start_time
    while(counter < itr):
        trainer.trainEpochs( epoch );
        counter = counter + 1;
    
    trnresult = percentError( trainer.testOnClassData(),data['class'] )
    trained_result_log = "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult;
    
    
    training_time_end = "The training and result logging ended at %s :" % datetime.datetime.now()
    
    filename = working_dir + "\models\\"+model_type + ".obj"
    save_trained_model(fann, filename)
    
    log_file.write("\n" + training_start_time+"\n")
    log_file.write(str(trained_result_log)+"\n")
    log_file.write(training_time_end+"\n")
コード例 #13
0
def generate_data():
    INPUT_FEATURES = 9216 
    CLASSES = 5

    train_text,train_classfi = getTargetData("Breast_train.data")

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="lumina" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="ERBB2" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="basal" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="normal" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="cell_lines" :
            klass = 4
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata}
コード例 #14
0
ファイル: ValueNet.py プロジェクト: ericgorlin/CS159
def getData():
    fo = open("C:\\Program Files (x86)\\Lux\\Support\\data1per.txt")
    #data = []

    '''
    correctinds = range(0,5)
    for k in range(5, 131, 3):
        correctinds.append(k)
    correctinds.append(129)
    correctinds.append(130)
    for k in range(131, 257, 3):
        correctinds.append(k)
    correctinds.append(255)
    correctinds.append(256)
    '''

    #alldata = ClassificationDataSet(92, 1)
    alldata = ClassificationDataSet(84, 1)

    count = 0
    for line in fo.readlines():
    #for k in range(0, 20000):
        count += 1

        #line = fo.readline()

        line = [int(x.strip()) for x in line[1:-3].split(',')]
        line = [line[0]]+line[4:47]+line[49:90]

        alldata.addSample(line[1:], line[0])
    print count
    return alldata
コード例 #15
0
class NeuralNetLearner:
    def __init__(self):
        self.bunch = load_digits()
        self.X = np.asarray(self.bunch.data, 'float32')
        self.Y = np.asarray(self.bunch.target, 'float32')
        #self.X, self.Y = nudge_dataset(self.X, self.bunch.target)
        self.X = (self.X - np.min(self.X, 0)) / (np.max(self.X, 0) + 0.0001)  # 0-1 scaling

        self.ds = ClassificationDataSet(64, nb_classes=10, class_labels=self.bunch.target_names)
        for (x, y) in zip(self.X, self.Y):
            self.ds.addSample(x, y)

        self.test_data, self.train_data = self.ds.splitWithProportion(0.3)

        self.network = buildNetwork(64, 10, 1)

    def get_datasets(self):
        return self.train_data, self.test_data

    def activate(self, x):
        self.network.activate(x.tolist())

    def fitness_func(self, x):
        if not (x.size == 64):
            print("Bad input vector: ", x)
            return
        sum_of_squared_error = 0
        for (input, target) in self.ds:
            sum_of_squared_error += (target - self.activate(input.tolist()))
        return (sum_of_squared_error / self.ds.length)

    def get_weights(self):
        return
コード例 #16
0
ファイル: sparse_coding.py プロジェクト: Aggregates/MI_HW2
def toClassificationDataset(codedSampleSet):
   
    classifiedSampleSet = []
    
    # Calculate the unique classes
    classes = []
    for sample in codedSampleSet:
    
        classifier = getClassifier(sample)
        if classifier not in classes:
            classes.append(classifier)
    classes.sort()
    
    # Now that we have all the classes, we process the outputs
    for sample in codedSampleSet:
        classifier = getClassifier(sample)
        classifiedSample = one_to_n(classes.index(classifier), len(classes))
        classifiedSampleSet.append(classifiedSample)

    # Build the dataset
    sampleSize = len(codedSampleSet[0])
    classifiedSampleSize = len(classifiedSampleSet[0])
    dataset = ClassificationDataSet(sampleSize, classifiedSampleSize)
    
    for i in range(len(classifiedSampleSet)):
        dataset.addSample(codedSampleSet[i], classifiedSampleSet[i])

    return dataset, classes
コード例 #17
0
ファイル: brain.py プロジェクト: Guosmilesmile/pythonstudy
def generate_data(n=400):
    INPUT_FEATURES = 2
    CLASSES = 3
    #means = [(-1, 0), (2, 4), (3, 1)]
    #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    #minX, maxX = means[0][0], means[0][0]
    #minY, maxY = means[0][1], means[0][1]
    #print minX, maxX , minY, maxY
    # #for i in range(n):
    #     for klass in range(CLASSES):

    #         features = multivariate_normal(means[klass], cov[klass])
    #         #print means[klass], cov[klass]
    #         #print features
    #         x, y = features
    #         minX, maxX = min(minX, x), max(maxX, x)
    #         minY, maxY = min(minY, y), max(maxY, y)
    #         alldata.addSample(features, [klass])
    #print alldata
    alldata.addSample([0,0], [0])
    alldata.addSample([0,1], [1])
    alldata.addSample([1,0], [1])
    alldata.addSample([1,1], [0])

    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata}
コード例 #18
0
def read_data(filename):
	"""
	See http://www.pybrain.org/docs/api/datasets/classificationdataset.html

	Reads a (naive) csv file of data and converts it into
	a ClassificationDataSet. 'Naive' in this case means
	the data can be parsed by splitting on commas - i.e.,
	no quotations or escapes. I picked this file format
	because it should be trivial to convert all our data into it.

	Raises an exception when an IO error occurs.

	Parameters:
	  filename - The name of the file containing the data.
	"""
	data_file = open(filename, "r")
	data_lines = [line.split(',') for line in data_file.readlines()]
	data_file.close()

	features = [[float(f) for f in line[0:-1]] for line in data_lines]
	classes = [[int(line[-1])] for line in data_lines]
        # Workaround to make classifications zero-based
        class_min = min([c[0] for c in classes])
        for i in range(len(classes)):
                classes[i][0] -= class_min

	data_set = ClassificationDataSet(len(features[0]))
	for feature_vector, classification in zip(features, classes):
		data_set.addSample(feature_vector, classification)

	return data_set
コード例 #19
0
ファイル: nnetwork.py プロジェクト: sverrirth/IIS-master
class NNetwork:
	def __init__(self):
		self.ds = ClassificationDataSet(7, 1, nb_classes=8)  #8 since we have 8 gestures, 7 since we have 7 features
		
	def add_data(self, training_data):
		for gesture in training_data:
			self.ds.addSample(gesture[1], gesture[0])  #a method to add all the training data we have
			
	def newData(self, training_data):   #a method for replacing the data already existing and adding data from scratch
		self.ds = ClassificationDataSet(7, 1, nb_classes=8)
		for gesture in training_data:
			self.ds.addSample(gesture[1], gesture[0])
	
	def train(self, shouldPrint):
		tstdata, trndata = self.ds.splitWithProportion(0.2)  #splits the data into training and verification data
		trndata._convertToOneOfMany()
		tstdata._convertToOneOfMany()
		self.fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer) #builds a network with 64 hidden neurons
		self.trainer = BackpropTrainer(self.fnn, dataset=trndata, momentum=0.1, learningrate=0.01, verbose=True, weightdecay=0.1)
		#uses the backpropagation algorithm
		self.trainer.trainUntilConvergence(dataset=trndata, maxEpochs=100, verbose=True, continueEpochs=10, validationProportion=0.20) #early stopping with 20% as testing data
		trnresult = percentError( self.trainer.testOnClassData(), trndata['class'] )
		tstresult = percentError( self.trainer.testOnClassData(dataset=tstdata ), tstdata['class'] )
		
		if shouldPrint:
			print "epoch: %4d" % self.trainer.totalepochs, "  train error: %5.2f%%" % trnresult, "  test error: %5.2f%%" % tstresult
	def activate(self, data): #tests a particular data point (feature vector)
	    return self.fnn.activate(data)
コード例 #20
0
ファイル: neural_net.py プロジェクト: marsjoy/wesandersone
class NeuralNetwork(BaseWorkflow):

    def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None):
        super(NeuralNetwork, self).__init__()
        self.purpose = purpose
        self.data_path = self.config.neural_net.get(self.purpose, None)
        self.file_name = 'neural_net'
        self.all_data = ClassificationDataSet(num_inputs,
                                              num_ouputs,
                                              nb_classes=classes,
                                              class_labels=class_lables)
        self.train = None
        self.test = None
        self.neural_network = None
        self.train_result = None
        self.test_result = None
        self.cross_validation_result = None

    def process(self):
        self.prepare_train_test()
        self.build_network()
        trainer = self.train_network(dataset=self.train)
        self.score_train_test(trainer=trainer)
        self.cross_validate(dataset=self.all_data)

    def add_sample(self, correlogram_matrix=None, target=None, sample_path=None):
        self.all_data.addSample(correlogram_matrix, target)
        logger.info('sample added from {sample_path}'.format(sample_path=sample_path))

    def prepare_train_test(self):
        self.test, self.train = self.all_data.splitWithProportion(0.25)

    def build_network(self):
        self.neural_network = buildNetwork(self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network

    def train_network(self, dataset=None):
        starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
        starter_trainer.trainUntilConvergence(validationProportion=0.25,  maxEpochs=100)
        return starter_trainer

    def score_train_test(self, trainer=None):
        self.test_result = percentError(trainer.testOnClassData(dataset=self.test), self.test['class'])
        logger.info('test error result: {result}'.format(result=self.test_result))
        self.train_result = percentError(trainer.testOnClassData(dataset=self.train), self.train['class'] )
        logger.info('train error result: {result}'.format(result=self.train_result))

    def cross_validate(self, dataset=None):
        trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
        validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10)
        mean_validation_result = validator.validate()
        self.cross_validation_result = mean_validation_result
        logger.info('cross val result: {result}'.format(result=self.cross_validation_result))

    @staticmethod
    def save_network_to_xml(net=None, file_name=None):
        NetworkWriter.writeToFile(net, file_name)

    @staticmethod
    def read_network_from_xml(file_name=None):
        return NetworkReader.readFrom(file_name)
コード例 #21
0
ファイル: Classifier.py プロジェクト: DanSGraham/code
def generateDataSet():

    inFile = open("data/input.txt")
    inData = inFile.readlines()
    inFile.close()
    
    outFile = open("data/output.txt")
    outData = outFile.readlines()
    outFile.close()


    inputs = 120 #you will want to update this based on the state you have... ###I don't understand this comment. How do we update if we haven't calculated the state yet?
    classes= 11 #11 #Not much reson to change this one, there are only 11 destinations.
    allData = ClassificationDataSet(inputs,1,nb_classes=classes)
    start = time.clock()
    for i in range(len(inData)):
        b = loadBrain(inData[i].strip())
        #inputs = len(b.g.heroes) - 1 + len(b.g.taverns_locs) + 4
        #calls functions inside of the ai object.  you will want to write these fcns. 
        ins = b.createInputs(inputs)
        klass = b.determineClass(classes,eval(outData[i].strip()))
        expectedKlass = b.classInverse(klass)
        #if expectedKlass != eval(outData[i].strip()):
        #    print expectedKlass, eval(outData[i].strip())
        allData.addSample(ins,[klass])
        #if(i > 1000): break
        if(i%100==0): print i,len(inData), "elapsed between sets", time.clock() - start
    
    return allData    
コード例 #22
0
ファイル: neural.py プロジェクト: dverstee/MLProject
def getdata(do_preprocessing, full_data):
    '''
    fetch and format the match data according to the given flags
    do_preprocessing: bool: true if preprocessing needs to be do_preprocessing
    full_data: bool: false if the minimal data should be used
    '''
    print ("fetching data ...")
    if full_data == 0 :
        fn = getMinimalDatafromMatch
    else:
        fn = getBasicDatafromMatch
    if globals.use_saved_data:
        try:
            with open('processed_data%d' % full_data) as outfile:
                data = json.load(outfile)
        except IOError:
            matches = Match.objects.all()
            data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
            data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
            with open('processed_data%d' % full_data, 'w') as outfile:
                json.dump(data,outfile)
    else:
        matches = Match.objects.all()
        data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
        data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
        with open('processed_data%d' % full_data, 'w') as outfile:
            json.dump(data,outfile)

    all_data = None
    for input, won in data:        
        if all_data is None:
            all_data = ClassificationDataSet(len(input), 1, nb_classes=2)                 
        all_data.addSample(input, int(won)) 
    return all_data
コード例 #23
0
ファイル: nnet.py プロジェクト: divijbindlish/quantify
class neuralNetwork():

	def __init__( self, n_classes ):
		self.n_classes = n_classes

	def fit( self, X, Y ):
		n_features = X.shape[1]
		self.train_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes )
		for train, target in zip( X, Y ):
			self.train_ds.addSample( train, [target] )

		self.train_ds._convertToOneOfMany( )

		self.net = buildNetwork( self.train_ds.indim, 2*n_features, self.train_ds.outdim, outclass = SoftmaxLayer )
		self.trainer = BackpropTrainer( self.net, self.train_ds )

	def predict( self, X ):
		n_features = X.shape[1]
		self.test_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes )
		for test in X:
			self.test_ds.addSample( test, [1] )

		self.test_ds._convertToOneOfMany( )

		for i in range( 100 ):
			self.trainer.trainEpochs( 5 )
			self.labels = self.net.activateOnDataset( self.test_ds )
			self.labels = self.labels.argmax(axis=1)
		return self.labels
コード例 #24
0
ファイル: 000300.py プロジェクト: ZiqiuZang/PythonTraining
def make_data_set(beg,end):
    ds = ClassificationDataSet(HISTORY*2+1, class_labels=['None', 'Buy' , 'Sell']) #SupervisedDataSet(HISTORY*3, 1) 
    trainQ = rawData[(rawData.tradeDate <= end) & ( rawData.tradeDate >= beg)]
    

    for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD-1):
        cur = idx + HISTORY - 1  
        if( abs( trainQ.iloc[cur]['MACD'] ) > 0.5 ):
            continue        
        sample = []
        for i in range(HISTORY):
            #sample.append( trainQ.iloc[idx+i]['EMAL'] )#  [['EMAL','DIFF','DEA','CDIS']] ) )
            sample.append( trainQ.iloc[idx+i]['DIFF'] )
            sample.append( trainQ.iloc[idx+i]['DEA'] )
                   
        sample.append( trainQ.iloc[cur]['CDIS'] )
        if max( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] > 1.05 : 
            answer = 1
        elif min( trainQ.iloc[cur+1:cur+HOLD+1]['EMAS'] ) / trainQ.iloc[cur]['closeIndex'] < 0.95:
            answer = 2
        else:
            answer = 0
#        print(sample)    
        ds.addSample(sample, answer)
    return ds
コード例 #25
0
def main():
    for stock in STOCK_TICKS:
        # Download Data
        get_data(stock)

        # Import Data
        days = extract_data(stock)
        today = days.pop(0)

        # Make DataSet
        data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2)
        for day in days:
            target = 0
            if day.change > 0:
                target = 1
            data_set.addSample(day.return_metrics(), [target])

        # Make Network
        network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM)

        # Train Network
        trainer = BackpropTrainer(network)
        trainer.setData(data_set)
        trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX)

        # Activate Network
        prediction = network.activate(today.return_metrics())
        print prediction
コード例 #26
0
ファイル: build_trainingset.py プロジェクト: 1va/caravan
def build_dataset(
    mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True
):
    # depricated
    if edgedetect:
        import cv2
    from pybrain.datasets import SupervisedDataSet, ClassificationDataSet

    patch_size = min(patch_size, orig_size)
    trim = round((orig_size - patch_size) / 2)
    # ds = SupervisedDataSet(patch_size**2, 1)
    ds = ClassificationDataSet(patch_size ** 2, target=1, nb_classes=nb_classes)
    cursor = list(mongo_collection.find())
    for one_image in cursor:
        # convert from binary to numpy array and transform
        img_array = np.fromstring(one_image["image"], dtype="uint8")
        if edgedetect:
            img_array = cv2.Canny(img_array, 150, 200)
        img_crop = img_array.reshape(orig_size, orig_size)[trim : (trim + patch_size), trim : (trim + patch_size)]
        classification = float(one_image["class"])
        if transform:
            transformed = transform_img(img_crop.ravel(), patch_size)
        else:
            transformed = [img_crop.ravel()]
        for one_img in transformed:
            ds.addSample(one_img.ravel(), classification)
    print("New dataset contains %d images (%d positive)." % (len(ds), sum(ds["target"])))
    return ds
コード例 #27
0
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs):

    data = np.genfromtxt(fileName)
    trnIn = data[:, 0:5]
    trnOut = data[:, 6]
    trnOut = [int(val) for val in trnOut]

    normalizeData(trnIn, numFeatures)
    trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses)
    for row in range(0, len(trnIn)):
        tempListOut = []
        tempListIn = []
        tempListOut.append(int(trnOut[row]))
        for i in range(0, numFeatures):
            tempListIn.append(trnIn[row][i])
        trndata.addSample(tempListIn, tempListOut)

    trndata._convertToOneOfMany()

    #  When running for the first time
    myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False)

    # Read from file after the first try.
    #  myNetwork = NetworkReader.readFrom('firstTime.xml')    # Use saved results.
    trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0)
    for i in range(numTrainingEpochs):
        trainer.trainOnDataset(dataset=trndata)
コード例 #28
0
ファイル: brains.py プロジェクト: oskanberg/pyconomy
class EightBitBrain(object):
    
    def __init__(self, dataset, inNodes, outNodes, hiddenNodes, classes):
        self.__dataset = ClassificationDataSet(inNodes, classes-1)
        for element in dataset:
            self.addDatasetSample(self._binaryList(element[0]), element[1])
        self.__dataset._convertToOneOfMany()
        self.__network = buildNetwork(inNodes, hiddenNodes, self.__dataset.outdim, recurrent=True)
        self.__trainer = BackpropTrainer(self.__network, learningrate = 0.01, momentum = 0.99, verbose = True)
        self.__trainer.setData(self.__dataset)

    def _binaryList(self, n):
        return [int(c) for c in "{0:08b}".format(n)]
    
    def addDatasetSample(self, argument, target):
        self.__dataset.addSample(argument, target)

    def train(self, epochs):
        self.__trainer.trainEpochs(epochs)
    
    def activate(self, information):
        result = self.__network.activate(self._binaryList(information))
        highest = (0,0)
        for resultClass in range(len(result)):
            if result[resultClass] > highest[0]:
                highest = (result[resultClass], resultClass)
        return highest[1]
コード例 #29
0
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    # images, labels = load_pca_proj(K=100)
    shuffle_in_unison(images, labels)
    ds = ClassificationDataSet(images.shape[1], 1, nb_classes=7)
    for i, l in zip(images, labels):
        ds.addSample(i, [l - 1])
    # ds._convertToOneOfMany()
    test, train = ds.splitWithProportion(0.2)
    test._convertToOneOfMany()
    train._convertToOneOfMany()
    net = shortcuts.buildNetwork(train.indim, 1000, train.outdim, outclass=SoftmaxLayer)

    trainer = BackpropTrainer(net, dataset=train, momentum=0.1, learningrate=0.01, weightdecay=0.05)
    # trainer = RPropMinusTrainer(net, dataset=train)
    # cv = validation.CrossValidator(trainer, ds)
    # print cv.validate()
    net.randomize()
    tr_labels_2 = net.activateOnDataset(train).argmax(axis=1)
    trnres = percentError(tr_labels_2, train["class"])
    # trnres = percentError(trainer.testOnClassData(dataset=train), train['class'])
    testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
    print "Training error: %.10f, Test error: %.10f" % (trnres, testres)
    print "Iters: %d" % trainer.totalepochs

    for i in range(100):
        trainer.trainEpochs(10)
        trnres = percentError(trainer.testOnClassData(dataset=train), train["class"])
        testres = percentError(trainer.testOnClassData(dataset=test), test["class"])
        trnmse = trainer.testOnData(dataset=train)
        testmse = trainer.testOnData(dataset=test)
        print "Iteration: %d, Training error: %.5f, Test error: %.5f" % (trainer.totalepochs, trnres, testres)
        print "Training MSE: %.5f, Test MSE: %.5f" % (trnmse, testmse)
コード例 #30
0
ファイル: DataClass.py プロジェクト: Trigition/Mimir
class ImageData(Data):
  
  image_x = 1
  image_y = 1
  images = []
  targets = []

  def __init__(self, images, targets, image_x, image_y, description="Image Data", outputs=1):
      Data.__init__(self, description, outputs)
      self.images = images
      self.targets = targets
      self.image_x = image_x
      self.image_y = image_y
      self.create_classifier()

  def create_classifier(self):
      #print "Image X:", self.image_x
      #print "Image Y:", self.image_y
      vector_length = self.image_x * self.image_y
      #Create the classifier
      #print "Creating Classifier. Vector_Len:", vector_length, "Output Vector:", self.outputs
      self.classifier = ClassificationDataSet(vector_length, self.outputs, nb_classes=(len(self.images) / 10))
      #print "Adding samples for", len(self.images), " images"
      for i in xrange(len(self.images)):
          #Assign images to their targets in the classifier
          #print i, "Image:", self.images[i], "Target:", self.targets[i]
          self.classifier.addSample(self.images[i], self.targets[i])

  def print_data(self):
    print "Image Object:" + str(this.data_unit)
    
  def add_image(self, image, target):
    self.images.append(image)
    self.targets.append(target)
コード例 #31
0

means = [(-1,0),(2,4),(3,1)]
cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
alldata = ClassificationDataSet(inputDim, 1, nb_classes=2)


#input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12],   myclones_data[n][26], myclones_data[n][27]] )

for n in xrange(len(myclones_data)):
    #for klass in range(3):
    input = np.array(
        [myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15], myclones_data[n][11],
         myclones_data[n][12], myclones_data[n][26], myclones_data[n][27]])
    #print (n, "-->", input)
    alldata.addSample(input, int(myclones_data[n][35]))


tstdata, trndata = alldata.splitWithProportion( 0.85 )

print("Class Label --> ", int(tstdata.getSample(1)[1]))

tmp_tst_for_validation = tstdata



tstdata_new = ClassificationDataSet(inputDim, 1, nb_classes=2)
for n in xrange(0, tstdata.getLength()):
    tstdata_new.addSample( tstdata.getSample(n)[0], tstdata.getSample(n)[1] )

trndata_new = ClassificationDataSet(inputDim, 1, nb_classes=2)
コード例 #32
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.tools.xml.networkwriter import NetworkWriter
from pybrain.tools.xml.networkreader import NetworkReader
''' Function or Class '''

if __name__ == "__main__":
    X = datasets.load_iris()['data']
    y = datasets.load_iris()['target']

    Dim = X.shape[1]
    NNData = ClassificationDataSet(Dim)

    for Idx in range(len(X)):
        NNData.addSample(np.ravel(X[Idx]), y[Idx])

    TrainData, TestData = NNData.splitWithProportion(0.25)
    TrainData._convertToOneOfMany()
    TestData._convertToOneOfMany()
    print TrainData.indim
    print TrainData.outdim

    HiddenNum = int(
        len(TrainData) / float(2 * (TrainData.indim + TrainData.outdim)))
    print HiddenNum

    NNNetwork = buildNetwork(TrainData.indim,
                             HiddenNum,
                             TrainData.outdim,
                             outclass=SoftmaxLayer)
コード例 #33
0
#############################################################################
# [set Data]

#CSV_TRAIN = "dataset/train_na2zero.csv"
#CSV_TEST = "dataset/test_na2zero.csv"
CSV_TRAIN = "dataset/train_zero_60x60.csv"
CSV_TEST = "dataset/test_zero_60x60.csv"

df_train = pd.read_csv(CSV_TRAIN)
Y = df_train.y
Y = Y -1 # in order to make target in the range of [0, 1, 2, 3, ...., 11]
X = df_train.iloc[:, 1:].values

alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=12)
for i in range(X.shape[0]):
    alldata.addSample(X[i, :], [Y[i]])
alldata._convertToOneOfMany()

df_test = pd.read_csv(CSV_TEST)
test_X = df_test.iloc[:, 1:].values

print "Number of training patterns: ", len(alldata)
print "Input and output dimensions: ", alldata.indim, alldata.outdim
print "First sample (input, target, class):"
print alldata['input'][0], alldata['target'][0], alldata['class'][0]

#############################################################################
# fnn
n = buildNetwork(alldata.indim, 1000, 1000, 1000, alldata.outdim, outclass=SoftmaxLayer, bias=True)
print("\n[ Network Structure]\n",n)
コード例 #34
0
def perceptron(hidden_neurons=5, weightdecay=0.01, momentum=0.1):
    INPUT_FEATURES = 2
    CLASSES = 3
    HIDDEN_NEURONS = hidden_neurons
    WEIGHTDECAY = weightdecay
    MOMENTUM = momentum

    # Generate the labeled set
    g = generate_data()
    #g = generate_data2()
    alldata = g['d']
    minX, maxX, minY, maxY = g['minX'], g['maxX'], g['minY'], g['maxY']

    # Split data into test and training dataset
    tstdata, trndata = alldata.splitWithProportion(0.25)
    trndata._convertToOneOfMany()  # This is necessary, but I don't know why
    tstdata._convertToOneOfMany()  # http://stackoverflow.com/q/8154674/562769

    print("Number of training patterns: %i" % len(trndata))
    print("Input and output dimensions: %i, %i" %
          (trndata.indim, trndata.outdim))
    print("Hidden neurons: %i" % HIDDEN_NEURONS)
    print("First sample (input, target, class):")
    print(trndata['input'][0], trndata['target'][0], trndata['class'][0])

    fnn = buildNetwork(trndata.indim,
                       HIDDEN_NEURONS,
                       trndata.outdim,
                       outclass=SoftmaxLayer)

    trainer = BackpropTrainer(fnn,
                              dataset=trndata,
                              momentum=MOMENTUM,
                              verbose=True,
                              weightdecay=WEIGHTDECAY)
    # Visualization
    ticksX = arange(minX - 1, maxX + 1, 0.2)
    ticksY = arange(minY - 1, maxY + 1, 0.2)
    X, Y = meshgrid(ticksX, ticksY)

    # need column vectors in dataset, not arrays
    griddata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(X.size):
        griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0])

    for i in range(20):
        trainer.trainEpochs(1)
        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                                 tstdata['class'])

        print("epoch: %4d" % trainer.totalepochs,
              "  train error: %5.2f%%" % trnresult,
              "  test error: %5.2f%%" % tstresult)
        out = fnn.activateOnDataset(griddata)
        # the highest output activation gives the class
        out = out.argmax(axis=1)
        out = out.reshape(X.shape)

        figure(1)  # always print on the same canvas
        ioff()  # interactive graphics off
        clf()  # clear the plot
        for c in [0, 1, 2]:
            here, _ = where(tstdata['class'] == c)
            plot(tstdata['input'][here, 0], tstdata['input'][here, 1], 'o')
        if out.max() != out.min():  # safety check against flat field
            contourf(X, Y, out)  # plot the contour
        ion()  # interactive graphics on
        draw()  # update the plot
    ioff()
    show()
コード例 #35
0
train_data = ClassificationDataSet(n_features, 1, nb_classes=2)
test_data = ClassificationDataSet(n_features, 1, nb_classes=2)
all_data = ClassificationDataSet(n_features, 1, nb_classes=2)

# train_data = SupervisedDataSet(n_features, 1)
# test_data = SupervisedDataSet(n_features, 1)
# all_data = SupervisedDataSet(n_features, 1)

target = (y == 1) * 1
# target = y + 1
# target = y

for i in xrange(N_train):
    if y[i] != 0:
        train_data.addSample(X_new[i, ], [target[i]])

for i in xrange(N_train + 1, N_test_end):
    if y[i] != 0:
        test_data.addSample(X_new[i, ], [target[i]])

for i in xrange(X_new.shape[0]):
    all_data.addSample(X_new[i, ], [target[i]])

train_data._convertToOneOfMany()
test_data._convertToOneOfMany()
all_data._convertToOneOfMany()

print("building")
fnn = buildNetwork(train_data.indim,
                   6,
コード例 #36
0
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.tools.xml.networkwriter import NetworkWriter

ds = ClassificationDataSet(79, 1, nb_classes=2)
tf = open('./finalFeaturestraining.csv', 'rb')
for line in tf.readlines():
    data = [float(x) for x in line.strip().split(',') if x != '']
    indata = tuple(data[:79])
    # print len(indata)
    outdata = tuple(data[79:80])
    # print len(outdata)
    ds.addSample(indata, outdata)
tstdata, trndata = ds.splitWithProportion(0.0)

n = buildNetwork(trndata.indim, 30, 30, trndata.outdim, recurrent=True)
t = BackpropTrainer(n,
                    dataset=trndata,
                    learningrate=0.001,
                    momentum=0.3,
                    verbose=True)
t.trainEpochs(100)
trnresult = percentError(t.testOnClassData(), trndata['class'])
# tstresult = percentError( t.testOnClassData( dataset=tstdata ), tstdata['class'] )

print "epoch: %4d" % t.totalepochs, \
          "  train error: %5.2f%%" % trnresult
コード例 #37
0
def main():

    in_data = np.genfromtxt('logit-train.csv', delimiter=',')
    out_data = np.genfromtxt('logit-test.csv', delimiter=',')

    #getting in the data from csv files and making it suitable for further action.
    in_data = in_data[~np.isnan(in_data).any(1)]
    t = len(in_data[0, :])
    y_train = np.array(in_data[0:, t - 1])
    x_train = np.array(in_data[0:, :t - 1])

    scaler = preprocessing.StandardScaler().fit(
        x_train)  #standardization plays an important role in all NN algos

    x_train = scaler.transform(x_train)  #final x_train

    out_data = out_data[~np.isnan(out_data).any(1)]
    t = len(out_data[0, :])
    y_test = np.array(out_data[0:, t - 1])
    x_test = np.array(out_data[0:, :t - 1])

    x_test = scaler.transform(x_test)  # final x_test

    alltraindata = ClassificationDataSet(t - 1, 1, nb_classes=2)
    for count in range(len((in_data))):
        alltraindata.addSample(x_train[count], [y_train[count]])

    alltraindata._convertToOneOfMany(bounds=[0, 1])

    alltestdata = ClassificationDataSet(t - 1, 1, nb_classes=2)
    for count in range(len((out_data))):
        alltestdata.addSample(x_test[count], [y_test[count]])

    alltestdata._convertToOneOfMany(bounds=[0, 1])

    numRBFCenters = 50

    kmeans = KMeans(n_clusters=numRBFCenters
                    )  # KMeans to find the centroids for the RBF neurons.
    kmeans.fit(alltraindata['input'])
    centers = kmeans.cluster_centers_
    #centers.shape = (numRBFCenters,13)
    cluster_distance = kmeans.transform(alltraindata['input'])
    #cluster_distance.shape = (152,10) and kmeans.labels_.shape = (152,)

    #cluster_distance.shape = (152,50)

    # Calculating the sigma/smoothness parameter of each Radial Basis Function
    # It is the variance/standard deviation of the points of each cluster, thus giving a value for each RBFcenter
    distance_std = []
    distance_within_cluster = []
    for lab in range(numRBFCenters):
        for x, label in enumerate(kmeans.labels_):
            if label == lab:
                distance_within_cluster.append(cluster_distance[x][label])
        distance_std.append(np.std(distance_within_cluster))

    rbf = RBFNN(
        alltraindata.indim, alltraindata.outdim, numRBFCenters, centers,
        distance_std)  # Passing the centers array for RBFNN initialization

    rbf.train(alltraindata['input'], alltraindata['target'])

    testdata_target = rbf.test(
        alltestdata['input']
    )  #values obtained after testing, T is a 'n x outdim' matrix
    testdata_target = testdata_target.argmax(
        axis=1
    )  # the highest output activation gives the class. Selects the class predicted

    traindata_target = rbf.test(alltraindata['input'])
    traindata_target = traindata_target.argmax(
        axis=1
    )  # the highest output activation gives the class. Selects the class predicted

    #compare to y_test to obtain the accuracy.

    # count=0
    # for x in range(len(y_test)):
    # 	if testdata_target[x] == y_test[x]:
    # 		count+=1
    # tstresult2=float(count)/float(len(y_test)) * 100

    trnresult = percentError(traindata_target, alltraindata['class'])
    tstresult = percentError(testdata_target, alltestdata['class'])

    print "Accuracy on train data is: %5.2f%%," % (100 - trnresult)
    print "Accuracy on test data is: %5.2f%%," % (100 - tstresult)

    for x in range(len(y_test)):
        if any(y_test[x]) == True:
            y_test[x] = 1
        else:
            y_test[x] = 0

    average_label = ['micro', 'macro', 'weighted']
    for label in average_label:
        f1 = f1_score(y_test, testdata_target, average=label)
        print "f1 score (%s)" % label, "is ", f1
コード例 #38
0
if __name__ == "__main__":
    breast_cancer = datasets.load_breast_cancer()

    X, y = breast_cancer.data, breast_cancer.target

    model = cluster.KMeans(n_clusters=2)
    labels = model.fit_predict(X)

    print X.shape
    X = np.concatenate((X, np.expand_dims(labels, axis=1)), axis=1)
    print X.shape

    ds = ClassificationDataSet(X.shape[1], 2)
    for k in xrange(len(X)):
        ds.addSample(X[k], y[k])

    tstdata, trndata = ds.splitWithProportion(0.3)

    max_epochs = 1000

    # List all the different networks we want to test
    net = buildNetwork(trndata.indim,
                       15,
                       trndata.outdim,
                       outclass=SigmoidLayer,
                       bias=True)
    print net

    # Setup a trainer that will use backpropogation for training
    trainer = BackpropTrainer(net,
コード例 #39
0
ファイル: ds.py プロジェクト: AoifeNicAntSaoir/ImagesML
net.sortModules()

print net

digits = load_digits()
X, y = digits.data, digits.target

print(X.shape)

plt.gray()
plt.matshow(digits.images[2])
plt.show()

daSet = ClassificationDataSet(len(t), 1)
for k in xrange(len(X)):
    daSet.addSample(X.ravel()[k], y.ravel()[k])

testData, trainData = daSet.splitWithProportion(0.25)

trainData._convertToOneOfMany()
testData._convertToOneOfMany()
#for inpt, target in daSet:
# print inpt, target

trainer = BackpropTrainer(net,
                          dataset=trainData,
                          momentum=0.1,
                          learningrate=0.01,
                          verbose=True)

trainer.trainEpochs(50)
コード例 #40
0
print "Contents of cov", cov

# creating the Dataset to add all the data
#arguments:
# input, output, nb_classes=3

alldata = ClassificationDataSet(2, 1, nb_classes=3)
print "initial contents of the data", alldata
for n in xrange(400):
    for klass in range(3):
        # print "value of klass", klass
        #so we are choosing some value of mean and some value of variance and then
        #then adding all the data to the sample.
        input = multivariate_normal(means[klass], cov[klass])
        # print "here is the input", input
        alldata.addSample(input, [klass])

print "the length of the final dataset is ", len(alldata)
#I am finding the length of the entire dataset and that is expected to be 1200 which it is. The
# the thing that I am more concerned about is that when you print out the alldata, you get a wierd
# number like 2056 or 2048
print "here is the whole data "
print "*" * 20
print alldata
# splitting the data between the trainings_et and testing_set
tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25)
# tstdata, trndata = alldata.splitWithProportion( 0.25 )

#here are are just copying the data from the temp variables to the actual vaiables that I will be using
tstdata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(0, tstdata_temp.getLength()):
コード例 #41
0
def main():
    """
    CLI Arguments allowed:
        --display_graphs       Displays graphs
        --retrain              Trains a new model
        --cross-validate       Runs cross validation to fine tune the model
        --test=validation_set  Tests the latest trained model against the validation set
        --test=test_set        Tests the latets trained model against the test set
    """

    global trainer, classifier
    inputs_train, targets_train, inputs_valid, targets_valid, inputs_test, targets_test = load_parsed_data()

    if '--display_graphs' in sys.argv:
        display_graphs = True

    print('using {} percent of all data in corpus'.format(PERCENTAGE_DATA_SET_TO_USE*100))
    print('using {} most common words as features'.format(NUM_FEATURES))

    if not trained_model_exists() or '--retrain' in sys.argv:
        train_features, valid_features, test_features = extract_features(
            inputs_train[:len(inputs_train)*PERCENTAGE_DATA_SET_TO_USE],
            targets_train[:len(targets_train)*PERCENTAGE_DATA_SET_TO_USE],
            inputs_valid[:len(inputs_valid)*PERCENTAGE_DATA_SET_TO_USE],
            targets_valid[:len(targets_valid)*PERCENTAGE_DATA_SET_TO_USE],
            inputs_test[:len(inputs_test)*PERCENTAGE_DATA_SET_TO_USE],
            targets_test[:len(targets_test)*PERCENTAGE_DATA_SET_TO_USE]
        )

        save_features(train_features, valid_features, test_features)
        pca = RandomizedPCA(n_components=N_COMPONENTS, whiten=False).fit(train_features)
        save_pca(pca)
        print ("Saved PCA")

        X_train = pca.transform(train_features)
        X_valid = pca.transform(valid_features)
        pca = None
        print ("Created PCAd features")

        valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_valid)):
            valid_data.addSample(X_valid[i], targets_test[i])
        valid_data._convertToOneOfMany()
        X_valid = None

        train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_train)):
            train_data.addSample( X_train[i], targets_train[i])
        train_data._convertToOneOfMany()
        X_train = None

        classifier = buildNetwork( train_data.indim, N_HIDDEN, train_data.outdim, outclass=SoftmaxLayer)
        trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True)
        train_model(train_data, valid_data)

        save_model(classifier)
        train_data = None
        valid_data = None

    else:
        train_features, valid_features, test_features = load_features()
        pca = load_pca()
        X_train = pca.transform(train_features)

        pca = None
        print ("Created PCAd features")

        train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_train)):
            train_data.addSample( X_train[i], targets_train[i])
        train_data._convertToOneOfMany()
        X_train = None

        classifier = load_trained_model()
        trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True)


    if '--test=validation_set' in sys.argv:
        print ("Running against validation set")
        pca = load_pca()
        X_valid = pca.transform(valid_features)
        pca = None
        valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_valid)):
            valid_data.addSample( X_valid[i], targets_test[i])
        valid_data._convertToOneOfMany()
        X_valid = None

        make_prediction(valid_data)


    if '--test=test_set' in sys.argv:
        print ("Running against test set")
        pca = load_pca()
        X_test = pca.transform(test_features)
        pca = None
        test_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2)
        for i in range(len(X_test)):
            test_data.addSample( X_test[i], targets_test[i])
        test_data._convertToOneOfMany()
        y_pred = trainer.testOnClassData(dataset=test_data)
        plot_precision_and_recall(y_pred, targets_test[:len(targets_test) * PERCENTAGE_DATA_SET_TO_USE])
        X_test = None

        make_prediction(test_data)
コード例 #42
0
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
from scipy import diag, arange, meshgrid, where
from numpy.random import multivariate_normal

means = [(-1, 0), (2, 4), (3, 1)]
cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(400):
    for klass in range(3):
        input = multivariate_normal(means[klass], cov[klass])
        alldata.addSample(input, [klass])
tstdata, trndata = alldata.splitWithProportion(0.25)
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()
print tstdata
fnn = buildNetwork(trndata.indim, 3, trndata.outdim, outclass=SoftmaxLayer)
trainer = BackpropTrainer(fnn,
                          dataset=trndata,
                          momentum=0.1,
                          verbose=True,
                          weightdecay=0.01)

for i in range(10):
    trainer.trainEpochs(1)
    trnresult = percentError(trainer.testOnClassData(), trndata['class'])
    tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
コード例 #43
0
	# removing not floats 
	crimes = crimes.drop(['Dates', 'DayOfWeek', 'Address', 'date_obj', 'Descript'], axis=1)
	crimes = crimes.drop(['Category', 'PdDistrict', 'Resolution'], axis=1)

	return crimes

print "preprocessing"
crimes = process(crimes)
X = crimes.drop(['category_ids'], axis=1)
# X = normalize(X, axis=0)
y = crimes['category_ids']

print "making net"
ds = ClassificationDataSet(35, 1 , nb_classes=39)
for k in xrange(len(X)): 
    ds.addSample(X.iloc[[k]],y.iloc[[k]])
print "cleaning data"
tstdata, trndata = ds.splitWithProportion( 0.5 )
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )

print "training"
hidden_layer = int((trndata.indim + trndata.outdim) / 2)

fnn = FeedForwardNetwork()
inLayer = LinearLayer(trndata.indim)
outLayer = SoftmaxLayer(trndata.outdim)

prev = None

fnn.addInputModule(inLayer)
コード例 #44
0
features_train = features_pd.iloc[:train_count]
# print(features_train.describe())
features_test = features_pd.iloc[train_count:]
# print(features_test.describe())
x_train, x_test, y_train, y_test = train_test_split(
    features_train, labels, test_size=0.2)  #, random_state=1)
X = (x_train, x_test, y_train, y_test)

# print(cross_val_score(svc, features_train, labels, scoring="neg_mean_squared_error", cv=10).mean())
# print(cross_val_score(linear_svc, features_train, labels, scoring="neg_mean_squared_error", cv=10).mean())

dsTrain = ClassificationDataSet(18, 1, nb_classes=2)
rows = len(x_train)
for row in range(rows):
    dsTrain.addSample(tuple(x_train.iloc[row]), y_train.iloc[row])
dsTrain._convertToOneOfMany()

dsTest = ClassificationDataSet(18, 1, nb_classes=2)
rows = len(x_test)
for row in range(rows):
    dsTest.addSample(tuple(x_test.iloc[row]), y_test.iloc[row])
dsTest._convertToOneOfMany()

svc = None
fnn = None
if False:
    svc = svm.SVC(kernel='rbf', C=10, random_state=1, gamma=0.1, max_iter=1000)
    linear_svc = svm.LinearSVC(C=10, random_state=1, max_iter=100)
    pred = train_model(svc, X).predict(features_test)
    # print(pred, len(pred), pred.mean())
コード例 #45
0
Y_n = [labels.index(e) for e in Y]
test_Y_n = [labels.index(e) for e in test_Y]

# normalize the feature to [-1, 1]
X_mean = np.mean(X, axis=0)
norm_X = [line - X_mean for line in np.array(X)]
norm_test_X = [line - X_mean for line in np.array(test_X)]

examples = []
for i in range(0, len(norm_X)):
    examples.append((norm_X[i], Y_n[i]))
shuffle(examples)

alldata = ClassificationDataSet(5400, 1, nb_classes=29)
for i in range(0, len(examples)):
    alldata.addSample(examples[i][0], [examples[i][1]])

tstdata, trndata = alldata.splitWithProportion(0.25)
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]
fnn = buildNetwork(trndata.indim,
                   10,
                   trndata.outdim,
                   hiddenclass=TanhLayer,
                   outclass=SoftmaxLayer)
コード例 #46
0
ファイル: recnet2.py プロジェクト: balanev/Pdstrategy
def trainet2(data, nhide=8, nhide1=8, epo=10, wd=.1, fn=''):

    alldata = data
    tstdata_temp, trndata_temp = alldata.splitWithProportion(0.5)

    tstdata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses)
    for n in range(0, tstdata_temp.getLength()):
        tstdata.addSample(
            tstdata_temp.getSample(n)[0],
            tstdata_temp.getSample(n)[1])

    trndata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses)
    for n in range(0, trndata_temp.getLength()):
        trndata.addSample(
            trndata_temp.getSample(n)[0],
            trndata_temp.getSample(n)[1])

    tstdata._convertToOneOfMany()
    trndata._convertToOneOfMany()

    net = FeedForwardNetwork()
    inLayer = LinearLayer(trndata.indim)
    hiddenLayer = TanhLayer(nhide)
    hiddenLayer1 = TanhLayer(nhide1)
    outLayer = LinearLayer(trndata.outdim)

    net.addInputModule(inLayer)
    net.addModule(hiddenLayer)
    net.addModule(hiddenLayer1)
    net.addOutputModule(outLayer)

    in_to_hidden = FullConnection(inLayer, hiddenLayer)
    hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer1)
    hidden_to_out = FullConnection(hiddenLayer1, outLayer)

    net.addConnection(in_to_hidden)
    net.addConnection(hidden_to_hidden)
    net.addConnection(hidden_to_out)

    net.sortModules()
    net.bias = True

    trainer = BackpropTrainer(net,
                              dataset=trndata,
                              verbose=True,
                              weightdecay=wd,
                              momentum=0.1)
    edata = []
    msedata = []
    for i in range(epo):
        trainer.trainEpochs(1)
        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                                 tstdata['class'])
        tod = trainer.testOnData(verbose=False)
        print("epoch: %4d" % trainer.totalepochs,
              "  train error: %5.2f%%" % trnresult,
              "  test error: %5.2f%%" % tstresult, "  layers: ", nhide1,
              "  N_tourn: ", alldata.indim / 2)
        edata.append([trnresult, tstresult])
        msedata.append([i, tod])
    with open(fn + ".dta", 'w') as fp:
        json.dump(edata, fp)
    with open(fn + ".mse", 'w') as fp:
        json.dump(msedata, fp)
    return net
コード例 #47
0
                                    target=self['target'][rightIndicies].copy())
    return leftDs, rightDs


# Load iris data
irisData = datasets.load_iris()
dataFeatures = irisData.data
dataTargets = irisData.target


# Create data set object
dataSet = ClassificationDataSet(4, 1, nb_classes=3) # 3 - classes of iris

# Add data to out data set
for i in range(len(dataFeatures)):
    dataSet.addSample(np.ravel(dataFeatures[i]), dataTargets[i])

# Split data in train and test sets
trainingData, testData = splitWithProportion(dataSet, 0.7)

# Convert data classes to (1,0,0), (0,1,0), (0,0,1)
trainingData._convertToOneOfMany()
testData._convertToOneOfMany()

# Build neural network
neuralNetwork = buildNetwork(trainingData.indim, 7, trainingData.outdim, outclass=SoftmaxLayer)
trainer = BackpropTrainer(neuralNetwork, dataset=trainingData, momentum=0.01, learningrate=0.05, verbose=True)

# Train for 10 000 iterations and print error
trainer.trainEpochs(10000)
print('Error (test dataset): ', percentError(trainer.testOnClassData(dataset=testData), testData['class']))
コード例 #48
0
ファイル: ncirf.py プロジェクト: Guosmilesmile/pythonstudy
def generate_data():
    index = [
        3471, 791, 458, 3068, 1542, 524, 278, 526, 5769, 3129, 5440, 166, 4577,
        5714, 1692, 546, 402, 2552, 4129, 1894, 4743, 1809, 630, 208, 818,
        6034, 3988, 3981, 4580, 134, 1289, 5712, 4723, 4961, 3417, 2630, 994,
        689, 5770, 3122, 4823, 4508, 2696, 5566, 2136, 4217, 1503, 1448, 3117,
        1161, 5385, 6095, 2197, 325, 2310, 4990, 2009, 5880, 3900, 1715, 1573,
        1488, 1125, 3533, 3004, 55, 4424, 3077, 499, 144, 5976, 4643, 3219,
        2328, 1770, 1510, 770, 107, 1625, 4684, 4544, 4470, 3684, 3607, 942,
        671, 5796, 3773, 2204, 2083, 345, 3942, 6113, 6112, 6111, 6110, 6109,
        6108, 6107, 6106, 6105, 6104, 6103, 6102, 6101, 6100, 6099, 6098, 6097,
        6096, 6094, 6093, 6092, 6091, 6090, 6089, 6088, 6087, 6086, 6085, 6084,
        6083, 6082, 6081, 6080, 6079, 6078, 6077, 6076, 6075, 6074, 6073, 6072,
        6071, 6070, 6069, 6068, 6067, 6066, 6065, 6064, 6063, 6062, 6061, 6060,
        6059, 6058, 6057, 6056, 6055, 6054, 6053, 6052, 6051, 6050, 6049, 6048,
        6047, 6046, 6045, 6044, 6043, 6042, 6041, 6040, 6039, 6038, 6037, 6036,
        6035, 6033, 6032, 6031, 6030, 6029, 6028, 6027, 6026, 6025, 6024, 6023,
        6022, 6021, 6020, 6019, 6018, 6017, 6016, 6015, 6014, 6013, 6012, 6011,
        6010, 6009, 6008, 6007, 6006, 6005, 6004, 6003, 6002, 6001, 6000, 5999,
        5998, 5997, 5996, 5995, 5994, 5993, 5992, 5991, 5990, 5989, 5988, 5987,
        5986, 5985, 5984, 5983, 5982, 5981, 5980, 5979, 5978, 5977, 5975, 5974,
        5973, 5972, 5971, 5970, 5969, 5968, 5967, 5966, 5965, 5964, 5963, 5962,
        5961, 5960, 5959, 5958, 5957, 5956, 5955, 5954, 5953, 5952, 5951, 5950,
        5949, 5948, 5947, 5946, 5945, 5944, 5943, 5942, 5941, 5940, 5939, 5938,
        5937, 5936, 5935, 5934, 5933, 5932, 5931, 5930, 5929, 5928, 5927, 5926,
        5925, 5924, 5923, 5922, 5921, 5920, 5919, 5918, 5917, 5916, 5915, 5914,
        5913, 5912, 5911, 5910, 5909, 5908, 5907, 5906, 5905, 5904, 5903, 5902,
        5901, 5900, 5899, 5898, 5897, 5896, 5895, 5894, 5893, 5892, 5891, 5890,
        5889, 5888, 5887, 5886, 5885, 5884, 5883, 5882, 5881, 5879, 5878, 5877,
        5876, 5875, 5874, 5873, 5872, 5871, 5870, 5869, 5868, 5867, 5866, 5865,
        5864, 5863, 5862, 5861, 5860, 5859, 5858, 5857, 5856, 5855, 5854, 5853,
        5852, 5851, 5850, 5849, 5848, 5847, 5846, 5845, 5844, 5843, 5842, 5841,
        5840, 5839, 5838, 5837, 5836, 5835, 5834, 5833, 5832, 5831, 5830, 5829,
        5828, 5827, 5826, 5825, 5824, 5823, 5822, 5821, 5820, 5819, 5818, 5817,
        5816, 5815, 5814, 5813, 5812, 5811, 5810, 5809, 5808, 5807, 5806, 5805,
        5804, 5803, 5802, 5801, 5800, 5799, 5798, 5797, 5795, 5794, 5793, 5792,
        5791, 5790, 5789, 5788, 5787, 5786, 5785, 5784, 5783, 5782, 5781, 5780,
        5779, 5778, 5777, 5776, 5775, 5774, 5773, 5772, 5771, 5768, 5767, 5766,
        5765, 5764, 5763, 5762, 5761, 5760, 5759, 5758, 5757, 5756, 5755, 5754,
        5753, 5752, 5751, 5750, 5749, 5748, 5747, 5746, 5745, 5744, 5743, 5742,
        5741, 5740, 5739, 5738, 5737, 5736, 5735, 5734, 5733, 5732, 5731, 5730,
        5729, 5728, 5727, 5726, 5725, 5724, 5723, 5722, 5721, 5720, 5719, 5718,
        5717, 5716, 5715, 5713, 5711, 5710, 5709, 5708, 5707, 5706, 5705, 5704,
        5703, 5702, 5701, 5700, 5699, 5698, 5697
    ]

    INPUT_FEATURES = 500
    CLASSES = 9
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "nci60_train_m.txt")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "1":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "2":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "3":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "4":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "5":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "6":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "7":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "8":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "9":
            klass = 8
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
コード例 #49
0
def generate_data():
    INPUT_FEATURES = 16063 
    CLASSES = 15

    train_text,train_classfi = getTargetData("GCM_train.data")

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="Breast" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Prostate" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Lung" :
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Colorectal" :
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Lymphoma" :
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Bladder" :
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Melanoma" :
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Uterus" :
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Leukemia" :
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Renal" :
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Pancreas" :
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Ovary" :
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i]=="Mesothelioma" :
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i]=="CNS" :
            klass = 13
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata}
コード例 #50
0
ファイル: ncirf.py プロジェクト: Guosmilesmile/pythonstudy
def generate_Testdata(index):
    INPUT_FEATURES = 500
    CLASSES = 9
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "nci60_test_m.txt")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "1":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "2":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "3":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "4":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "5":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "6":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "7":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "8":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "9":
            klass = 8
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
コード例 #51
0
ファイル: neural_net.py プロジェクト: marsjoy/wesandersone
class NeuralNetwork(BaseWorkflow):
    def __init__(self,
                 purpose='train',
                 num_inputs=None,
                 num_ouputs=None,
                 classes=None,
                 class_lables=None):
        super(NeuralNetwork, self).__init__()
        self.purpose = purpose
        self.data_path = self.config.neural_net.get(self.purpose, None)
        self.file_name = 'neural_net'
        self.all_data = ClassificationDataSet(num_inputs,
                                              num_ouputs,
                                              nb_classes=classes,
                                              class_labels=class_lables)
        self.train = None
        self.test = None
        self.neural_network = None
        self.train_result = None
        self.test_result = None
        self.cross_validation_result = None

    def process(self):
        self.prepare_train_test()
        self.build_network()
        trainer = self.train_network(dataset=self.train)
        self.score_train_test(trainer=trainer)
        self.cross_validate(dataset=self.all_data)

    def add_sample(self,
                   correlogram_matrix=None,
                   target=None,
                   sample_path=None):
        self.all_data.addSample(correlogram_matrix, target)
        logger.info(
            'sample added from {sample_path}'.format(sample_path=sample_path))

    def prepare_train_test(self):
        self.test, self.train = self.all_data.splitWithProportion(0.25)

    def build_network(self):
        self.neural_network = buildNetwork(
            self.train.indim, 7, self.train.outdim,
            outclass=SoftmaxLayer)  # feed forward network

    def train_network(self, dataset=None):
        starter_trainer = BackpropTrainer(self.neural_network,
                                          dataset=dataset,
                                          momentum=0.1,
                                          verbose=True,
                                          weightdecay=0.01)
        starter_trainer.trainUntilConvergence(validationProportion=0.25,
                                              maxEpochs=100)
        return starter_trainer

    def score_train_test(self, trainer=None):
        self.test_result = percentError(
            trainer.testOnClassData(dataset=self.test), self.test['class'])
        logger.info(
            'test error result: {result}'.format(result=self.test_result))
        self.train_result = percentError(
            trainer.testOnClassData(dataset=self.train), self.train['class'])
        logger.info(
            'train error result: {result}'.format(result=self.train_result))

    def cross_validate(self, dataset=None):
        trainer = BackpropTrainer(self.neural_network,
                                  dataset=dataset,
                                  momentum=0.1,
                                  verbose=True,
                                  weightdecay=0.01)
        validator = CrossValidator(trainer=trainer,
                                   dataset=dataset,
                                   n_folds=10)
        mean_validation_result = validator.validate()
        self.cross_validation_result = mean_validation_result
        logger.info('cross val result: {result}'.format(
            result=self.cross_validation_result))

    @staticmethod
    def save_network_to_xml(net=None, file_name=None):
        NetworkWriter.writeToFile(net, file_name)

    @staticmethod
    def read_network_from_xml(file_name=None):
        return NetworkReader.readFrom(file_name)
コード例 #52
0
ファイル: prueba2.py プロジェクト: alexing/prof
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer

from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
from scipy import diag, arange, meshgrid, where
from numpy.random import multivariate_normal

#To have a nice dataset for visualization, we produce a set of points in 2D belonging to three different classes.
means = [(-1, 0), (2, 4), (3, 1)]
cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(2, 1, nb_classes=3)
for n in xrange(400):
    for klass in range(3):
        input = multivariate_normal(means[klass], cov[klass])
        alldata.addSample(input, [klass])

#Randomly split the dataset into 75% training and 25% test data sets. Of course, we could also have created two different datasets to begin with.
tstdata, trndata = alldata.splitWithProportion(0.25)

#For neural network classification, it is highly advisable to encode classes with one output neuron per class. Note that this operation duplicates the original targets and stores them in an (integer) field named "class".
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]

#Now build a feed-forward network with 5 hidden units. We use the shortcut buildNetwork() for this. The input and output layer size must match the dataset's input and target dimension.
#net = buildNetwork(2, 5, 3)
コード例 #53
0
from sklearn import datasets
from matplotlib import pyplot as plt
import sys
#from pybrain.datasets import ClassificationDataSet

digits = datasets.load_digits()
X, y = digits.data, digits.target

print(X[0].shape)

ds = ClassificationDataSet(64, 10, nb_classes=10)
test = ClassificationDataSet(64, 10, nb_classes=10)
training = ClassificationDataSet(64, 10, nb_classes=10)

for k in xrange(len(X)):
    ds.addSample(ravel(X[k]), y[k])

test_t, training_t = ds.splitWithProportion(0.25)

for k in xrange(0, test_t.getLength()):
    test.addSample(test_t.getSample(k)[0], test_t.getSample(k)[1])

for k in xrange(0, training_t.getLength()):
    training.addSample(training_t.getSample(k)[0], training_t.getSample(k)[1])

print(training.getLength())
print(test.getLength())

print(test.indim)
print(test.outdim)
print(training.indim)
コード例 #54
0
    NetworkWriter.writeToFile(nn, filename)


########################################################################################################################

# seed random numbers to make calculation deterministic (just a good practice)
np.random.seed(1)

olivetti = datasets.fetch_olivetti_faces()
X, y = olivetti.data, olivetti.target
print "data shape of faces:", X.shape

# Flatten the 64x64 data to one dimensional 4096 and then feed the data our NN classification dataset:
ds = ClassificationDataSet(4096, 1, nb_classes=40)
for k in xrange(len(X)):
    ds.addSample(X.ravel()[k], y.ravel()[k])

# Split the data into 75% training and 25% test data
tstdata, trndata = ds.splitWithProportion(0.25)

# Convert 1 output to 40 binary outputs
trndata._convertToOneOfMany()
tstdata._convertToOneOfMany()

# Check the data inside the neural network:
print trndata['input'], trndata['target'], tstdata.indim, tstdata.outdim

# Now that all data is loaded, build the network and backpropagation trainer:
#fnn = buildNetwork(trndata.indim, 64, trndata.outdim, outclass=SoftmaxLayer)
fnn = create_or_read_from_file(trndata)
trainer = BackpropTrainer(fnn,
コード例 #55
0
def generate_data():
    index = [
        9154, 5123, 2407, 680, 548, 8016, 15755, 9861, 461, 5552, 6834, 6268,
        14112, 15285, 13065, 8838, 2962, 6581, 4025, 14928, 10521, 1413, 3587,
        3537, 13462, 9809, 4128, 15806, 4884, 2084, 7818, 8294, 12308, 8789,
        5328, 5817, 7663, 6299, 15295, 3547, 1673, 5940, 6085, 6368, 6006,
        5520, 14228, 8608, 7822, 3237, 10927, 12268, 2852, 6903, 13001, 10775,
        4852, 14487, 10885, 14948, 15239, 8787, 6886, 15720, 13436, 4102, 7832,
        5071, 11062, 15004, 14888, 12560, 4381, 14283, 6892, 14753, 10132,
        6937, 2393, 465, 11791, 8533, 2174, 6739, 4316, 251, 11438, 10288,
        6658, 6439, 6711, 5173, 11590, 1452, 524, 15677, 13742, 11881, 9299,
        7499, 7068, 11457, 11128, 4936, 1634, 14692, 13352, 11896, 11895,
        11494, 9704, 6878, 10112, 10027, 10207, 6946, 6604, 5563, 3590, 2817,
        2661, 9667, 9609, 8368, 7538, 6830, 1909, 1385, 15043, 14006, 11050,
        10743, 10306, 9574, 9546, 9267, 9232, 8546, 8452, 8027, 7465, 5453,
        1903, 1747, 1367, 15496, 14231, 13894, 12340, 11433, 11118, 9223, 8369,
        8017, 7324, 6737, 5047, 4635, 4631, 3685, 3418, 3215, 1395, 835, 690,
        15808, 15210, 13829, 13798, 13303, 13220, 13078, 12416, 12407, 12082,
        11940, 11266, 9794, 9643, 8825, 8600, 8446, 7892, 6972, 6728, 6559,
        5759, 5091, 4640, 4209, 3214, 1994, 1599, 1447, 1082, 15881, 15810,
        15586, 15564, 15150
    ]

    INPUT_FEATURES = 200
    CLASSES = 15
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "GCM_train.data")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "Breast":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Prostate":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lung":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lymphoma":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Bladder":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Melanoma":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Uterus":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Leukemia":
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Renal":
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Pancreas":
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Ovary":
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Mesothelioma":
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i] == "CNS":
            klass = 13
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 14
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
コード例 #56
0
def run(args):
    manual_validated_file = args.manual_validated_file # 'JHotDraw54b1_clones.xml.clones2'
    save_target_name = args.save_target_name #'newTrainedModel'



    print 'Training the Model. Please wait ...'

    manual_validation_data = pd.read_csv('manual_validator/input_clone_pairs/'+manual_validated_file)
    inputDim = 6
    alldata = ClassificationDataSet(inputDim, 1, nb_classes=2)
    txlHelper = TXLHelper()
    for i in range(0, len(manual_validation_data)):
        #print manual_validation_data.iloc[i][3], manual_validation_data.iloc[i][4]
        #print manual_validation_data.iloc[i][2]
        cloneFragment_1_path, cloneFragment_1_start, cloneFragment_1_end = manual_validation_data.iloc[i][3].split()[0], \
                                                                           manual_validation_data.iloc[i][3].split()[1], \
                                                                           manual_validation_data.iloc[i][3].split()[2]
        cloneFragment_2_path, cloneFragment_2_start, cloneFragment_2_end = manual_validation_data.iloc[i][4].split()[0], \
                                                                           manual_validation_data.iloc[i][4].split()[1], \
                                                                           manual_validation_data.iloc[i][4].split()[2]
        cloneFragment_1 = read_file_in_line_range(filePath='manual_validator/input_clone_pairs/'+cloneFragment_1_path, \
                                                  startLine=cloneFragment_1_start, endLine=cloneFragment_1_end)
        cloneFragment_2 = read_file_in_line_range(filePath='manual_validator/input_clone_pairs/' + cloneFragment_2_path,
                                                  startLine=cloneFragment_2_start, endLine=cloneFragment_2_end)


        type1sim_by_line, type2sim_by_line, type3sim_by_line = txlHelper.app_code_clone_similaritiesNormalizedByLine(cloneFragment_1,
                                                     cloneFragment_2, 'java')

        type1sim_by_token, type2sim_by_token, type3sim_by_token = txlHelper.app_code_clone_similaritiesNormalizedByToken(cloneFragment_1,
                                                     cloneFragment_2, 'java')

        label = manual_validation_data.iloc[i][2]
        if label == 'true':
            label = 1
        else:
            label = 0



        input = np.array([type1sim_by_token, type2sim_by_line, type3sim_by_line, type1sim_by_token, type2sim_by_token, type3sim_by_token])

        alldata.addSample(input, int(label))



    # # np.nan_to_num(alldata)
    # # alldata = alldata[~np.isnan(alldata)]
    # #alldata.fillna(0)
    # np.set_printoptions(precision=3)
    # print alldata

    #
    # def load_training_dataSet(fileName):
    #     data = pd.read_csv(fileName, sep=',', header=None)
    #     #data.columns = ["state", "outcome"]
    #     return data
    #
    # myclones_data = load_training_dataSet('Datasets/new_dataset_with_new_features.csv')
    # myclones_data = myclones_data.values
    #
    #
    # inputDim = 6
    #
    #
    # means = [(-1,0),(2,4),(3,1)]
    # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
    # alldata = ClassificationDataSet(inputDim, 1, nb_classes=2)
    #
    #
    # #input = np.array([ myclones_data[n][16], myclones_data[n][17], myclones_data[n][18], myclones_data[n][15],myclones_data[n][11],myclones_data[n][12],   myclones_data[n][26], myclones_data[n][27]] )
    #
    # for n in xrange(len(myclones_data)):
    #     #for klass in range(3):
    #     input = np.array(
    #         [myclones_data[n][11], myclones_data[n][17], myclones_data[n][12], myclones_data[n][15], myclones_data[n][18],
    #          myclones_data[n][16]])
    #     #print (n, "-->", input)
    #     alldata.addSample(input, int(myclones_data[n][35]))
    #
    #
    tstdata, trndata = alldata.splitWithProportion( 0.25 )

    #print(tstdata)

    tstdata_new = ClassificationDataSet(inputDim, 1, nb_classes=2)
    for n in xrange(0, tstdata.getLength()):
        tstdata_new.addSample( tstdata.getSample(n)[0], tstdata.getSample(n)[1] )

    trndata_new = ClassificationDataSet(inputDim, 1, nb_classes=2)
    for n in xrange(0, trndata.getLength()):
        trndata_new.addSample( trndata.getSample(n)[0], trndata.getSample(n)[1])

    trndata = trndata_new
    tstdata = tstdata_new

    #print("Before --> ", trndata)

    trndata._convertToOneOfMany( )
    tstdata._convertToOneOfMany( )



    fnn = buildNetwork( trndata.indim, 107, trndata.outdim, outclass=SoftmaxLayer )
    trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1,learningrate=0.05 , verbose=True, weightdecay=0.001)



    #print "Printing Non-Trained Network..."






    """
    ticks = arange(-3.,6.,0.2)
    X, Y = meshgrid(ticks, ticks)
    # need column vectors in dataset, not arrays
    griddata = ClassificationDataSet(7,1, nb_classes=2)
    for i in xrange(X.size):
        griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0])
    griddata._convertToOneOfMany()  # this is still needed to make the fnn feel comfy
    
    """



    #trainer.trainEpochs(1)
    #trainer.testOnData(verbose=True)
    #print(np.array([fnn.activate(x) for x, _ in tstdata]))





    for i in range(1):
        trainer.trainEpochs(10)
        trnresult = percentError(trainer.testOnClassData(),
                                 trndata['class'])
        tstresult = percentError(trainer.testOnClassData(
            dataset=tstdata), tstdata['class'])




        #print "epoch: %4d" % trainer.totalepochs, \
        #    "  train error: %5.2f%%" % trnresult, \
         #   "  test error: %5.2f%%" % tstresult


    #print "Printing Trained Network..."
    #print fnn.params


    print "Saving the trined Model at : ", 'pybrain/'+save_target_name
    #saving the trained network...
    import pickle

    fileObject = open('pybrain/'+save_target_name, 'w')

    pickle.dump(fnn, fileObject)
    fileObject.close()

    #
    # fileObject = open('trainedNetwork79', 'r')
    # loaded_fnn = pickle.load(fileObject)
    #
    #
    # print "Printing the result prediction..."
    #
    # print loaded_fnn.activate([0.2,0.5,0.6,0.1,0.3,0.7])
    #
    # print fnn.activate([0.2,0.5,0.6,0.1,0.3,0.7])
    #


        #out = fnn.activateOnDataset(griddata)
        #out = out.argmax(axis=1)  # the highest output activation gives the class
        #out = out.reshape(X.shape)

    """
コード例 #57
0
# Imports
import numpy as np
from scipy import stats
from pybrain.datasets import ClassificationDataSet

# Data and outputs
datain = np.loadtxt(open("beerdata.csv", "rb"), delimiter=",", skiprows=0)
y = datain[:, 0] - 1  # 178x1 vector classifications
X = datain[:, 1:]  # 178x13 matrix of data points
X = stats.zscore(X, axis=0)  # normalize the data by feature
m = X.shape[0]  # number of data points

### Build a ClassificationDataSet data object and enter all of the data and classifications from X and y.

data = ClassificationDataSet(13)
for i in range(m):
    data.addSample(X[i, :], int(y[i]))
コード例 #58
0
def generate_Testdata(index):
    INPUT_FEATURES = 200
    CLASSES = 15
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "GCM_test.data")
    train_text = getIndexData(train_text, index)
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "Breast":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Prostate":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lung":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Lymphoma":
            klass = 4
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Bladder":
            klass = 5
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Melanoma":
            klass = 6
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Uterus":
            klass = 7
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Leukemia":
            klass = 8
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Renal":
            klass = 9
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Pancreas":
            klass = 10
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Ovary":
            klass = 11
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Mesothelioma":
            klass = 12
            alldata.addSample(features, klass)
        elif train_classfi[i] == "CNS":
            klass = 13
            alldata.addSample(features, klass)
        elif train_classfi[i] == "Colorectal":
            klass = 14
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
コード例 #59
0
images_path = sys.argv[1] if len(sys.argv) == 2 else "data2/**/*.jpg"
fitness.setData(images_path)

for input_data, output_data, image_path in fitness.getNextData(
        recalc=True, return_image_path=True,
        use_images_without_output=use_net):
    total += 1

    image = cv2.imread(image_path)

    if use_net:
        if classification:
            ds = ClassificationDataSet(len(input_data),
                                       nb_classes=2,
                                       class_labels=['aceptado', 'despunte'])
            ds.addSample(features, [0])
            ds._convertToOneOfMany()
            out = net.activateOnDataset(ds)
            out_class = out.argmax(
                axis=1)  # the highest output activation gives the class
        else:
            ds = SupervisedDataSet(len(input_data), net.indim)
            ds.addSample(input_data, [0] * net.indim)
            out = net.activateOnDataset(ds)[0]
            print out

    debug_image = []
    if output_data is not None:
        debug_image.extend(feature.debug_feature(output_data, image_path))

    if use_net:
コード例 #60
0
                                 nb_classes=2,
                                 class_labels=['Benign', 'Malignant'])

all_data.setField('input', raw_inputs)
all_data.setField('target', raw_target)
all_data.setField('class', raw_target)

test_data_temp, training_data_temp = all_data.splitWithProportion(0.33)

test_data = ClassificationDataSet(9,
                                  1,
                                  nb_classes=2,
                                  class_labels=['Benign', 'Malignant'])
for n in xrange(0, test_data_temp.getLength()):
    test_data.addSample(
        test_data_temp.getSample(n)[0],
        test_data_temp.getSample(n)[1])

training_data = ClassificationDataSet(9,
                                      1,
                                      nb_classes=2,
                                      class_labels=['Benign', 'Malignant'])
for n in xrange(0, training_data_temp.getLength()):
    training_data.addSample(
        training_data_temp.getSample(n)[0],
        training_data_temp.getSample(n)[1])

training_data._convertToOneOfMany()
test_data._convertToOneOfMany()

#********************End of Data Preparation***************************