Esempio n. 1
0
def makeMnistDataSets(path):
    """Return a pair consisting of two datasets, the first being the training
    and the second being the test dataset."""
    # test = SupervisedDataSet(28 * 28, 10)
    test = ClassificationDataSet(28 * 28, 10)
    test_image_file = os.path.join(path, 't10k-images-idx3-ubyte')
    test_label_file = os.path.join(path, 't10k-labels-idx1-ubyte')
    test_images = images(test_image_file)
    test_labels = (flaggedArrayByIndex(l, 10) for l in labels(test_label_file))

    for image, label in zip(test_images, test_labels):
        test.appendLinked(image, label)
        # test.addSample(image, label)

    # train = SupervisedDataSet(28 * 28, 10)
    train = ClassificationDataSet(28 * 28, 10)
    train_image_file = os.path.join(path, 'train-images-idx3-ubyte')
    train_label_file = os.path.join(path, 'train-labels-idx1-ubyte')
    train_images = images(train_image_file)
    train_labels = (flaggedArrayByIndex(l, 10)
                    for l in labels(train_label_file))
    for image, label in zip(train_images, train_labels):
        train.appendLinked(image, label)
        # train.addSample(image, label)

    return train, test
def mlpClassifier(X,
                  y,
                  train_indices,
                  test_indices,
                  mom=0.1,
                  weightd=0.01,
                  epo=5):
    X_train, y_train, X_test, y_test = X[train_indices], y[train_indices], X[
        test_indices], y[test_indices]

    #Converting the data into a dataset which is easily understood by PyBrain.
    tstdata = ClassificationDataSet(X.shape[1], target=1, nb_classes=8)
    trndata = ClassificationDataSet(X.shape[1], target=1, nb_classes=8)
    for i in range(y_train.shape[0]):
        trndata.addSample(X_train[i, :], y_train[i])
    for i in range(y_test.shape[0]):
        tstdata.addSample(X_test[i, :], y_test[i])
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    mlpc = buildNetwork(trndata.indim,
                        100,
                        trndata.outdim,
                        outclass=SoftmaxLayer)
    trainer = BackpropTrainer(mlpc,
                              dataset=trndata,
                              momentum=mom,
                              verbose=True,
                              weightdecay=weightd)
    trainer.trainEpochs(epo)
    y_pred = trainer.testOnClassData(dataset=tstdata)
    print "Done. Accu: " + "%.2f" % accuracy_score(y_test, y_pred)
    return y_test, y_pred
    def consturt_train_data(self):

        # print len(self.output_train)
        # print len(self.eigenvector)
        ds = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for i in range(len(self.output_train)):
            ds.appendLinked(self.eigenvector[i], self.output_train[i])
        # print ds
        # print ds
        ds.calculateStatistics()

        # split training, testing, validation data set (proportion 4:1)
        tstdata_temp, trndata_temp = ds.splitWithProportion(0.25)
        tstdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for n in range(0, tstdata_temp.getLength()):
            tstdata.appendLinked(
                tstdata_temp.getSample(n)[0],
                tstdata_temp.getSample(n)[1])

        trndata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        for n in range(0, trndata_temp.getLength()):
            trndata.appendLinked(
                trndata_temp.getSample(n)[0],
                trndata_temp.getSample(n)[1])
        # one hot encoding
        # print trndata
        testdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2)
        test_data_temp = self.test_data
        for n in range(len(test_data_temp)):
            testdata.addSample(test_data_temp[n], [0])
        # print testdata
        trndata._convertToOneOfMany()
        tstdata._convertToOneOfMany()
        testdata._convertToOneOfMany()
        return trndata, tstdata, testdata, ds
Esempio n. 4
0
def get_datasets(inputdir, dstype="all", proportion=0.3):
    untransformed_files = [f for f in os.listdir(inputdir) if "trans" not in f]

    if dstype == "symbol":
        files = [f for f in untransformed_files if f.endswith(".png")]
        classifier_func = classes.get_symbol_class
        num_classes = 2
    elif dstype == "pitch":
        files = [f for f in untransformed_files if "note" in f]
        classifier_func = classes.get_pitch_class
        num_classes = 18
    elif dstype == "note_duration":
        files = [
            f for f in untransformed_files if "note" in f and "other" not in f
        ]
        classifier_func = classes.get_duration_class
        num_classes = 5
    elif dstype == "rest_duration":
        files = [f for f in untransformed_files if "rest" in f]
        classifier_func = classes.get_duration_class
        num_classes = 5
    elif dstype == "quarters_and_eighths":
        files = [
            f for f in untransformed_files
            if "note-quarter" in f or "note-eighth" in f
        ]
        classifier_func = classes.quarter_or_eighth
        num_classes = 2
    else:
        files = []
        classifier_func = classes.get_symbol_class
        num_classes = 18

    random_indices = np.random.permutation(len(files))
    sep = int(len(files) * proportion)

    train_indices = random_indices[sep:]
    test_indices = random_indices[:sep]

    train_files = [f for i, f in enumerate(files) if i in train_indices]
    test_files = [f for i, f in enumerate(files) if i in test_indices]

    transformed = []
    for f in train_files:
        transformed += images.get_transformed_filenames(f)

    train_files += transformed

    Xtrain, ytrain = get_data(inputdir, train_files, classifier_func)
    Xtest, ytest = get_data(inputdir, test_files, classifier_func)

    train_set = ClassificationDataSet(Xtrain.shape[1], nb_classes=num_classes)
    for i in range(len(Xtrain)):
        train_set.addSample(Xtrain[i], ytrain[i])

    test_set = ClassificationDataSet(Xtest.shape[1], nb_classes=num_classes)
    for i in range(len(Xtest)):
        test_set.addSample(Xtest[i], ytest[i])

    return train_set, test_set
Esempio n. 5
0
def main():
    # Get Data
    dataSets = genfromtxt('normalizedData.csv', delimiter=',')
    alldata = ClassificationDataSet(13, 1, nb_classes=3)
    for dataSet in dataSets:
        alldata.addSample(dataSet[1:14], int(dataSet[0]) - 1)

    # Split the data
    tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25)
    tstdata = ClassificationDataSet(13, 1, nb_classes=3)
    for n in range(0, tstdata_temp.getLength()):
        tstdata.addSample(
            tstdata_temp.getSample(n)[0],
            tstdata_temp.getSample(n)[1])
    trndata = ClassificationDataSet(13, 1, nb_classes=3)
    for n in range(0, trndata_temp.getLength()):
        trndata.addSample(
            trndata_temp.getSample(n)[0],
            trndata_temp.getSample(n)[1])
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    # Build Network
    fnn = buildNetwork(trndata.indim, 4, 4, 4, trndata.outdim)

    # Construct Trainer
    trainer = BackpropTrainer(fnn, trndata, learningrate=0.1)

    # Train
    while True:
        trainer.trainEpochs(1)
        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        print("Training Test Error: %5.2f%%" % trnresult)
        if trnresult < 1:
            break

    tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                             tstdata['class'])
    print("test error: %5.2f%%" % tstresult)
    out1 = fnn.activate([
        0.70789474, 0.13636364, 0.60962567, 0.31443299, 0.41304348, 0.83448276,
        0.70253165, 0.11320755, 0.51419558, 0.47098976, 0.33333333, 0.58608059,
        0.71825963
    ])
    out2 = fnn.activate([
        0.26578947, 0.70355731, 0.54545455, 0.58762887, 0.10869565, 0.3862069,
        0.29746835, 0.54716981, 0.29652997, 0.11262799, 0.25203252, 0.47619048,
        0.21540656
    ])
    out3 = fnn.activate([
        0.81578947, 0.66403162, 0.73796791, 0.71649485, 0.2826087, 0.36896552,
        0.08860759, 0.81132075, 0.29652997, 0.67576792, 0.10569106, 0.12087912,
        0.20114123
    ])
    print(out1, out2, out3)
Esempio n. 6
0
def execute(data, learn_rate, momentum_rate, file_result, p_train):

    inputs = data[:, :-1]  #COPIAR TODAS AS COLUNAS MENOS A ULTIMA
    targets = data[:, -1]  #COPIAR ULTIMA COLUNA

    train_data = ClassificationDataSet(4, 1, nb_classes=3)
    test_data = ClassificationDataSet(4, 1, nb_classes=3)

    size = int(len(inputs) * p_train)
    for n in range(0, size):
        #print(targets[n])
        train_data.addSample(inputs[n], [targets[n]])

    for n in range(size, len(inputs)):
        #print(targets[n])
        test_data.addSample(inputs[n], [targets[n]])

    train_data._convertToOneOfMany()
    test_data._convertToOneOfMany()

    fnn = buildNetwork(train_data.indim, 2, train_data.outdim)
    trainer = BackpropTrainer(fnn,
                              train_data,
                              learningrate=learn_rate,
                              momentum=momentum_rate,
                              verbose=False)

    epochs = 0
    for i in range(300):
        epochs += 1
        trainer.train()

    #print (trainer.testOnClassData())
    #print (trainer.testOnData())

    cont = 0
    for test in test_data:
        r = fnn.activate(test[0])
        cls = convert(r)
        print(cls, test[1])
        if ((cls == test[1]).all()):
            cont += 1

    print(cont)

    error = cont / len(test_data)

    line_result = str(momentum_rate) + "\t" + str(learn_rate) + "\t" + str(
        error) + "\t" + str(epochs) + "\t" + str(p_train)

    f.write(line_result + "\n")
    f.flush()
def splitWithProportion(self, proportion=0.7):
    indicies = random.permutation(len(self))
    separator = int(len(self) * proportion)

    leftIndicies = indicies[:separator]
    rightIndicies = indicies[separator:]

    leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(),
                                   target=self['target'][leftIndicies].copy())
    rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(),
                                    target=self['target'][leftIndicies].copy())

    return leftDs, rightDs
Esempio n. 8
0
 def classify(self, file_name):
     self.load()
     self.t_ds = ClassificationDataSet(8, 1, nb_classes=2)
     self.ds = ClassificationDataSet(8, 1, nb_classes=2)
     self.classifier_neural_net = buildNetwork(8,
                                               30,
                                               2,
                                               outclass=SoftmaxLayer,
                                               hiddenclass=TanhLayer)
     #self.add_patients_data_to_train(file_name);
     self.trainer = BackpropTrainer(self.classifier_neural_net, self.ds)
     self.add_patients_data_to_train(file_name)
     self.trainer.train()
     self.save()
def splitWithProportion(self, proportion=0.7):
    """Produce two new datasets, the first one containing the fraction given
    by `proportion` of the samples."""
    indicies = random.permutation(len(self))
    separator = int(len(self) * proportion)

    leftIndicies = indicies[:separator]
    rightIndicies = indicies[separator:]

    leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(),
                                   target=self['target'][leftIndicies].copy())
    rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(),
                                    target=self['target'][rightIndicies].copy())
    return leftDs, rightDs
Esempio n. 10
0
def classif():
    means = [(-1, 0), (2, 4), (3, 1)]
    cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
    alldata = ClassificationDataSet(2, 1, nb_classes=3)
    for n in xrange(400):
        for klass in range(3):
            input = multivariate_normal(means[klass], cov[klass])
            alldata.addSample(input, [klass])

    tstdata, trndata = alldata.splitWithProportion(0.25)

    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]
    fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(fnn,
                              dataset=trndata,
                              momentum=0.1,
                              verbose=True,
                              weightdecay=0.01)
    ticks = arange(-3., 6., 0.2)
    X, Y = meshgrid(ticks, ticks)
    # need column vectors in dataset, not arrays
    griddata = ClassificationDataSet(2, 1, nb_classes=3)
    for i in xrange(X.size):
        griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0])
    griddata._convertToOneOfMany(
    )  # this is still needed to make the fnn feel comfy

    for i in range(20):
        trainer.trainEpochs(5)

        trnresult = percentError(trainer.testOnClassData(), trndata['class'])
        tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                                 tstdata['class'])

        print "epoch: %4d" % trainer.totalepochs, \
              "  train error: %5.2f%%" % trnresult, \
              "  test error: %5.2f%%" % tstresult

        out = fnn.activateOnDataset(griddata)
        out = out.argmax(
            axis=1)  # the highest output activation gives the class
        out = out.reshape(X.shape)
        print out
Esempio n. 11
0
def train(X, y):
    """ Trains and predicts dataset with a Neural Network classifier """

    ds = ClassificationDataSet(len(X.columns), 1, nb_classes=2)
    for k in xrange(len(X)):
        ds.addSample(X.iloc[k], np.array(y[k]))
    tstdata, trndata = ds.splitWithProportion(0.20)
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()
    input_size = len(X.columns)
    target_size = 1
    hidden_size = 5
    fnn = None
    if os.path.isfile('fnn.xml'):
        fnn = NetworkReader.readFrom('fnn.xml')
    else:
        fnn = buildNetwork(trndata.indim,
                           hidden_size,
                           trndata.outdim,
                           outclass=SoftmaxLayer)
    trainer = BackpropTrainer(fnn,
                              dataset=trndata,
                              momentum=0.05,
                              learningrate=0.1,
                              verbose=False,
                              weightdecay=0.01)

    trainer.trainUntilConvergence(verbose=False,
                                  validationProportion=0.15,
                                  maxEpochs=100,
                                  continueEpochs=10)
    NetworkWriter.writeToFile(fnn, 'oliv.xml')
    predictions = trainer.testOnClassData(dataset=tstdata)
    return tstdata['class'], predictions
Esempio n. 12
0
def test_trained_model(filename, training_filename):
    fileObject = open(filename, 'r')
    fann = pickle.load(fileObject)
    testing_dataset = np.genfromtxt(training_filename,
                                    skip_header=0,
                                    dtype="int",
                                    delimiter='\t')

    data = ClassificationDataSet(len(testing_dataset[0]) - 1, 2, nb_classes=2)
    for aSample in testing_dataset:
        data.addSample(aSample[0:len(aSample) - 1],
                       [aSample[len(aSample) - 1]])

    #
    data._convertToOneOfMany()
    test = BackpropTrainer(fann,
                           dataset=data,
                           momentum=0.1,
                           verbose=False,
                           weightdecay=0.01)

    trnresult = percentError(test.testOnClassData(), data['class'])
    results = "Train error on testing data : %5.2f%%" % trnresult
    log_file.write(results + "  , The length of data " + str(len(data)))
    print results
Esempio n. 13
0
def generate_Testdata(index):
    INPUT_FEATURES = 200
    CLASSES = 3
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "Leukemia1_test.data")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "B_cell":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "AML":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "T_cell":
            klass = 2
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
Esempio n. 14
0
def build_dataset(mongo_collection,
                  patch_size=IMG_SIZE,
                  orig_size=IMG_SIZE,
                  nb_classes=2,
                  edgedetect=True,
                  transform=True):
    # depricated
    if edgedetect:
        import cv2
    from pybrain.datasets import SupervisedDataSet, ClassificationDataSet
    patch_size = min(patch_size, orig_size)
    trim = round((orig_size - patch_size) / 2)
    #ds = SupervisedDataSet(patch_size**2, 1)
    ds = ClassificationDataSet(patch_size**2, target=1, nb_classes=nb_classes)
    cursor = list(mongo_collection.find())
    for one_image in cursor:
        # convert from binary to numpy array and transform
        img_array = np.fromstring(one_image["image"], dtype='uint8')
        if edgedetect:
            img_array = cv2.Canny(img_array, 150, 200)
        img_crop = img_array.reshape(orig_size,
                                     orig_size)[trim:(trim + patch_size),
                                                trim:(trim + patch_size)]
        classification = float(one_image["class"])
        if transform:
            transformed = transform_img(img_crop.ravel(), patch_size)
        else:
            transformed = [img_crop.ravel()]
        for one_img in transformed:
            ds.addSample(one_img.ravel(), classification)
    print('New dataset contains %d images (%d positive).' %
          (len(ds), sum(ds['target'])))
    return ds
Esempio n. 15
0
def generate_Testdata(index):
    INPUT_FEATURES = 200
    CLASSES = 5
    train_text, train_classfi_number, train_classfi, train_feature_name = getTargetData(
        "Breast_test.data")

    train_text = getIndexData(train_text, index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i] == "lumina":
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i] == "ERBB2":
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i] == "basal":
            klass = 2
            alldata.addSample(features, klass)
        elif train_classfi[i] == "normal":
            klass = 3
            alldata.addSample(features, klass)
        elif train_classfi[i] == "cell_lines":
            klass = 4
            alldata.addSample(features, klass)
    return {
        'minX': 0,
        'maxX': 1,
        'minY': 0,
        'maxY': 1,
        'd': alldata,
        'index': index
    }
Esempio n. 16
0
 def __init__(self, indim, hiddendim, nb_classes):
     #net = buildNetwork(size[0] * size[1], 96, ds.outdim, outclass=SoftmaxLayer)
     self.ds = ClassificationDataSet(indim, nb_classes=nb_classes)
     self.net = buildNetwork(indim,
                             hiddendim,
                             nb_classes,
                             outclass=SoftmaxLayer)  #
Esempio n. 17
0
def gen_data(csv_file, db):
    keywords = {}
    count = 0
    img_list = []

    with open(csv_file) as f:
        content = f.readlines()
    f.close()

    for line in content:
        aux = line.replace('\n', '').split(',')
        if aux[1] not in keywords:
            keywords[aux[1]] = count
            count += 1
        img_list.append(aux)

    data = ClassificationDataSet(768, len(keywords), nb_classes=len(keywords))
    n = len(keywords)

    for img in img_list:
        path = db + '/' + img[0]
        im = Image.open(path).convert('RGB')
        data.addSample(get_img_feats(im),
                       get_keyword_class(keywords[img[1]], n))

    return data, n, keywords
Esempio n. 18
0
def make_data_set(beg, end):
    ds = ClassificationDataSet(
        HISTORY * 2 + 1,
        class_labels=['None', 'Buy', 'Sell'])  #SupervisedDataSet(HISTORY*3, 1)
    trainQ = rawData[(rawData.tradeDate <= end) & (rawData.tradeDate >= beg)]

    for idx in range(1, len(trainQ) - HISTORY - 1 - HOLD - 1):
        cur = idx + HISTORY - 1
        if (abs(trainQ.iloc[cur]['MACD']) > 0.5):
            continue
        sample = []
        for i in range(HISTORY):
            #sample.append( trainQ.iloc[idx+i]['EMAL'] )#  [['EMAL','DIFF','DEA','CDIS']] ) )
            sample.append(trainQ.iloc[idx + i]['DIFF'])
            sample.append(trainQ.iloc[idx + i]['DEA'])

        sample.append(trainQ.iloc[cur]['CDIS'])
        if max(trainQ.iloc[cur + 1:cur + HOLD +
                           1]['EMAS']) / trainQ.iloc[cur]['closeIndex'] > 1.05:
            answer = 1
        elif min(trainQ.iloc[cur + 1:cur + HOLD + 1]
                 ['EMAS']) / trainQ.iloc[cur]['closeIndex'] < 0.95:
            answer = 2
        else:
            answer = 0


#        print(sample)
        ds.addSample(sample, answer)
    return ds
Esempio n. 19
0
def toClassificationDataset(codedSampleSet):
   
    classifiedSampleSet = []
    
    # Calculate the unique classes
    classes = []
    for sample in codedSampleSet:
    
        classifier = getClassifier(sample)
        if classifier not in classes:
            classes.append(classifier)
    classes.sort()
    
    # Now that we have all the classes, we process the outputs
    for sample in codedSampleSet:
        classifier = getClassifier(sample)
        classifiedSample = one_to_n(classes.index(classifier), len(classes))
        classifiedSampleSet.append(classifiedSample)

    # Build the dataset
    sampleSize = len(codedSampleSet[0])
    classifiedSampleSize = len(classifiedSampleSet[0])
    dataset = ClassificationDataSet(sampleSize, classifiedSampleSize)
    
    for i in range(len(classifiedSampleSet)):
        dataset.addSample(codedSampleSet[i], classifiedSampleSet[i])

    return dataset, classes
Esempio n. 20
0
def generate_data():
    index = [2242,3833,3252,3286,1460,7054,6036,2527,538,3886,3329,6464,826,7071,3454,6544,2487,4225,4422,6447,681,2203,5715,3188,6417,3785,7095,391,3257,6581,6962,5535,6531,2389,5756,3262,1808,1860,3532,7128,7127,7126,7125,7124,7123,7122,7121,7120,7119,7118,7117,7116,7115,7114,7113,7112,7111,7110,7109,7108,7107,7106,7105,7104,7103,7102,7101,7100,7099,7098,7097,7096,7094,7093,7092,7091,7090,7089,7088,7087,7086,7085,7084,7083,7082,7081,7080,7079,7078,7077,7076,7075,7074,7073,7072,7070,7069,7068,7067,7066,7065,7064,7063,7062,7061,7060,7059,7058,7057,7056,7055,7053,7052,7051,7050,7049,7048,7047,7046,7045,7044,7043,7042,7041,7040,7039,7038,7037,7036,7035,7034,7033,7032,7031,7030,7029,7028,7027,7026,7025,7024,7023,7022,7021,7020,7019,7018,7017,7016,7015,7014,7013,7012,7011,7010,7009,7008,7007,7006,7005,7004,7003,7002,7001,7000,6999,6998,6997,6996,6995,6994,6993,6992,6991,6990,6989,6988,6987,6986,6985,6984,6983,6982,6981,6980,6979,6978,6977,6976,6975,6974,6973,6972,6971,6970,6969,6968,6967,6966,6965]

    INPUT_FEATURES = 200 
    CLASSES = 3
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Lung1_train.data")
    
    train_text = getIndexData(train_text,index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="A" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="C" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="N" :
            klass = 2
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
Esempio n. 21
0
 def trainModel(self):
     self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted]
     self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader
     self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1]
     ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses)
     for rowData in self.finalDataSet:
         target = rowData[-1]
         variables = rowData[0:-1]
         ds.addSample(variables, target)
     self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25)
     self.testDataSet._convertToOneOfMany()
     self.trainDataSet._convertToOneOfMany()
     print self.testDataSet
     print self.trainDataSet
     self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True)
     self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99)
     begin0 = time.time()
     # self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10)
     for i in xrange(10):
         begin = time.time()
         self.trainer.trainEpochs(10)
         end = time.time()
         print 'iteration ', i, ' takes ', end-begin,  'seconds'
     end0 = time.time()
     print 'total time consumed: ', end0 - begin0
Esempio n. 22
0
def generate_data():
    index = [629,2641,1009,3280,6224,4041,2994,5553,5299,6509,2014,759,1024,6375,3820,4884,2596,2744,3935,3577,1238,2309,4445,7128,7127,7126,7125,7124,7123,7122,7121,7120,7119,7118,7117,7116,7115,7114,7113,7112,7111,7110,7109,7108,7107,7106,7105,7104,7103,7102,7101,7100,7099,7098,7097,7096,7095,7094,7093,7092,7091,7090,7089,7088,7087,7086,7085,7084,7083,7082,7081,7080,7079,7078,7077,7076,7075,7074,7073,7072,7071,7070,7069,7068,7067,7066,7065,7064,7063,7062,7061,7060,7059,7058,7057,7056,7055,7054,7053,7052,7051,7050,7049,7048,7047,7046,7045,7044,7043,7042,7041,7040,7039,7038,7037,7036,7035,7034,7033,7032,7031,7030,7029,7028,7027,7026,7025,7024,7023,7022,7021,7020,7019,7018,7017,7016,7015,7014,7013,7012,7011,7010,7009,7008,7007,7006,7005,7004,7003,7002,7001,7000,6999,6998,6997,6996,6995,6994,6993,6992,6991,6990,6989,6988,6987,6986,6985,6984,6983,6982,6981,6980,6979,6978,6977,6976,6975,6974,6973,6972,6971,6970,6969,6968,6967,6966,6965,6964,6963,6962,6961,6960,6959,6958,6957,6956,6955,6954,6953,6952]

    INPUT_FEATURES = 200 
    CLASSES = 3
    #train_text,train_classfi = getTargetData("Breast_train.data")

    #Load boston housing dataset as an example
    train_text,train_classfi_number,train_classfi,train_feature_name = getTargetData("Leukemia1_train.data")
    train_text = getIndexData(train_text,index)

    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    for i in range(len(train_text)):
        features = train_text[i]
        if train_classfi[i]=="B_cell" :
            klass = 0
            alldata.addSample(features, klass)
        elif train_classfi[i]=="AML" :
            klass = 1
            alldata.addSample(features, klass)
        elif train_classfi[i]=="T_cell" :
            klass = 2
            alldata.addSample(features, klass)
    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata,'index':index}
Esempio n. 23
0
 def batch_classify(self, samples):
     ds = ClassificationDataSet(len(self._fx))
     for sample in samples:
         fvec = [sample[l] for l in self._fx]
         ds.addSample(fvec, [0])
     results = self._trainer.testOnClassData(ds)
     return [self._rmap[r] for r in results]
Esempio n. 24
0
    def __init__(self, hidden_nodes=30):
        """
        parameters to buildNetwork are inputs, hidden layers, output
        bias = true allows for a bias unit to be added in each neural net layer
        hiddenclass represents the method used by the hidden layer
        """
        # Regression

        # self.classifier_neural_net = buildNetwork(12, hidden_nodes, 1, bias=True, hiddenclass=TanhLayer)
        # # Initializing dataset for supervised regression training
        # self.data_sets = SupervisedDataSet(12, 1)
        # # classification_trainer uses backpropagation supervised training method for training the newural network
        # self.classification_trainer = BackpropTrainer(self.classifier_neural_net, self.data_sets)

        # Classification
        self.classifier_neural_net = buildNetwork(12,
                                                  hidden_nodes,
                                                  3,
                                                  outclass=SoftmaxLayer,
                                                  hiddenclass=TanhLayer)
        self.data_sets = ClassificationDataSet(12, 1, nb_classes=3)
        self.classification_trainer = BackpropTrainer(
            self.classifier_neural_net,
            self.data_sets,
            momentum=0.1,
            verbose=True,
            weightdecay=0.01)
Esempio n. 25
0
def generate_data(n=400):
    INPUT_FEATURES = 2
    CLASSES = 3
    #means = [(-1, 0), (2, 4), (3, 1)]
    #cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    #minX, maxX = means[0][0], means[0][0]
    #minY, maxY = means[0][1], means[0][1]
    #print minX, maxX , minY, maxY
    # #for i in range(n):
    #     for klass in range(CLASSES):

    #         features = multivariate_normal(means[klass], cov[klass])
    #         #print means[klass], cov[klass]
    #         #print features
    #         x, y = features
    #         minX, maxX = min(minX, x), max(maxX, x)
    #         minY, maxY = min(minY, y), max(maxY, y)
    #         alldata.addSample(features, [klass])
    #print alldata
    alldata.addSample([0,0], [0])
    alldata.addSample([0,1], [1])
    alldata.addSample([1,0], [1])
    alldata.addSample([1,1], [0])

    return {'minX': 0, 'maxX': 1,
            'minY': 0, 'maxY': 1, 'd': alldata}
Esempio n. 26
0
    def learn(self):
        try:
            self.data_for_training = []
            mem = Memory('http://localhost:9200')
            data = mem.get_data()
            categories = list(data.keys())
            for category in data:
                for req in data[category]:
                    self.data_for_training.append(
                        [req.token, [categories.index(category)]])

            net = buildNetwork(len(self.data_for_training[0][0]),
                               15,
                               len(categories),
                               hiddenclass=SoftmaxLayer,
                               recurrent=False)
            ds = ClassificationDataSet(len(self.data_for_training[0][0]),
                                       nb_classes=len(categories),
                                       class_labels=categories)
            for data in self.data_for_training:
                ds.addSample(data[0], data[1])
            ds._convertToOneOfMany()
            trainer = BackpropTrainer(net,
                                      ds,
                                      momentum=0.1,
                                      learningrate=0.01,
                                      verbose=True)
            trainer.trainUntilConvergence(maxEpochs=500)
            NetworkWriter.writeToFile(net, 'net.xml')
        except Exception as e:
            raise e
        return net
Esempio n. 27
0
def _convert_supervised_to_classification2(supervised_dataset,classes):
    classification_dataset = ClassificationDataSet(supervised_dataset.indim,supervised_dataset.outdim,classes)
    
    for n in xrange(0,supervised_dataset.getLength()):
        classification_dataset.addSample(supervised_dataset.getSample(n)[0], [0])

    return classification_dataset
def generate_training_set(dimensions, mult=1):
    ''' Returns training set and Pybrain ClassificationDataSet class'''
    P_data_set = ClassificationDataSet(dimensions, 1, nb_classes=2)
    #P_data_set = SupervisedDataSet(dimensions, 1)
    combinations = [
        "".join(seq) for seq in itertools.product("01", repeat=dimensions)
    ]
    train_x = []
    train_y = []
    for i in range(mult):
        for rec in combinations:
            r = list(map(int, list(rec)))
            if (sum(r)) == 0:
                label = 0
            else:
                label = (sum(r) + 1) % 2
            #print(label,r)
            train_x.append(r)
            train_y.append(label)

    for k in range(len(train_y)):
        P_data_set.addSample(np.ravel(train_x[k]), train_y[k])
    '''classification label is list of two. If it is [1 0] than output is classified as "0", 
       if it is [0 1] it is classified as "1". This is done with _convertToOneOfMany() '''
    P_data_set._convertToOneOfMany()

    return P_data_set, train_x, train_y
Esempio n. 29
0
def conductGeneration(generation, corpus):
    '''
        Conducts a generation of learning and testing on the input data
                generation (int) --- the number of the generation
                corpus (object) --- corpus object containing info needed
        '''
    # Set up the dataset skeleton
    alldata = ClassificationDataSet(2,
                                    1,
                                    nb_classes=3,
                                    class_labels=['a', 'b', 'c'])

    # means = [(-1,0),(2,4),(3,1)]
    # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]

    # alldata = ClassificationDataSet(2, 1, nb_classes=3)
    # for n in xrange(400):
    #     for klass in range(3):
    #         input = multivariate_normal(means[klass],cov[klass])
    #         print type(input)
    #         alldata.addSample(input, [klass])

    alldata.addSample((0, 1), (1))
    alldata.addSample((1, 0), (0))
    alldata.addSample((0, 0), (2))
    alldata.addSample((1, 1), (0))

    trndata, partdata = alldata.splitWithProportion(0.5)

    return alldata
def loadData(paths, classes):
    class_dict = generateDictOfClasses(classes)
    all_data = None

    for i in range(len(paths)):
        path = paths[i]
        print path

        for img in os.listdir(path):
            m = re.search('.*Thumbs\.(db)',
                          img)  # in windows XP, this is a problem

            if (m is None):
                img_path = path + "/" + img
                img_data = thresholdOp(Image(img_path))
                flattened = img_data.getNumpy()[:, :,
                                                1].flatten()  # 25x20 (wxh)
                flattened[flattened == 255] = 1  # set every '255' to '1'

                if all_data is None:
                    all_data = ClassificationDataSet(len(flattened),
                                                     nb_classes=len(classes),
                                                     class_labels=classes)

                all_data.addSample(
                    flattened, [class_dict[classes[i]]])  # [data[1],data[2]]

    return all_data