Example #1
0
def main(project):
	DBN.test_DBN(project=proj,flag='within')
	DBN.predict(project=proj,flag='within')	
	tr = pd.read_csv(str(project)+'_semantic_features.csv').as_matrix()
	np.random.shuffle(tr)
	labels = tr[:,-1].astype('int32')
	x = tr[:,:-2].astype('float32') #fill in tr indexes!
	X_train, X_test, labels_train, labels_test = train_test_split(x,labels, test_size=0.33, 
									random_state=42)
	MIN_K = 1
	MAX_K = 10
	K_RANGE = 1
	N_SPLITS = 10

	kfold = KFold(n_splits = N_SPLITS, shuffle = True)
	ks = np.arange(MIN_K, MAX_K, K_RANGE)

	for k in ks:
		score = 0
		for train, test in kfold.split(X_train, labels_train):
			lr = tree.DecisionTreeClassifier()
			lr.fit(X_train[train], labels_train[train])
			score = score + np.sum(np.equal(labels_train[test], lr.predict(X_train[test])))
		
		print "Decision Tree Cross-Validation Score:", (score/float(test.shape[0]*N_SPLITS))
	lr.fit(X_test,labels_test)
	score = lr.score(X_test,labels_test)
		
	y_pred = lr.predict(X_test)
	#print(y_pred)
	print "Decision Tree Test Score:", score
	
	print "Decision Tree Precision Score: ", sklearn.metrics.precision_score(labels_test, y_pred)
	print "Decision Tree Recall Score:", sklearn.metrics.recall_score(labels_test, y_pred)
	print "Decision Tree F1 Score: ", sklearn.metrics.f1_score(labels_test, y_pred)
Example #2
0
def test_dbn():
    t0 = time.time()
    DBN.test_DBN(pretraining_epochs=1,
                 training_epochs=2,
                 batch_size=300,
                 output_folder='tmp_DBN_plots')
    print >> sys.stderr, "test_mlp took %.3fs expected ??s in our buildbot" % (
        time.time() - t0)
Example #3
0
    def DBNPretraining(self, dataDir):
        modelDir = os.path.dirname(self.modelFile)
        title = os.path.basename(self.modelFile).split('.')[0]
        layerSizes = self.AELayerSizes[:len(self.AELayerSizes) / 2 + 1]
        types = ['GB'] + ['BB'] * (len(layerSizes) - 2)

        dbn = DBN(title, layerSizes, types, modelDir)
        dbn.DBNTrain(dataDir)
Example #4
0
def reconstruct(mdlfile='output/metallica_gtr-model.pickle', datfile='input/metallica_gtr-data.pickle', ind=None):
    # load data and model
    dbn = DBN.load_from_dump(mdlfile)
    raw_x = cPickle.load(open(datfile, 'rb')).astype(dtype=DBN.NUMPY_DTYPE)

    # choose input data
    if ind is not None:
        inp = raw_x[ind,:].reshape(1, raw_x.shape[1])

        # find latents, then sample from latents to reconstruct
        out = dbn.latents(inp)
        outs = np.tile(out, (10,1))
        out_p = dbn.sample(outs, threshold=0.0)

        # write input and its reconstruction
        outfile = 'output/test_inp.midi'
        midiparser.midiwrite(inp.reshape(88, 64).T, outfile, resolution=2, patch_nums=82)
        outfile = 'output/test_out.midi'
        midiparser.midiwrite(out_p.T, outfile, resolution=2, patch_nums=82)

    else:
        latents = []
        for ind in xrange(raw_x.shape[0]):
            inp = raw_x[ind,:].reshape(1, raw_x.shape[1])
            out = dbn.latents(inp)
            latents.append(out)
        return latents
def createDBN(traindata, trainlabel, testdata, testlabel, parameter_file):
    f = open(parameter_file, 'r')
    n = int(f.readline())
    for k in range(n):
        s = f.readline()
        args = s.split(';')
        num_hid_layer = int(args[0])
        
        #hidden layer sizes
        s = args[1].split(',')
        hid_layer_sizes = []
        for i in range(num_hid_layers):
            hid_layer_sizes.append(int(s[i]))

        #momentums
        s = args[2].split(',')
        momentums = []
        for i in range(num_hid_layers):
            momentums.append(float(s[i]))

        #sparsities
        s = args[3].split(',')
        sparsities = [None] * num_hid_layer
        for i in range(num_hid_layers):
            sparsities[i] = float(s[i])

        dbn = DBN(num_hid_layer, hid_layer_sizes, momentums, sparsities, traindata, trainlabel)
        trainingOption(dbn, num_hid_layers, traindata, trainlabel, testdata, testlabel, args[4], 'regRBM_'+parameter_file+'_')        
    f.close()
Example #6
0
def compare_dnn_init(
    X_train,
    y_train,
    X_test,
    y_test,
    layers=[784, 100, 10],
):
    print(f"Architecture : {layers}")
    # 1. initialiser deux réseaux identiques;
    dnn_random = DNN(layers)
    dnn_pretrained = DNN(layers)

    # 2. pré-apprendre un des deux réseau en le considérant comme un empilement de RBM (apprentissage non
    # supervisé);
    layers_dbn = layers[:-1]
    dbn = DBN(layers_dbn)
    print("Pretraining RBM...")
    dbn.train(X_train, epochs=100, batch_size=32, learning_rate=0.1)
    dnn_pretrained.init_DNN_with_DBN(dbn)

    # 3. apprendre le réseau pré-appris préalablement avec l’algorithme de rétro-propagation;
    dnn_pretrained.train(X_train,
                         y_train,
                         epochs=20,
                         batch_size=128,
                         learning_rate=0.1)

    # 4. apprendre le second réseau qui a été initialisé aléatoirement avec l’algorithme de rétro-propagation;
    dnn_random.train(X_train,
                     y_train,
                     epochs=20,
                     batch_size=128,
                     learning_rate=0.1)

    # 5. Calculer les taux de mauvaises classifications avec le réseau 1 (pré-entrainé + entraîné) et le réseau 2
    # (entraîné) à partir du jeu ’train’ et du jeu ’test’
    error_random = dnn_random.error_rate(X_test, y_test)
    error_pretrained = dnn_pretrained.error_rate(X_test, y_test)
    print(f"Error rate random init : {error_random:.3f}")
    print(f"Error rate RBM pretrained : {error_pretrained:.3f}")

    return error_random, error_pretrained
Example #7
0
def test_dbn():
    DBN.test_DBN(pretraining_epochs=1, training_epochs=1, batch_size=300)
Example #8
0
error_pretrained_all = []
for training_size in training_sizes:
    sub_X = X_train[:training_size]
    sub_y = y_train[:training_size]
    error_random, error_pretrained = compare_dnn_init(sub_X, sub_y, X_test,
                                                      y_test, layers)
    error_random_all.append(error_random)
    error_pretrained_all.append(error_pretrained)
print("Random:", error_random_all)
print("Pretrain:", error_pretrained_all)
plot_results(
    error_random_all,
    error_pretrained_all,
    training_sizes,
    "Training size",
    "Comparing traing sizes",
)

# On cherchera enfin une configuration permettant d’obtenir le meilleur
# taux de classification possible (ne pas hésiter à utiliser les 60000 données
# et des réseaux de grande taille).

dnn = DNN([784, 600, 400, 200, 10])
dbn = DBN([784, 600, 400, 200])
print("Pretraining RBM...")
dbn.train(X_train[:10000], epochs=100, batch_size=128, learning_rate=0.1)
dnn.init_DNN_with_DBN(dbn)
dnn.train(X_train, y_train, epochs=100, batch_size=128, learning_rate=0.1)
error = dnn.error_rate(X_test, y_test)
print(f"Meilleure erreur: {error}")
Example #9
0
filename=data_dir + "GM12878_200bp_Data_3Cl_l2normalized_TestSet.txt";
test_set_x_org=numpy.loadtxt(filename,delimiter='\t',dtype='float32')
filename=data_dir + "GM12878_200bp_Classes_3Cl_l2normalized_TestSet.txt";
test_set_y_org=numpy.loadtxt(filename,delimiter='\t',dtype=object)
prev,test_set_y_org=cl.change_class_labels(test_set_y_org)

filename=data_dir + "GM12878_Features_Unique.txt";
features=numpy.loadtxt(filename,delimiter='\t',dtype=object)  

rng=numpy.random.RandomState(1000)

# train
classifier,training_time=DBN.train_model(train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, 
                                         valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                                         pretrain_lr=0.1,finetune_lr=0.1, alpha=0.01, 
                                         lambda_reg=0.0001, alpha_reg=0.0001, 
                                         n_hidden=[64,64,64], persistent_k=15,
                                         pretraining_epochs=5, training_epochs=1000,
                                         batch_size=100, rng=rng)
                        
# test
test_set_y_pred,test_set_y_pred_prob,test_time=DBN.test_model(classifier, test_set_x_org, batch_size=200)
print test_set_y_pred[0:20]
print test_set_y_pred_prob[0:20]
print test_time
# evaluate classification performance
perf,conf_mat=cl.perform(test_set_y_org,test_set_y_pred,numpy.unique(train_set_y_org))
print perf
print conf_mat

# collect garbage
Example #10
0
def test_dbn():
    t0=time.time()
    DBN.test_DBN(pretraining_epochs = 1, training_epochs = 2, batch_size =300,
            output_folder = 'tmp_DBN_plots')
    print >> sys.stderr, "test_mlp took %.3fs expected ??s in our buildbot"%(time.time()-t0)
Example #11
0
filename=data_dir + "GM12878_200bp_Data_3Cl_l2normalized_TestSet.txt";
test_set_x_org=numpy.loadtxt(filename,delimiter='\t',dtype='float32')
filename=data_dir + "GM12878_200bp_Classes_3Cl_l2normalized_TestSet.txt";
test_set_y_org=numpy.loadtxt(filename,delimiter='\t',dtype=object)
prev,test_set_y_org=cl.change_class_labels(test_set_y_org)

filename=data_dir + "GM12878_Features_Unique.txt";
features=numpy.loadtxt(filename,delimiter='\t',dtype=object)  

rng=numpy.random.RandomState(1000)

# train
classifier,training_time=DBN.train_model(train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, 
                                         valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                                         pretrain_lr=0.1,finetune_lr=0.1, alpha=0.01, 
                                         lambda_reg=0.0001, alpha_reg=0.0001, 
                                         n_hidden=[64,64,64], persistent_k=15,
                                         pretraining_epochs=5, training_epochs=1000,
                                         batch_size=100, rng=rng)
                        
# test
test_set_y_pred=DBN.test_model(classifier, test_set_x_org, batch_size=200)

# evaluate classification performance
perf,conf_mat=cl.perform(test_set_y_org,test_set_y_pred,numpy.unique(train_set_y_org))
print perf
print conf_mat

# collect garbage
gc_collect()
Example #12
0
def main():
    # Set parameters and print them
    args = parse_args()
    print_args(args)
    whatLayer = args['whatLayer']
    epochs = args['epochs']
    visibleUnits = args['visibleUnits']
    hiddenUnits = args['hiddenUnits']
    secondLayerUnits = args['secondLayerUnits']
    approxMethod = args['approxMethod']
    approxSteps = args['approxSteps']
    learnRate = args['learnRate']
    persistStart = args['persistStart']
    momentum = args['momentum']
    decayMagnitude = args['decayMagnitude']
    decayType = args['decayType']
    sigma = args['sigma']
    batchSize = args['batchSize']
    binarization = args['binarization']

    trace_file = args['trace_file']  # saving results
    save_file = args['save_file']  # saving parameters
    load_file = args['load_file']  # loading parameters

    print('Loading data')
    with gzip.open('../data/mnist.pkl.gz', 'r') as f:
        # combine train and valid and leave test
        (TrainSet,
         y_train), (x_test, y_test), (ValidSet,
                                      y_Valid) = pickle.load(f,
                                                             encoding='latin1')

    TrainSet = np.concatenate((TrainSet, x_test), axis=0)
    TrainSet = sklearn.preprocessing.binarize(
        TrainSet, threshold=binarization).T  # Create binary data
    ValidSet = sklearn.preprocessing.binarize(
        ValidSet, threshold=binarization).T  # Create binary data

    if len(load_file) == 0:
        for ii in range(len(layers) - 1):
            if ii == 0:
                print('Initializing model')
                model = DBN.DBN(
                    n_vis=layers[ii],
                    n_hid=layers[ii + 1],
                    momentum=momentum,
                    sigma=sigma,
                    trainData=TrainSet,
                    wiseStart=True,
                )

            else:
                params = DBN.DBN.load("DBN")
                model = DBN.DBN(layer=ii + 1,
                                params=params,
                                n_vis=layers[ii],
                                n_hid=layers[ii + 1])

            print('Start of training')
            Training.fit(model,
                         TrainSet,
                         ValidSet,
                         n_epochs=epochs,
                         weight_decay=decayType,
                         decay_magnitude=decayMagnitude,
                         lr=learnRate,
                         batch_size=batchSize,
                         NormalizationApproxIter=approxSteps,
                         approx=approxMethod,
                         persistent_start=persistStart,
                         trace_file=trace_file,
                         save_file=save_file)

    else:
        print('Initializing model')
        params = DBN.DBN.load(load_file)
        model = DBN.DBN(layer=whatLayer, params=params)
        print('W1 shape', model.W1.shape, 'hbias', model.hbias1.shape, 'vbias',
              model.vbias1.shape)
        if model.layer >= 3:
            print('W2 shape', model.W22.shape, 'hbias', model.hbias2.shape,
                  'vbias', model.vbias2.shape)
            if model.layer == 4:
                print('W3 shape', model.W33.shape, 'hbias', model.hbias3.shape,
                      'vbias', model.vbias3.shape)
        print('Next W shape', model.W.shape, 'hbias', model.hbias.shape,
              'vbias', model.vbias.shape)
Example #13
0
def test_DBN(finetune_lr=0.01,
             pretraining_epochs=100,
             pretrain_lr=0.01,
             k=1,
             training_epochs=1000,
             dataset="C:\Python27\Lib\data\dex.pkl.gz",
             batch_size=10):
    """
    Demonstrates how to train and test a Deep Belief Network.

    This is demonstrated on MNIST.

    :type finetune_lr: float
    :param finetune_lr: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # print train_set_y

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network

    H_L_table = createLayerTable(num_of_layers, lsize)
    #H_L_table = createFunnelTable(num_of_layers, lsize)

    dbn = DBN(numpy_rng=numpy_rng,
              n_ins=n_ins,
              hidden_layers_sizes=H_L_table,
              n_outs=2)

    # ########################
    # PRETRAINING THE MODEL #
    # ########################
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    global res_val
    global res_test
    res_val = best_validation_loss
    res_test = test_score
def test_dbn():
    DBN.test_DBN(pretraining_epochs=1, training_epochs=1, batch_size=300)
label_test = np.squeeze(np.asarray(label_test)) - 1

#data_train=data_train.flatten()
print(data_train.shape)

n_train_batches = data_train.shape[0] // batch_size
print(n_train_batches)

data_train = theano.shared(data_train)
label_train = theano.shared(label_train)
data_test = theano.shared(data_test)
label_test = theano.shared(label_test)
numpy_rng = np.random.RandomState(0)

dbn = DBN.DBN(numpy_rng=numpy_rng,
              n_ins=20,
              hidden_layers_sizes=[10],
              n_outs=4)

print('... getting the pretraining functions')
pretraining_fns = dbn.pretraining_functions(train_set_x=data_train,
                                            batch_size=batch_size,
                                            k=k)

print('... pre-training the model')
start_time = timeit.default_timer()
# Pre-train layer-wise
for i in range(dbn.n_layers):
    # go through pretraining epochs
    for epoch in range(pretraining_epochs):
        # go through the training set
        c = []
Example #16
0
    def test_DBN(self, finetune_lr=0.1, pretraining_epochs=10,
             pretrain_lr=0.01, k=1, training_epochs=10, batch_size=10,
             hidden_layers_sizes=[400,400], sidelen=20,
             output_folder='DBN_plots', item='test'):
        """
        Demonstrates how to train and test a Deep Belief Network.
    
        This is demonstrated on MNIST.
    
        :type finetune_lr: float
        :param finetune_lr: learning rate used in the finetune stage
        :type pretraining_epochs: int
        :param pretraining_epochs: number of epoch to do pretraining
        :type pretrain_lr: float
        :param pretrain_lr: learning rate to be used during pre-training
        :type k: int
        :param k: number of Gibbs steps in CD/PCD
        :type training_epochs: int
        :param training_epochs: maximal number of iterations ot run the optimizer
        :type dataset: string
        :param dataset: path the the pickled dataset
        :type batch_size: int
        :param batch_size: the size of a minibatch
        
        
        from DLRBM_2D_temporal import *
        funcs = DLRBM_2D_temporal()
        funcs.test_DBN()
        
       
        DBN.test_DBN()
    
        """
    
        traindata_path= 'allLpatches_10x10x5.pklz' #'allLpatches_subs_smaller.pklz' #'allLpatches.pklz'
        trainUdata_path= 'allUpatches_10x10x5.pklz'#'allUpatches_subs_smaller.pklz' #'allUpatches.pklz'
        labeldata_path= 'allLabels_10x10x5.pklz' #'allLabels_subs_smaller.pklz' #'allLabels.pklz'
        
        #############
        ## LOAD datasets
        #############
        datasets = self.load_wUdata(traindata_path, labeldata_path, trainUdata_path)
    
        train_set_x, train_set_y = datasets[0]
        np_train_x, np_train_y = datasets[3]
        valid_set_x, valid_set_y = datasets[1]
        np_valid_x, np_valid_y = datasets[4]        
        test_set_x, test_set_y = datasets[2]
        np_test_x, np_test_y = datasets[5]
        

        #########################
        # FORMAT THE DATA
        #########################
        ## transform to pixel intensities between 0 and 1
        tnp_train_x= list( utils.scale_to_unit_interval( np.asarray(np_train_x) ) )
        tnp_valid_x = list( utils.scale_to_unit_interval( np.asarray(np_valid_x) ) )
        tnp_test_x = list( utils.scale_to_unit_interval( np.asarray(np_test_x) ) )
        
        #################
        # Substract pathces from pre-contrast
        #################
#        subsnp_train_x = []
#        subsnp_valid_x = []
#        subsnp_test_x = []
#
#        for img in np_train_x:
#            Vol = img.reshape(5,10,10)
#            imgslicestime = [Vol[0,:,:], Vol[1,:,:], Vol[2,:,:], Vol[3,:,:], Vol[4,:,:]]
#            subslicestime = []  
#            for k in range(1,5):
#                # substract volume
#                subVol =  np.asarray(imgslicestime[k]) - np.asarray(imgslicestime[0])
#                subslicestime.append(subVol)
#            #append
#            subsnp_train_x.append( np.asarray(subslicestime).reshape(4*10*10) )          
#        for img in np_valid_x:
#            Vol = img.reshape(5,10,10)
#            imgslicestime = [Vol[0,:,:], Vol[1,:,:], Vol[2,:,:], Vol[3,:,:], Vol[4,:,:]]
#            subslicestime = []  
#            for k in range(1,5):
#                # substract volume
#                subVol =  np.asarray(imgslicestime[k]) - np.asarray(imgslicestime[0])
#                subslicestime.append(subVol)
#            #append
#            subsnp_valid_x.append( np.asarray(subslicestime).reshape(4*10*10) )
#        for img in np_test_x:
#            Vol = img.reshape(5,10,10)
#            imgslicestime = [Vol[0,:,:], Vol[1,:,:], Vol[2,:,:], Vol[3,:,:], Vol[4,:,:]]
#            subslicestime = []  
#            for k in range(1,5):
#                # substract volume
#                subVol =  np.asarray(imgslicestime[k]) - np.asarray(imgslicestime[0])
#                subslicestime.append(subVol)
#            #append
#            subsnp_test_x.append( np.asarray(subslicestime).reshape(4*10*10) )
#    
#    
#    
#        #########################
#        # FORMAT THE DATA
#        #########################
#        ## transform to pixel intensities between 0 and 1
#        tsubsnp_train_x = list( utils.scale_to_unit_interval( np.asarray(subsnp_train_x) ) )
#        tsubsnp_valid_x = list( utils.scale_to_unit_interval( np.asarray(subsnp_valid_x) ) )
#        tsubsnp_test_x = list( utils.scale_to_unit_interval( np.asarray(subsnp_test_x) ) )
#        
#        # inpect one image class 1/0
#        tVol = tsubsnp_train_x[29594].reshape(4,10,10)
#        timgslicestime = [tVol[0,:,:], tVol[1,:,:], tVol[2,:,:], tVol[3,:,:]]
#        
#        # inpect one image class 1/0
#        Vol = np_train_x[29594].reshape(5,10,10)
#        imgslicestime = [Vol[0,:,:], Vol[1,:,:], Vol[2,:,:], Vol[3,:,:], Vol[4,:,:]]
#        
#        # Display image
#        fig, ax = plt.subplots(nrows=4, ncols=6, figsize=(16, 8))
#        for k in range(1,5):
#            ax[k-1,0].imshow(imgslicestime[k], cmap=plt.cm.gray)
#            ax[k-1,0].set_axis_off()
#            ax[k-1,0].set_adjustable('box-forced')
#        
#            # Display Original histogram
#            ax[k-1,1].hist(imgslicestime[k].ravel(), bins=50, color='black')
#            ax[k-1,1].ticklabel_format(axis='y', style='scientific', scilimits=(0, 0))
#            ax[k-1,1].set_xlabel('original')
#            
#            # substract volume
#            subVol =  np.asarray(imgslicestime[k]) - np.asarray(imgslicestime[0])
#            subslicestime.append(subVol)
#        
#            # Display subtracted histogram
#            ax[k-1,2].hist(subVol.ravel(), bins=50, color='black')
#            ax[k-1,2].ticklabel_format(axis='y', style='scientific', scilimits=(0, 0))
#            ax[k-1,2].set_xlabel('substracted histogram')
#       
#            # display  subtracted  
#            ax[k-1,3].imshow(subVol, cmap=plt.cm.gray)
#            ax[k-1,3].set_axis_off()
#            ax[k-1,3].set_adjustable('box-forced')
#            
#            # display  pixels 0-1 subtracted  
#            ax[k-1,4].imshow(timgslicestime[k-1], cmap=plt.cm.gray)
#            ax[k-1,4].set_axis_off()
#            ax[k-1,4].set_adjustable('box-forced')
#            
#            # display pixels 0-1 subtracted histogram
#            ax[k-1,5].hist(timgslicestime[k-1].ravel(), bins=50, color='black')
#            ax[k-1,5].ticklabel_format(axis='y', style='scientific', scilimits=(0, 0))
#            ax[k-1,5].set_xlabel(' pixels 0-1 subtracted histogram')
#        
#        plt.show(block=False)
        
        train_set_x, train_set_y = self.shared_dataset( tnp_train_x, np_train_y )
        valid_set_x, valid_set_y = self.shared_dataset( tnp_valid_x, np_valid_y )
        test_set_x, test_set_y = self.shared_dataset( tnp_test_x, np_test_y )
        
        datasets = [    (train_set_x, train_set_y), 
                        (valid_set_x, valid_set_y), 
                        (test_set_x, test_set_y)    ]
        
    
        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        
        # numpy random generator
        numpy_rng = numpy.random.RandomState(123)
        print '... building the model'
        
        # construct the Deep Belief Network
        dbn = DBN(numpy_rng=numpy_rng, n_ins=5*10*10,
                  hidden_layers_sizes=hidden_layers_sizes,
                  n_outs=2)
    
        #########################
        # PRETRAINING THE MODEL #
        #########################
        DBN_avg_costs = []
        DBN_iter = []
        layer_i = []
        
        print '... getting the pretraining functions'
        pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size,
                                                    k=k)
    
        print '... pre-training the model'
        start_time = timeit.default_timer()
        
        ## Pre-train layer-wise
        for i in range(dbn.n_layers):
            # go through pretraining epochs
            for epoch in range(pretraining_epochs):
                # go through the training set
                c = []
                if( epoch % 50 == 0):
                    pretrain_lr = pretrain_lr/10
                    
                for batch_index in range(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                lr=pretrain_lr))
                 # append      
                DBN_avg_costs.append(  np.mean(c) )
                DBN_iter.append(epoch)
                layer_i.append(i)
                
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
    
        end_time = timeit.default_timer()
    
        print 'The pretraining code ran for %.2fm' % ((end_time - start_time) / 60.)
        
        #####################################
        # Plot images in 2D
        #####################################   
        Wrbm = dbn.rbm_layers[0].W.get_value(borrow=True).T
        
        fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(16, 6))
        axes = axes.flatten()
        for k in range(1,5):
            image = Image.fromarray(
                    utils.tile_raster_images(
                    X=Wrbm.reshape( Wrbm.shape[0], 5, 10, 10)[:,k,:,:],
                    img_shape=(10,10),
                    tile_shape=(sidelen,sidelen),
                    tile_spacing=(1, 1) ))
            
            im = axes[k-1].imshow(image, cmap="Greys_r")  
            axes[k-1].get_xaxis().set_visible(False)
            axes[k-1].get_yaxis().set_visible(False)
            
        cax,kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
        plt.colorbar(im, cax=cax, **kw)  
        fig.savefig(output_folder+'/filters_dbn'+str(item)+'.pdf')
                               
        ##############
        # Format      
        #################           
        LLdata = [float(L) for L in DBN_avg_costs]
        LLiter = [float(it) for it in DBN_iter]
        LLilayer = [ilayer for ilayer in layer_i]
        dfpredata = pd.DataFrame( LLdata )
        dfpredata.columns = ['LL_iter']
        dfpredata['iter'] = LLiter
        dfpredata['layer'] = LLilayer
        
                           
        ########################
        # FINETUNING THE MODEL #
        ########################
        # get the training, validation and testing function for the model
        print '... getting the finetuning functions'
        train_fn, validate_model, test_model = dbn.build_finetune_functions(
            datasets=datasets,
            batch_size=batch_size,
            learning_rate=finetune_lr)
    
        ############
        ### for plotting likelihood or cost, accumulate returns of train_model
        ############
        minibatch_avg_costs = []
        minibatch_iter = []
        minibatch_loss = []
        
        print '... finetuning the model'
        # early-stopping parameters
        patience = 1000 * n_train_batches  # look as this many examples regardless
        patience_increase = 2.    # wait this much longer when a new best is
                                  # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatches before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch
    
        best_validation_loss = numpy.inf
        test_score = 0.
        start_time = timeit.default_timer()
    
        done_looping = False
        epoch = 0
    
        while (epoch < training_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in range(n_train_batches):
    
                minibatch_avg_cost = train_fn(minibatch_index)
                iter = (epoch - 1) * n_train_batches + minibatch_index
    
                if (iter + 1) % validation_frequency == 0:
    
                    validation_losses = validate_model()
                    this_validation_loss = numpy.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                              (epoch, minibatch_index + 1, 
                               n_train_batches,
                               this_validation_loss * 100.))
                    
                    ##############
                    # append      
                    #################
                    minibatch_avg_costs.append(minibatch_avg_cost)
                    minibatch_iter.append(iter)
                    minibatch_loss.append(this_validation_loss*100)
    
                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
    
                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                            patience = max(patience, iter * patience_increase)
    
                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
    
                        # test it on the test set
                        test_losses = test_model()
                        test_score = numpy.mean(test_losses)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))
    
                if patience <= iter:
                    done_looping = True
                    break
    
        end_time = timeit.default_timer()
        
        print('Optimization complete with best validation score of %f %%, '
                'obtained at iteration %i, '
                'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
                
        print 'The fine tuning code ran for %.2fm' % ((end_time - start_time)/ 60.)
    
        ##############
        # Format      
        #################           
        LLdata = [float(L) for L in minibatch_avg_costs]
        LLiter = [float(it) for it in minibatch_iter]
        LLoss = [float(l) for l in minibatch_loss]        
        dfinedata = pd.DataFrame( LLdata )
        dfinedata.columns = ['LL_iter']
        dfinedata['iter'] = LLiter
        dfinedata['loss'] = LLoss
    
        ###############
        ## Predictions
        ###############
        # get training data in numpy format   
        X,y = tnp_train_x, np_train_y # datasets[3]  
        Xtrain = np.asarray(X)
        ytrain = utils.makeMultiClass(y)
        # get valid data in numpy format   
        X,y = tnp_valid_x, np_valid_y # datasets[4]
        Xvalid = np.asarray(X)
        yvalid = utils.makeMultiClass(y)
        # get test data in numpy format           
        X,y = tnp_test_x, np_test_y  # datasets[5]  
        Xtest = np.asarray(X)
        ytest = utils.makeMultiClass(y)
                 
        
        ###############
        # predicting using the SDA
        ###############
        # in train
        predtrain = dbn.predict_functions(Xtrain).argmax(1)
        # let's see how the network did
        y = ytrain.argmax(1)
        e0 = 0.0; y0 = len([0 for yi in range(len(y)) if y[yi]==0])
        e1 = 0.0; y1 = len([1 for yi in range(len(y)) if y[yi]==1])
        for i in range(len(y)):
            if(y[i] == 1):
                e1 += y[i]==predtrain[i]
            if(y[i] == 0):
                e0 += y[i]==predtrain[i]
    
        # printing the result, this structure should result in 80% accuracy
        Acutrain0=100*e0/y0
        Acutrain1=100*e1/y1
        print "Train Accuracy for class 0: %2.2f%%"%(Acutrain0)
        print "Train Accuracy for class 1: %2.2f%%"%(Acutrain1)  
        
        # in Valid
        predvalid = dbn.predict_functions(Xvalid).argmax(1)
        # let's see how the network did
        y = yvalid.argmax(1)
        e0 = 0.0; y0 = len([0 for yi in range(len(y)) if y[yi]==0])
        e1 = 0.0; y1 = len([1 for yi in range(len(y)) if y[yi]==1])
        for i in range(len(y)):
            if(y[i] == 1):
                e1 += y[i]==predvalid[i]
            if(y[i] == 0):
                e0 += y[i]==predvalid[i]
    
        # printing the result, this structure should result in 80% accuracy
        Acuvalid0=100*e0/y0
        Acuvalid1=100*e1/y1
        print "Valid Accuracy for class 0: %2.2f%%"%(Acuvalid0)
        print "Valid Accuracy for class 1: %2.2f%%"%(Acuvalid1) 
        
        # in Xtest
        predtest = dbn.predict_functions(Xtest).argmax(1)
        # let's see how the network did
        y = ytest.argmax(1)
        e0 = 0.0; y0 = len([0 for yi in range(len(y)) if y[yi]==0])
        e1 = 0.0; y1 = len([1 for yi in range(len(y)) if y[yi]==1])
        for i in range(len(y)):
            if(y[i] == 1):
                e1 += y[i]==predtest[i]
            if(y[i] == 0):
                e0 += y[i]==predtest[i]
    
        # printing the result, this structure should result in 80% accuracy
        Acutest0=100*e0/y0
        Acutest1=100*e1/y1
        print "Test Accuracy for class 0: %2.2f%%"%(Acutest0)
        print "Test Accuracy for class 1: %2.2f%%"%(Acutest1) 
            
    
        return [dfpredata, dfinedata, dbn, 
                Acutrain0, Acutrain1,
                Acuvalid0, Acuvalid1,
                Acutest0, Acutest1]
Example #17
0
       pretrain_lr=0.01
       finetune_lr=0.1
       alpha=0.1
       lambda_reg=0#0.0001
       alpha_reg=0#0.5
       n_hidden=[256,64,16]
       persistent_k=15
       pretraining_epochs=5
       training_epochs=100
       batch_size=100
 
       # train
       classifier,training_time=DBN.train_model(train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, 
                                                valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                                                pretrain_lr=pretrain_lr,finetune_lr=finetune_lr, alpha=alpha, 
                                                lambda_reg=lambda_reg, alpha_reg=alpha_reg, 
                                                n_hidden=n_hidden, persistent_k=persistent_k,
                                                pretraining_epochs=pretraining_epochs, training_epochs=training_epochs,
                                                batch_size=batch_size, rng=rng) 
                   
       # test
       test_set_y_pred,test_set_y_pred_prob,test_time=DBN.test_model(classifier, test_set_x_org, batch_size=100)
       
       # evaluate classification performance
       perf_i,conf_mat_i=cl.perform(test_set_y_org,test_set_y_pred,numpy.unique(train_set_y_org))
       print perf_i
       print conf_mat_i
       if i==0:
           perf=perf_i
           conf_mat=conf_mat_i
           training_times=training_time