예제 #1
0
def main():

    xinp, yinp, yact = generateData(noise=2, N=100)
    xtest, ytestnoise, ytestact = generateData(noise=0, N=1000)
    beta = 100
    muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, beta)
    x = xinp
    y = yact
    y_rvm_est = predictRVM(x, xinp, muMat, idx)

    err_rvm = rmse(y, y_rvm_est)

    print 'rvm error'
    print err_rvm
    #    plt.plot(xtest, yact, c='k', label='True function')
    #    plot
    plt.ylim((-0.4, 1.2))
    plt.xlim((-11, 11))

    plt.scatter(x_rel,
                y_rel,
                marker='o',
                c='r',
                s=70,
                label='Relevance vectors')
    plt.plot(xinp, yinp, c='b', marker='^', label='Training data')
    plt.plot(xtest, ytestact, marker='+', c='g', label='True function')
    plt.scatter(x,
                y_rvm_est,
                marker='.',
                s=70,
                c='yellow',
                label='Estimated function')

    plt.legend()
    title = 'RVM, gaussian noise $\sigma$ = 0.1'
    plt.title(title)

    ds = DataSets()
    # x, y = ds.genFriedman(i=2,N=240,D=4)
    x, y = ds.genFriedman(i=1, N=240, D=10)

    svr_rbf = svm.SVR(C=10, epsilon=0.03, kernel='rbf', gamma=10)
    svr_rbf.fit(x, y)
    #    svr_spline.fit(splKernel, yinp)

    yest = svr_rbf.predict(x)
    #    yspest = svr_spline.predict(splKernel)

    plt.plot()
    plt.show()
예제 #2
0
def main():
#    plt.figure()

    xinp, yinp, yact = generateData(noise = 2,N=100)    
    xtest, ytestnoise, ytestact  = generateData(noise = 0, N = 1000)
    beta = 100
    R= 4
    muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, R, beta)

    x = xinp
    y = yact
    y_rvm_est = predictRVM(x, xinp, muMat, idx, R)

    err_rvm = rmse(y, y_rvm_est)
    
    print 'rvm error'
    print err_rvm
#    plt.plot(xtest, yact, c='k', label='True function')
#    plot    
    plt.ylim((-0.4, 1.2))
    plt.xlim((-11, 11))

    plt.scatter(x_rel, y_rel, marker = 'o', c='r', s=70, label='Relevance vectors')
#    plt.scatter(xinp, yinp,  c= 'b', marker='.', label='Training data')
    plt.plot(xinp, yinp,  c= 'b', marker='^', label='Training data')
    plt.plot(xtest, ytestact, marker='+', c='g',label='True function')
    plt.scatter(x, y_rvm_est, marker = '.', s = 70, c='yellow',label='Estimated function')

    plt.legend()
#    title = 'RVM, No noise'
#    title = 'RVM, uniform noise [-0.2,0.2]'
    title = 'RVM, gaussian noise $\sigma$ = 0.1'
    plt.title(title)
    
    
    ds = DataSets()
    x, y = ds.genFriedman(i=2,N=240,D=4)
#    x, y = ds.genFriedman(i=1,N=240,D=10)    
##    xtest, ytestnoise, ytestact  = generateData(noise = 0, N = 1000)
#    R=100 
    for r in np.arange(0.5,1,0.5):
        beta = 100    
        muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(x[:,0], y, beta)
        print "r: {}".format(r)
        print "Relevance vectors: ".format(sum(idx))
예제 #3
0
def main():

    xinp, yinp, yact = generateData(noise =2 ,N=100)
    xtest, ytestnoise, ytestact  = generateData(noise = 0, N = 1000)
    beta = 100
    muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, beta)
    x = xinp
    y = yact
    y_rvm_est = predictRVM(x, xinp, muMat, idx)

    err_rvm = rmse(y, y_rvm_est)

    print 'rvm error'
    print err_rvm
#    plt.plot(xtest, yact, c='k', label='True function')
#    plot
    plt.ylim((-0.4, 1.2))
    plt.xlim((-11, 11))

    plt.scatter(x_rel, y_rel, marker = 'o', c='r', s=70, label='Relevance vectors')
    plt.plot(xinp, yinp,  c= 'b', marker='^', label='Training data')
    plt.plot(xtest, ytestact, marker='+', c='g',label='True function')
    plt.scatter(x, y_rvm_est, marker = '.', s = 70, c='yellow',label='Estimated function')

    plt.legend()
    title = 'RVM, gaussian noise $\sigma$ = 0.1'
    plt.title(title)

    ds = DataSets()
    # x, y = ds.genFriedman(i=2,N=240,D=4)
    x, y = ds.genFriedman(i=1,N=240,D=10)

    svr_rbf  = svm.SVR(C = 10, epsilon = 0.03, kernel = 'rbf', gamma = 10)
    svr_rbf.fit(x, y)
#    svr_spline.fit(splKernel, yinp)


    yest = svr_rbf.predict(x)
#    yspest = svr_spline.predict(splKernel)

    plt.plot()
    plt.show()
예제 #4
0
def GetDataSet(dense=True,sparse=True,specialdense=True):
    # LOAD DATA
    dense_df,train,rejected,summary,sparsefeatures,sparseheaders = ds.pickleLoad('FeatureSet_A')
    
    if specialdense:
        missingfieldindicators = [col+'_mv' for col in ['short_description','need_statement','essay']]
        engineeredfeatures = ['essay_len','maxcaps','totalcaps','dollarbool','dollarcount','email','urls']        
        dense_df = dense_df[missingfieldindicators+engineeredfeatures]
    
    # NORMALIZE
    binary_col_selector = summary.distinct_count == 2
    nonbinary_col_selector = summary.distinct_count > 2
    binary_cols = dense_df.loc[:,binary_col_selector]
    nonbinary_cols = dense_df.loc[:,nonbinary_col_selector]
    normalized = pd.DataFrame(normalize(nonbinary_cols,norm='l2'),columns=nonbinary_cols.columns)
    dense_normalized = pd.concat((binary_cols,normalized),axis=1,ignore_index=True)        
    
    # COMBINE ALL FEATURES
    if dense and sparse:
        features = fg.CombineFeatures([dense_normalized],sparsefeatures)
        features = sp.sparse.csr_matrix(features) #required for efficient slicing
    elif dense:
        features = dense_normalized
    elif sparse:
        features = fg.CombineFeatures([],sparsefeatures)
        features = sp.sparse.csr_matrix(features) #required for efficient slicing
    
    # GET NUM DENSE & SPARSE (USED LATER IN COEF)
    numdense = dense_normalized.shape[1]
    numsparse = sparsefeatures[0].shape[1]
    numfeatures = numdense+numsparse
    
    selector_dense = np.arange(numfeatures) < numdense
    selector_sparse = selector_dense == False
    
    # TRAIN/TEST SLICING
    sel_bool_train = train == 1
    sel_bool_test = train == 0
    sel_ind_train = np.where(sel_bool_train)[0]
    sel_ind_test = np.where(sel_bool_test)[0]
    
    f_train = features[sel_ind_train]
    f_test = features[sel_ind_test]
    
    # N
    approved = 1-rejected
    y_train = np.array(approved[sel_bool_train]).astype(int)
    y_test = np.array(approved[sel_bool_test]).astype(int)
    
    return f_train,f_test,y_train,y_test
예제 #5
0
learnAdd = Network.create([2, 4, 4, 4, 4, 1])
learnAdd.update()

# network2 = network1.getReverse()
# network2.display()

#============================================#

##############################################
# Training Data #
#===============#

# Broke out DataSets into a different file to allow easy swapping of data sets

truth_table, truth_table_testing = DataSets.and_or_nand_xor()

decoder, decoder_testing = DataSets.decoder()

adder, adder_testing = DataSets.adding()

# trainer = Particle_Swarm(10, learnTruth, truth_table,truth_table_testing)
trainer = GeneticTrainer(learnTruth, truth_table, truth_table_testing)

#=============================================#

##############################################
# Pygame Graphics :D #
#====================#

# Initialize
예제 #6
0
import numpy as np
import DataSets as ds
import Layers


def get_dict(database, IsTrainingMode):
    xs, ys = database.NextTrainingBatch()
    return {x: xs, y_desired: ys, ITM: IsTrainingMode}


LoadModel = False
KeepProb_Dropout = 0.9

experiment_name = '10k_Dr%.3f' % KeepProb_Dropout
#train = ds.DataSet('../DataBases/data_1k.bin','../DataBases/gender_1k.bin',1000)
train = ds.DataSet('../DataBases/data_10k.bin', '../DataBases/gender_10k.bin',
                   10000)
#train = ds.DataSet('../DataBases/data_100k.bin','../DataBases/gender_100k.bin',100000)
test = ds.DataSet('../DataBases/data_test10k.bin',
                  '../DataBases/gender_test10k.bin', 10000)

with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, [None, train.dim], name='x')
    y_desired = tf.placeholder(tf.float32, [None, 2], name='y_desired')
    ITM = tf.placeholder("bool", name='Is_Training_Mode')

with tf.name_scope('CNN'):
    t = Layers.unflat(x, 48, 48, 1)
    nbfilter = 3
    for k in range(4):
        for i in range(2):
            t = Layers.conv(t, nbfilter, 3, 1, ITM,
예제 #7
0
import tensorflow as tf
import DataSets as ds
import Layers
import os
import cv2

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

LoadModel = True
path = '/home/yamhiroto/hiroto_yamakawa/Data Challenge IDEMIA/'
experiment_size = 10
train = ds.DataSet(path + '/DataBases/data_%dk.bin' % experiment_size,
                   path + '/DataBases/gender_%dk.bin' % experiment_size,
                   1000 * experiment_size)
test = ds.DataSet(path + '/DataBases/data_test10k.bin',
                  path + '/DataBases/gender_test10k.bin', 10000)


class ConvNeuralNet(tf.Module):
    @tf.function(input_signature=[
        tf.TensorSpec(shape=[None, 48, 48], dtype=tf.float32)
    ])
    def eval(self, x):
        return tf.nn.softmax(self(x, False))

    def __init__(self):
        self.unflat = Layers.unflat('unflat', 48, 48, 1)
        self.cv1 = Layers.conv('conv_1', output_dim=3, filterSize=3, stride=1)
        self.mp = Layers.maxpool('pool', 2)
        self.cv2 = Layers.conv('conv_2', output_dim=6, filterSize=3, stride=1)
        self.cv3 = Layers.conv('conv_3', output_dim=12, filterSize=3, stride=1)
예제 #8
0
################################################################################
################################################################################

#define a get_dict function to extract next training batch in training mode
def get_dict(database,IsTrainingMode):
	xs,ys = database.NextTrainingBatch()
	return {x:xs,y_desired:ys,ITM:IsTrainingMode}

#Loading model is false 
LoadModel = False
#??
KeepProb_Dropout = 0.9
#we give a nem to the expirement KeepProb_Dropout
experiment_name = '10k_Dr%.3f'%KeepProb_Dropout
#train = ds.DataSet('../DataBases/data_1k.bin','../DataBases/gender_1k.bin',1000)
train = ds.DataSet('D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/data_10k.bin','D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/gender_10k.bin',10000)
#train = ds.DataSet('../DataBases/data_100k.bin','../DataBases/gender_100k.bin',100000)
test = ds.DataSet('D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/data_test10k.bin','D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/gender_test10k.bin',10000)

#we give to tf our x as input and y as output 
with tf.name_scope('input'):
	x = tf.placeholder(tf.float32, [None, train.dim],name='x')
	y_desired = tf.placeholder(tf.float32, [None, 2],name='y_desired')
	ITM = tf.placeholder("bool", name='Is_Training_Mode')

#we unflat our images to apply the filters "nbfilters=3" in traing mode 
with tf.name_scope('CNN'):
	t = Layers.unflat(x,48,48,1)
	nbfilter = 3
	for k in range(4):
		for i in range(2):
예제 #9
0
if __name__ == '__main__':
    data = datasets.load_iris()
    dataX = data.data
    dataY = data.target
    '''
    for i in range(dataY.shape[0]):
        if dataY[i] != 0:
            dataY[i] = 1
    '''

    
    #c = Classifier(dataX, dataY, 3, None, 1, 2, 1, 5)
    #c.fit()

    dataX, dataY = DataSets.load_regression()
    d = datasets.load_boston()
    dataX = d.data
    dataY = d.target


    c = DecisionTreeBuilder(Constants.Regression, dataX, dataY, None, None, 1, 2, 1, None)
    c.fit()
    print(c.predict_value(dataX))
    print()

    print(metrics.mean_squared_error(dataY, c.predict_value(dataX)))

    print(c.get_terminal_regions(dataX))
    print()
예제 #10
0
### This file generates a random polynomial, adds Gaussian noise, and finds the Least Squares polynomial fit with the best cross validation performance.
import GraphData as Graph
import DataSets as Data
import Regression
import CrossValidation as CV
import pdb
import math
import random
import sys

#Generate the data from the basis function
if(len(sys.argv) == 1):
	#Generate the order of the random true polynomial function
	trueOrder = random.randint(1,10)	
	D = Data.genData(trueOrder)
elif(sys.argv[1] == "nonpoly"):
	D = Data.genNonPoly()
else:
	raise Exception("Invalid command line argument")


#In the following, D is the data set which has all the x values as its first entry and the y values as its second.

error,order = CV.kFoldErrorChoose(D[0],D[1],10,5)

#Graph the points on the base polynomial
Graph.lineColor(D[0],D[1],'red')

#Add Gaussian noise to the data outputs
D[1] = Data.addGaussianNoise(D[1],1.0/2000)
예제 #11
0
import tensorflow as tf
import numpy as np
import DataSets as ds

LoadModel = False

experiment_name = '1k'
train = ds.DataSet('../DataBases/data_1k.bin', '../DataBases/gender_1k.bin',
                   1000)


def variable_summaries(var, name):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar(name + '/mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
        tf.summary.scalar(name + '/sttdev', stddev)
        tf.summary.scalar(name + '/max', tf.reduce_max(var))
        tf.summary.scalar(name + 'min/', tf.reduce_min(var))
        tf.summary.histogram(name, var)


def fc_layer(tensor, input_dim, output_dim, name):
    with tf.name_scope(name):
        Winit = tf.truncated_normal([input_dim, output_dim], stddev=0.1)
        W = tf.Variable(Winit)
        variable_summaries(W, name + '/W')
        Binit = tf.constant(0.0, shape=[output_dim])
        B = tf.Variable(Binit)
        variable_summaries(B, name + '/B')
예제 #12
0
import tensorflow as tf
import numpy as np
import DataSets as ds

LoadModel = False

experiment_name = '1k'
train = ds.DataSet(
    '/Users/maelfabien/Desktop/LocalDB/MDI341/Databases/data_1k.bin',
    '/Users/maelfabien/Desktop/LocalDB/MDI341/Databases/gender_1k.bin', 1000)


def variable_summaries(var, name):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar(name + '/mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
        tf.summary.scalar(name + '/sttdev', stddev)
        tf.summary.scalar(name + '/max', tf.reduce_max(var))
        tf.summary.scalar(name + 'min/', tf.reduce_min(var))
        tf.summary.histogram(name, var)


def fc_layer(tensor, input_dim, output_dim, name):
    with tf.name_scope(name):
        Winit = tf.truncated_normal([input_dim, output_dim], stddev=0.1)
        W = tf.Variable(Winit)
        variable_summaries(W, name + '/W')
        Binit = tf.constant(0.0, shape=[output_dim])
        B = tf.Variable(Binit)
    #ax.set_xticks(ind+width)
    #ax.set_xticks((width/2.0,width*3/2.0))    
    #ax.set_xticklabels(('rejected('+str(ser_rej_T)+')','approved('+str(ser_app_T)+')'))
    plt.title(title)
    plt.ylabel('% of subset rejected/approved')
    #plt.legend(loc="lower right")
    filepath = dl.getDataFilePath('plots/fig_'+filename+'.png')
    plt.savefig(filepath)
    plt.show()

def getSparseColumn(featurename,sparsefeatures,sparseheaders):
    return pd.Series(sparsefeatures[:,sparseheaders.index('student')].toarray().ravel())
    

# LOAD DATA
dense_df,train,rejected,summary,sparsefeatures,sparseheaders = ds.pickleLoad('FeatureSet_A')
df = dense_df



maxcaps = df.maxcaps
makehist(
    maxcaps,
    rejected,
    mincount=4,
    bins=np.arange(4,45,2),
    title="Max consecutive capitilized letters",
    filename='maxcaps')
                
totalcaps = df.totalcaps
makehist(
import tensorflow as tf
import DataSets as ds
import Layers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

LoadModel = False

experiment_size = 10
train = ds.DataSet('../DataBases/data_%dk.bin' % experiment_size,
                   '../DataBases/gender_%dk.bin' % experiment_size,
                   1000 * experiment_size)
test = ds.DataSet('../DataBases/data_test10k.bin',
                  '../DataBases/gender_test10k.bin', 10000)


class ConvNeuralNet(tf.Module):
    def __init__(self):

        list = []
        list.append(Layers.unflat('unflat', 48, 48, 1))

        nbfilter = 3
        for i in range(4):
            for j in range(2):
                list.append(
                    Layers.conv('block_%d_conv_%d' % (i, j),
                                output_dim=nbfilter,
                                filterSize=3,
                                stride=1,
                                dropout_rate=0.1))
예제 #15
0
avg_tfidf_approved_top10 = avg_tfidf_approved.iloc[:10,:]         
avg_tfidf_rejected_top10 = avg_tfidf_rejected.iloc[:10,:]          

app_indices = avg_tfidf_approved.index
rej_indices = avg_tfidf_rejected.index

topwords = pd.concat(
                (pd.DataFrame(app_indices,columns=['Approved']),
                 pd.DataFrame(np.array(avg_tfidf_approved),columns=['AVGTFIDF_Approved']),
                 pd.DataFrame(rej_indices,columns=['Rejected']),
                 pd.DataFrame(np.array(avg_tfidf_rejected),columns=['AVGTFIDF_Rejected'])),
                axis = 1,
                ignore_index = False)

df = ds.pickleLoad('BalancedFull')
def ClosestRecord(threshold=0.01,label=1):
    # grabs records with label = 1 in test set with probability
    # closest to threshold
    myprobs = probs[:,1]
    probs_rejected = y_test == label
    diff_myprobs = np.abs(myprobs-threshold)
    minval = min(diff_myprobs[probs_rejected])
    ind_minval_rejected = np.logical_and(probs_rejected,diff_myprobs==minval)
    return df[sel_bool_test][ind_minval_rejected],dense_df[sel_bool_test][ind_minval_rejected]


#ds.pickleIt((coef_binary,coef_numerical,coef_sparse),'FeatureSetA_coef_summaries')


def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}):
예제 #16
0
proj4 = client.getProjection(dataSet).proj4

mask = '/data/puma1/scratch/cryotempo/masks/icesheets.shp' if mask_prefix == "ICE" else '/data/puma1/scratch/cryotempo/sarinmasks/{}_Greenland.shp'.format(
    mask_prefix)

tmpPath = '/home/jon/data/masks/'

bbox = client.boundingBox(dataSet)

gridCells = client.gridCells(dataSet, bbox)

for gc in gridCells:
    data = interpolationGrid(gc.minX, gc.minY, gridCellSize, resolution)

    point_ds = pds.PointDataSet(data, proj4)
    geoDs = point_ds.asGeoDataSet()
    geoDs.withinMask(mask, mask_prefix)

    results = geoDs.getData()
    stats = {}
    stats['InterpolationCount'] = float(
        results['within_{}'.format(mask_prefix)].sum())
    print(
        client.query.publishGridCellStats(
            dataSet.parentDataSet,
            "{}_GridCellInterpolationCount".format(mask_prefix), gc.minX,
            gc.minY, gridCellSize, stats))

    mask_file = "mask_{}_{}_{}.csv".format(dataSet.dataSet, gc.minX, gc.minY)
    results.to_csv(tmpPath + mask_file)