def main(): xinp, yinp, yact = generateData(noise=2, N=100) xtest, ytestnoise, ytestact = generateData(noise=0, N=1000) beta = 100 muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, beta) x = xinp y = yact y_rvm_est = predictRVM(x, xinp, muMat, idx) err_rvm = rmse(y, y_rvm_est) print 'rvm error' print err_rvm # plt.plot(xtest, yact, c='k', label='True function') # plot plt.ylim((-0.4, 1.2)) plt.xlim((-11, 11)) plt.scatter(x_rel, y_rel, marker='o', c='r', s=70, label='Relevance vectors') plt.plot(xinp, yinp, c='b', marker='^', label='Training data') plt.plot(xtest, ytestact, marker='+', c='g', label='True function') plt.scatter(x, y_rvm_est, marker='.', s=70, c='yellow', label='Estimated function') plt.legend() title = 'RVM, gaussian noise $\sigma$ = 0.1' plt.title(title) ds = DataSets() # x, y = ds.genFriedman(i=2,N=240,D=4) x, y = ds.genFriedman(i=1, N=240, D=10) svr_rbf = svm.SVR(C=10, epsilon=0.03, kernel='rbf', gamma=10) svr_rbf.fit(x, y) # svr_spline.fit(splKernel, yinp) yest = svr_rbf.predict(x) # yspest = svr_spline.predict(splKernel) plt.plot() plt.show()
def main(): # plt.figure() xinp, yinp, yact = generateData(noise = 2,N=100) xtest, ytestnoise, ytestact = generateData(noise = 0, N = 1000) beta = 100 R= 4 muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, R, beta) x = xinp y = yact y_rvm_est = predictRVM(x, xinp, muMat, idx, R) err_rvm = rmse(y, y_rvm_est) print 'rvm error' print err_rvm # plt.plot(xtest, yact, c='k', label='True function') # plot plt.ylim((-0.4, 1.2)) plt.xlim((-11, 11)) plt.scatter(x_rel, y_rel, marker = 'o', c='r', s=70, label='Relevance vectors') # plt.scatter(xinp, yinp, c= 'b', marker='.', label='Training data') plt.plot(xinp, yinp, c= 'b', marker='^', label='Training data') plt.plot(xtest, ytestact, marker='+', c='g',label='True function') plt.scatter(x, y_rvm_est, marker = '.', s = 70, c='yellow',label='Estimated function') plt.legend() # title = 'RVM, No noise' # title = 'RVM, uniform noise [-0.2,0.2]' title = 'RVM, gaussian noise $\sigma$ = 0.1' plt.title(title) ds = DataSets() x, y = ds.genFriedman(i=2,N=240,D=4) # x, y = ds.genFriedman(i=1,N=240,D=10) ## xtest, ytestnoise, ytestact = generateData(noise = 0, N = 1000) # R=100 for r in np.arange(0.5,1,0.5): beta = 100 muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(x[:,0], y, beta) print "r: {}".format(r) print "Relevance vectors: ".format(sum(idx))
def main(): xinp, yinp, yact = generateData(noise =2 ,N=100) xtest, ytestnoise, ytestact = generateData(noise = 0, N = 1000) beta = 100 muMat, beta, converged, idx, x_rel, y_rel = rvmtrain(xinp, yinp, beta) x = xinp y = yact y_rvm_est = predictRVM(x, xinp, muMat, idx) err_rvm = rmse(y, y_rvm_est) print 'rvm error' print err_rvm # plt.plot(xtest, yact, c='k', label='True function') # plot plt.ylim((-0.4, 1.2)) plt.xlim((-11, 11)) plt.scatter(x_rel, y_rel, marker = 'o', c='r', s=70, label='Relevance vectors') plt.plot(xinp, yinp, c= 'b', marker='^', label='Training data') plt.plot(xtest, ytestact, marker='+', c='g',label='True function') plt.scatter(x, y_rvm_est, marker = '.', s = 70, c='yellow',label='Estimated function') plt.legend() title = 'RVM, gaussian noise $\sigma$ = 0.1' plt.title(title) ds = DataSets() # x, y = ds.genFriedman(i=2,N=240,D=4) x, y = ds.genFriedman(i=1,N=240,D=10) svr_rbf = svm.SVR(C = 10, epsilon = 0.03, kernel = 'rbf', gamma = 10) svr_rbf.fit(x, y) # svr_spline.fit(splKernel, yinp) yest = svr_rbf.predict(x) # yspest = svr_spline.predict(splKernel) plt.plot() plt.show()
def GetDataSet(dense=True,sparse=True,specialdense=True): # LOAD DATA dense_df,train,rejected,summary,sparsefeatures,sparseheaders = ds.pickleLoad('FeatureSet_A') if specialdense: missingfieldindicators = [col+'_mv' for col in ['short_description','need_statement','essay']] engineeredfeatures = ['essay_len','maxcaps','totalcaps','dollarbool','dollarcount','email','urls'] dense_df = dense_df[missingfieldindicators+engineeredfeatures] # NORMALIZE binary_col_selector = summary.distinct_count == 2 nonbinary_col_selector = summary.distinct_count > 2 binary_cols = dense_df.loc[:,binary_col_selector] nonbinary_cols = dense_df.loc[:,nonbinary_col_selector] normalized = pd.DataFrame(normalize(nonbinary_cols,norm='l2'),columns=nonbinary_cols.columns) dense_normalized = pd.concat((binary_cols,normalized),axis=1,ignore_index=True) # COMBINE ALL FEATURES if dense and sparse: features = fg.CombineFeatures([dense_normalized],sparsefeatures) features = sp.sparse.csr_matrix(features) #required for efficient slicing elif dense: features = dense_normalized elif sparse: features = fg.CombineFeatures([],sparsefeatures) features = sp.sparse.csr_matrix(features) #required for efficient slicing # GET NUM DENSE & SPARSE (USED LATER IN COEF) numdense = dense_normalized.shape[1] numsparse = sparsefeatures[0].shape[1] numfeatures = numdense+numsparse selector_dense = np.arange(numfeatures) < numdense selector_sparse = selector_dense == False # TRAIN/TEST SLICING sel_bool_train = train == 1 sel_bool_test = train == 0 sel_ind_train = np.where(sel_bool_train)[0] sel_ind_test = np.where(sel_bool_test)[0] f_train = features[sel_ind_train] f_test = features[sel_ind_test] # N approved = 1-rejected y_train = np.array(approved[sel_bool_train]).astype(int) y_test = np.array(approved[sel_bool_test]).astype(int) return f_train,f_test,y_train,y_test
learnAdd = Network.create([2, 4, 4, 4, 4, 1]) learnAdd.update() # network2 = network1.getReverse() # network2.display() #============================================# ############################################## # Training Data # #===============# # Broke out DataSets into a different file to allow easy swapping of data sets truth_table, truth_table_testing = DataSets.and_or_nand_xor() decoder, decoder_testing = DataSets.decoder() adder, adder_testing = DataSets.adding() # trainer = Particle_Swarm(10, learnTruth, truth_table,truth_table_testing) trainer = GeneticTrainer(learnTruth, truth_table, truth_table_testing) #=============================================# ############################################## # Pygame Graphics :D # #====================# # Initialize
import numpy as np import DataSets as ds import Layers def get_dict(database, IsTrainingMode): xs, ys = database.NextTrainingBatch() return {x: xs, y_desired: ys, ITM: IsTrainingMode} LoadModel = False KeepProb_Dropout = 0.9 experiment_name = '10k_Dr%.3f' % KeepProb_Dropout #train = ds.DataSet('../DataBases/data_1k.bin','../DataBases/gender_1k.bin',1000) train = ds.DataSet('../DataBases/data_10k.bin', '../DataBases/gender_10k.bin', 10000) #train = ds.DataSet('../DataBases/data_100k.bin','../DataBases/gender_100k.bin',100000) test = ds.DataSet('../DataBases/data_test10k.bin', '../DataBases/gender_test10k.bin', 10000) with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, train.dim], name='x') y_desired = tf.placeholder(tf.float32, [None, 2], name='y_desired') ITM = tf.placeholder("bool", name='Is_Training_Mode') with tf.name_scope('CNN'): t = Layers.unflat(x, 48, 48, 1) nbfilter = 3 for k in range(4): for i in range(2): t = Layers.conv(t, nbfilter, 3, 1, ITM,
import tensorflow as tf import DataSets as ds import Layers import os import cv2 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' LoadModel = True path = '/home/yamhiroto/hiroto_yamakawa/Data Challenge IDEMIA/' experiment_size = 10 train = ds.DataSet(path + '/DataBases/data_%dk.bin' % experiment_size, path + '/DataBases/gender_%dk.bin' % experiment_size, 1000 * experiment_size) test = ds.DataSet(path + '/DataBases/data_test10k.bin', path + '/DataBases/gender_test10k.bin', 10000) class ConvNeuralNet(tf.Module): @tf.function(input_signature=[ tf.TensorSpec(shape=[None, 48, 48], dtype=tf.float32) ]) def eval(self, x): return tf.nn.softmax(self(x, False)) def __init__(self): self.unflat = Layers.unflat('unflat', 48, 48, 1) self.cv1 = Layers.conv('conv_1', output_dim=3, filterSize=3, stride=1) self.mp = Layers.maxpool('pool', 2) self.cv2 = Layers.conv('conv_2', output_dim=6, filterSize=3, stride=1) self.cv3 = Layers.conv('conv_3', output_dim=12, filterSize=3, stride=1)
################################################################################ ################################################################################ #define a get_dict function to extract next training batch in training mode def get_dict(database,IsTrainingMode): xs,ys = database.NextTrainingBatch() return {x:xs,y_desired:ys,ITM:IsTrainingMode} #Loading model is false LoadModel = False #?? KeepProb_Dropout = 0.9 #we give a nem to the expirement KeepProb_Dropout experiment_name = '10k_Dr%.3f'%KeepProb_Dropout #train = ds.DataSet('../DataBases/data_1k.bin','../DataBases/gender_1k.bin',1000) train = ds.DataSet('D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/data_10k.bin','D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/gender_10k.bin',10000) #train = ds.DataSet('../DataBases/data_100k.bin','../DataBases/gender_100k.bin',100000) test = ds.DataSet('D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/data_test10k.bin','D:/bdr/Documents/TP 3 tensor flow/Deep_Learning_Cours/Deep_Learning_Cours/DataBases/gender_test10k.bin',10000) #we give to tf our x as input and y as output with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, train.dim],name='x') y_desired = tf.placeholder(tf.float32, [None, 2],name='y_desired') ITM = tf.placeholder("bool", name='Is_Training_Mode') #we unflat our images to apply the filters "nbfilters=3" in traing mode with tf.name_scope('CNN'): t = Layers.unflat(x,48,48,1) nbfilter = 3 for k in range(4): for i in range(2):
if __name__ == '__main__': data = datasets.load_iris() dataX = data.data dataY = data.target ''' for i in range(dataY.shape[0]): if dataY[i] != 0: dataY[i] = 1 ''' #c = Classifier(dataX, dataY, 3, None, 1, 2, 1, 5) #c.fit() dataX, dataY = DataSets.load_regression() d = datasets.load_boston() dataX = d.data dataY = d.target c = DecisionTreeBuilder(Constants.Regression, dataX, dataY, None, None, 1, 2, 1, None) c.fit() print(c.predict_value(dataX)) print() print(metrics.mean_squared_error(dataY, c.predict_value(dataX))) print(c.get_terminal_regions(dataX)) print()
### This file generates a random polynomial, adds Gaussian noise, and finds the Least Squares polynomial fit with the best cross validation performance. import GraphData as Graph import DataSets as Data import Regression import CrossValidation as CV import pdb import math import random import sys #Generate the data from the basis function if(len(sys.argv) == 1): #Generate the order of the random true polynomial function trueOrder = random.randint(1,10) D = Data.genData(trueOrder) elif(sys.argv[1] == "nonpoly"): D = Data.genNonPoly() else: raise Exception("Invalid command line argument") #In the following, D is the data set which has all the x values as its first entry and the y values as its second. error,order = CV.kFoldErrorChoose(D[0],D[1],10,5) #Graph the points on the base polynomial Graph.lineColor(D[0],D[1],'red') #Add Gaussian noise to the data outputs D[1] = Data.addGaussianNoise(D[1],1.0/2000)
import tensorflow as tf import numpy as np import DataSets as ds LoadModel = False experiment_name = '1k' train = ds.DataSet('../DataBases/data_1k.bin', '../DataBases/gender_1k.bin', 1000) def variable_summaries(var, name): with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar(name + '/mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) tf.summary.scalar(name + '/sttdev', stddev) tf.summary.scalar(name + '/max', tf.reduce_max(var)) tf.summary.scalar(name + 'min/', tf.reduce_min(var)) tf.summary.histogram(name, var) def fc_layer(tensor, input_dim, output_dim, name): with tf.name_scope(name): Winit = tf.truncated_normal([input_dim, output_dim], stddev=0.1) W = tf.Variable(Winit) variable_summaries(W, name + '/W') Binit = tf.constant(0.0, shape=[output_dim]) B = tf.Variable(Binit) variable_summaries(B, name + '/B')
import tensorflow as tf import numpy as np import DataSets as ds LoadModel = False experiment_name = '1k' train = ds.DataSet( '/Users/maelfabien/Desktop/LocalDB/MDI341/Databases/data_1k.bin', '/Users/maelfabien/Desktop/LocalDB/MDI341/Databases/gender_1k.bin', 1000) def variable_summaries(var, name): with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar(name + '/mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) tf.summary.scalar(name + '/sttdev', stddev) tf.summary.scalar(name + '/max', tf.reduce_max(var)) tf.summary.scalar(name + 'min/', tf.reduce_min(var)) tf.summary.histogram(name, var) def fc_layer(tensor, input_dim, output_dim, name): with tf.name_scope(name): Winit = tf.truncated_normal([input_dim, output_dim], stddev=0.1) W = tf.Variable(Winit) variable_summaries(W, name + '/W') Binit = tf.constant(0.0, shape=[output_dim]) B = tf.Variable(Binit)
#ax.set_xticks(ind+width) #ax.set_xticks((width/2.0,width*3/2.0)) #ax.set_xticklabels(('rejected('+str(ser_rej_T)+')','approved('+str(ser_app_T)+')')) plt.title(title) plt.ylabel('% of subset rejected/approved') #plt.legend(loc="lower right") filepath = dl.getDataFilePath('plots/fig_'+filename+'.png') plt.savefig(filepath) plt.show() def getSparseColumn(featurename,sparsefeatures,sparseheaders): return pd.Series(sparsefeatures[:,sparseheaders.index('student')].toarray().ravel()) # LOAD DATA dense_df,train,rejected,summary,sparsefeatures,sparseheaders = ds.pickleLoad('FeatureSet_A') df = dense_df maxcaps = df.maxcaps makehist( maxcaps, rejected, mincount=4, bins=np.arange(4,45,2), title="Max consecutive capitilized letters", filename='maxcaps') totalcaps = df.totalcaps makehist(
import tensorflow as tf import DataSets as ds import Layers import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' LoadModel = False experiment_size = 10 train = ds.DataSet('../DataBases/data_%dk.bin' % experiment_size, '../DataBases/gender_%dk.bin' % experiment_size, 1000 * experiment_size) test = ds.DataSet('../DataBases/data_test10k.bin', '../DataBases/gender_test10k.bin', 10000) class ConvNeuralNet(tf.Module): def __init__(self): list = [] list.append(Layers.unflat('unflat', 48, 48, 1)) nbfilter = 3 for i in range(4): for j in range(2): list.append( Layers.conv('block_%d_conv_%d' % (i, j), output_dim=nbfilter, filterSize=3, stride=1, dropout_rate=0.1))
avg_tfidf_approved_top10 = avg_tfidf_approved.iloc[:10,:] avg_tfidf_rejected_top10 = avg_tfidf_rejected.iloc[:10,:] app_indices = avg_tfidf_approved.index rej_indices = avg_tfidf_rejected.index topwords = pd.concat( (pd.DataFrame(app_indices,columns=['Approved']), pd.DataFrame(np.array(avg_tfidf_approved),columns=['AVGTFIDF_Approved']), pd.DataFrame(rej_indices,columns=['Rejected']), pd.DataFrame(np.array(avg_tfidf_rejected),columns=['AVGTFIDF_Rejected'])), axis = 1, ignore_index = False) df = ds.pickleLoad('BalancedFull') def ClosestRecord(threshold=0.01,label=1): # grabs records with label = 1 in test set with probability # closest to threshold myprobs = probs[:,1] probs_rejected = y_test == label diff_myprobs = np.abs(myprobs-threshold) minval = min(diff_myprobs[probs_rejected]) ind_minval_rejected = np.logical_and(probs_rejected,diff_myprobs==minval) return df[sel_bool_test][ind_minval_rejected],dense_df[sel_bool_test][ind_minval_rejected] #ds.pickleIt((coef_binary,coef_numerical,coef_sparse),'FeatureSetA_coef_summaries') def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}):
proj4 = client.getProjection(dataSet).proj4 mask = '/data/puma1/scratch/cryotempo/masks/icesheets.shp' if mask_prefix == "ICE" else '/data/puma1/scratch/cryotempo/sarinmasks/{}_Greenland.shp'.format( mask_prefix) tmpPath = '/home/jon/data/masks/' bbox = client.boundingBox(dataSet) gridCells = client.gridCells(dataSet, bbox) for gc in gridCells: data = interpolationGrid(gc.minX, gc.minY, gridCellSize, resolution) point_ds = pds.PointDataSet(data, proj4) geoDs = point_ds.asGeoDataSet() geoDs.withinMask(mask, mask_prefix) results = geoDs.getData() stats = {} stats['InterpolationCount'] = float( results['within_{}'.format(mask_prefix)].sum()) print( client.query.publishGridCellStats( dataSet.parentDataSet, "{}_GridCellInterpolationCount".format(mask_prefix), gc.minX, gc.minY, gridCellSize, stats)) mask_file = "mask_{}_{}_{}.csv".format(dataSet.dataSet, gc.minX, gc.minY) results.to_csv(tmpPath + mask_file)