def data_prep(df, seed, validation_size, dep_var): X_df, Y_df = util.split_tmy(df, dep_var) #now, we have the data, let's look at it without the zeros removed, #and train the LL model on that X = X_df.values Y = Y_df.values #NOTE I think this is just for randomization X_train, X_val, Y_train, Y_val = util.train_test(validation_size, seed, X, Y) #NOTE, in the past, I've been overwriting my X_train when I run the randomizer again. #so the training data for the two Y-values wasn't actually different #now we scale the data, we also keep the scaler used so we can invert it later X_train_scaled, Y_train_scaled, X_norm, Y_norm = util.standardizer( X_train, Y_train) return (X_train_scaled, Y_train_scaled, X_val, Y_val, X_norm, Y_norm)
temp_data = arr_test[i[0]:i[1]] #generate an array the size of the current sub array, to be added. noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [ temp_data[k, j] + temp_data[k, j] * np.random.uniform(-noise, noise) for j in xrange(2, 6) ] for k in xrange(len(temp_data))]) enh_array = np.concatenate((enh_array, noise_array), axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack((normalizer(standardizer(enh_array[1:, 0])), normalizer(standardizer(enh_array[1:, 2])), normalizer(standardizer(enh_array[1:, 3])), normalizer(standardizer(enh_array[1:, 4])), normalizer(standardizer(enh_array[1:, 5])))) #this is meant to r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array for j in r_lengths: temp_data = enh_array[j[0]:j[1]] for k in xrange(len(temp_data) - 1):
#generate an array the size of the current sub array, to be added. noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [ temp_data[k, j] + temp_data[k, j] * np.random.uniform(-noise, noise) for j in xrange(2, 6) ] for k in xrange(len(temp_data))]) enh_array = np.concatenate((enh_array, noise_array), axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack( (standardizer(enh_array[1:, 0]), standardizer(enh_array[1:, 2]), standardizer(enh_array[1:, 3]), standardizer(enh_array[1:, 4]), standardizer(enh_array[1:, 5]))) #this is meant to r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array for j in r_lengths: temp_data = enh_array[j[0]:j[1]] for k in xrange(len(temp_data) - 1): ds2.addSample( (temp_data[k][0], temp_data[k][1], temp_data[k][2],
usecols=range(5), columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) #columns are 'Illumination','Time', 'DCW', 'nitrate', 'lutein', 'nitrogen flowrate' arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])), normalizer(standardizer(arr_temp[:, 1])), normalizer(standardizer(arr_temp[:, 2])), normalizer(standardizer(arr_temp[:, 3])), normalizer(standardizer(arr_temp[:, 4])), normalizer(standardizer(arr_temp[:, 5])))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #save data so that it can be reversed later arr_temp_means = np.mean(arr_temp, axis=0, dtype=np.float64) #print exp_1.col #print arr_temp print arr_temp_means arr_temp_stdD = np.std(arr_temp, axis=0, dtype=np.float64) print arr_temp_stdD
columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) arr_tempA = np.column_stack( (standardizer(arr_temp[:, 0]), standardizer(arr_temp[:, 1]), standardizer(arr_temp[:, 2]), standardizer(arr_temp[:, 3]), standardizer(arr_temp[:, 4]), standardizer(arr_temp[:, 5]))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52)) #third set is separated #an enchanced array that will contain the noisy data #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val #Open networks
#import as Pandas Data the excel file exp_1 = pd.read_excel('Second/First_Exp.xlsx', sheetname='Sheet1', header=None,usecols=range(5),columns=lista_col) exp_2 = pd.read_excel('Second/Second_Exp.xlsx', sheetname='Sheet1', header=None,usecols=range(5),columns=lista_col) exp_1.col=lista_col1 exp_2.col=lista_col1 flow1=np.array([(4*[0]+8*[127.5])*3]) flow2=np.array([(4*[0]+8*[25.5])*4]) arr1=np.concatenate((exp_1.values[1:],flow1.T),axis=1) arr2=np.concatenate((exp_2.values[1:],flow2.T),axis=1) arr_test=np.concatenate((arr1[0:12],arr1[24:36],arr2[0:12],arr2[24:36],arr2[36:48]),axis=0) arr_val=np.concatenate((arr1[12:24],arr2[12:24]),axis=0) arr_temp=np.concatenate((arr_test,arr_val),axis=0) arr_tempA=np.column_stack((standardizer(arr_temp[:,0]),standardizer(arr_temp[:,1]),standardizer(arr_temp[:,2]),standardizer(arr_temp[:,3]),standardizer(arr_temp[:,4]),standardizer(arr_temp[:,5]))) arr_testT,arr_valT=arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points set_lengths=((0,13),(13,26),(26,39),(39,52)) #third set is separated #an enchanced array that will contain the noisy data #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val #Open networks
#augment the enhanced array for i,m in product(set_lengths,xrange(50)): #i is the dataset #for each dataset we produce a 100 noise_arrays temp_data=arr_test[i[0]:i[1]] #generate an array the size of the current sub array, to be added. noise_array=np.array([[temp_data[k,0]]+[temp_data[k,1]]+[temp_data[k,j]+temp_data[k,j]*np.random.uniform(-noise,noise) for j in xrange(2,6)] for k in xrange(len(temp_data))]) enh_array=np.concatenate((enh_array,noise_array),axis=0) #Data normalization #print noise_array # a custom built function normalizes along the column. The columns are then put back together #TIME REMOVED #now we have in order #Illumination DCW nitrate Lutein n-Flowrate enh_array=np.column_stack((standardizer(enh_array[1:,0]),standardizer(enh_array[1:,2]),standardizer(enh_array[1:,3]),standardizer(enh_array[1:,4]),standardizer(enh_array[1:,5]))) #this is meant to r_lengths=[[i,i+12] for i in xrange(0,12*50*2*5,12)] #print r_lengths[-1] #print len(enh_array) #dataset without time #print enh_array for j in r_lengths: temp_data=enh_array[j[0]:j[1]] for k in xrange(len(temp_data)-1): ds2.addSample((temp_data[k][0],temp_data[k][1],temp_data[k][2],temp_data[k][3],temp_data[k][4]), (temp_data[k+1][1]-temp_data[k][1],temp_data[k+1][2]-temp_data[k][2],temp_data[k+1][3]-temp_data[k][3])) #in this one only 4 inputs.
n_Epochs = [15, 50, 100, 200, 300, 400, 600] # #print mb2 #print mb2.head error_storage = [] #---------- # build the datasets #---------- ds2 = SupervisedDataSet(5, 3) #Data normalization # a custom built function normalizes along the column. The columns are then put back together #Illumination DCW nitrate Lutein n-Flowrate enh_array = np.column_stack( (standardizer(beta_Val[:, 0]), standardizer(beta_Val[:, 1]), standardizer(beta_Val[:, 2]), standardizer(beta_Val[:, 3]), standardizer(beta_Val[:, 4]))) #dataset with flow for j in set_lengths: temp_data = enh_array[j[0]:j[1]] for k in xrange(len(temp_data) - 1): ds2.addSample( temp_data[k], (temp_data[k + 1][1] - temp_data[k][1], temp_data[k + 1][2] - temp_data[k][2], temp_data[k + 1][3] - temp_data[k][3])) for nH, nE in product(n_hidden, n_Epochs): #----------
usecols=range(5), columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])), normalizer(standardizer(arr_temp[:, 1])), normalizer(standardizer(arr_temp[:, 2])), normalizer(standardizer(arr_temp[:, 3])), normalizer(standardizer(arr_temp[:, 4])), normalizer(standardizer(arr_temp[:, 5])))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52)) #third set is separated #an enchanced array that will contain the noisy data #different deltaT.Normalized. Should be 12,24,36,72 #print cross_val
usecols=range(5), columns=lista_col) exp_1.col = lista_col1 exp_2.col = lista_col1 flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3]) flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4]) arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1) arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1) arr_test = np.concatenate( (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0) arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0) arr_temp = np.concatenate((arr_test, arr_val), axis=0) arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])), normalizer(standardizer(arr_temp[:, 1])), normalizer(standardizer(arr_temp[:, 2])), normalizer(standardizer(arr_temp[:, 3])), normalizer(standardizer(arr_temp[:, 4])), normalizer(standardizer(arr_temp[:, 5])))) arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):] #the four sets of experiments are given separately. These are their start and end points sLht = 12 #third set is separated #an enchanced array that will contain the noisy data #debug. Functions for scaling and rescaling seem to work fine #print arr_temp[:,2]