コード例 #1
0
def data_prep(df, seed, validation_size, dep_var):
    X_df, Y_df = util.split_tmy(df, dep_var)

    #now, we have the data, let's look at it without the zeros removed,
    #and train the LL model on that

    X = X_df.values
    Y = Y_df.values

    #NOTE I think this is just for randomization
    X_train, X_val, Y_train, Y_val = util.train_test(validation_size, seed, X,
                                                     Y)
    #NOTE, in the past, I've been overwriting my X_train when I run the randomizer again.
    #so the training data for the two Y-values wasn't actually different
    #now we scale the data, we also keep the scaler used so we can invert it later
    X_train_scaled, Y_train_scaled, X_norm, Y_norm = util.standardizer(
        X_train, Y_train)
    return (X_train_scaled, Y_train_scaled, X_val, Y_val, X_norm, Y_norm)
コード例 #2
0
        temp_data = arr_test[i[0]:i[1]]
        #generate an array the size of the current sub array, to be added.
        noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [
            temp_data[k, j] +
            temp_data[k, j] * np.random.uniform(-noise, noise)
            for j in xrange(2, 6)
        ] for k in xrange(len(temp_data))])
        enh_array = np.concatenate((enh_array, noise_array), axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
    enh_array = np.column_stack((normalizer(standardizer(enh_array[1:, 0])),
                                 normalizer(standardizer(enh_array[1:, 2])),
                                 normalizer(standardizer(enh_array[1:, 3])),
                                 normalizer(standardizer(enh_array[1:, 4])),
                                 normalizer(standardizer(enh_array[1:, 5]))))

    #this is meant to
    r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)]
    #print r_lengths[-1]
    #print len(enh_array)
    #dataset without time
    #print enh_array

    for j in r_lengths:
        temp_data = enh_array[j[0]:j[1]]
        for k in xrange(len(temp_data) - 1):
コード例 #3
0
        #generate an array the size of the current sub array, to be added.
        noise_array = np.array([[temp_data[k, 0]] + [temp_data[k, 1]] + [
            temp_data[k, j] +
            temp_data[k, j] * np.random.uniform(-noise, noise)
            for j in xrange(2, 6)
        ] for k in xrange(len(temp_data))])
        enh_array = np.concatenate((enh_array, noise_array), axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
    enh_array = np.column_stack(
        (standardizer(enh_array[1:, 0]), standardizer(enh_array[1:, 2]),
         standardizer(enh_array[1:, 3]), standardizer(enh_array[1:, 4]),
         standardizer(enh_array[1:, 5])))

    #this is meant to
    r_lengths = [[i, i + 12] for i in xrange(0, 12 * 100 * 5, 12)]
    #print r_lengths[-1]
    #print len(enh_array)
    #dataset without time
    #print enh_array

    for j in r_lengths:
        temp_data = enh_array[j[0]:j[1]]
        for k in xrange(len(temp_data) - 1):
            ds2.addSample(
                (temp_data[k][0], temp_data[k][1], temp_data[k][2],
コード例 #4
0
                      usecols=range(5),
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)
#columns are 'Illumination','Time', 'DCW', 'nitrate', 'lutein', 'nitrogen flowrate'
arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])),
                             normalizer(standardizer(arr_temp[:, 1])),
                             normalizer(standardizer(arr_temp[:, 2])),
                             normalizer(standardizer(arr_temp[:, 3])),
                             normalizer(standardizer(arr_temp[:, 4])),
                             normalizer(standardizer(arr_temp[:, 5]))))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#save data so that it can be reversed later
arr_temp_means = np.mean(arr_temp, axis=0, dtype=np.float64)
#print exp_1.col
#print arr_temp
print arr_temp_means
arr_temp_stdD = np.std(arr_temp, axis=0, dtype=np.float64)
print arr_temp_stdD
コード例 #5
0
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)

arr_tempA = np.column_stack(
    (standardizer(arr_temp[:, 0]), standardizer(arr_temp[:, 1]),
     standardizer(arr_temp[:, 2]), standardizer(arr_temp[:, 3]),
     standardizer(arr_temp[:, 4]), standardizer(arr_temp[:, 5])))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52))
#third set is separated
#an enchanced array that will contain the noisy data

#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val

#Open networks
#import as Pandas Data the excel file
exp_1 = pd.read_excel('Second/First_Exp.xlsx', sheetname='Sheet1', header=None,usecols=range(5),columns=lista_col)
exp_2 = pd.read_excel('Second/Second_Exp.xlsx', sheetname='Sheet1', header=None,usecols=range(5),columns=lista_col)
exp_1.col=lista_col1
exp_2.col=lista_col1
flow1=np.array([(4*[0]+8*[127.5])*3])
flow2=np.array([(4*[0]+8*[25.5])*4])

arr1=np.concatenate((exp_1.values[1:],flow1.T),axis=1)
arr2=np.concatenate((exp_2.values[1:],flow2.T),axis=1)

arr_test=np.concatenate((arr1[0:12],arr1[24:36],arr2[0:12],arr2[24:36],arr2[36:48]),axis=0)
arr_val=np.concatenate((arr1[12:24],arr2[12:24]),axis=0)
arr_temp=np.concatenate((arr_test,arr_val),axis=0)

arr_tempA=np.column_stack((standardizer(arr_temp[:,0]),standardizer(arr_temp[:,1]),standardizer(arr_temp[:,2]),standardizer(arr_temp[:,3]),standardizer(arr_temp[:,4]),standardizer(arr_temp[:,5])))

arr_testT,arr_valT=arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
set_lengths=((0,13),(13,26),(26,39),(39,52))
#third set is separated
#an enchanced array that will contain the noisy data


#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val


#Open networks
コード例 #7
0
    #augment the enhanced array
    for i,m in product(set_lengths,xrange(50)):
        #i is the dataset
        #for each dataset we produce a 100 noise_arrays
        temp_data=arr_test[i[0]:i[1]]
        #generate an array the size of the current sub array, to be added.
        noise_array=np.array([[temp_data[k,0]]+[temp_data[k,1]]+[temp_data[k,j]+temp_data[k,j]*np.random.uniform(-noise,noise) for j in xrange(2,6)] for k in xrange(len(temp_data))])
        enh_array=np.concatenate((enh_array,noise_array),axis=0)

    #Data normalization
    #print noise_array
    # a custom built function normalizes along the column. The columns are then put back together
    #TIME REMOVED
    #now we have in order
    #Illumination DCW nitrate Lutein n-Flowrate
    enh_array=np.column_stack((standardizer(enh_array[1:,0]),standardizer(enh_array[1:,2]),standardizer(enh_array[1:,3]),standardizer(enh_array[1:,4]),standardizer(enh_array[1:,5])))



        #this is meant to 
r_lengths=[[i,i+12] for i in xrange(0,12*50*2*5,12)]
        #print r_lengths[-1]
        #print len(enh_array)
        #dataset without time
        #print enh_array

for j in r_lengths:
    temp_data=enh_array[j[0]:j[1]]        
    for k in xrange(len(temp_data)-1):
        ds2.addSample((temp_data[k][0],temp_data[k][1],temp_data[k][2],temp_data[k][3],temp_data[k][4]), (temp_data[k+1][1]-temp_data[k][1],temp_data[k+1][2]-temp_data[k][2],temp_data[k+1][3]-temp_data[k][3]))
            #in this one only 4 inputs.
コード例 #8
0
n_Epochs = [15, 50, 100, 200, 300, 400, 600]  #
#print mb2
#print mb2.head
error_storage = []

#----------
# build the datasets
#----------

ds2 = SupervisedDataSet(5, 3)

#Data normalization
# a custom built function normalizes along the column. The columns are then put back together
#Illumination DCW nitrate Lutein n-Flowrate
enh_array = np.column_stack(
    (standardizer(beta_Val[:, 0]), standardizer(beta_Val[:, 1]),
     standardizer(beta_Val[:, 2]), standardizer(beta_Val[:, 3]),
     standardizer(beta_Val[:, 4])))

#dataset with flow

for j in set_lengths:
    temp_data = enh_array[j[0]:j[1]]
    for k in xrange(len(temp_data) - 1):
        ds2.addSample(
            temp_data[k],
            (temp_data[k + 1][1] - temp_data[k][1], temp_data[k + 1][2] -
             temp_data[k][2], temp_data[k + 1][3] - temp_data[k][3]))

for nH, nE in product(n_hidden, n_Epochs):
    #----------
コード例 #9
0
                      usecols=range(5),
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)

arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])),
                             normalizer(standardizer(arr_temp[:, 1])),
                             normalizer(standardizer(arr_temp[:, 2])),
                             normalizer(standardizer(arr_temp[:, 3])),
                             normalizer(standardizer(arr_temp[:, 4])),
                             normalizer(standardizer(arr_temp[:, 5]))))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
set_lengths = ((0, 13), (13, 26), (26, 39), (39, 52))
#third set is separated
#an enchanced array that will contain the noisy data

#different deltaT.Normalized. Should be 12,24,36,72
#print cross_val
コード例 #10
0
                      usecols=range(5),
                      columns=lista_col)
exp_1.col = lista_col1
exp_2.col = lista_col1
flow1 = np.array([(4 * [0] + 8 * [127.5]) * 3])
flow2 = np.array([(4 * [0] + 8 * [25.5]) * 4])

arr1 = np.concatenate((exp_1.values[1:], flow1.T), axis=1)
arr2 = np.concatenate((exp_2.values[1:], flow2.T), axis=1)

arr_test = np.concatenate(
    (arr1[0:12], arr1[24:36], arr2[0:12], arr2[24:36], arr2[36:48]), axis=0)
arr_val = np.concatenate((arr1[12:24], arr2[12:24]), axis=0)
arr_temp = np.concatenate((arr_test, arr_val), axis=0)

arr_tempA = np.column_stack((normalizer(standardizer(arr_temp[:, 0])),
                             normalizer(standardizer(arr_temp[:, 1])),
                             normalizer(standardizer(arr_temp[:, 2])),
                             normalizer(standardizer(arr_temp[:, 3])),
                             normalizer(standardizer(arr_temp[:, 4])),
                             normalizer(standardizer(arr_temp[:, 5]))))

arr_testT, arr_valT = arr_tempA[:len(arr_test)], arr_tempA[len(arr_test):]

#the four sets of experiments are given separately. These are their start and end points
sLht = 12
#third set is separated
#an enchanced array that will contain the noisy data

#debug. Functions for scaling and rescaling seem to work fine
#print arr_temp[:,2]