def calculatePCAweights(filename): """ From a solved network - calculate the PCA weights :param filename: :return: """ # Load the data from a solved system mismatch = loader.get_eu_mismatch_balancing_injection_meanload(filename)[0] # Total number of days in the data [hours/24] numberOfDays = mismatch.shape[1] / 24 # Center and normalize data, for use with PCA mismatch_c, mean_mismatch = PCA.center(mismatch) h, Ntilde = PCA.normalize(mismatch_c) # Arrays to store data in. weights_all = np.zeros([30, len(mismatch[1])]) weightsDaily_all = np.zeros([30, numberOfDays]) weight_monthly_all = np.zeros([30, numberOfDays]) # Calculating the amplitude of each PC for j in range(30): weights_all[j, :] = PCA.get_xi_weight(h, j) return weights_all
def generate8YearMismatchData(filename,weights_monthly_all,weights_daily_all,weights_hourly_all,NumberOfComponents=1): # Data strucktues T = len(weights_hourly_all[0,:]) a_h = np.zeros((NumberOfComponents,T)) a_d = np.zeros((NumberOfComponents,T)) a_m = np.zeros((NumberOfComponents,T)) # Setting starting values a_h[:,0] = weights_hourly_all[0:NumberOfComponents,0] a_d[:,0] = weights_daily_all[0:NumberOfComponents,0] a_m[:,0] = weights_monthly_all[0:NumberOfComponents,0] hours_in_month = createCumulativeSumOfDaysInData(StartMonth='Jan')[1][1::2] hours_in_semi_month = createCumulativeSumOfDaysInData(StartMonth='Jan')[1] # Load the data from a solved system mismatch = loader.get_eu_mismatch_balancing_injection_meanload(filename)[0] # Center and normalize data, for use with PCA mismatch_c, mean_mismatch = PCA.center(mismatch) h, Ntilde = PCA.normalize(mismatch_c) # N is the number of hours epsilon = np.zeros((NumberOfComponents,T)) # We have a network of 30 nodes, so the new mismatch needs to be 30xN+1 approx_mismatch = np.zeros((mismatch.shape[0],T)) # Loop over number of components for component in range(NumberOfComponents): # Load relevant kdes and associated values. with open('hourly_kde_full_k='+str(component+1)+'.pkl', 'rb') as f: Hourly_data = pickle.load(f) kdes_h = Hourly_data['kde'] max_values_h = Hourly_data['max'] min_values_h = Hourly_data['min'] value_intervals_h = Hourly_data['interval'] with open('daily_kde_full_k='+str(component+1)+'.pkl', 'rb') as f: Daily_data = pickle.load(f) kdes_d = Daily_data['kde'] max_value_d = Daily_data['max'] min_value_d = Daily_data['min'] value_interval_d = Daily_data['interval'] with open('monthly_kde_full_k='+str(component+1)+'.pkl', 'rb') as f: Monthly_data = pickle.load(f) kdes_m = Monthly_data['kde'] max_values_m = Monthly_data['max'] min_values_m = Monthly_data['min'] value_intervals_m = Monthly_data['interval'] # Begin to create generated values daily_amplitude = weights_daily_all[component,0] semi_monthly_amplitude = weights_monthly_all[component,0] day = 0 semi_month = 0 month = 0 year = 0 time_of_day = 0 # Since we use the real data values for hour 0, the generated data is for hour in range(1,T): #print time_of_day,'Time of day' #print hour,'Hour' # Keeping track on wat month and year we are in when generating data if hour == hours_in_month[12*year+month]: month += 1 if month == 12: month = 0 year += 1 samples = np.vstack([np.repeat(a_h[component,hour-1],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)]) pdf = kdes_h[(24*month)+time_of_day].evaluate(samples) drift = KDE.kramer_moyal_coeff(a_h[component,hour-1], value_intervals_h[(24*month)+time_of_day,:], pdf, n=1, tau=1) diffusion = KDE.kramer_moyal_coeff(a_h[component,hour-1], value_intervals_h[(24*month)+time_of_day,:], pdf, n=2, tau=1) p = norm.rvs(loc=0, scale=1) a_h[component,hour] = a_h[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p time_of_day += 1 if time_of_day == 24: time_of_day = 0 # Check if the new value is "legal" samples = np.vstack([np.repeat(a_h[component,hour],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)]) pdf = kdes_h[(24*month)+time_of_day].evaluate(samples) iteration = 0 if np.sum(pdf) == 0.0: # print 'out', i while np.sum(pdf) == 0.0: p = norm.rvs(loc=0, scale=1) a_h[component,hour] = a_h[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p samples = np.vstack([np.repeat(a_h[component,hour],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)]) pdf = kdes_h[(24*month)+time_of_day].evaluate(samples) iteration += 1 if iteration > 1000: print('Too many iterations') break ############################################################################ ############################## DAILY PART ################################## ############################################################################ # 24 hours has passed we need a new daily value - if hour % 24.0 == 0: samples = np.vstack([np.repeat(a_d[component,hour],samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)]) pdf = kdes_d[month].evaluate(samples) drift = KDE.kramer_moyal_coeff(a_d[component,hour], value_interval_d[month,:], pdf, n=1, tau=1) diffusion = KDE.kramer_moyal_coeff(a_d[component,hour], value_interval_d[month,:], pdf, n=2, tau=1) p = norm.rvs(loc=0, scale=1) daily_amplitude = a_d[component,hour] + drift*tau + np.sqrt(diffusion*tau)*p samples = np.vstack([np.repeat(daily_amplitude,samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)]) pdf = kdes_d[month].evaluate(samples) iteration = 0 if np.sum(pdf) == 0.0: while np.sum(pdf) == 0.0: p = norm.rvs(loc=0, scale=1) daily_amplitude = a_d[component,hour] + drift*tau + np.sqrt(diffusion*tau)*p samples = np.vstack([np.repeat(daily_amplitude,samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)]) pdf = kdes_d[month].evaluate(samples) iteration += 1 if iteration > 1000: print('Too many iterations') break # Write the daily value to the daily amplitude array a_d[component,hour] = daily_amplitude ############################################################################ ################## MONTHLY PART ############################################ ############################################################################ if hour == hours_in_semi_month[semi_month]: semi_month += 1 samples = np.vstack([np.repeat(a_m[component,hour-1],samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)]) pdf = kdes_m[month].evaluate(samples) drift = KDE.kramer_moyal_coeff(a_m[component,hour-1], value_intervals_m[month,:], pdf, n=1, tau=1) diffusion = KDE.kramer_moyal_coeff(a_m[component,hour-1], value_intervals_m[month,:], pdf, n=2, tau=1) p = norm.rvs(loc=0, scale=1) semi_monthly_amplitude = a_m[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p samples = np.vstack([np.repeat(semi_monthly_amplitude,samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)]) pdf = kdes_m[month].evaluate(samples) # Check if we go out of bounds, then find a new random value which would bring us back. iteration = 0 if np.sum(pdf) == 0.0: #print 'out', i while np.sum(pdf) == 0.0: p = norm.rvs(loc=0, scale=1) semi_monthly_amplitude = a_m[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p samples = np.vstack([np.repeat(semi_monthly_amplitude,samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)]) pdf = kdes_m[month].evaluate(samples) iteration += 1 if iteration > 1000: print('Too many iterations') break a_m[component,hour] = semi_monthly_amplitude if hour%10000 == 0: print '10k hours generated' print 'Mean values' print np.mean(a_h[component,:]) print np.mean(a_d[component,:]) print np.mean(a_m[component,:]) a_h[component,:] = a_h[component,:] - np.mean(a_h[component,:]) a_d[component,:] = a_d[component,:] - np.mean(a_d[component,:]) a_m[component,:] = a_m[component,:] - np.mean(a_m[component,:]) for i in np.arange(0, T, tau): epsilon[component,i] = a_m[component,i] + a_d[component,i] + a_h[component,i] # The +1 is to skip the startvalue of epsilon_h if i == 0: print epsilon[component,i] print PCA.get_xi_weight(h, component)[0] lambd, princ_comp = PCA.get_principal_component(h, component) mismatch_PC = PCA.unnormalize_uncenter(princ_comp,Ntilde, mean_mismatch) approx_mismatch += np.outer(mismatch_PC, epsilon[component,:]) filename = 'weights_generated.npy' np.save('approx_mismatch_generated'+'.npy',approx_mismatch) np.save(filename, epsilon)