def calculatePCAweights(filename):
    """
    From a solved network - calculate the PCA weights
    :param filename:
    :return:
    """
    # Load the data from a solved system
    mismatch = loader.get_eu_mismatch_balancing_injection_meanload(filename)[0]

    # Total number of days in the data [hours/24]
    numberOfDays = mismatch.shape[1] / 24

    # Center and normalize data, for use with PCA
    mismatch_c, mean_mismatch = PCA.center(mismatch)
    h, Ntilde = PCA.normalize(mismatch_c)

    # Arrays to store data in.
    weights_all = np.zeros([30, len(mismatch[1])])
    weightsDaily_all = np.zeros([30, numberOfDays])
    weight_monthly_all = np.zeros([30, numberOfDays])

    # Calculating the amplitude of each PC
    for j in range(30):
        weights_all[j, :] = PCA.get_xi_weight(h, j)

    return weights_all
def generate8YearMismatchData(filename,weights_monthly_all,weights_daily_all,weights_hourly_all,NumberOfComponents=1):
    # Data strucktues
    T = len(weights_hourly_all[0,:])
    a_h = np.zeros((NumberOfComponents,T))
    a_d = np.zeros((NumberOfComponents,T))
    a_m = np.zeros((NumberOfComponents,T))

    # Setting starting values
    a_h[:,0] = weights_hourly_all[0:NumberOfComponents,0]
    a_d[:,0] = weights_daily_all[0:NumberOfComponents,0]
    a_m[:,0] = weights_monthly_all[0:NumberOfComponents,0]

    hours_in_month = createCumulativeSumOfDaysInData(StartMonth='Jan')[1][1::2]
    hours_in_semi_month = createCumulativeSumOfDaysInData(StartMonth='Jan')[1]

    # Load the data from a solved system
    mismatch = loader.get_eu_mismatch_balancing_injection_meanload(filename)[0]

    # Center and normalize data, for use with PCA
    mismatch_c, mean_mismatch = PCA.center(mismatch)
    h, Ntilde = PCA.normalize(mismatch_c)

    # N is the  number of hours
    epsilon = np.zeros((NumberOfComponents,T))

    # We have a network of 30 nodes, so the new mismatch needs to be 30xN+1
    approx_mismatch = np.zeros((mismatch.shape[0],T))

    # Loop over number of components
    for component in range(NumberOfComponents):
        # Load relevant kdes and associated  values.
        with open('hourly_kde_full_k='+str(component+1)+'.pkl', 'rb') as f:
            Hourly_data = pickle.load(f)

        kdes_h = Hourly_data['kde']
        max_values_h = Hourly_data['max']
        min_values_h = Hourly_data['min']
        value_intervals_h = Hourly_data['interval']

        with open('daily_kde_full_k='+str(component+1)+'.pkl', 'rb') as f:
            Daily_data = pickle.load(f)

        kdes_d = Daily_data['kde']
        max_value_d = Daily_data['max']
        min_value_d = Daily_data['min']
        value_interval_d = Daily_data['interval']

        with open('monthly_kde_full_k='+str(component+1)+'.pkl', 'rb') as f:
                Monthly_data = pickle.load(f)

        kdes_m = Monthly_data['kde']
        max_values_m = Monthly_data['max']
        min_values_m = Monthly_data['min']
        value_intervals_m = Monthly_data['interval']


        # Begin to create generated values
        daily_amplitude = weights_daily_all[component,0]
        semi_monthly_amplitude = weights_monthly_all[component,0]
        day = 0
        semi_month = 0
        month = 0
        year = 0
        time_of_day = 0 # Since we use the real data values for hour 0, the generated data is
        for hour in range(1,T):

            #print time_of_day,'Time of day'
            #print hour,'Hour'
            # Keeping track on wat month and year we are in when generating data
            if hour == hours_in_month[12*year+month]:
                month += 1

            if month == 12:
                month = 0
                year += 1

            samples = np.vstack([np.repeat(a_h[component,hour-1],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)])
            pdf = kdes_h[(24*month)+time_of_day].evaluate(samples)

            drift = KDE.kramer_moyal_coeff(a_h[component,hour-1], value_intervals_h[(24*month)+time_of_day,:], pdf, n=1, tau=1)
            diffusion = KDE.kramer_moyal_coeff(a_h[component,hour-1], value_intervals_h[(24*month)+time_of_day,:], pdf, n=2, tau=1)

            p = norm.rvs(loc=0, scale=1)

            a_h[component,hour] = a_h[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p

            time_of_day += 1
            if time_of_day == 24:
                time_of_day = 0

            # Check if the new value is "legal"
            samples = np.vstack([np.repeat(a_h[component,hour],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)])
            pdf = kdes_h[(24*month)+time_of_day].evaluate(samples)

            iteration = 0
            if np.sum(pdf) == 0.0:
                # print 'out', i
                while np.sum(pdf) == 0.0:

                    p = norm.rvs(loc=0, scale=1)

                    a_h[component,hour] = a_h[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p

                    samples = np.vstack([np.repeat(a_h[component,hour],samplerate), np.linspace(min_values_h[(24*month)+time_of_day], max_values_h[(24*month)+time_of_day], samplerate)])
                    pdf = kdes_h[(24*month)+time_of_day].evaluate(samples)
                    iteration += 1
                    if iteration > 1000:
                        print('Too many iterations')
                        break

            ############################################################################
            ############################## DAILY PART ##################################
            ############################################################################
            # 24 hours has passed we need a new daily value -
            if hour % 24.0 == 0:
                samples = np.vstack([np.repeat(a_d[component,hour],samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)])
                pdf = kdes_d[month].evaluate(samples)

                drift = KDE.kramer_moyal_coeff(a_d[component,hour], value_interval_d[month,:], pdf, n=1, tau=1)
                diffusion = KDE.kramer_moyal_coeff(a_d[component,hour], value_interval_d[month,:], pdf, n=2, tau=1)

                p = norm.rvs(loc=0, scale=1)

                daily_amplitude = a_d[component,hour] + drift*tau + np.sqrt(diffusion*tau)*p

                samples = np.vstack([np.repeat(daily_amplitude,samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)])
                pdf = kdes_d[month].evaluate(samples)

                iteration = 0
                if np.sum(pdf) == 0.0:

                    while np.sum(pdf) == 0.0:

                        p = norm.rvs(loc=0, scale=1)

                        daily_amplitude = a_d[component,hour] + drift*tau + np.sqrt(diffusion*tau)*p

                        samples = np.vstack([np.repeat(daily_amplitude,samplerate), np.linspace(min_value_d[month], max_value_d[month], samplerate)])
                        pdf = kdes_d[month].evaluate(samples)

                        iteration += 1
                        if iteration > 1000:
                            print('Too many iterations')
                            break

            # Write the daily value to the daily amplitude array
            a_d[component,hour] = daily_amplitude

            ############################################################################
            ################## MONTHLY PART ############################################
            ############################################################################
            if hour == hours_in_semi_month[semi_month]:
                semi_month += 1

                samples = np.vstack([np.repeat(a_m[component,hour-1],samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)])
                pdf = kdes_m[month].evaluate(samples)

                drift = KDE.kramer_moyal_coeff(a_m[component,hour-1], value_intervals_m[month,:], pdf, n=1, tau=1)
                diffusion = KDE.kramer_moyal_coeff(a_m[component,hour-1], value_intervals_m[month,:], pdf, n=2, tau=1)

                p = norm.rvs(loc=0, scale=1)

                semi_monthly_amplitude = a_m[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p

                samples = np.vstack([np.repeat(semi_monthly_amplitude,samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)])
                pdf = kdes_m[month].evaluate(samples)

                # Check if we go out of bounds, then find a new random value which would bring us back.
                iteration = 0
                if np.sum(pdf) == 0.0:
                    #print 'out', i
                    while np.sum(pdf) == 0.0:

                        p = norm.rvs(loc=0, scale=1)

                        semi_monthly_amplitude = a_m[component,hour-1] + drift*tau + np.sqrt(diffusion*tau)*p

                        samples = np.vstack([np.repeat(semi_monthly_amplitude,samplerate), np.linspace(min_values_m[month], max_values_m[month], samplerate)])
                        pdf = kdes_m[month].evaluate(samples)

                        iteration += 1
                        if iteration > 1000:
                            print('Too many iterations')
                            break

            a_m[component,hour] = semi_monthly_amplitude

            if hour%10000 == 0:
                print '10k hours generated'

        print 'Mean values'
        print np.mean(a_h[component,:])
        print np.mean(a_d[component,:])
        print np.mean(a_m[component,:])
        a_h[component,:] = a_h[component,:] - np.mean(a_h[component,:])
        a_d[component,:] = a_d[component,:] - np.mean(a_d[component,:])
        a_m[component,:] = a_m[component,:] - np.mean(a_m[component,:])

        for i in np.arange(0, T, tau):
            epsilon[component,i] = a_m[component,i] + a_d[component,i] + a_h[component,i] # The +1 is to skip the startvalue of epsilon_h
            if i == 0:
                print epsilon[component,i]
                print PCA.get_xi_weight(h, component)[0]

        lambd, princ_comp = PCA.get_principal_component(h, component)
        mismatch_PC = PCA.unnormalize_uncenter(princ_comp,Ntilde, mean_mismatch)

        approx_mismatch += np.outer(mismatch_PC, epsilon[component,:])

    filename = 'weights_generated.npy'
    np.save('approx_mismatch_generated'+'.npy',approx_mismatch)
    np.save(filename, epsilon)