Beispiel #1
0
def prune(freqSet, H, S, rules, minConf):
    prunedH = []
    for i in range(np.shape(H)[0]):
        conseq = H[i, :]  # get a consequent
        ante = np.setdiff(
            freqSet, conseq)  # antecedent is the rest of items in the itemset
        freqSetSup = S[str(freqSet)]
        # support for freqSet
        anteSup = S[str(ante)]  # support for antecedent
        conseqSup = S[str(conseq)]  # support for consequent
        conf = freqSetSup / anteSup  # confidence
        lift = freqSetSup / (anteSup * conseqSup)  # and lift
        if conf >= minConf:
            prunedH = np.concatenate((prunedH, conseq), axis=1)
            rule = {
                'Antecedent': ante,
                'Consequent': conseq,
                'Conf': conf,
                'Lift': lift,
                'Sup': freqSetSup
            }
            if np.isempty(rules):
                rules = rule
            else:
                rules = np.concatenate((rules, rule), axis=1)
    return ([prunedH, rules])
    def delete(self):

        # delete class destructor
        #
        # Syntax:
        #   delete(self)
        #
        # Description:
        #   Loops through parameters and, if not an object, empties them. Else, calls
        #   the sub-object's destructor.
        #
        # Input:
        #   obj -  model object
        #
        # Output:  
        #   (none)
        #
        # Author: 
        #   Dr. Tim Peterson, The Department of Infrastructure Engineering, 
        #   The University of Melbourne.
        #
        # Date:
        #   24 Aug 2016
        ##            
        
        propNames = properties(self)
        for i in range(len(propNames)):
           if np.isempty(self.propNames[i])):
               continue               
           if isobject(self.propNames[i])):
               delete(self.propNames[i]))
           else:
               self.propNames[i]) = [] 
Beispiel #3
0
def generate(freqSet, H, S, rules, minConf):
    # if frequent itemset is longer than consequent by more than 1
    (m, n) = np.shape(H)
    if len(freqSet) > (m + 1):
        if m == 1:
            a, rules = prune(freqSet, H, S, rules,
                             minConf)  # prune 1-item consequents
            Hm1 = aprioriGen(
                H, m + 1)  # use aprioriGen to generate longer consequents
            [Hm1, rules] = prune(freqSet, Hm1, S, rules,
                                 minConf)  # prune consequents
            if not np.isempty(Hm1):  # recursive if more consequents
                rules = generate(freqSet, Hm1, S, rules, minConf)
    return (rules)
Beispiel #4
0
def generateFreqItemsets(transactions, minSup):

    transactions = pd.Series(transactions,
                             index=np.arange(0, len(transactions)))
    items_generate = [
        item for transaction in transactions.values for item in transaction
    ]
    uniqItems, index, oneItemsets = np.unique(
        items_generate, return_inverse=True,
        return_counts=True)  # index has indices of original items

    N = len(transactions)

    C1 = uniqItems
    supk = (oneItemsets) / N  # support for all candidates
    S = {}
    for j in range(0, len(C1)):
        S[str(j)] = supk[j]

    Lk = {
        'items': uniqItems[(supk >= minSup).ravel().nonzero()][0],
        'index': (supk >= minSup).ravel().nonzero()
    }  # must be >= minimal support
    L = [Lk]

    # get all frequent k-itemsets where k >= 2
    k = 2
    while True:
        # Ck: candidate itemsets
        p = L[k - 2]['index'][0]
        Ck = aprioriGen(p, k)

        support = np.zeros(np.shape(Ck)[0])
        for i in range(N):  # walk through all transactions
            t = sorted([items.index(item) for item in transactions.values[i]
                        ])  # which item in ith transaction, returns item index
            support[myall(ismember(
                Ck, t))] = support[myall(ismember(Ck, t))] + (
                    1 / N
                )  #if all the values in t are members, then it will return 1

        Lk = {
            'items': items[Ck[support >= minSup, :]],
            'index': Ck[support >= minSup, :]
        }

        if not np.isempty(support):
            mapS = {}
            for i in range(len(support)):
                mapS[str(Ck[i, :])] = support[i]

            S = np.concatenate((S, mapS), axis=0)
        else:
            break

        if not np.isempty(Lk):
            L[k] = Lk
            k = k + 1
        else:
            break
    return ([L, S, items])
Beispiel #5
0
def main():
    #Definitions of the event fetched from config.cfg
    config = ConfigParser.RawConfigParser()
    config.read("config.cfg")

    startdate = config.get("input", "startdate")
    enddate = config.get("input", "enddate")
    n = config.getint("input", "n")
    diacorr = config.getfloat("input", "diacorr")

    #Convert dates to datetime
    try:
        startdt = datetime.strptime(startdate, "%Y%m%d%H%M")
        enddt = datetime.strptime(enddate, "%Y%m%d%H%M")
    except Exception:
        traceback.print_exc()

    foldername = os.path.join(config.get("input", "dataloc"),
                              datetime.strftime(startdt, '%Y%m%d'))
    if not os.path.exists(foldername):
        os.mkdir(foldername)

    paraname = os.path.join(
        foldername,
        datetime.strftime(startdt, '%H%M') + '_' +
        datetime.strftime(enddt, '%H%M'))
    print(paraname)
    parafile = shelve.open(paraname)

    #Flags for optional data
    pluvio200 = config.getboolean("flags", "pluvio200")
    pluvio400 = config.getboolean("flags", "pluvio400")
    wind_GILL = config.getboolean("flags", "wind_GILL")
    FMI_met = config.getboolean("flags", "FMI_met")
    PIP_psd = config.getboolean("flags", "PIP_psd")
    PIP_vel = config.getboolean("flags", "PIP_vel")
    PIP_par = config.getboolean("flags", "PIP_par")
    PIP_parrel = config.getboolean("flags", "PIP_parrel")
    PIP_mass = config.getboolean("flags", "PIP_mass")
    PIP_minute = config.getboolean("flags", "PIP_minute")
    imag = config.getboolean("flags", "imag")
    version = config.getboolean("flags", "version")

    #Read precipitation data
    if (pluvio200 == 1 or pluvio400 == 1):
        print("Reading precipitation data (PLUVIO)")
        tv_PL200, PL200_acc, PL200_rr, tv_PL400, PL400_acc, PL400_rr = PluvioIntensity_GPM(
            n, startdt, enddt)
    else:
        tv_PL200 = []
        PL200_acc = []
        PL200_rr = []
        tv_PL400 = []
        PL400_acc = []
        PL400_rr = []
    #Convert dates to datetime
    try:
        startdt = datetime.strptime(startdate, "%Y%m%d%H%M")
        enddt = datetime.strptime(enddate, "%Y%m%d%H%M")
    except Exception:
        traceback.print_exc()

    #Save parameters to file for later examination
    ##parafile.write(tv_PL200, ' ' , PL200_acc, ' ' , PL200_rr , ' ' , tv_PL400 , ' ' , PL400_acc , ' ' , PL400_rr )
    #shelving(parafile)

    #Read data from FMI-met
    if (FMI_met == 1):
        print("Reading FMI MET data")
        tv_FMI, temp_FMI, rh_FMI, sn_FMI, rr_FMI, press_FMI = Read_FMIstation_GPM(
            startdt, enddt, n)
    else:
        tv_FMI = []
        temp_FMI = []
        rh_FMI = []
        sn_FMI = []
        rr_FMI = []
        press_FMI = []

    #parafile.write(tv_FMI , ' ' , temp_FMI, ' ' , rh_FMI, ' ' , sn_FMI, ' ' , rr_FMI, ' ' , press_FMI)
    #shelving(parafile)

    #Read and plot GILL wind data
    ##TODO: Sanity check
    if (wind_GILL == 1):
        print("Reading wind data (GILL)")
        time_vector_GILL, mean_vel_GILL, mode_dir_GILL = WindComparison_GPM(
            n, startdt, enddt)
    else:
        time_vector_GILL = []
        mean_vel_GILL = []
        mode_dir_GILL = []

    #parafile.write(time_vector_GILL, ' ', mean_vel_GILL, ' ', mode_dir_GILL)
    #shelving(parafile)

    #Read the PSD tables (size distribution)
    if (PIP_psd == 1):
        print("Reading PSD tables")
        D_PIP, PIP_PSD, PIPtime_psd, N_mean_PIP, Dm - PIP, N0_PIP, lambda_PIP,
        mu_PIP, Nw_PIP, D02_exp_PIP, lambda2_exp_PIP, PIPtime_N = ReadPIP_PSD_GPM(
            startdt, enddt, n, foldername)
    else:
        D_PIP = []
        PIP_PSD = []
        PIPtime_psd = []
        N_mean_PIP = []
        Dm_PIP = []
        N0_PIP = []
        lambda_PIP = []
        mu_PIP = []
        Nw_PIP = []
        D02_exp_PIP = []
        lambda2_exp_PIP = []
        PIPtime_N = []

    #parafile.write(D_PIP, ' ', PIP_PSD, ' ', PIPtime_psd, ' ',N_mean_PIP , ' ',Dm_PIP , ' ',N0_PIP , ' ',
    #                ' ',lambda_PIP , ' ',mu_PIP , ' ',Nw_PIP , ' ',D02_exp_PIP , ' ',lambda2_exp_PIP  , ' ',PIPtime_N)
    #shelving(parafile)

    #Read the velocity tables and perform a fit as function of D_PIP
    if (PIP_vel == 1):
        print("Reading the velocity tables (PIP_vel)")
        D_PIP, PIPtime_vel, PIPD_vel, PIPV_vel = ReadPIP_vel_GPM(
            startdt, enddt)
        avel_DPIP_vel, bvel_DPIP_vel, PIPtime_vel_n = PIPVelRel_GPM(
            startdt, enddt, D_PIP, PIPtime_vel, PIPD_vel, PIPV_vel, n,
            foldername)
    else:
        PIPtime_vel = []
        PIPD_vel = []
        PIPV_vel = []
        avel_DPIP = []
        bvel_DPIP = []
        DPIP_V_vel_n = []
        PIPtime_vel_n = []

    #parafile.write(D_PIP, ' ', PIPtime_vel, ' ', PIPD_vel, ' ', PIPV_vel, ' ', avel_DPIP_vel, ' ', bvel_DPIP_vel, ' ', PIPtime_vel_n)
    #shelving(parafile)

    #Read the particle tables
    if (PIP_par == 1):
        D_PIP, PIPD_par, PIPV_par, PIPtime_par, PIPEmaj, PIPEmajmax, PIPEmin, PIPAR, PIPOR, PIPLen, PIPHig = ReadPIP_par_GPM(
            startdt, enddt)
        temp_PIPD = PIPD_par
        temp_Dmax = 2 * PIPEmajmax
        temp_PIPD[np.isnan(tempDmax)] = []
        temp_Dmax[np.isnan(tempDmax)] = []
        temp_PIPD[np.isinf(tempDmax)] = []
        temp_Dmax[np.isinf(tempDmax)] = []

        temp_Dmax[np.isnan(temp_PIPD)] = []
        temp_PIPD[np.isnan(temp_PIPD)] = []
        temp_Dmax[np.isinf(temp_PIPD)] = []
        temp_PIPD[np.isinf(temp_PIPD)] = []

        #Area equivalent of whole snow event
        ##TODO
        #C1 =
        #kD = C1[1]

    else:
        D_PIP = []
        PIPD_par = []
        PIPV_par = []
        PIPtime_par = []
        PIPEmaj = []
        PIPEmajmax = []
        PIPEmin = []
        PIPAR = []
        PIPlonX = []
        PIPDia = []
        PIPOR = []
        PIPLen = []
        PIPHig = []
        kD = []

    #parafile.write(D_PIP, ' ', PIPtime_vel, ' ', PIPD_vel, ' ', PIPV_vel, ' ', avel_DPIP_vel, ' ', bvel_DPIP_vel, ' ', PIPtime_vel_n)
    #shelving(parafile)

    if (PIP_mass == 1):
        temp_FMI[np.isnan(rh_FMI)] = []
        press_FMI[np.isnan(rh_FMI)] = []
        tv_FMI[np.isnan(rh_FMI)] = []
        rh_FMI[np.isnan(rh_FMI)] = []
        press_FMI[np.isnan(temp_FMI)] = []
        tv_FMI[np.isnan(temp_FMI)] = []
        rh_FMI[np.isnan(temp_FMI)] = []
        temp_FMI[np.isnan(temp_FMI)] = []
        tv_FMI[np.isnan(press_FMI)] = []
        rh_FMI[np.isnan(press_FMI)] = []
        temp_FMI[np.isnan(press_FMI)] = []
        press_FMI[np.isnan(press_FMI)] = []
        envr.temp = temp_FMI
        envr.press = press_FMI
        envr.time = tv_FMI
        envr.rh = rh_FMI / 100

        time_mass, amass_PIP, bmass_PIP = MassEstimate(n, D_PIP, PIPtime_par,
                                                       envr, PIPD_par,
                                                       PIPV_par, PIPEmajmax,
                                                       PIPAR, kD, diacorr,
                                                       foldername)
    else:
        time_mass = []
        amass_PIP = []
        bmass_PIP = []
    """
    Part 2
    """

    #Calculate the accumulation and reflectivity from the mass estimate
    master_time_vector = np.arange(startdt, enddt, timedelta(minutes=n))

    #PSD properties
    N0_mtv = []
    lambda_mtv = []
    mu_mtv = []

    N02_exp_mtv = []
    lambda2_exp_mtv = []
    D02_exp_mtv = []

    N_mean_PIP_mtv = []
    Dmax_mtv = []

    #Selected a, b (mass factors) and av, bv (velocity factors)
    amass_mtv = []
    bmass_mtv = []
    avel_mtv_max = []
    bvel_mtv_max = []
    """
    #Choose used diameter correction
    if diacorr == 0.9:
        time = time_mass.MH05_maxmaxD_corrconst08
        amass = amass_PIP.MH05_maxmaxD_corrconst08
        bmass = bmass_PIP.MH05_maxmaxD_corrconst08
    elif diacorr == 0.82:
        time = time_mass.MH05_maxmaxD_corrconst06
        amass = amass_PIP.MH05_maxmaxD_corrconst06
        bmass = bmass_PIP.MH05_maxmaxD_corrconst06
    elif diacorr == 0.7:
        time = time_mass.MH05_maxmaxD_corrconst04
        amass = amass_PIP.MH05_maxmaxD_corrconst04
        bmass = bmass_PIP.MH05_maxmaxD_corrconst04
    else:
        time_mtv = time_mass.MH05_maxmaxD
        amass = amass_PIP.MH05_maxmaxD
        bmass = bmass_PIP.MH05_maxmaxD
    """
    accum_PIP_fac_MH05_maxmaxD = []
    accum_PIP_fac_MH05_maxmaxD_corrconst08 = []
    accum_PIP_fac_MH05_maxmaxD_corrconst06 = []
    accum_PIP_fac_MH05_maxmaxD_corrconst04 = []

    Ze_PIP_fac_MH05_maxmaxD = []
    Ze_PIP_fac_MH05_maxmaxD_corrconst08 = []
    Ze_PIP_fac_MH05_maxmaxD_corrconst06 = []
    Ze_PIP_fac_MH05_maxmaxD_corrconst04 = []

    diff_PIP = np.zeros(np.shape(D_PIP))
    diff_PIP[0] = D_PIP[0]
    diff_PIP[1:] = np.diff(D_PIP)

    diacorr_no = kD / 1
    diacorr08 = kD / 0.9
    diacorr06 = kD / 0.82
    diacorr04 = kD / 0.70

    for dd in (np.arange(1, np.shape(master_time_vector)[1])):
        d_ind = np.where(time_mass.MH05_maxmaxD >
                         (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                         and time_mass.MH05_maxmaxD <
                         (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        #d_ind = np.where(time_mass.MH05_maxmaxD > (master_time_vector[dd]-30*(1/(24*3600))) & time_mass.MH05_maxmaxD < (master_time_vector[dd]+30*(1/(24*3600))));
        dn_ind = np.where(PIPtime_n >
                          (master_time_vector[dd] - 30 * (1 / 24 * 3600))
                          and PIPtime_n <
                          (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        #dn_ind = np.where(PIPtime_n  > (master_time_vector(dd)-30*(1/(24*3600))) & PIPtime_n < (master_time_vector(dd)+30*(1/(24*3600))));
        dv_ind = np.where(time_vector_parrel_max > (
            master_time_vector[dd] - 30 *
            (1 / (24 * 3600)) and time_vector_parrel_max <
            (master_time_vector[dd] + 30 * (1 / (24 * 3600)))))
        #dv_ind = np.where(time_vector_parrel_max > (master_time_vector(dd)-30*(1/(24*3600))) & time_vector_parrel_max < (master_time_vector(dd)+30*(1/(24*3600))));
        if (np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0
                and np.isempty(dv_ind) == 0):
            #PSD parameters
            N0_mtv = np.column_stack(N0_mtv, N0_PIP(dn_ind))
            lambda_mtv = np.column_stack(lambda_mtv, lambda_PIP(dn_ind))
            mu_mtv = np.column_stack(mu_mtv, mu_PIP(dn_ind))

            N02_exp_mtv = np.column_stack(N02_exp_mtv, N02_exp_PIP[dn_ind])
            lambda2_exp_mtv = np.column_stack(lambda2_exp_mtv,
                                              lambda2_exp_PIP[dn_ind])
            D02_exp_mtv = np.column_stack(D02_exp_mtv, D02_exp_PIP[dn_ind])
            N_mean_PIP_mtv = np.row_stack(N_mean_PIP_mtv,
                                          N_mean_PIP[dn_ind, :])
            Dmax_mtv = np.column_stack(Dmax_mtv, Dmax_PIP(dn_ind))

            #particle relations
            avel_mtv_max = np.column_stack(avel_mtv_max,
                                           avel_PIP_par_max(dv_ind))
            bvel_mtv_max = np.column_stack(bvel_mtv_max,
                                           bvel_PIP_par_max(dv_ind))

            accum_PIP_fac_MH05_maxmaxD = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD, n * 60 * 10 ^ (-3) *
                np.nansum(amass_PIP.MH05_maxmaxD[d_ind] *
                          (diacorr_no * 0.1 * D_PIP) ^
                          (bmass_PIP.MH05_maxmaxD[d_ind]) *
                          avel_PIP_par_max[dv_ind] *
                          (diacorr_no * D_PIP) ^ (bvel_PIP_par[dv_ind]) *
                          N_mean_PIP[dn_ind, :] * diff_PIP))
            if (bmass_PIP.MH05_maxmaxD(d_ind) >= 1
                    and bmass_PIP.MH05_maxmaxD(d_ind) < 3.5) and (
                        bvel_PIP_par_max(dv_ind) >= 0):
                Ze_PIP_fac_MH05_maxmaxD = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD,
                    10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi)
                    ^ 2 * nansum((amass_PIP.MH05_maxmaxD(d_ind) *
                                  (diacorr_no * 0.1 * D_PIP) ^
                                  (bmass_PIP.MH05_maxmaxD(d_ind)))
                                 ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP))
                amass_mtv = np.column_stack(amass_mtv, amass(d_ind))
                bmass_mtv = np.column_stack(bmass_mtv, bmass(d_ind))
            else:
                Ze_PIP_fac_MH05_maxmaxD = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD, 0)
                amass_mtv = np.column_stack(amass_mtv, 0)
                bmass_mtv = np.column_stack(bmass_mtv, 0)
        else:
            accum_PIP_fac_MH05_maxmaxD = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD, 0)
            Ze_PIP_fac_MH05_maxmaxD = np.column_stack(Ze_PIP_fac_MH05_maxmaxD,
                                                      0)
            amass_mtv = np.column_stack(mass_mtv, 0)
            bmass_mtv = np.column_stack(bmass_mtv, 0)
            N0_mtv = np.column_stack(N0_mtv, 0)
            lambda_mtv = np.column_stack(lambda_mtv, 0)
            mu_mtv = np.column_stack(mu_mtv, 0)
            N02_exp_mtv = np.column_stack(N02_exp_mtv, 0)
            D02_exp_mtv = np.column_stack(D02_exp_mtv, 0)
            lambda2_exp_mtv = np.column_stack(lambda2_exp_mtv, 0)
            N_mean_PIP_mtv = np.row_stack(N_mean_PIP_mtv,
                                          np.zeros(size(D_PIP)))
            Dmax_mtv = np.column_stack(Dmax_mtv, 0)
            avel_mtv_max = np.column_stack(avel_mtv_max, 0)
            bvel_mtv_max = np.column_stack(bvel_mtv_max, 0)

        d_ind = np.where(time_mass.MH05_maxmaxD_corrconst08 >
                         (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                         and time_mass.MH05_maxmaxD_corrconst08 <
                         (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        dn_ind = np.where(
            PIPtime_n > (master_time_vector(dd) - 30 * (1 / (24 * 3600)))
            & PIPtime_n < (master_time_vector(dd) + 30 * (1 / (24 * 3600))))
        dv_ind = np.where(
            time_vector_parrel_max > (master_time_vector(dd) - 30 *
                                      (1 / (24 * 3600)))
            & time_vector_parrel_max < (master_time_vector(dd) + 30 *
                                        (1 / (24 * 3600))))
        if np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty(
                dv_ind) == 0:
            accum_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst08, n * 60 * 10 ^ (-3) *
                np.nansum(amass_PIP.MH05_maxmaxD_corrconst08[d_ind] *
                          (diacorr08 * 0.1 * D_PIP) ^
                          (bmass_PIP.MH05_maxmaxD_corrconst08[d_ind]) *
                          avel_PIP_par_max[dv_ind] *
                          (D_PIP * diacorr08) ^ (bvel_PIP_par_max[dv_ind]) *
                          N_mean_PIP[dn_ind, :] * diff_PIP))
            if (bmass_PIP.MH05_maxmaxD_corrconst08(d_ind) >= 1
                    and bmass_PIP.MH05_maxmaxD_corrconst08(d_ind) < 3.5) and (
                        bvel_PIP_par_max(dv_ind) >= 0):
                Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst08,
                    10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^
                    2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst08[d_ind] *
                                   (diacorr08 * 0.1 * D_PIP) ^
                                   (bmass_PIP.MH05_maxmaxD_corrconst08[d_ind]))
                                  ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP))
            else:
                Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst08, 0)
        else:
            accum_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst08, 0)
            Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack(
                Ze_PIP_fac_MH05_maxmaxD_corrconst08, 0)

        d_ind = np.where(time_mass.MH05_maxmaxD_corrconst06 >
                         (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                         and time_mass.MH05_maxmaxD_corrconst06 <
                         (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        dn_ind = np.where(PIPtime_n >
                          (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                          and PIPtime_n <
                          (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        dv_ind = np.where(time_vector_parrel_max >
                          (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                          and time_vector_parrel_max <
                          (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        if np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty(
                dv_ind) == 0:
            accum_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst06, n * 60 * 10 ^ (-3) *
                np.nansum(amass_PIP.MH05_maxmaxD_corrconst06[d_ind] *
                          (diacorr06 * 0.1 * D_PIP) ^
                          (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind]) *
                          avel_PIP_par_max[dv_ind] *
                          (diacorr06 * D_PIP) ^ (bvel_PIP_par_max[dv_ind]) *
                          N_mean_PIP[dn_ind, :] * diff_PIP))
            if (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind] >= 1
                    and bmass_PIP.MH05_maxmaxD_corrconst06[d_ind] < 3.5) and (
                        bvel_PIP_par_max[dv_ind] >= 0):
                Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst06,
                    10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^
                    2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst06[d_ind] *
                                   (diacorr06 * 0.1 * D_PIP) ^
                                   (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind]))
                                  ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP))
            else:
                Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst06, 0)
        else:
            accum_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst06, 0)
            Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack(
                Ze_PIP_fac_MH05_maxmaxD_corrconst06, 0)
        d_ind = np.where(time_mass.MH05_maxmaxD_corrconst04 >
                         (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                         and time_mass.MH05_maxmaxD_corrconst04 <
                         (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        dn_ind = np.where(
            PIPtime_n > (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
            & PIPtime_n < (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        dv_ind = np.where(time_vector_parrel_max >
                          (master_time_vector[dd] - 30 * (1 / (24 * 3600)))
                          and time_vector_parrel_max <
                          (master_time_vector[dd] + 30 * (1 / (24 * 3600))))
        if (np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0
                and np.isempty(dv_ind) == 0):
            accum_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst04, n * 60 * 10 ^
                (-3) * np.nansum(amass_PIP.MH05_maxmaxD_corrconst04[d_ind] *
                                 (diacorr04 * 0.1 * D_PIP) ^
                                 (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind]) *
                                 avel_PIP_par_max[dv_ind] *
                                 (diacorr04 * D_PIP) ^ (bvel_PIP_par[dv_ind]) *
                                 N_mean_PIP[dn_ind, :] * diff_PIP))
            if (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind] >= 1
                    and bmass_PIP.MH05_maxmaxD_corrconst04[d_ind] < 3.5) and (
                        bvel_PIP_par_max[dv_ind] >= 0):
                Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst04,
                    10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^
                    2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst04[d_ind] *
                                   (diacorr04 * 0.1 * D_PIP) ^
                                   (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind]))
                                  ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP))
            else:
                Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack(
                    Ze_PIP_fac_MH05_maxmaxD_corrconst04, 0)
        else:
            accum_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack(
                accum_PIP_fac_MH05_maxmaxD_corrconst04, 0)
            Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack(
                Ze_PIP_fac_MH05_maxmaxD_corrconst04, 0)

    #shelving(parafile)

    #Plot the summary of the event (IMPORTANT)
    Plot_EventSummary_GPM()

    #Define Z(S) with Rayleigh approximation
    #Choose the relation
    if (diacorr == 0.9):
        accum = accum_PIP_fac_MH05_maxmaxD_corrconst08
        Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst08
        fname1 = os.path.join(foldername, '\ZeS_corrconst08_', startdt, '_',
                              enddt)
        fname2 = os.path.join(foldername, '\ZeS_timeseries_corrconst08_',
                              startdt, '_', enddt)
    elif (diacorr == 0.82):
        accum = accum_PIP_fac_MH05_maxmaxD_corrconst06
        Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst06
        fname1 = os.path.join(foldername, '\ZeS_corrconst06_',
                              datestr(event_start_time, 30), '_',
                              datestr(event_end_time, 30))
        fname2 = os.path.join(foldername, '\ZeS_timeseries_corrconst06_',
                              datestr(event_start_time, 30), '_',
                              datestr(event_end_time, 30))
    elif (diacorr == 0.7):
        accum = accum_PIP_fac_MH05_maxmaxD_corrconst04
        Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst04
        fname1 = (foldername, '\ZeS_corrconst04_',
                  datestr(event_start_time,
                          30), '_', datestr(event_end_time, 30))
        fname2 = (foldername, '\ZeS_timeseries_corrconst04_',
                  datestr(event_start_time,
                          30), '_', datestr(event_end_time, 30))
    else:
        accum = accum_PIP_fac_MH05_maxmaxD
        Ze = Ze_PIP_fac_MH05_maxmaxD
        fname1 = (foldername, '\ZeS_nocorr_', datestr(event_start_time,
                                                      30), '_',
                  datestr(event_end_time, 30))
        fname2 = (foldername, '\ZeS_timeseries_nocorr_',
                  datestr(event_start_time,
                          30), '_', datestr(event_end_time, 30))

    parafile.close()
Beispiel #6
0
def outlierDetection(headData, isOutlier, nSigma_threshold):

    # Initialise outputs    
    noise_sigma = np.inf
    x_opt = []
    model_calib = []
     
    # Initialise 'isOutliers' if it's not supplied by the user
    if np.isempty(isOutlier):
        isOutlier = False(np.shape([headData,1]))
    isNewOutlier = False(np.shape([headData,1]))
    isOutlier_input = isOutlier
    
    # Build inputs for exponential smoothing model
    t = headData[:,1]
    h_obs = headData[:,2]
    dummyBoreID = 'BoreID_123'
    coordinates = [dummyBoreID, -999, -999 'Precip', -999, -999]
    forcingData = [t[1]-10 : t[end]+10]
    forcingData = table(year(forcingData), month(forcingData), day(forcingData), np.zeros(np.shape([forcingData,1]),1), 'VariableNames', ['Year', 'Month', 'Day', 'Precip'])   
    h_obs_model = [year(t), month(t), day(t), hour(t), minute(t), second(t), h_obs]
    
    # Calibrate exponential smoothing model
    summaryStr = []
    noise_sigma = 0
    i = 1
    doFinalCalibration = False
    el = 0
    while (i==1) | (np.sum(isNewOutlier)>0) | (doFinalCalibration==True):

        # Build model        
        model_calib = HydroSightModel('Outlier detection', dummyBoreID, 'ExpSmooth', h_obs_model[~isOutlier,:], -999, forcingData, coordinates, False)
        
        # Calibrate model
        calibrateModel(model_calib, [], 0, np.inf, 'SPUCI', 2)
        
        # Get the standard deviation of the noise.
        noise_sigma = model_calib.model.variables.sigma_n
        
        # Exit if the this loop is being undertaken 
        if doFinalCalibration==True:
            break
        
        # Store calibrated parameters
        alpha          = model_calib.model.parameters.alpha
        beta           = model_calib.model.parameters.beta
        gamma          = model_calib.model.parameters.gamma
        meanHead_calib = model_calib.model.variables.meanHead_calib
        initialHead    = model_calib.model.variables.initialHead
        initialTrend   = model_calib.model.variables.initialTrend 
         
        # Loop through each non-outlier observation to omit it from the
        # simulation. This is done to exclude a possible outlier point from
        # the smoothed estimate and the resulting calculation of the
        # noise. If the difference between the current obs point and the
        # forcast is greater than this noise estimate, then it is denoted
        # as an outlier. Importantly, when calculating the noise the min and
        # max points are also excluded.
        isNewOutlier = False(np.shape(isOutlier))        
        filt = isOutlier
        ind = find(~isOutlier) # <<< find ?
        for k,j in enumerate(ind[2:end]):
            # Get a vector of obs points excluding the current obs point, point ind[j].
            filt[j] = True
            time_points_trim =  headData[~filt, 1]
            delta_t = headData[j, 1] - headData[j-1, 1]
            h_obs_trim = headData[~filt, 2]
            h_obs_trim = [year(time_points_trim), month(time_points_trim), day(time_points_trim), 
                          hour(time_points_trim), minute(time_points_trim), second(time_points_trim), h_obs_trim]

            # Rebuild the model without the current time point, assign the
            # calibrated parameters and solve the model.
            model = HydroSightModel('Outlier detection', dummyBoreID, 'ExpSmooth', h_obs_trim, -999, forcingData, coordinates, False)
            model.model.parameters.alpha = alpha
            model.model.parameters.beta = beta
            model.model.parameters.gamma = gamma
            model.model.variables.meanHead_calib = meanHead_calib
            model.model.variables.calibraion_time_points = time_points_trim            
            model.model.variables.initialHead = initialHead
            model.model.variables.initialTrend = initialTrend 
            
            # Add current point back in the simulation. Note, when
            # the simulation is undertaken for a point does not exist in
            # model, then it is forecast.
            filt[j] = False
            time_points_trimExtended =  headData[~filt,1]
            h_mod_trim = solveModel(model, time_points_trimExtended, [], 'NoLabel', False)    
            h_forecast_trim = model.model.variables.h_forecast            
            
            # Create a filter to remove the current point from the forecast
            # and then calculate the residuals
            obs_filt = [1:k-1, k+1:len(ind)]
            resid_trim = h_obs_trim[:,end] - h_forecast_trim[obs_filt]
                        
            # To minimise the impacts of outliers as yet identified, create
            # a filter to remove the most negative and posative values from
            # the residuals
            resid_filt = resid_trim[(resid_trim>np.min(resid_trim)) & (resid_trim<np.max(resid_trim))]
            resid_trim = resid_trim[resid_filt]
            time_points_trim = time_points_trim[resid_filt]
            
            # Calculate innovations
            innov = resid_trim[2:end] - resid_trim[1:end-1] .* np.exp(-10.**model.model.parameters.beta .* np.diff(time_points_trim))      
                        
            # Calculate st. dev. of residuals noise
            #sigma_n_trimmed = sqrt(mean(innov.^2 ./ (1 - exp( -2 .* 10.^model.model.parameters.beta .* diff(time_points_trim) ))))        
            
            # Calculate st. dev. of residual for the current forecast only
            # Note: estimate of sigma_n at a spacific time step is derived
            # from von Asmuth 2015  doi:10.1029/2004WR00372 eqn A7 but with
            # the innovations at t, v_t, replaced with the mean. The was
            # undertaken so that sigma_n,t is independent from the residual forecast.
            sigma_n_trimmed = np.sqrt(np.mean(innov.**2.) ./ (1.-np.exp(-2 .* 10.**model.model.parameters.beta .* delta_t)))
                        
            # Calculate residual for omitted obs point.
            resid_point = h_obs[j] - h_forecast_trim[k]
            
            # Break for-loop if an outlier is detected.
            if np.abs(resid_point) >= nSigma_threshold*sigma_n_trimmed:
                isNewOutlier[j] = True
                el +=1
                summaryStr[el] = ['Date : ', str(t[j]),', Head : ', str(h_obs[j]), ', Smoothed forecast head : ', str(h_forecast_trim[k]),
                                  ', Residual head : ', str(resid_point), ', St. dev of noise : ', str(sigma_n_trimmed)]
                break
        
        # Aggregate new outliers with previously detected outliers
        isOutlier = [isOutlier, isNewOutlier]
        
        # If the while loop is to exit, then set flag to do one last
        # calibration so that the noise is best estimated.
        if np.sum(isNewOutlier)==0:
            doFinalCalibration = True
        
        #update counter
        #i=i+1 # needed twice ?

    # Assign the final parameters 
    x_opt = getParameters(model_calib.model)
    
    # Exclude input outliers from those input. That is, only return the
    # outliers identified from the exponential smoothing model
    isOutlier(isOutlier_input) = False
    
    # Print summary:
    print 'Summary of Outliers Detected'
    print '----------------------------'
    for i in range(el):
        print summaryStr[i]
    print '----------------------------'

    return isOutlier, noise_sigma, x_opt, model_calib 
Beispiel #7
0
def qc_kk(II, QI, IQ, QQ, Ihigh, Qhigh, Ilow, Qlow, Ierr, Qerr):
    erfcinv = lambda x: erfinv(1 - x)
    tval = lambda x: 2**.5 * erfcinv(2 * x)
    rel_pwr = lambda Xhi, Xlo: 1.819745692478292 / (tval(Xhi) + tval(Xlo))**2

    relPwrQ = rel_pwr(Qhigh, Qlow)
    relPwrI = rel_pwr(Ihigh, Ilow)
    qc_power = (relPwrQ * relPwrI)**.5

    x1 = np.array([Qhigh, Ihigh, Ihigh, Qhigh])
    x2 = np.array([Qlow, Ilow, Ilow, Qlow])
    y1 = np.array([Qhigh, Ihigh, Qhigh, Ihigh])
    y2 = np.array([Qlow, Ilow, Qlow, Ilow])

    Xh, Xl, Yh, Yl = tval(x1), -tval(x2), tval(y1), -tval(y2)

    Xh = np.tile(Xh, (len(II), 1))
    Xl = np.tile(Xl, (len(II), 1))
    Yh = np.tile(Yh, (len(II), 1))
    Yl = np.tile(Yl, (len(II), 1))

    sqrt2 = 2**-.5
    x1 = erfc(sqrt2 * Xh)
    x2 = erfc(-sqrt2 * Xl)
    y1 = erfc(sqrt2 * Yh)
    y2 = erfc(-sqrt2 * Yl)

    Cxy = np.array([QQ, II, IQ, QI]).transpose()

    TC0 = .25 * (x1 * y1 + x2 * y2 - x1 * y2 - x2 * y1)

    TC1=1/(2*np.pi)*(\
     (np.exp(-0.5*Xh*Xh)+np.exp(-0.5*Xl*Xl))*\
     (np.exp(-0.5*Yh*Yh)+np.exp(-0.5*Yl*Yl)))

    TC2=1/(4*np.pi)*(\
     (Xh*np.exp(-0.5*Xh*Xh)+Xl*np.exp(-0.5*Xl*Xl))*\
     (Yh*np.exp(-0.5*Yh*Yh)+Yl*np.exp(-0.5*Yl*Yl)))

    TC3=1/(12*np.pi)*(\
     ((1-Xh*Xh)*np.exp(-0.5*Xh*Xh)+(1-Xl*Xl)*np.exp(-0.5*Xl*Xl))*\
     ((1-Yh*Yh)*np.exp(-0.5*Yh*Yh)+(1-Yl*Yl)*np.exp(-0.5*Yl*Yl)))

    # FIND ROOTS TO 3rd DEGREE POLYNOMIAL
    a, b, c = TC2 / TC3, TC1 / TC3, (TC0 - Cxy) / TC3
    p, q = b - (1 / 3) * (a**2), c + (1 / 27) * (2 * (a**3) - 9 * a * b)
    K = 0.5 * q + np.sign(q) * np.sqrt(0.25 * (q**2) + (1 / 27) * (p**3))

    qc_data = Cxy * 0

    #Handle NaNs
    map_nan = np.isnan(K)
    qc_data[map_nan] = Cxy[map_nan]
    map_real = (K.imag == 0) & ~map_nan

    #Handle case K=0
    map_real_0 = (K == 0) & map_real
    qc_data[map_real_0] = -a[map_real_0] / 3

    #Handle optimization of roots
    map_opt = map_real == ~map_real_0

    x = K[map_opt].transpose()
    r_opt = 0.5 * x
    r2 = r_opt * 1.
    e = x * 0 + 1.
    run = (e == 1)
    max_steps = 10000
    err_thres = 1e-10

    # Iterate to find roots
    while np.any(run) and (max_steps > 0):
        max_steps -= 1
        r2[run] = r_opt[run]
        r_opt[run] = 0.5 * (x[run] / (r_opt[run]**2) + r_opt[run])
        e[run] = np.abs(r2[run] - r_opt[run])
        run[run] = e[run] > err_thres

    # Insert found roots
    qc_data[map_opt] = p[map_opt] / (3 * r_opt) - r_opt - a[map_opt] / 3

    # Handle case imaginary roots
    map_imag = K.imag != 0 & ~map_nan

    if np.any(map_imag):
        coeff4 = TC3[map_imag]
        coeff3 = TC2[map_imag]
        coeff2 = TC1[map_imag]
        coeff1 = TC0[map_imag] - Cxy[map_imag]
        corr_compressed = Cxy[map_imag]
        res = coeff1 * 0

        for i in range(len(coeff1)):
            r_roots = np.roots([coeff4[i], coeff3[i], coeff2[i], coeff1[i]])

            if len(r_roots
                   ) == 1 & np.isreal(r_roots) & r_roots < 1 & r_roots > -1:
                res[i] = r_roots
            else:
                # Throw away complex roots and out-of-range values
                tmp = r_roots[(np.abs(r_roots.imag) < 1e-10)
                              & (np.abs(r_roots.real) < 1)].real

                if len(tmp) == 1:  # case one root in the range
                    res[i] = tmp
                elif len(
                        tmp
                ) > 1:  # case multiple roots in the range -> invalid solution
                    (trash, I) = np.min(np.abs(tmp - corr_compressed[i]))
                    res[i] = tmp[I]
                elif np.isempty(tmp):  # case no roots
                    # This happens for large rho values. Set it equal to +/-1.
                    # Normally only happens for the first autocorr. values.
                    res[i] = np.sign(corr_compressed[i])
        qc_data[map_imag] = res

    # Handle big values
    map_big = np.abs(qc_data) > 1
    qc_data[map_big] = np.sign(Cxy[map_big])

    QQqq = qc_data[:, 0]
    IIqq = qc_data[:, 1]
    IQqq = qc_data[:, 2]
    QIqq = qc_data[:, 3]

    #	Rqq=IIqq+QQqq+1j*(IQqq-QIqq)	# complex version

    # real version
    Rqq = np.zeros((QQqq.size * 2, ))

    QIqq[0] = 0
    QIqq = np.roll(QIqq, -1)
    Rqq[::2] = IIqq + QQqq
    Rqq[1::2] = IQqq + QIqq
    # 0.5 correlation in Q and I channels => divide autocorrelation function
    # by 2 for it to be strictly correct
    Rqq = Rqq / 2

    w = np.hanning(Rqq.size * 2)
    yqq = qc_power * np.absolute(np.fft.hfft(Rqq * w[Rqq.size:]))

    #yqq=qc_power*np.absolute(np.fft.hfft(Rqq))

    yqq[0] = yqq[1]
    yqq = yqq[yqq.size // 2:]

    return yqq