E_GW_2 = 0.31209  #  Tidal Efficiency [-] for well 2
    E_GW_3 = 0.24625  #  Tidal Efficiency [-] for well 3
    E_GW_4 = 0.17867  #  Tidal Efficiency [-] for well 4
    E_GW_5 = 0.33024  #  Tidal Efficiency [-] for well 5
    E_GW_6 = 0.36874  #  Tidal Efficiency [-] for well 6
    # ---------------------------------------
    # END user inputs END
    # ---------------------------------------


    path = os.path.dirname(sys.argv[0])
    fname = os.path.abspath(os.path.join(path, file_folder, file_name) )
    RIVER_fname = os.path.abspath(os.path.join(path, file_folder, river_name) )

    # load data into pd.dataframe
    data       = process2pandas.read_mean_hydrographs_into_pandas(fname, datetime_indexes=True, decimal='.',  skiprows=1)
    RIVER_data = process2pandas.read_mean_hydrographs_into_pandas(RIVER_fname, datetime_indexes=True, decimal='.',  skiprows=4)



    print 'shifting, amplifying well data...'
    data['shifted_amplified_GW_1'] = data['W_1'].mean() + (data['GW_1'] - data['GW_1'].mean()) / E_GW_1
    data['shifted_amplified_GW_2'] = data['W_1'].mean() + (data['GW_2'] - data['GW_2'].mean()) / E_GW_2
    data['shifted_amplified_GW_3'] = data['W_1'].mean() + (data['GW_3'] - data['GW_3'].mean()) / E_GW_3
    data['shifted_amplified_GW_4'] = data['W_1'].mean() + (data['GW_4'] - data['GW_4'].mean()) / E_GW_4
    data['shifted_amplified_GW_5'] = data['W_1'].mean() + (data['GW_5'] - data['GW_5'].mean()) / E_GW_5
    data['shifted_amplified_GW_6'] = data['W_1'].mean() + (data['GW_6'] - data['GW_6'].mean()) / E_GW_6

    # loop over gw wells and USERDEFINED possible timelags
    #   i.e. timetuple=(20, 30) means that the script will try to match all timelags in list [20, 21, 22, ..., 30]
    #   we use these timetuples to increase speed of calculation, cause this approach of Erskine is timeconsuming
def method_2_from_cycle(savemode=None):

    '''
        DEPRECATED !!!
        USE FUNCTION method_3_from_cycle()
        DEPRECATED !!!


        calculates timelag based on modified method of Erskine 1991
        utilazing cycle approach (see documentation)

        the result is saved in an excel file

        Mean timelag is not saved, and only showed in console,
        since it can be easily assesed in excel
    '''

    # ---------------------------------------
    # user inputs
    # ---------------------------------------
    file_folder       = '../data/SLICED_171020141500_130420150600/hydrographs/'
    amplitudes_folder = '../data/SLICED_171020141500_130420150600/amplitude/'
    file_name         = 'Farge-ALL_10min.all'
    river_name        = 'Farge-W1_1min.all'
    river_ampl_fname  = 'W_1_amplitude.all'

    path_out          = 'out/'
    fname_out         = 'timelag_calculated_for_every_cycle'
    
    E_GW_1 = 0.25218  #  Tidal Efficiency [-] for well 1
    E_GW_2 = 0.31209  #  Tidal Efficiency [-] for well 2
    E_GW_3 = 0.24625  #  Tidal Efficiency [-] for well 3
    E_GW_4 = 0.17867  #  Tidal Efficiency [-] for well 4
    E_GW_5 = 0.33024  #  Tidal Efficiency [-] for well 5
    E_GW_6 = 0.36874  #  Tidal Efficiency [-] for well 6
    # ---------------------------------------
    # END user inputs END
    # ---------------------------------------


    path = os.path.dirname(sys.argv[0])
    fname = os.path.abspath(os.path.join(path, file_folder, file_name) )
    RIVER_fname = os.path.abspath(os.path.join(path, file_folder, river_name) )

    # load data into pd.dataframe
    data       = process2pandas.read_mean_hydrographs_into_pandas(fname, datetime_indexes=True, decimal='.',  skiprows=1)
    RIVER_data = process2pandas.read_mean_hydrographs_into_pandas(RIVER_fname, datetime_indexes=True, decimal='.',  skiprows=4)
    # get hightide, lowtide for accessing TIME of each
    river_hightide, river_lowtide, _ = process2pandas.read_amplitudes_into_pandas(os.path.join(path, amplitudes_folder, river_ampl_fname))

    
    print 'shifting, amplifying well data...'
    data['shifted_amplified_GW_1'] = data['W_1'].mean() + (data['GW_1'] - data['GW_1'].mean()) / E_GW_1
    data['shifted_amplified_GW_2'] = data['W_1'].mean() + (data['GW_2'] - data['GW_2'].mean()) / E_GW_2
    data['shifted_amplified_GW_3'] = data['W_1'].mean() + (data['GW_3'] - data['GW_3'].mean()) / E_GW_3
    data['shifted_amplified_GW_4'] = data['W_1'].mean() + (data['GW_4'] - data['GW_4'].mean()) / E_GW_4
    data['shifted_amplified_GW_5'] = data['W_1'].mean() + (data['GW_5'] - data['GW_5'].mean()) / E_GW_5
    data['shifted_amplified_GW_6'] = data['W_1'].mean() + (data['GW_6'] - data['GW_6'].mean()) / E_GW_6

    TLAG = dict()
    TLAG['GW_1'] = []
    TLAG['GW_2'] = []
    TLAG['GW_3'] = []
    TLAG['GW_4'] = []
    TLAG['GW_5'] = []
    TLAG['GW_6'] = []
    
    
    
    number_of_cycles = len(river_lowtide['datetime'])
    print 'Looping over Tidal Cycles of a River...'
    for t_ht, t_lt, i in zip(river_hightide['datetime'], river_lowtide['datetime'], river_lowtide.index):  # iterate over RIVER cycle times...
        print '\n\n Calculating timelag... for cycle {0}/{1}'.format(i+1, number_of_cycles)
        
        TLAG_I = dict()
        # loop over wells...
        for n1, timetuple in zip(['GW_1',  'GW_2',   'GW_3',   'GW_4',   'GW_5',   'GW_6'],
                                [(-10, 80), (-10, 80), (-10, 80), (-10, 80), (-10, 80), (-10, 80)]):
            h = data.ix[t_ht:t_lt, 'shifted_amplified_'+n1]  # slice data for correct time (t_ht:t_lt), and select well
            SUMM_LIST = list()
            TLAG_LIST = list()
            
            # loop over possible timelag values.... (see explanation in script <calculate_timelag.py>)
            for timelag in xrange(timetuple[0], timetuple[1]+1):  # try all timelags specified in 'timetuple'
                timelag_datetime = timedelta(minutes=timelag)     # convert minutes to datetime object

                # now cycle through all records in GROUNDWATERLEVEL data...
                summ = 0.
                for time_index, h_value in h.iteritems():
                    T = RIVER_data.loc[(time_index-timelag_datetime)][0]
                    summ += (h_value - T)**2

                SUMM_LIST.append(summ)
                TLAG_LIST.append(timelag)

            TLAG_I[n1] = TLAG_LIST[SUMM_LIST.index(min(SUMM_LIST))]
        
        # save tlags of all wells into one dictionary
        for n, v in TLAG_I.iteritems():
            TLAG[n].append(v)


    # ------------------------------------------------------
    # now we got all timelags for each cycle for each well...
    # So... calculate mean!
    # ------------------------------------------------------

    print '+'*50
    for n, v in TLAG.iteritems():
        TLAG[n] = np.array(v)
        print n, '\t >>> average tlag = ', TLAG[n].mean(), 'min'
        


    # save to EXCEL file
    df = pd.DataFrame(data=TLAG)
    outputfname = os.path.abspath(os.path.join(path, path_out, fname_out+'.xls'))
    writer = pd.ExcelWriter(outputfname)
    df.to_excel(writer, na_rep='---', index=True)
    writer.save()
    print "File created:", outputfname
    sys.path.insert(0, cmd_subfolder)

import process2pandas
import plot_pandas


if __name__ == '__main__':
    # --------------------------------------------------------------------------------------
    # user inputs
    # --------------------------------------------------------------------------------------
    file_folder  = '../data/SLICED_171020141500_130420150600/hydrographs/'
    file_name    = 'Farge_mean_after_Serfes1991.csv'
    
    col_names    = ['GW_2_averaging3', 'GW_3_averaging3', 'GW_4_averaging3', 'W_1_averaging3']
    legend_names = ['GW_2 mean water-level', 'GW_3 mean water-level', 'GW_4 mean water-level', 'W_1 mean water-level']
    # --------------------------------------------------------------------------------------
    # END user inputs END
    # --------------------------------------------------------------------------------------

    path = os.path.dirname(sys.argv[0])
    fname = os.path.abspath(os.path.join(path, file_folder, file_name) )

    data = process2pandas.read_mean_hydrographs_into_pandas(fname, datetime_indexes=True, decimal=',', na_values=['---'])
    
    if _sns:
        with sns.axes_style("whitegrid"):
            plot_pandas.plot_mean_waterlevel(data, col_names, legend_names , saveName=None)
    else:
        plot_pandas.plot_mean_waterlevel(data, col_names, legend_names , saveName=None)

def method_3_from_cycle(savemode=None):
    '''
        calculates timelag based on modified method of Erskine 1991
        utilazing cycle approach (see documentation)

        the result is saved in an excel file

        Mean timelag is not saved, and only showed in console,
        since it can be easily assesed in excel
    '''
    # ---------------------------------------
    # user inputs
    # ---------------------------------------
    file_folder       = '../data/SLICED_171020141500_130420150600/hydrographs/'
    amplitudes_folder = '../data/SLICED_171020141500_130420150600/amplitude/'
    file_name         = 'Farge-ALL_10min.all'
    river_name        = 'Farge-W1_1min.all'
    river_ampl_fname  = 'W_1_amplitude.all'
    fname_Ei          = '../data/SLICED_171020141500_130420150600/output_tidal_efficiency_with_E.xls'

    path_out          = 'out/'
    fname_out         = 'timelag_calculated_for_every_cycle'

    # search limits for a timelag, (0, 80) means that script will iterate OVER tlag=[0, 1, 2, ... 80]
    MIN = dict()
    MIN['GW_1'] = (0, 80)
    MIN['GW_2'] = (0, 80)
    MIN['GW_3'] = (0, 80)
    MIN['GW_4'] = (0, 80)
    MIN['GW_5'] = (0, 80)
    MIN['GW_6'] = (0, 80)
    # ---------------------------------------
    # END user inputs END
    # ---------------------------------------


    path = os.path.dirname(sys.argv[0])
    fname = os.path.abspath(os.path.join(path, file_folder, file_name) )
    RIVER_fname = os.path.abspath(os.path.join(path, file_folder, river_name) )

    # read data into pd.dataframe
    data       = process2pandas.read_mean_hydrographs_into_pandas(fname, datetime_indexes=True, decimal='.',  skiprows=1)
    RIVER_data = process2pandas.read_mean_hydrographs_into_pandas(RIVER_fname, datetime_indexes=True, decimal='.',  skiprows=4)
    # get hightide, lowtide for accessing TIME of each
    river_hightide, river_lowtide, _ = process2pandas.read_amplitudes_into_pandas(os.path.join(path, amplitudes_folder, river_ampl_fname))

    print "reading xlx with Ei"
    # read_ XLS into dictionary with key=sheet_name, value=pd.DataFrame
    xl_file = pd.ExcelFile(os.path.join(path, fname_Ei))
    dfs = {sheet_name: xl_file.parse(sheet_name)  # read
          for sheet_name in xl_file.sheet_names}
    
    TLAG = dict()
    # loop over wells...
    for well in ['GW_1', 'GW_2', 'GW_3', 'GW_4', 'GW_5', 'GW_6']:
        # for each well...
        TLAG[well] = []

        mean = data[well].mean()
        print 'MEAN = ', mean

        t_ht = dfs[well]['Datetime High Tide']
        E_amp = dfs[well]['E_i (amplitude ratio)']
        E_std = dfs[well]['E_i (std ratio)']

        number_of_cycles = len(t_ht)

        i = 0
        TLAG_I = list()
        for t_ht_i, E_amp_i, E_std_i in zip(t_ht, E_amp, E_std):
        # for each cycle...
            i += 1
            t_stac_gw = t_ht_i   - timedelta(minutes=180)    # here we go 180min before highpeak
            t_endc_gw = t_stac_gw + timedelta(minutes=720)   # here we go 720min after beggining of cycle
            #t_stac_gw, t_endc_gw  - datetime of start, end of cycle in DataFrame "data[]" (hydrographs, 10min) for a specific well

            # now, we know exact time of start and stop of cycle >>> slice data!
            h = copy.deepcopy(data.ix[t_stac_gw:t_endc_gw, well])  # slice data for correct time (t_ht:t_lt), and select well
            mean = RIVER_data.ix[t_stac_gw:t_endc_gw].mean()[0]    # mean of a tidal stage for current cycle
            E = E_std_i                                            # tidal efficiency of current well for current cycle


            # shift, amplify data....
            h = mean + (h - mean) / E
            
            print '\nCalculating timelag... for well={2}, cycle {0}/{1}'.format(i, number_of_cycles, well)
            print '\ttstart={0}\n\ttstop={1}\n\tE={2}'.format(t_stac_gw, t_endc_gw, E)
            
            SUMM_LIST = list()
            TLAG_LIST = list()
            for timelag in xrange(MIN[well][0], MIN[well][1]+1):  # try all timelags from 0 to 60 minutes, or those specified in 'timetuple'
                timelag_datetime = timedelta(minutes=timelag)     # convert minutes to datetime object

                # now cycle through all records in GROUNDWATERLEVEL data...
                summ = 0.
                for time_index, h_value in h.iteritems():
                    T = RIVER_data.loc[(time_index-timelag_datetime)][0]
                    summ += (h_value - T)**2
                SUMM_LIST.append(summ)
                TLAG_LIST.append(timelag)

            print '\ttlag >>>', TLAG_LIST[SUMM_LIST.index(min(SUMM_LIST))], 'min'
            TLAG_I.append(TLAG_LIST[SUMM_LIST.index(min(SUMM_LIST))])  # append correct timelag corresponding to minimum sum
        TLAG[well] = TLAG_I


    print '+'*50
    for n, v in TLAG.iteritems():
        TLAG[n] = np.array(v)
        print n, '\t >>> average tlag = ', TLAG[n].mean(), 'min'



    # save to EXCEL file
    df = pd.DataFrame(data=TLAG)
    outputfname = os.path.abspath(os.path.join(path, path_out, fname_out+'.xls'))
    writer = pd.ExcelWriter(outputfname)
    df.to_excel(writer, na_rep='---', index=True)
    writer.save()
    print "File created:", outputfname