def test_original_test(NoTrendData, TrendData, arbitrary_1d_data):
    # check with no trend data
    NoTrendRes = mk.original_test(NoTrendData)
    assert NoTrendRes.trend == 'no trend'
    assert NoTrendRes.h == False
    assert NoTrendRes.p == 1.0
    assert NoTrendRes.z == 0
    assert NoTrendRes.Tau == 0.0
    assert NoTrendRes.s == 0.0
    assert NoTrendRes.var_s == 0.0
    assert NoTrendRes.slope == 0.0

    # check with trendy data
    TrendRes = mk.original_test(TrendData)
    assert TrendRes.trend == 'increasing'
    assert TrendRes.h == True
    assert TrendRes.p == 0.0
    assert TrendRes.Tau == 1.0
    assert TrendRes.s == 64620.0
    np.testing.assert_allclose(TrendRes.slope, 1.0, rtol=1e-02)

    # check with arbitrary data
    result = mk.original_test(arbitrary_1d_data)
    assert result.trend == 'no trend'
    assert result.h == False
    assert result.p == 0.37591058740506833
    assert result.z == -0.8854562842589916
    assert result.Tau == -0.03153167653875869
    assert result.s == -1959.0
    assert result.var_s == 4889800.333333333
    assert result.slope == -0.0064516129032258064
def test_residuals(model, timeperiod, reg):
    """
    Test for a residual trend, applying a Mann-Kendall-test

    Parameters
    ----------
    model : GLMObject
        Best model
    timeperiod : np.array
        considered years (not used here)

    Returns
    -------
    float
        slope in residuals
    float
        p-value

    """
    res_trend = mk.original_test(model.resid_response, alpha=0.1)

    fig, ax = plt.subplots(figsize=(12, 8))
    sm.graphics.tsa.plot_acf(model.resid_response, lags=39, ax=ax)
    ax.set_xlabel('lag')
    ax.set_title('Autocorrelation {}'.format(reg))
    #fig.savefig('/home/insauer/projects/NC_Submission/Climada_papers/Test/AutocorrResidualsGMT_{}.png'.format(reg),bbox_inches = 'tight',dpi =600)

    alt_trend_test = mk.hamed_rao_modification_test(model.resid_response)

    return res_trend.slope, res_trend.p, alt_trend_test.trend, alt_trend_test.p
예제 #3
0
def mk_col_slope(Data, YearCol, LocCol, window, TH, alpha=0.1):
    length = Data[LocCol].shape[0]
    start_index = Data[YearCol].values[window - 1]
    final_index = Data[YearCol].values[length - 1] + 1

    Year = pd.Series(range(start_index, final_index))

    iterations = length - (window - 1)
    stats_list2 = pd.DataFrame(index=range(0, iterations),
                               columns=['Year', LocCol])

    for instance in range(0, iterations):
        stats_list2['Year'].values[instance] = Year[instance]

    for instance in range(0, iterations):
        snip = Data[LocCol].loc[instance:instance + window - 1]
        if missing_values(snip, window, TH):
            snip_test = MK.original_test(snip, alpha)
            stats_list2[LocCol].values[instance] = snip_test.slope

        else:
            stats_list2[LocCol].values[instance] = 'Not enough data'

    #print(stats_list2)
    return stats_list2
예제 #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--granularity',
        '-g',
        type=str,
        default='minute',
        help=
        'one of [day,hour,minute], the granularity of the x-axis. default=minute'
    )
    parser.add_argument(
        '--num_points',
        '-n',
        type=int,
        default=60 * 24,
        help=
        'number of points to sample from historic prices (e.g. number of days). default=1440'
    )
    parser.add_argument(
        '--mode',
        '-m',
        type=str,
        default='close',
        help=
        'the cryptocompare price attribute to use (e.g. open or close). default=close'
    )
    parser.add_argument('--currency',
                        '-c',
                        type=str,
                        default='CAD',
                        help='currency (e.g. CAD, USD). default=CAD')

    args = parser.parse_args()

    fig, axs = plt.subplots(2, 1)

    colors = 'rgbykc'
    for ax, symbol, color in zip(axs, ['BTC', 'ETH'], colors):
        fn = getattr(cryptocompare, f'get_historical_price_{args.granularity}')

        btc = fn(symbol, 'CAD', limit=args.num_points)
        data = [x[args.mode] for x in btc]
        cur = data[-1]

        result = mk.original_test(data)
        line = lambda t: result.slope * t + result.intercept
        print(symbol, ':', result, '\n\n')
        ax.plot(
            data,
            label=f'{symbol} = {cur} ({result.trend}, p={round(result.p, 4)})',
            c=color)
        ax.plot(list(map(line, range(len(data)))), label='MK fit')
        ax.legend()
        ax.set_ylabel(f'{symbol} Price ({args.mode})')
        ax.set_xlabel('previous ' + args.granularity + 's')
    fig.suptitle(
        f'Crypto prices {args.currency} for the last {args.num_points} {args.granularity}s',
        fontsize=12)
    fig.tight_layout()
    plt.show()
예제 #5
0
def multip_function(x):
    """This function estimates the trend in the discharge time series of one grid cell.

    Parameters
    ----------
    x : tuple
        lat, lon coordinates

    Returns
    -------
    reg.slope
        slope of the trend
    reg.p
        v-value of the trend
    """
    lat, lon = x
    # exclude coordinates over sea
    if (lat == -1000) or (lon == -1000):
        return np.nan, np.nan
    print(lon, lat)
    data = get_dis_gridcell(lat, lon)

    if np.isnan(data).all():
        return np.nan, np.nan
    else:
        reg = mk.original_test(data, alpha=0.1)

    return reg.slope, reg.p
    def test_crash(norm_stats, crash_date, norm_name):
        (V, SD, AC) = norm_stats

        sys.stdout.write(f"Results of the Mann Kendall Test "
                         f"for the {norm_name} (crash: {crash_date}): \n")
        MKV = mk.original_test(V)
        sys.stdout.write(
            f"Variance:          trend = {MKV.trend} | tau = {MKV.Tau:0.4f}\n")
        MKSD = mk.original_test(SD)
        sys.stdout.write(
            f"Spectral Density:  trend = {MKSD.trend} | tau = {MKSD.Tau:0.4f}\n"
        )
        MKAC = mk.original_test(AC)
        sys.stdout.write(
            f"Autocorrelation:   trend = {MKAC.trend} | tau = {MKAC.Tau:0.4f}\n\n"
        )
def test_residuals(model, timeperiod, reg):
    """
    Test for a residual trend, applying a Mann-Kendall-test

    Parameters
    ----------
    model : GLMObject
        Best model
    timeperiod : np.array
        considered years (not used here)

    Returns
    -------
    float
        slope in residuals
    float
        p-value

    """
    res_trend = mk.original_test(model.resid_response, alpha=0.1)

    fig, ax = plt.subplots(figsize=(12, 8))

    alt_trend_test = mk.hamed_rao_modification_test(model.resid_response)

    return res_trend.slope, res_trend.p, alt_trend_test.trend, alt_trend_test.p
예제 #8
0
    def mann_kendall_price(self):
        """
        The Mann Kendall Trend Test (sometimes called the M-K test) is used to analyze data collected over time for
        consistently increasing or decreasing trends (monotonic) in Y values. H0 - there is no monotonic trend,
        H1 - the trend exists, it is either positive or negative

        :return: trend: trend direction, h: bool if trend exists, p: p-value, z: z-stat, Tau: Kendall Tau,
        s: Mann-Kendal’s score, var_s: Variance S, slope: Theil-Sen estimator/slope,
        intercept: Intercept of Kendall-Theil Robust Line
        """
        return mk.original_test(self.center_price())
예제 #9
0
def doTheAnnMeanFlowTrends(pastStats, currentStats = None):
    tSeries = []
    for key in pastStats:
        #tSeries.extend(pastStats[key][:12])
        tSeries.append(pastStats[key][12])
        #print(pastStats[key])
    
    if currentStats is not None:
        for key in currentStats:
            tSeries.append(currentStats[key][12])
    
    #print(len(tSeries), sum(tSeries)/len(tSeries))
    #print(tSeries)
    return [pmk.original_test(tSeries), pmk.hamed_rao_modification_test(tSeries)]
 def mkTest(series, seasonal):
     
     if seasonal == False:
         data_mk = mk.original_test(series)
         trend = data_mk[0]
     else:
         data_mk_seasonal_test = mk.seasonal_test(series, period= 12)
         trend = data_mk_seasonal_test[0]
     
     if trend == 'decreasing' or trend == 'increasing':
         self.__trend__ = 'present' 
         trend = 'present'
         return trend
     self.__trend__ = trend
     return trend
예제 #11
0
def trendTest(output_dir, data):
    """
    Tests each of the call categories during a given time period for
    a monotonic trend by using the mann-kendall test. Results of this
    test are saved to a csv file.

    Inputs:
        - output_dir: String path to output directory
        - data: incident data of time period you want to graph
    """
    call_categories = [
        'injuries_external', 'motor', 'health', 'fire', 'mental_illness',
        'other'
    ]
    trend = []
    h = []
    p = []
    z = []
    Tau = []
    s = []
    var_s = []
    slope = []

    for category in call_categories:
        category_data = data[[category]]
        to, ho, po, zo, Tauo, so, var_so, slopeo = mk.original_test(
            category_data)
        trend.append(to)
        h.append(ho)
        p.append(po)
        z.append(zo)
        Tau.append(Tauo)
        s.append(so)
        var_s.append(var_so)
        slope.append(slopeo)

    results = pd.DataFrame({
        'Call_Category': call_categories,
        'Trend': trend,
        'h': h,
        'p': p,
        'z': z,
        'Tau': Tau,
        's': s,
        'var_s': var_s,
        'slope': slope
    })
    results.to_csv(join(output_dir, "trend_test_results.csv"), index=False)
예제 #12
0
def getValuesMK(arry, p_sig):
    z_, lines, cols = arry.shape
    arryMk = np.ndarray( ( 3, lines, cols ) )
    for l in range(lines):
        for c in range(cols):
            v = arry[:, l, c ]
            if  np.isnan( np.sum(v) ):
                arryMk[0, l, c] = np.nan
                arryMk[1, l, c] = np.nan
                arryMk[2, l, c] = np.nan
                continue
            r = mk.original_test( v, p_sig )
            arryMk[0, l, c] = r.s if r.p <= p_sig else np.nan
            arryMk[1, l, c] = r.p
            arryMk[2, l, c] = r.slope if r.p <= p_sig else np.nan
    return arryMk
def test_autocorrelation(time_series):
    """
    Test for autocorrelation

    Parameters
    ----------
    time-series

    Returns
    -------
    float
        tau

    """
    auto = mk.original_test(time_series, alpha=0.1)

    return auto.Tau
예제 #14
0
    def determine_orientation(self, positions):
        "Given a list of minimizer positions, determine the orientation of the contig"
        if len(positions) > 1:
            if all(x < y for x, y in zip(positions, positions[1:])):
                return "+"
            if all(x > y for x, y in zip(positions, positions[1:])):
                return "-"
            if self.args.mkt:
                mkt_result = mk.original_test(positions)
                if mkt_result.h and mkt_result.p <= 0.05:
                    return "+" if mkt_result.trend == "increasing" else "-"
            else:
                tally = Counter([x < y for x, y in zip(positions, positions[1:])])
                positive_perc = tally[True]/float(len(positions)-1)*100
                negative_perc = 100 - positive_perc
                if positive_perc >= self.args.m:
                    return "+"
                if negative_perc >= self.args.m:
                    return "-"

        return "?"
예제 #15
0
def mk_column(Data, YearCol, LocCol, windows, datalist, threshold, alpha=0.1):
    for each in windows:
        length = Data[LocCol].shape[0]
        start_index = Data[YearCol].values[each - 1]
        final_index = Data[YearCol].values[length - 1] + 1

        Year = pd.Series(range(start_index, final_index))

        iterations = length - (each - 1)
        stats_list2 = pd.DataFrame(index=range(0, iterations),
                                   columns=[
                                       LocCol, 'Year', 'trend', 'Ha', 'p', 'Z',
                                       'S', 'VAR(S)', 'slope'
                                   ])
        stats_list2[LocCol].values[0] = str(each) + " Year window"

        for instance in range(0, iterations):
            stats_list2['Year'].values[instance] = Year[instance]

        for instance in range(0, iterations):
            snip = Data[LocCol].loc[instance:instance + each -
                                    1]  # INSERT TEST FOR 10%
            if missing_values(snip, each, threshold):
                snip_test = MK.original_test(snip, alpha)
                stats_list2['trend'].values[instance] = snip_test.trend
                stats_list2['Ha'].values[instance] = snip_test.h
                stats_list2['p'].values[instance] = snip_test.p
                stats_list2['Z'].values[instance] = snip_test.z
                stats_list2['S'].values[instance] = snip_test.s
                stats_list2['VAR(S)'].values[instance] = snip_test.var_s
                stats_list2['slope'].values[instance] = snip_test.slope

            else:
                stats_list2['trend'].values[instance] = 'Not enough data'

        #print(stats_list2)
        datalist.append(
            stats_list2
        )  # this is used if you want to enter different data frames in a list
    ''' need to find way to store it in Excel '''
def test_autocorrelation(time_series):
    """
    Test for a residual trend, applying a Mann-Kendall-test

    Parameters
    ----------
    model : GLMObject
        Best model
    timeperiod : np.array
        considered years (not used here)

    Returns
    -------
    float
        slope in residuals
    float
        p-value

    """
    auto = mk.original_test(time_series, alpha=0.1)

    return auto.Tau
def test_residuals(model, timeperiod, reg):
    """
    Test for a residual trend, applying a Mann-Kendall-test

    Parameters
    ----------
    model : GLMObject
        Best model
    timeperiod : np.array
        considered years (not used here)

    Returns
    -------
    float
        slope in residuals
    float
        p-value

    """
    res_trend = mk.original_test(model.resid_response, alpha=0.1)

    return res_trend.slope, res_trend.p
예제 #18
0
def calc_mann_kendall(data_file, info_file, out_file):
    """
    用 kendall tau刻画每个column的发育趋势
    """
    # load
    df = pd.read_csv(data_file)
    info_df = pd.read_csv(info_file)
    ages = np.array(info_df['age in years'])
    age_uniq = np.unique(ages)

    # calculate
    out_df = pd.DataFrame(index=('tau', 'p'), columns=df.columns)
    for col in out_df.columns:
        meas_vec = np.array(df[col])
        y = np.zeros_like(age_uniq, dtype=np.float64)
        for age_idx, age in enumerate(age_uniq):
            y[age_idx] = np.mean(meas_vec[ages == age])
        mk_test = mk.original_test(y, 0.05)
        out_df.loc['tau', col] = mk_test.Tau
        out_df.loc['p', col] = mk_test.p

    # save
    out_df.to_csv(out_file)
예제 #19
0
def Trend_1(data_sku, star1):
    starting = star1
    data_sku3 = data_sku[starting:len(data_sku)]
    data_sku3 = data_sku3.reset_index(drop=True)
    index = data_sku3.ne(0).idxmax()
    data_sku4 = data_sku3[index:]  #first non zero element
    data_sku4 = data_sku4.reset_index(drop=True)
    Zero = np.where((data_sku4 == 0) == True)
    sparsity = len(Zero) / len(data_sku4)
    if len(data_sku3) >= 8 and sparsity < (0.2):
        dd = mk.original_test(data_sku3)
        if dd[0] == 'increasing':
            Type_0 = 'Growing'
        elif dd[0] == 'decreasing':
            Type_0 = 'Degrowing'
        else:
            Type_0 = 'Normal'
    elif len(data_sku3) >= 12 and sparsity > (0.2):
        #rolling-->can be applied to series
        roll_sum = pd.Series(data_sku).rolling(6).apply(np.mean)
        roll_diff = np.diff(roll_sum)
        g_1 = len(np.where((roll_diff > 0) == True)[0])
        l_1 = len(np.where((roll_diff < 0) == True)[0])
        g_p = g_1 / len(roll_diff)
        l_p = l_1 / len(roll_diff)
        if g_p >= 0.75:
            Type_0 = 'Growing'
        elif l_p >= 0.75:
            Type_0 = 'Degrowing'
        else:
            Type_0 = 'Normal'

    else:
        Type_0 = 'Normal'

# v=c(Type_0,starting)
    return (Type_0)
예제 #20
0
def mk_column(Data, YearCol, LocCol, windows):
    for each in windows:
        length = Data[LocCol].shape[0]
        start_index = Data[YearCol].values[each - 1]
        final_index = Data[YearCol].values[length - 1] + 1

        Year = pd.Series(range(start_index, final_index))

        iterations = length - (each - 1)
        stats_list2 = pd.DataFrame(
            index=range(0, iterations),
            columns=['Year', 'trend', 'Ha', 'p', 'Z', 'S', 'VAR(S)', 'slope'])
        stats_list2.insert(0, 'Name', None)
        stats_list2['Name'].values[0] = LocCol

        for instance in range(0, iterations):
            stats_list2['Year'].values[instance] = Year[instance]

        for instance in range(0, iterations):
            snip = Data[LocCol].loc[instance:instance + each - 1]
            snip_test = MK.original_test(snip, 0.1)
            stats_list2['trend'].values[instance] = snip_test[0]
            stats_list2['Ha'].values[instance] = snip_test[1]
            stats_list2['p'].values[instance] = snip_test[2]
            stats_list2['Z'].values[instance] = snip_test[3]
            stats_list2['S'].values[instance] = snip_test[5]
            stats_list2['VAR(S)'].values[instance] = snip_test[6]
            stats_list2['slope'].values[instance] = snip_test[7]

        # stats_list2 = stats_list2.transpose()
        # stats_list2.insert(stats_list2.shape[1],None,None)
        # stats_list2 = stats_list2.transpose()
        stats_list2 = stats_list2.append(pd.Series([np.nan]),
                                         ignore_index=True)
        print(stats_list2)
        # stats_list2.to_excel("testNewColTranspose.xlsx", index=False)
        ''' need to find way to store it in Excel '''
예제 #21
0
 def add_element(self, value):
     
     '''
     Add new element to the statistic
             
     '''
     
     #reset parameters if change was detected:
     if self.in_concept_change:
         self.reset()
     
     
     
     #append elements:
     self.instance_memory.append(value)
     
                 
     
     if len(self.instance_memory) == self.min_instances:
         self.sample_count = 1
     
     if len(self.instance_memory) > self.min_instances:
         self.instance_count += 1
         
     #start drift detection: >> min_instances have to be reached, then always perform test once, after that perform test every i_th instance (instances_step)
     if len(self.instance_memory) >= self.min_instances and ((self.instance_count == self.instances_step) or (self.sample_count == 1)):
         
         if self.test_type == 'original_mk':
             
             #call corresponding test from package:
             print('Perform MK test')
             results_tuple = mk.original_test(self.instance_memory, self.alpha)
             print('MK test ended')
 
         
         if self.test_type == 'hamed_rao_mod':
             
             #call corresponding test from package:
             results_tuple = mk.hamed_rao_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'yue_wang_mod':
             
             #call corresponding test from package:
             results_tuple = mk.yue_wang_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'trend_free_pre_whitening_mod':
             
             #call corresponding test from package:
             results_tuple = mk.trend_free_pre_whitening_modification_test(self.instance_memory, self.alpha)
         
         if self.test_type == 'pre_whitening_mod':
             
             #call corresponding test from package:
             results_tuple = mk.pre_whitening_modification_test(self.instance_memory, self.alpha)
             
         if self.test_type == 'seasonal':
             
             #call corresponding test from package:
             results_tuple = mk.seasonal_test(self.instance_memory, period = self.period, alpha = self.alpha)
         
         
         #reset counter every time a test was performed:
         self.sample_count = 0
         self.instance_count = 0
         
         
         #assign results:
         self.p_value = results_tuple[2]
         self.sens_slope = results_tuple[-1]
         self.trend = results_tuple[0]  
             
                     
         if self.p_value < self.alpha and np.abs(self.sens_slope) > self.slope_threshold:
             self.in_concept_change = True
                
         else:
             self.in_concept_change = False
def rel_time_attr_MK(dataFrame71):
    """
    Theil-Sen-Slope estimation and Mann-Kendall-Test to estimate the
    contribution of each driver!

    Parameters
    ----------
    dataFrame71 : time series
        Time series

    Returns
    -------
    regH : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 1980 fixed exposure)(TS_Haz) 1980-2010
    regHE : List MK-output
        Sen_slope and MK-test result with uncertainty range of TS_HazExp
        1980-2010
    regF : List MK-output
        Sen_slope and MK-test result with uncertainty range of TS_Full
        1980-2010.
    regH7 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 1980 fixed exposure)(TS_Haz) 1971-2010
    regH107 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 2010 fixed exposure)(TS_Haz) 1971-2010
    regH10 : List MK-output
        Sen_slope and MK-test result with uncertainty range of hazard
        (with 2010 fixed exposure)(TS_Haz) 1980-2010
    regE : List MK-output
        Sen_slope and MK-test result with uncertainty range of exposure
        difference function (TS_HazExp - TS_Haz) 1980-2010 (not used)
    regE7 : List MK-output
        Sen_slope and MK-test result with uncertainty range of exposure
        difference function (TS_HazExp - TS_Haz) 1971-2010 (not used)
    regV : List MK-output
        Sen_slope and MK-test result with uncertainty range of vulnerability
        difference function (TS_full - TS_Haz_Exp)(not used)
    regI : List MK-output
        Sen_slope and MK-test result with uncertainty range of modeled damges
        (including vulnerability)
    regN : List MK-output
        Sen_slope and MK-test result with uncertainty range of observed damages

    """

    dataFrame = dataFrame71[dataFrame71['Year'] > 1979]

    regLHazExp = mk.original_test(dataFrame['Norm_Impact_2y_trend'], alpha=0.1)

    slopeLHazExp = stats.theilslopes(dataFrame['Norm_Impact_2y_trend'],
                                     alpha=0.1)

    regHE = [regLHazExp.slope, regLHazExp.p, slopeLHazExp[2], slopeLHazExp[3]]

    regLFull = mk.original_test(dataFrame['Norm_Impact_Pred'], alpha=0.1)

    slopeLFull = stats.theilslopes(dataFrame['Norm_Impact_Pred'], alpha=0.1)

    regF = [regLFull.slope, regLFull.p, slopeLFull[2], slopeLFull[3]]

    regHaz = mk.original_test(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1)

    slopeHaz = stats.theilslopes(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1)

    regH = [regHaz.slope, regHaz.p, slopeHaz[2], slopeHaz[3]]

    regHaz7 = mk.original_test(dataFrame71['Norm_ImpFix_2y_trend'], alpha=0.1)

    slopeHaz7 = stats.theilslopes(dataFrame71['Norm_ImpFix_2y_trend'],
                                  alpha=0.1)

    regH7 = [regHaz7.slope, regHaz7.p, slopeHaz7[2], slopeHaz7[3]]

    regHaz107 = mk.original_test(dataFrame71['Norm_Imp2010_2y_trend'],
                                 alpha=0.1)

    slopeHaz107 = stats.theilslopes(dataFrame71['Norm_Imp2010_2y_trend'],
                                    alpha=0.1)

    regH107 = [regHaz107.slope, regHaz107.p, slopeHaz107[2], slopeHaz107[3]]

    regHaz10 = mk.original_test(dataFrame['Norm_Imp2010_2y_trend'], alpha=0.1)

    slopeHaz10 = stats.theilslopes(dataFrame['Norm_Imp2010_2y_trend'],
                                   alpha=0.1)

    regH10 = [regHaz10.slope, regHaz10.p, slopeHaz10[2], slopeHaz10[3]]

    regNat = mk.original_test(dataFrame['natcat_flood_damages_2005_CPI'],
                              alpha=0.1)

    slopeNat = stats.theilslopes(dataFrame['natcat_flood_damages_2005_CPI'],
                                 alpha=0.1)

    regN = [regNat.slope, regNat.p, slopeNat[2], slopeNat[3]]

    return regH, regHE, regH7, regH107, regH10, regF, regN
예제 #23
0
SLS = Data_2_use.sel(lon=0.5, lat=7.5, method='nearest')
SLS.plot()

#Areal Selection & Averaging
ASA = Data_2_use.sel(lon=np.arange(-1.5, 1.5, 0.5),
                     lat=np.arange(5, 15, 0.5),
                     method='nearest')
ASA = ASA.mean(dim=('lon', 'lat'))
ASA.plot()

#Annual Averaging
Ann_avg = ASA.groupby('time.year').mean('time')
Ann_avg.plot()

### Statistics of Data
stat_result = pmk.original_test(Ann_avg)
print(stat_result)

#Seasonal Climatology
Seas_avg = ASA.groupby('time.season').mean('time')
print(Seas_avg)

#THANK YOU

#Reading Data from NetCDF File
Data = xr.open_dataset(in_file)
Data_2_use = Data['tmp']

#Selecting location 0.5W, 6.5N
loc_data = Data_2_use.sel(lon=-0.5, lat=6.5, method='nearest')
loc_data.plot()
예제 #24
0
        break
    x = struct.unpack("f", x)[0]
    if (not np.isinf(x) and not np.isnan(x)):
        ajat.append(x)
        ind.append(i)
    else:
        dnfind.append(i)
    i += 1
f.close()

ajat = np.array(ajat)
dnfind = np.array(dnfind)
ind = np.array(ind)
sr = pd.Series(ajat, ind)

ts = mk.original_test(sr)
pns = stat.linregress(ind, ajat)

plt.plot(ind, ajat, 'o', color='b')
plt.plot(ind, ind * pns.slope + pns.intercept, label="pns")
plt.plot(ind, ind * ts.slope + ts.intercept, label="ts")
plt.xlim(right=np.max(ind))
plt.ylim(top=np.max(ajat))

#dnfien plottaus
ala, ula = plt.ylim()
dnfaika = np.zeros(np.shape(dnfind)) + ula
plt.plot(dnfind, dnfaika, 'o', color='r')

plt.legend()
#plt.tight_layout();
예제 #25
0
def trend_test(df):
    result = mk.original_test(df)
    print(result)
예제 #26
0
파일: hw5new.py 프로젝트: oykuzumrutdal/hw5
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.api import Holt
from statsmodels.tsa.stattools import adfuller
import pymannkendall as mk
import os
#We imported the required libraries
filenamemadrid = os.getcwd() + "\\weather_madrid_LEMD_1997_2015.csv"
df_madrid = pd.read_csv(filenamemadrid,
                        usecols=["CET", "Mean TemperatureC"],
                        sep=",")
#We make python read the csv files
df_madrid = df_madrid.dropna()
df_madrid = df_madrid.rename(columns={"CET": "date"})

filenamebrazil = os.getcwd() + "\\sudeste.csv"
df_brazil = pd.read_csv(filenamebrazil, usecols=["date", "temp"], sep=",")
df_brazil = df_brazil.dropna()

df_brazil = df_brazil.groupby(["date"])["temp"].mean()
df_brazil = df_brazil.to_frame()
df_brazil = df_brazil.reset_index(drop=False)
df_brazil = df_brazil.set_index('date')
df_madrid = df_madrid.set_index('date')

plt.show()
trend_brazil = mk.original_test(df_brazil)
print(trend_brazil)
trend_madrid = mk.original_test(df_madrid)
print(trend_madrid)
예제 #27
0
def do_backtest(df, symbol, end=None):
    trade_count = 0
    trade_history = []
    balance = initial_balance
    win_count = 0
    loss_count = 0
    profit = 0
    action = HOLD
    current_tick = 0
    entry_tick = 0
    buy_mode = True
    entry_price = 0
    buy_index = 0
    window_size = 1000
    last_size = 50

    if backtest_mode == 2:
        df = df.iloc[end - window_size * 1 - 100:end + window_size * 2]
    elif backtest_mode == 3:
        df_x = df
        df = df.iloc[195267:199267]
        # fragment = detect_anomaly(df)
        #detect_anomaly(df.iloc[11706:11074])
        #plot_whole(df_x)

    df = df.reset_index()
    df = df.fillna(0)
    for i, row in df.iterrows():
        start_time = time.time()
        current_price = row['last_price']
        current_ask_price = row['best_ask_price']
        current_bid_price = row['best_bid_price']
        current_tick += 1
        if i > window_size:
            last = df.iloc[i, :]
            prev1 = df.iloc[i - 2, :]
            prev25 = df.iloc[i - 25, :]
            prev50 = df.iloc[i - 50, :]
            prev100 = df.iloc[i - 100, :]
            prev200 = df.iloc[i - 200, :]
            prev500 = df.iloc[i - 500, :]

            diffx1 = last.qav_sma500 - last.qav_sma1000
            diffx2 = prev50.qav_sma500 - prev50.qav_sma1000
            diffx3 = prev100.qav_sma500 - prev100.qav_sma1000
            diffx4 = prev200.qav_sma500 - prev200.qav_sma1000

            first_check = (
                last['last_sma600'] > prev100['last_sma600']
                and last['last_sma600'] > prev500['last_sma600']
                and last.qav_sma500 > last.qav_sma1000
                and prev50.qav_sma500 > prev50.qav_sma1000
                and prev100.qav_sma500 > prev100.qav_sma1000
                and prev200.qav_sma500 > prev200.qav_sma1000
                and last.qav_sma500 > prev50.qav_sma500 > prev100.qav_sma500 >
                prev200.qav_sma500 and diffx1 > diffx2 > diffx3 > diffx4
                and diffx1 > 0.3 and  ###buda yanıltıcı!!!!!
                diffx1 < 1  ###yanıltıcı!!!!!
            )
            # if last['index'] == 114395:
            #  	pdb.set_trace()
            if (first_check == True and conditions[0]['buy_mode'] == True):
                fragment = df.iloc[i - window_size:i, :]
                fragment = detect_anomaly(fragment)
                fragment = fragment.reset_index()
                last = fragment.iloc[-1, :]
                prev1 = fragment.iloc[-2, :]
                first_n = fragment[:window_size - last_size]
                last_n = fragment[-last_size:]
                mk_test = mk.original_test(fragment.change_qav.to_numpy())
                fragment_sum = fragment.groupby(
                    ['score_qav', 'label_qav'], as_index=False,
                    sort=False)[["change_qav", "change_price"]].sum()

                conditions[0]['buy_cond'] = (
                    (fragment_sum[fragment_sum['label_qav'] == 1].change_qav <
                     3).all() and mk_test.z > 1 and mk_test.z < 10
                    and mk_test.Tau < 0.1 and fragment_sum[
                        fragment_sum['label_qav'] == 1].change_qav.sum() > 4
                    and fragment_sum[fragment_sum['label_qav'] ==
                                     1].change_qav.sum() < 10
                    and fragment_sum.label_qav.iloc[0] == 0
                    and fragment_sum.label_qav.iloc[-1] == 1
                    and fragment_sum.label_qav.iloc[-2] == 1 and
                    (fragment_sum[fragment_sum['label_qav'] == 0].change_qav <
                     fragment_sum[fragment_sum['label_qav'] ==
                                  1].change_qav.max()).all()
                    and fragment_sum.iloc[-1].change_price +
                    fragment_sum.iloc[-2].change_price > 0
                    and fragment_sum.change_price.sum() > 0
                    and (last_n.label_qav == 1).count() < 50)
            elif (conditions[0]['buy_mode'] == False):
                conditions[0]['sell_cond'] = (last['last_sma600'] <
                                              prev1['last_sma600'])
            else:
                continue

            for ic, cond in enumerate(conditions):
                if cond['buy_mode'] and cond['buy_cond']:
                    conditions[ic]['action'] = BUY
                    conditions[ic]['entry_price'] = current_ask_price
                    conditions[ic]['buy_mode'] = False
                    if ic == 0:
                        printLog("CONDITION " + str(ic + 1) + " IS BUYING....")
                        printLog("##### TRADE " + str(cond['trade_count']) +
                                 " #####")
                        printLog("BUY: " + symbol + " for " +
                                 str(cond['entry_price']) + " at " +
                                 str(last.date) + " - index: " +
                                 str(last['index']))
                        printLog(fragment[[
                            'index', 'date', 'symbol', 'last_price',
                            'total_traded_quote_asset_volume', 'label_qav',
                            'score_qav', 'change_qav', 'change_price'
                        ]].tail(100))
                        printLog(
                            mk.original_test(fragment.change_qav.to_numpy()))
                        printLog(fragment_sum)
                        printLog("diffx1: " + str(diffx1))
                        printLog("last.qav_sma500: " + str(last.qav_sma500))
                        printLog("last.qav_sma1000: " + str(last.qav_sma1000))
                        printLog("prev100.qav_sma500: " +
                                 str(prev100.qav_sma500))
                        printLog("prev100.qav_sma1000: " +
                                 str(prev100.qav_sma1000))
                        #plot_whole(df)
                        #pdb.set_trace()
                elif not cond['buy_mode'] and cond['sell_cond']:
                    printLog("CONDITION " + str(ic + 1) + " IS SELLING....")
                    conditions[ic]['action'] = SELL
                    exit_price = current_bid_price
                    profit = (
                        (exit_price - cond['entry_price']) /
                        cond['entry_price'] + 1) * (1 - transaction_fee)**2 - 1
                    conditions[ic]['balance'] = conditions[ic]['balance'] * (
                        1.0 + profit)
                    conditions[ic]['trade_count'] += 1
                    conditions[ic]['buy_mode'] = True
                    printLog("SELL: " + symbol + " for " + str(exit_price) +
                             " at " + str(last.date) + " - index: " +
                             str(last['index']))
                    printLog("PROFIT: " + str(profit * 100))
                    printLog("BALANCE: " + str(cond['balance']))
                else:
                    conditions[ic]['action'] = HOLD

        if (current_tick > len(df) - 1):
            printLog("*********TOTAL RESULTS*************************")
            for ic, cond in enumerate(conditions):
                printLog("SYMBOL: " + symbol)
                printLog("CONDITION NUMBER: " + str(ic))
                printLog("TOTAL BALANCE: " + str(cond['balance']))
                printLog("TRADE COUNT: " + str(cond['trade_count']))
            printLog("**********************************")

        if i % 1000 == 0:
            printLog(symbol + "-" + str(row['index']))
예제 #28
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymannkendall as mk

Birth_data = pd.read_csv("daily-total-female-births.csv",
                         parse_dates=['Date'],
                         index_col='Date')  #Birth_data
data = Birth_data

fig, ax = plt.subplots(figsize=(12, 8))
res = mk.original_test(data)
trend_line = np.arange(len(data)) * res.slope + res.intercept

ax.plot(data)
ax.plot(data.index, trend_line)
ax.legend(['data', 'trend line'])
ax.set(xlabel="Dates", ylabel="Births", title="Trend line")
fig.savefig('Trendline_plot1.png')
예제 #29
0
# IMPORTING THE FIRST DATA SET
Birth_data = pd.read_csv("daily-total-female-births.csv",
                         parse_dates=['Date'],
                         index_col='Date')

# SUMMARY STATISTICS
head = Birth_data.head()
Summary = Birth_data.describe()
print(head)
print(Summary)

# MANNKENDALL TREND TEST
Birth_data = pd.read_csv("daily-total-female-births.csv",
                         parse_dates=['Date'],
                         index_col='Date')  #Birth_data
MKT = mk.original_test(Birth_data, alpha=0.05)
print(MKT)

# IMPORTING SECOND DATA SET
Shampoo_data = pd.read_csv("shampoo.csv",
                           parse_dates=['Month'],
                           index_col='Month')

# SUMMARY STATISTICS
head_shampoo = Shampoo_data.head()
Summary_shampoo = Shampoo_data.describe()
print(head_shampoo)
print(Summary_shampoo)

# TREND TEST 1
MKT1 = mk.hamed_rao_modification_test(Shampoo_data)
예제 #30
0
    for month in range(1, 13, 1):
        name_month = datetime.date(1900, int(month), 1).strftime('%B')

        TREND = []
        for index, row in list_nom.iterrows():
            data = pd.read_csv(path_m + row[0] + '_MONTH_' + varin + '_' +
                               indice + '_' + str(yearmin) + '_' +
                               str(yearmax) + '_' +
                               str('{:02d}'.format(month)) + '.csv',
                               skiprows=2)
            data = data.rename(columns={
                data.columns[1]: "var"
            }).set_index('datetime')

            if (valeur['name'] == 'Original Mann-Kendall test'):
                trend, h, p, z, Tau, s, var_s, slope, intercept = mk.original_test(
                    data)
            elif (valeur['name'] == 'Hamed and Rao Modified MK Test'):
                trend, h, p, z, Tau, s, var_s, slope, intercept = mk.hamed_rao_modification_test(
                    data)
            elif (valeur['name'] == 'Yue and Wang Modified MK Test'):
                trend, h, p, z, Tau, s, var_s, slope, intercept = mk.yue_wang_modification_test(
                    data)
            elif (valeur['name'] ==
                  'Modified MK test using Pre-Whitening method'):
                trend, h, p, z, Tau, s, var_s, slope, intercept = mk.pre_whitening_modification_test(
                    data)
            elif (valeur['name'] ==
                  'Modified MK test using Trend free Pre-Whitening method'):
                trend, h, p, z, Tau, s, var_s, slope, intercept = mk.trend_free_pre_whitening_modification_test(
                    data)