Beispiel #1
0
    def test_3m(self, FakeMm):
        bcs = BinCapUsed()
        bcs.handle_pre({'mainManager': FakeMm()})

        e = pd.DataFrame([
            (
                dt.date(2019, 1, 31),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 1, 31),
                dt.date(2019, 1, 1),
            ),
            (
                dt.date(2019, 2, 28),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 2, 28),
                dt.date(2019, 2, 1),
            ),
            (
                dt.date(2019, 3, 31),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 3, 31),
                dt.date(2019, 3, 1),
            ),
            (
                dt.date(2019, 4, 30),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 4, 30),
                dt.date(2019, 4, 1),
            ),
        ],
                         columns=[
                             'Timestamp', 'capacity_usd', 'used_usd',
                             'count_analyzed', 'regions_set', 'dt_start',
                             'dt_end'
                         ])
        for fx in ['Timestamp', 'dt_start', 'dt_end']:
            e[fx] = pd.to_datetime(e[fx])
        e.set_index('Timestamp', inplace=True)

        pd.testing.assert_frame_equal(e, bcs.df_bins)
Beispiel #2
0
    def test_90d(self, example_dataframe):
        dfi, dt_start, dt_end = example_dataframe

        bcs = BinCapUsed()
        bcs._set_freq(90)

        dfe_actual = bcs.do_resample_end(dfi).sum()
        dfs_actual = bcs.do_resample_start(dfi).sum()
        #dfe_actual = bcs.fix_resample_end(dfe_actual, dfs_actual)
        #dfs_actual = bcs.fix_resample_start(dfs_actual, dfe_actual, dt_start, dt_end)

        idxs_expected = [
            dt.date(2019, 9, 1),
            dt.date(2019, 10, 1),
            dt.date(2019, 11, 1),
            dt.date(2019, 12, 1),
        ]
        assert (dfs_actual.index == idxs_expected).all()

        idxe_expected = [
            dt.date(2019, 9, 30),
            dt.date(2019, 10, 31),
            dt.date(2019, 11, 30),
            dt.date(2019, 12, 31),
        ]
        assert (dfe_actual.index == idxe_expected).all()
Beispiel #3
0
    def test_30d(self, example_dataframe):
        dfi, dt_start, dt_end = example_dataframe

        bcs = BinCapUsed()
        bcs._set_freq(30)

        dfe_actual = bcs.do_resample_end(dfi).sum()
        dfs_actual = bcs.do_resample_start(dfi).sum()
        #dfe_actual = bcs.fix_resample_end(dfe_actual, dfs_actual)
        #dfs_actual = bcs.fix_resample_start(dfs_actual, dfe_actual, dt_start, dt_end)

        idxs_expected = [
            dt.date(2019, 8, 26),
            dt.date(2019, 9, 2),
            dt.date(2019, 9, 9),
            dt.date(2019, 9, 16),
            dt.date(2019, 9, 23),
            dt.date(2019, 9, 30),
            dt.date(2019, 10, 7),
            dt.date(2019, 10, 14),
            dt.date(2019, 10, 21),
            dt.date(2019, 10, 28),
            dt.date(2019, 11, 4),
            dt.date(2019, 11, 11),
            dt.date(2019, 11, 18),
            dt.date(2019, 11, 25),
        ]
        assert (dfs_actual.index == idxs_expected).all()

        idxe_expected = [
            dt.date(2019, 9, 1),
            dt.date(2019, 9, 8),
            dt.date(2019, 9, 15),
            dt.date(2019, 9, 22),
            dt.date(2019, 9, 29),
            dt.date(2019, 10, 6),
            dt.date(2019, 10, 13),
            dt.date(2019, 10, 20),
            dt.date(2019, 10, 27),
            dt.date(2019, 11, 3),
            dt.date(2019, 11, 10),
            dt.date(2019, 11, 17),
            dt.date(2019, 11, 24),
            dt.date(2019, 12, 1),
        ]
        assert (dfe_actual.index == idxe_expected).all()
Beispiel #4
0
    def test_07d(self, example_dataframe):
        dfi, dt_start, dt_end = example_dataframe

        bcs = BinCapUsed()
        bcs._set_freq(7)

        dfe_actual = bcs.do_resample_end(dfi).sum()
        dfs_actual = bcs.do_resample_start(dfi).sum()
        #dfe_actual = bcs.fix_resample_end(dfe_actual, dfs_actual)
        #dfs_actual = bcs.fix_resample_start(dfs_actual, dfe_actual, dt_start, dt_end)

        idxs_expected = pd.date_range(dt.date(2019, 9, 1),
                                      dt.date(2019, 12, 1))
        assert (dfs_actual.index == idxs_expected).all()

        # notice this is different than the "df_daily.resample('1D', label='right')" below
        idxe_expected = pd.date_range(dt.date(2019, 9, 1),
                                      dt.date(2019, 12, 1))
        assert (dfe_actual.index == idxe_expected).all()
Beispiel #5
0
    def test_60d(self, example_dataframe):
        dfi, dt_start, dt_end = example_dataframe

        bcs = BinCapUsed()
        bcs._set_freq(60)

        dfe_actual = bcs.do_resample_end(dfi).sum()
        dfs_actual = bcs.do_resample_start(dfi).sum()
        #dfe_actual = bcs.fix_resample_end(dfe_actual, dfs_actual)
        #dfs_actual = bcs.fix_resample_start(dfs_actual, dfe_actual, dt_start, dt_end)

        idxs_expected = [
            dt.date(2019, 9, 1),
            dt.date(2019, 9, 15),
            dt.date(2019, 10, 1),
            dt.date(2019, 10, 15),
            dt.date(2019, 11, 1),
            dt.date(2019, 11, 15),
            dt.date(2019, 12, 1),
        ]
        assert (dfs_actual.index == idxs_expected).all()

        # notice this is different than idx_exp_1SM_right_right below
        # Update 2019-12-17 without the fix_resample_* function, this is the same as idx_exp_1SM_right_right
        #idxe_expected = [
        #  dt.date(2019, 9,14), dt.date(2019, 9,30),
        #  dt.date(2019,10,14), dt.date(2019,10,31),
        #  dt.date(2019,11,14), dt.date(2019,11,30),
        #  dt.date(2019,12,14),
        #]
        idxe_expected = [
            dt.date(2019, 9, 15),
            dt.date(2019, 9, 30),
            dt.date(2019, 10, 15),
            dt.date(2019, 10, 31),
            dt.date(2019, 11, 15),
            dt.date(2019, 11, 30),
            dt.date(2019, 12, 15),
        ]
        assert (dfe_actual.index == idxe_expected).all()
Beispiel #6
0
    def test_1m(self, FakeMm):
        # prepare input
        df1 = pd.DataFrame([
            (dt.date(2019, 1, 15), 10, 50),
            (dt.date(2019, 1, 16), 12, 50),
            (dt.date(2019, 1, 17), 12, 50),
        ],
                           columns=['Timestamp', 'capacity_usd', 'used_usd'])

        # calculate
        bcs = BinCapUsed()
        bcs.handle_pre({'mainManager': FakeMm()})
        ctx = {'ec2_df': df1, 'ec2_dict': {'Region': 'us-west-2'}}
        bcs.per_ec2(ctx)
        bcs.per_ec2(ctx)

        # expected
        e = pd.DataFrame([
            (
                dt.date(2019, 1, 31),
                68,
                300,
                2,
                frozenset(['us-west-2']),
                dt.date(2019, 1, 15),
                dt.date(2019, 1, 17),
            ),
            (
                dt.date(2019, 2, 28),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 2, 28),
                dt.date(2019, 2, 1),
            ),
            (
                dt.date(2019, 3, 31),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 3, 31),
                dt.date(2019, 3, 1),
            ),
            (
                dt.date(2019, 4, 30),
                0,
                0,
                0,
                frozenset([]),
                dt.date(2019, 4, 30),
                dt.date(2019, 4, 1),
            ),
        ],
                         columns=[
                             'Timestamp', 'capacity_usd', 'used_usd',
                             'count_analyzed', 'regions_set', 'dt_start',
                             'dt_end'
                         ])
        for fx in ['Timestamp', 'dt_start', 'dt_end']:
            e[fx] = pd.to_datetime(e[fx])
        e.set_index('Timestamp', inplace=True)

        # test expected = actual
        pd.testing.assert_frame_equal(e, bcs.df_bins)
Beispiel #7
0
 def test_preNoBreak(self, FakeMm):
     bcs = BinCapUsed()
     ret = bcs.handle_pre({'mainManager': FakeMm()})
     assert ret is not None
Beispiel #8
0
    def test_3m(self, FakeMm):
        # prepare input
        s_ts = pd.date_range(start=dt.date(2019, 1, 15),
                             end=dt.date(2019, 4, 15),
                             freq='D')

        # parameters for simple case, no fluctuations
        cap1 = 10  # USD/day
        cap2 = 20  # USD/day

        #import numpy as np
        # s_used = np.random.rand(len(s_ts)) # random usage between 0 and 100%
        s_used = 0.3  # 30% usage

        # dataframes
        df1 = pd.DataFrame({
            'Timestamp': s_ts,
            'capacity_usd': cap1,
            'used_usd': s_used * cap1
        })
        df2 = pd.DataFrame({
            'Timestamp': s_ts,
            'capacity_usd': cap2,
            'used_usd': s_used * cap2
        })

        # int for simplicity
        df1['used_usd'] = df1['used_usd'].astype(int)
        df2['used_usd'] = df2['used_usd'].astype(int)

        # calculate
        bcs = BinCapUsed()
        bcs.handle_pre({'mainManager': FakeMm()})
        ctx1 = {'ec2_df': df1, 'ec2_dict': {'Region': 'us-west-2'}}
        bcs.per_ec2(ctx1)
        ctx2 = {'ec2_df': df2, 'ec2_dict': {'Region': 'us-west-2'}}
        bcs.per_ec2(ctx2)

        # expected
        e = pd.DataFrame([
            (
                dt.date(2019, 1, 31),
                510,
                153,
                2,
                frozenset(['us-west-2']),
                dt.date(2019, 1, 15),
                dt.date(2019, 1, 31),
            ),
            (
                dt.date(2019, 2, 28),
                840,
                252,
                2,
                frozenset(['us-west-2']),
                dt.date(2019, 2, 1),
                dt.date(2019, 2, 28),
            ),
            (
                dt.date(2019, 3, 31),
                930,
                279,
                2,
                frozenset(['us-west-2']),
                dt.date(2019, 3, 1),
                dt.date(2019, 3, 31),
            ),
            (
                dt.date(2019, 4, 30),
                450,
                135,
                2,
                frozenset(['us-west-2']),
                dt.date(2019, 4, 1),
                dt.date(2019, 4, 15),
            ),
        ],
                         columns=[
                             'Timestamp', 'capacity_usd', 'used_usd',
                             'count_analyzed', 'regions_set', 'dt_start',
                             'dt_end'
                         ])
        for fx in ['Timestamp', 'dt_start', 'dt_end']:
            e[fx] = pd.to_datetime(e[fx])
        e.set_index('Timestamp', inplace=True)

        # test expected = actual
        pd.testing.assert_frame_equal(e, bcs.df_bins)
Beispiel #9
0
def redshift_cost_core(ra, rr, share_email, filter_region, ctx, filter_tags):
    """
    ra - Analyzer
    rr - Reporter
    """

    # data layer
    from isitfit.tqdmman import TqdmL2Verbose
    tqdmman = TqdmL2Verbose(ctx)

    ri = RedshiftPerformanceIterator(filter_region, tqdmman)

    # pipeline
    from isitfit.cost.mainManager import MainManager
    from isitfit.cost.cacheManager import RedisPandas as RedisPandasCacheManager
    from isitfit.cost.metrics_cloudwatch import CwRedshiftListener
    from isitfit.cost.ec2_common import Ec2Common
    from isitfit.cost.cloudtrail_ec2type import CloudtrailCached

    mm = MainManager("Redshift cost analyze or optimize", ctx)
    mm.set_ndays(ctx.obj['ndays'])

    cache_man = RedisPandasCacheManager()

    # manager of cloudwatch
    cwman = CwRedshiftListener(cache_man)
    cwman.set_ndays(ctx.obj['ndays'])

    # common stuff
    ec2_common = Ec2Common()

    # boto3 cloudtrail data
    # FIXME note that if two pipelines are run, one for ec2 and one for redshift, then this Object fetches the same data twice
    # because the base class behind it does both ec2+redshift at once
    # in the init_data phase
    cloudtrail_manager = CloudtrailCached(mm.EndTime, cache_man, tqdmman)

    # update dict and return it
    # https://stackoverflow.com/a/1453013/4126114
    inject_analyzer = lambda context_all: dict({'analyzer': ra}, **context_all)

    # binning
    do_binning = False
    ra_name = type(ra).__name__
    if ra_name == 'CalculatorOptimizeRedshift':
        do_binning = False
    elif ra_name == 'CalculatorAnalyzeRedshift':
        do_binning = True
    else:
        raise Exception("Invalid calculator class passed: %s" % ra_name)

    if do_binning:
        from isitfit.cost.ec2_analyze import BinCapUsed
        bcs = BinCapUsed()
        bcs.context_key = 'df_single'

    # setup pipeline
    mm.set_iterator(ri)
    mm.add_listener('pre', cache_man.handle_pre)
    mm.add_listener('pre', cloudtrail_manager.init_data)

    if do_binning:
        mm.add_listener('pre', bcs.handle_pre)

    rtf = RedshiftTagFilter(filter_tags)
    mm.add_listener('ec2', rtf.per_cluster)

    mm.add_listener('ec2', cwman.per_ec2)
    mm.add_listener('ec2', cloudtrail_manager.single)
    mm.add_listener('ec2', ra.per_ec2)

    if do_binning:
        mm.add_listener('ec2', bcs.per_ec2)

    mm.add_listener(
        'all',
        ec2_common.after_all)  # just show IDs missing cloudwatch/cloudtrail
    mm.add_listener('all', ra.after_all)
    mm.add_listener('all', ra.calculate)
    mm.add_listener('all', inject_analyzer)
    mm.add_listener('all', rr.postprocess)

    if do_binning:
        mm.add_listener('all', bcs.after_all)

    #inject_email_in_context = lambda context_all: dict({'emailTo': share_email}, **context_all)
    #mm.add_listener('all', rr.display)
    #mm.add_listener('all', inject_email_in_context)
    #mm.add_listener('all', rr.email)

    return mm