예제 #1
0
    async def test_process_and_persist_h_tiny_batch(self, prod_df_h, bind):

        api14s = (prod_df_h.groupby(
            "api14").count().iloc[:,
                                  0].sort_values().index[:2].values.tolist())

        prod_df_h = prod_df_h.loc[prod_df_h.api14.isin(api14s)]
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_h,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_h.groupby(
            level=[0, 1]).first().shape[0]
        assert ps.stats.shape[0] > 0
예제 #2
0
    async def test_process_and_persist_v_tiny_batch(self, prod_df_v, bind):

        # select an entity12 from the available dataframe that is likely to have
        # more than one, but not too many, associated wells (for test speed)

        api14s = (prod_df_v.groupby(
            "api14").count().iloc[:,
                                  0].sort_values().index[:2].values.tolist())

        prod_df_v = prod_df_v.loc[prod_df_v.api14.isin(api14s)]
        prodset_v = prod_df_v.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.V)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_v,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_v.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_v.shape[0]
        assert ps.stats.shape[0] > 0
예제 #3
0
 async def test_download(self, prod_dispatcher, hole_dir):
     pexec = ProdExecutor(hole_dir)
     prodset = await pexec.download(entities=["a", "b", "c"],
                                    dispatch=prod_dispatcher)
     # check prodset was returned
     assert isinstance(prodset, ProdSet)
     # check metric was added
     assert pexec.metrics.shape[0] == 1
예제 #4
0
    async def test_process_and_persist_with_default_option_sets(
            self, prod_df_h, bind):
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        ps = await pexec.process(prodset_h)

        assert ps.header.shape[0] == prod_df_h.index.levels[0].shape[0]
        assert ps.monthly.shape[0] == prod_df_h.shape[0]
        assert ps.stats.shape[0] > 0
        await pexec.persist(ps)

        expected = ps.stats.shape[0]
        actual = await Model.agg.count()
        assert expected == actual
예제 #5
0
    def test_init_model_kwargs(self):
        header_kwargs = {1: 1}
        monthly_kwargs = {2: 2}
        stats_kwargs = {3: 3}
        pexec = ProdExecutor(
            HoleDirection.H,
            header_kwargs=header_kwargs,
            monthly_kwargs=monthly_kwargs,
            stats_kwargs=stats_kwargs,
        )

        assert pexec.model_kwargs["header"] == header_kwargs
        assert pexec.model_kwargs["monthly"] == monthly_kwargs
        assert pexec.model_kwargs["stats"] == {
            "batch_size": 1000,
            **stats_kwargs
        }
예제 #6
0
    async def test_process_and_persist_h_one_option_set(self, prod_df_h, bind):
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_h,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_h.shape[0]
        assert ps.stats.shape[0] > 0
예제 #7
0
    async def test_arun_v_tiny_batch(self, prod_v, bind):

        prod_v = prod_v[:5]
        api14s = [x["api14"] for x in prod_v]

        dispatch = MockAsyncDispatch({"data": prod_v})
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        pexec = ProdExecutor(
            HoleDirection.V,
            download_kwargs={"dispatch": dispatch},
            process_kwargs={
                "prodstat_opts": opts,
                "ratio_opts": opts
            },
        )

        ct, ps = await pexec.arun(api14s=api14s)
        actual = await ProdHeader.agg.count()
        assert len(api14s) == actual
예제 #8
0
 async def test_download_catch_network_error(self, prod_dispatcher,
                                             hole_dir):
     pexec = ProdExecutor(hole_dir)
     with pytest.raises(Exception):
         await pexec.download(entities=["a", "b", "c"])
예제 #9
0
 async def test_download_bad_holedir(self):
     pexec = ProdExecutor(HoleDirection.H)
     pexec.hole_dir = ProdStatRange.FIRST
     with pytest.raises(ValueError):
         await pexec.download(entities=["a", "b", "c"])
예제 #10
0
 def test_init_default(self):
     pexec = ProdExecutor(HoleDirection.H)
     assert pexec.metrics.empty is True
     assert pexec.model_kwargs["stats"] == {"batch_size": 1000}
예제 #11
0
 def pexec(self):
     yield ProdExecutor(HoleDirection.H)
예제 #12
0
 async def run_production(holedir: HoleDirection, api14s: List[str]):
     pexec = ProdExecutor(holedir)
     prodset = await pexec.download(api14s=api14s)
     prodset = await pexec.process(prodset)
     await pexec.persist(prodset)