async def test_process_and_persist_h_tiny_batch(self, prod_df_h, bind): api14s = (prod_df_h.groupby( "api14").count().iloc[:, 0].sort_values().index[:2].values.tolist()) prod_df_h = prod_df_h.loc[prod_df_h.api14.isin(api14s)] prodset_h = prod_df_h.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.H) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) ps = await pexec.process(prodset_h, prodstat_opts=opts, ratio_opts=opts) await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique( ).shape[0] assert ps.monthly.shape[0] == prod_df_h.groupby( level=[0, 1]).first().shape[0] assert ps.stats.shape[0] > 0
async def test_process_and_persist_v_tiny_batch(self, prod_df_v, bind): # select an entity12 from the available dataframe that is likely to have # more than one, but not too many, associated wells (for test speed) api14s = (prod_df_v.groupby( "api14").count().iloc[:, 0].sort_values().index[:2].values.tolist()) prod_df_v = prod_df_v.loc[prod_df_v.api14.isin(api14s)] prodset_v = prod_df_v.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.V) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) ps = await pexec.process(prodset_v, prodstat_opts=opts, ratio_opts=opts) await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual assert ps.header.shape[0] == prod_df_v.reset_index().api14.unique( ).shape[0] assert ps.monthly.shape[0] == prod_df_v.shape[0] assert ps.stats.shape[0] > 0
async def test_download(self, prod_dispatcher, hole_dir): pexec = ProdExecutor(hole_dir) prodset = await pexec.download(entities=["a", "b", "c"], dispatch=prod_dispatcher) # check prodset was returned assert isinstance(prodset, ProdSet) # check metric was added assert pexec.metrics.shape[0] == 1
async def test_process_and_persist_with_default_option_sets( self, prod_df_h, bind): prodset_h = prod_df_h.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.H) ps = await pexec.process(prodset_h) assert ps.header.shape[0] == prod_df_h.index.levels[0].shape[0] assert ps.monthly.shape[0] == prod_df_h.shape[0] assert ps.stats.shape[0] > 0 await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual
def test_init_model_kwargs(self): header_kwargs = {1: 1} monthly_kwargs = {2: 2} stats_kwargs = {3: 3} pexec = ProdExecutor( HoleDirection.H, header_kwargs=header_kwargs, monthly_kwargs=monthly_kwargs, stats_kwargs=stats_kwargs, ) assert pexec.model_kwargs["header"] == header_kwargs assert pexec.model_kwargs["monthly"] == monthly_kwargs assert pexec.model_kwargs["stats"] == { "batch_size": 1000, **stats_kwargs }
async def test_process_and_persist_h_one_option_set(self, prod_df_h, bind): prodset_h = prod_df_h.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.H) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) ps = await pexec.process(prodset_h, prodstat_opts=opts, ratio_opts=opts) await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique( ).shape[0] assert ps.monthly.shape[0] == prod_df_h.shape[0] assert ps.stats.shape[0] > 0
async def test_arun_v_tiny_batch(self, prod_v, bind): prod_v = prod_v[:5] api14s = [x["api14"] for x in prod_v] dispatch = MockAsyncDispatch({"data": prod_v}) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) pexec = ProdExecutor( HoleDirection.V, download_kwargs={"dispatch": dispatch}, process_kwargs={ "prodstat_opts": opts, "ratio_opts": opts }, ) ct, ps = await pexec.arun(api14s=api14s) actual = await ProdHeader.agg.count() assert len(api14s) == actual
async def test_download_catch_network_error(self, prod_dispatcher, hole_dir): pexec = ProdExecutor(hole_dir) with pytest.raises(Exception): await pexec.download(entities=["a", "b", "c"])
async def test_download_bad_holedir(self): pexec = ProdExecutor(HoleDirection.H) pexec.hole_dir = ProdStatRange.FIRST with pytest.raises(ValueError): await pexec.download(entities=["a", "b", "c"])
def test_init_default(self): pexec = ProdExecutor(HoleDirection.H) assert pexec.metrics.empty is True assert pexec.model_kwargs["stats"] == {"batch_size": 1000}
def pexec(self): yield ProdExecutor(HoleDirection.H)
async def run_production(holedir: HoleDirection, api14s: List[str]): pexec = ProdExecutor(holedir) prodset = await pexec.download(api14s=api14s) prodset = await pexec.process(prodset) await pexec.persist(prodset)