async def test_process_and_persist_v_one_option_set(self, prod_df_v, bind): # select an entity12 from the available dataframe that is likely to have # more than one, but not too many, associated wells (for test speed) entity12 = (prod_df_v.groupby( "entity12").count().iloc[:, 0].sort_values().index[2]) prod_df_v = prod_df_v.loc[prod_df_v.entity12 == entity12].copy( deep=True) prodset_v = prod_df_v.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.V) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) ps = await pexec.process(prodset_v, prodstat_opts=opts, ratio_opts=opts) await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual assert ps.header.shape[0] == prod_df_v.reset_index().api14.unique( ).shape[0] assert ps.monthly.shape[0] == prod_df_v.shape[0] assert ps.stats.shape[0] > 0
async def test_process_and_persist_h_one_option_set(self, prod_df_h, bind): prodset_h = prod_df_h.prodstats.to_prodset() pexec = ProdExecutor(HoleDirection.H) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) ps = await pexec.process(prodset_h, prodstat_opts=opts, ratio_opts=opts) await pexec.persist(ps) expected = ps.stats.shape[0] actual = await Model.agg.count() assert expected == actual assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique( ).shape[0] assert ps.monthly.shape[0] == prod_df_h.shape[0] assert ps.stats.shape[0] > 0
async def test_arun_v_tiny_batch(self, prod_v, bind): prod_v = prod_v[:5] api14s = [x["api14"] for x in prod_v] dispatch = MockAsyncDispatch({"data": prod_v}) opts = calc.prodstat_option_matrix(ProdStatRange.FIRST, months=[6], include_zeroes=False) pexec = ProdExecutor( HoleDirection.V, download_kwargs={"dispatch": dispatch}, process_kwargs={ "prodstat_opts": opts, "ratio_opts": opts }, ) ct, ps = await pexec.arun(api14s=api14s) actual = await ProdHeader.agg.count() assert len(api14s) == actual
if __name__ == "__main__": import loggers import calc.prod # noqa from db import db # noqa from collector import IHSClient # import itertools # import multiprocessing as mp loggers.config(level=10, formatter="funcname") ranges = ProdStatRange.PEAKNORM months = [3, 6] include_zeroes = [True, False] calc.prodstat_option_matrix(ranges=ranges, months=months, include_zeroes=include_zeroes) # def get_id_sets(area: str) -> Tuple[List[str], List[str]]: # loop = asyncio.get_event_loop() # coroh = IHSClient.get_ids_by_area(path=IHSPath.well_h_ids, area=area) # corov = IHSClient.get_ids_by_area(path=IHSPath.well_v_ids, area=area) # return loop.run_until_complete(asyncio.gather(coroh, corov)) # area = "tx-upton" # sample_size = 25 # api14h, api14v = get_id_sets(area) # h_sample: List[str] = random.choices(api14h, k=sample_size) # v_sample: List[str] = random.choices(api14v, k=sample_size)