Esempio n. 1
0
    async def wrapper(hole_dir: HoleDirection, area: str):
        hole_dir = HoleDirection(hole_dir)
        index_cols = ["entity_id", "entity_type"]

        if hole_dir == HoleDirection.H:
            path = IHSPath.well_h_ids
        else:
            path = IHSPath.well_v_ids

        # fetch ids from remote service
        ids = await IHSClient.get_ids_by_area(path, area=area)
        df = pd.Series(ids, name="entity_id").to_frame()
        df["ihs_last_seen_at"] = util.utcnow()
        df["entity_type"] = "api14"
        df = df.set_index(index_cols)

        # query matching records existing in the known_entities model
        objs: List[
            db.models.KnownEntity] = await db.models.KnownEntity.query.where(
                db.models.KnownEntity.entity_id.in_(ids)).gino.all()

        obj_df = pd.DataFrame([x.to_dict()
                               for x in objs]).set_index(index_cols)

        fresh = pd.DataFrame(index=obj_df.index, columns=obj_df.columns)

        # merge the records, prioritizing new values from the remote service
        combined = fresh.combine_first(df).combine_first(obj_df)
        combined = combined.drop(columns=["created_at", "updated_at"])

        # persist the new records
        await db.models.KnownEntity.bulk_upsert(combined, batch_size=1000)
Esempio n. 2
0
def status_assignment_detail(
        hole_dir: Union[HoleDirection, str],
        api14s: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    hole_dir = HoleDirection(hole_dir)

    async def coro() -> Tuple[pd.DataFrame, pd.DataFrame]:

        wells, *other = await pd.DataFrame.wells.from_multiple(
            hole_dir=hole_dir, api14s=api14s)

        # TODO: Move to Router
        if hole_dir == HoleDirection.H:
            prodpath = IHSPath.prod_h_headers
        elif hole_dir == HoleDirection.V:
            prodpath = IHSPath.prod_v_header
        else:
            raise ValueError(
                f"cant determine IHSPath from hole_dir ({hole_dir})")

        prod_headers = await wells.wells.last_prod_date(path=prodpath,
                                                        prefer_local=False)
        wells = wells.join(prod_headers)
        indicators = wells.wells.assign_status(detail=True)
        print(f"\n{indicators.T}\n")
        labels = wells.wells.assign_status(detail=True, as_labels=True)
        print(f"\n{labels.T}\n")
        return indicators, labels

    loop = asyncio.get_event_loop()
    return loop.run_until_complete(coro())
Esempio n. 3
0
def sync_known_entities(hole_dir: HoleDirection):

    hole_dir = HoleDirection(hole_dir)

    if hole_dir == HoleDirection.H:
        path = IHSPath.well_h_ids
    else:
        path = IHSPath.well_v_ids

    areas: List[Dict] = util.aio.async_to_sync(IHSClient.get_areas(path=path))

    for idx, area in enumerate(areas):
        sync_known_entities_for_area.apply_async(args=(hole_dir, area),
                                                 kwargs={},
                                                 countdown=idx + 30)
Esempio n. 4
0
def run_driftwood(hole_dir: HoleDirection, **kwargs):

    hole_dir = HoleDirection(hole_dir)

    executors = [WellExecutor, GeomExecutor, ProdExecutor]

    if hole_dir == HoleDirection.H:
        api14s = [
            "42461409160000",
            "42383406370000",
            "42461412100000",
            "42461412090000",
            "42461411750000",
            "42461411740000",
            "42461411730000",
            "42461411720000",
            "42461411600000",
            "42461411280000",
            "42461411270000",
            "42461411260000",
            "42383406650000",
            "42383406640000",
            "42383406400000",
            "42383406390000",
            "42383406380000",
            "42461412110000",
            "42383402790000",
        ]

    elif hole_dir == HoleDirection.V:
        api14s = [
            "42461326620001",
            "42461326620000",
            "42461328130000",
            "42461343960001",
            "42461352410000",
            "42383362120000",
            "42383362080000",
            "42383362090000",
            "42383374140000",
            "42383374130000",
            "42383362060000",
        ]

    else:
        raise ValueError(f"Invalid hole direction: {hole_dir=}")

    run_executors(hole_dir, api14s=api14s, executors=executors, **kwargs)
Esempio n. 5
0
def run_next_available(hole_dir: Union[HoleDirection, str],
                       force: bool = False,
                       **kwargs):
    """ Run next available area """

    # TODO: set task meta
    hole_dir = HoleDirection(hole_dir)

    async def coro():
        # await db.startup()
        # hole_dir = HoleDirection.H

        # TODO: move to Router
        if hole_dir == HoleDirection.H:
            ids_path = IHSPath.well_h_ids
        else:
            ids_path = IHSPath.well_v_ids

        area_obj, attr, is_ready, cooldown_hours = await db.models.Area.next_available(
            hole_dir)
        utcnow = util.utcnow()
        prev_run = getattr(area_obj, attr)

        if is_ready or force:
            api14s: List[str] = await IHSClient.get_ids_by_area(
                path=ids_path,
                area=area_obj.area)  # pull from IDMaster once implmented
            # api14s = api14s[:10]
            run_executors(hole_dir=hole_dir, api14s=api14s, **kwargs)

            await area_obj.update(**{attr: utcnow}).apply()

            prev_run = (prev_run.strftime(util.dt.formats.no_seconds)
                        if prev_run else None)
            utcnow = utcnow.strftime(util.dt.formats.no_seconds)
            print(
                f"({db.models.Area.__name__}[{hole_dir}]) updated {area_obj.area}.{attr}: {prev_run} -> {utcnow}"  # noqa
            )
        else:
            next_run_in_seconds = (
                (prev_run + timedelta(hours=cooldown_hours)) -
                utcnow).total_seconds()
            print(
                f"({db.models.Area.__name__}[{hole_dir}]) Skipping {area_obj.area} next available for run in {humanize_seconds(next_run_in_seconds)}"  # noqa
            )  # noqa

    return util.aio.async_to_sync(coro())
Esempio n. 6
0
    def __init__(
        self,
        hole_dir: Union[HoleDirection, str],
        download_kwargs: Dict = None,
        process_kwargs: Dict = None,
        persist_kwargs: Dict = None,
    ):

        self.exec_id = shortuuid.uuid()
        self.hole_dir: HoleDirection = HoleDirection(hole_dir)
        self.download_kwargs = download_kwargs or {}
        self.process_kwargs = process_kwargs or {}
        self.persist_kwargs = persist_kwargs or {}

        self.metrics: pd.DataFrame = pd.DataFrame(columns=[
            "executor",
            "hole_direction",
            "operation",
            "name",
            "seconds",
            "count",
        ])
Esempio n. 7
0
class TestWells:
    def test_instantiate_df_ext(self):
        pd.DataFrame.wells

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    def test_from_records(self, hole_dir, wells_h, wells_v):
        wells = wells_h if hole_dir == HoleDirection.H else wells_v

        wellcount = len(wells)
        wellset = pd.DataFrame.wells.from_records(wells, create_index=True)

        assert wellset.wells.shape[0] == wellcount
        assert wellset.depths.shape[0] == wellcount
        assert wellset.fracs.shape[0] == wellcount

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_from_fracfocus(self, hole_dir, fracs_h, fracs_v):
        fracs = fracs_h if hole_dir == HoleDirection.H else fracs_v

        api14s = [x["api14"] for x in fracs]
        dispatch = MockAsyncDispatch({"data": fracs})
        wellset = await pd.DataFrame.wells.from_fracfocus(
            api14s=api14s, dispatch=dispatch
        )
        assert {*wellset.fracs.index} == set(api14s)

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_from_multiple(self, hole_dir, wells_h, wells_v, fracs_h, fracs_v):
        wells = wells_h if hole_dir == HoleDirection.H else wells_v
        fracs = fracs_h if hole_dir == HoleDirection.H else fracs_v

        ihs_dispatch = MockAsyncDispatch({"data": wells})
        fracfocus_dispatch = MockAsyncDispatch({"data": fracs})

        api14s = [x["api14"] for x in wells]
        wellset = await pd.DataFrame.wells.from_multiple(
            hole_dir,
            api14s=api14s,
            ihs_kwargs={"dispatch": ihs_dispatch},
            fracfocus_kwargs={"dispatch": fracfocus_dispatch},
        )
        assert {*wellset.wells.index} == set(api14s)
        assert len({*wellset.fracs.index}) <= len(set(api14s))

    @pytest.mark.asyncio
    async def test_from_sample(self, wells_h, fracs_h):
        wells = wells_h
        fracs = fracs_h
        path = IHSPath.well_h_sample

        ihs_dispatch = MockAsyncDispatch({"data": wells})
        fracfocus_dispatch = MockAsyncDispatch({"data": fracs})

        api14s = [x["api14"] for x in wells]

        wellset = await pd.DataFrame.wells.from_sample(
            path,
            n=5,
            ihs_kwargs={"dispatch": ihs_dispatch},
            fracfocus_kwargs={"dispatch": fracfocus_dispatch},
        )

        assert {*wellset.wells.index} == set(api14s)
        assert len({*wellset.fracs.index}) <= len(set(api14s))

    def test_combine_frac_parameters(self, wellset_h):
        df1 = pd.DataFrame(
            columns=["fluid", "proppant"],
            index=pd.Index([0, 1, 2, 3, 4, 5], name="api14"),
        )
        df1.index.name = "api14"
        df2 = df1.copy(deep=True)
        df1.loc[[1, 5]] = 10
        df2.loc[[2, 4]] = 5

        actual = df1.wells.combine_frac_parameters(df2, dropna=True)
        expected = np.array([[10, 10], [5, 5], [5, 5], [10, 10]])
        assert np.array_equal(actual.to_numpy(), expected)

    def test_status_assignment(self, wellset_h):
        # TODO: Need good test cases
        """ input format
Esempio n. 8
0
    async def from_multiple(
        cls,
        hole_dir: Union[HoleDirection, str],
        api14s: Union[str, List[str]] = None,
        api10s: Union[str, List[str]] = None,
        create_index: bool = True,
        use_ihs: bool = True,
        use_fracfocus: bool = True,
        use_drillinginfo: bool = True,
        ihs_kwargs: Dict = None,
        fracfocus_kwargs: Dict = None,
        drillinginfo_kwargs: Dict = None,
        **kwargs,
    ) -> WellSet:
        """ Return an dataset enriched from multiple sources """

        ihs_kwargs = ihs_kwargs or {}
        fracfocus_kwargs = fracfocus_kwargs or {}
        drillinginfo_kwargs = drillinginfo_kwargs or {}

        if not isinstance(hole_dir, HoleDirection):
            hole_dir = HoleDirection(str(hole_dir).upper())

        if hole_dir == HoleDirection.H:
            ihs_path = IHSPath.well_h
        elif hole_dir == HoleDirection.V:
            ihs_path = IHSPath.well_v
        else:
            raise TypeError("hole_dir must be specified!")

        wellset = WellSet(wells=None,
                          depths=None,
                          fracs=None,
                          stats=None,
                          ips=None)
        coros: List[Coroutine] = []

        if use_ihs:
            coros.append(
                cls.from_ihs(ihs_path,
                             api14s=api14s,
                             api10s=api10s,
                             **ihs_kwargs))

        if use_fracfocus:
            coros.append(
                cls.from_fracfocus(api14s=api14s,
                                   api10s=api10s,
                                   **fracfocus_kwargs))

        results: List[Union[WellSet,
                            pd.DataFrame]] = await asyncio.gather(*coros)

        if use_ihs:
            wellset = results.pop(0)

        if use_fracfocus:
            fracfocus = results.pop(0)

            if wellset.fracs is None:
                wellset.fracs = fracfocus.fracs
            else:
                if fracfocus.fracs is not None:
                    wellset.fracs = fracfocus.fracs.wells.combine_frac_parameters(
                        fracfocus.fracs)

        return wellset
Esempio n. 9
0
class TestWellExecutor:
    @pytest.fixture
    def exh(self, wells_h, fracs_h, geoms_h, prod_headers_h):
        ihs_dispatch = MockAsyncDispatch({"data": wells_h})
        fracfocus_dispatch = MockAsyncDispatch({"data": fracs_h})
        geoms_dispatch = MockAsyncDispatch({"data": geoms_h})
        prod_headers_dispatch = MockAsyncDispatch({"data": prod_headers_h})
        exh = WellExecutor(
            HoleDirection.H,
            download_kwargs={
                "dispatch": {
                    "dispatch": ihs_dispatch
                },
                "ihs_kwargs": {
                    "dispatch": ihs_dispatch
                },
                "fracfocus_kwargs": {
                    "dispatch": fracfocus_dispatch
                },
            },
            process_kwargs={
                "geoms_dispatch": geoms_dispatch,
                "prod_headers_dispatch": prod_headers_dispatch,
            },
        )
        yield exh

    @pytest.fixture
    def exv(self, wells_v, fracs_v, geoms_v, prod_headers_v):
        ihs_dispatch = MockAsyncDispatch({"data": wells_v})
        fracfocus_dispatch = MockAsyncDispatch({"data": fracs_v})
        geoms_dispatch = MockAsyncDispatch({"data": geoms_v})

        # geoms = await pd.DataFrame.shapes.from_ihs(
        #     IHSPath.well_h_geoms,
        #     api14s=api14h,
        #     # dispatch=geoms_dispatch,
        # )
        # await IHSClient.get_wells(
        #     IHSPath.well_h_geoms,
        #     api14s=api14h
        #     # , dispatch=geoms_dispatch
        # )
        # geoms_h

        prod_headers_dispatch = MockAsyncDispatch({"data": prod_headers_v})
        exv = WellExecutor(
            HoleDirection.V,
            download_kwargs={
                "dispatch": {
                    "dispatch": ihs_dispatch
                },
                "ihs_kwargs": {
                    "dispatch": ihs_dispatch
                },
                "fracfocus_kwargs": {
                    "dispatch": fracfocus_dispatch
                },
            },
            process_kwargs={
                "geoms_dispatch": geoms_dispatch,
                "prod_headers_dispatch": prod_headers_dispatch,
            },
        )
        yield exv

    def test_init_default(self):
        ex = WellExecutor(HoleDirection.H)
        assert ex.metrics.empty is True

    def test_init_model_kwargs(self):
        wells_kwargs = {1: 1}
        depths_kwargs = {2: 2}
        fracs_kwargs = {3: 3}
        ips_kwargs = {4: 4}
        stats_kwargs = {5: 5}
        links_kwargs = {6: 6}

        ex = WellExecutor(
            HoleDirection.H,
            wells_kwargs=wells_kwargs,
            depths_kwargs=depths_kwargs,
            fracs_kwargs=fracs_kwargs,
            ips_kwargs=ips_kwargs,
            stats_kwargs=stats_kwargs,
            links_kwargs=links_kwargs,
        )

        assert ex.model_kwargs["wells"] == wells_kwargs
        assert ex.model_kwargs["depths"] == depths_kwargs
        assert ex.model_kwargs["fracs"] == fracs_kwargs
        assert ex.model_kwargs["ips"] == ips_kwargs
        assert ex.model_kwargs["stats"] == stats_kwargs
        assert ex.model_kwargs["links"] == links_kwargs

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_download(self, hole_dir, wells_h, wells_v, fracs_h,
                            fracs_v):
        wells = wells_h if hole_dir == HoleDirection.H else wells_v
        fracs = fracs_h if hole_dir == HoleDirection.H else fracs_v

        ihs_dispatch = MockAsyncDispatch({"data": wells})
        fracfocus_dispatch = MockAsyncDispatch({"data": fracs})

        ex = WellExecutor(hole_dir)
        wellset = await ex.download(
            api14s=["a", "b", "c"],
            ihs_kwargs={"dispatch": ihs_dispatch},
            fracfocus_kwargs={"dispatch": fracfocus_dispatch},
        )

        assert isinstance(wellset, WellSet)
        assert ex.metrics.shape[0] == 1

    @pytest.mark.asyncio
    async def test_download_bad_holedir(self):
        ex = WellExecutor(HoleDirection.H)
        ex.hole_dir = ProdStatRange.FIRST
        with pytest.raises(ValueError):
            await ex.download(zaza=["a", "b", "c"])

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_download_catch_network_error(self, hole_dir):
        ex = WellExecutor(hole_dir)
        with pytest.raises(Exception):
            await ex.download(zaza=["a", "b", "c"])

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_h_full(self, exh, wellset_h, bind):
        dataset: WellSet = await exh.process(wellset_h)
        await exh.persist(dataset)

    # @pytest.mark.asyncio
    # async def test_process_and_persist_h_small_batch(self, geoms_h, bind):
    #     geoms = geoms_h[:3]
    #     wellset = pd.DataFrame.shapes.from_records(geoms, create_index=True)
    #     ex = WellExecutor(HoleDirection.H)
    #     dataset: WellSet = await ex.process(wellset)
    #     await ex.persist(dataset)

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_v_full(self, exv, wellset_v, bind):
        dataset: WellSet = await exv.process(wellset_v)
        await exv.persist(dataset)
Esempio n. 10
0
class TestGeomExecutor:
    @pytest.fixture
    def gexec(self):
        yield GeomExecutor(HoleDirection.H)

    def test_init_default(self):
        gexec = GeomExecutor(HoleDirection.H)
        assert gexec.metrics.empty is True

    def test_init_model_kwargs(self):
        locations_kwargs = {1: 1}
        surveys_kwargs = {2: 2}
        points_kwargs = {3: 3}
        gexec = GeomExecutor(
            HoleDirection.H,
            locations_kwargs=locations_kwargs,
            surveys_kwargs=surveys_kwargs,
            points_kwargs=points_kwargs,
        )

        assert gexec.model_kwargs["locations"] == locations_kwargs
        assert gexec.model_kwargs["surveys"] == surveys_kwargs
        assert gexec.model_kwargs["points"] == {
            "batch_size": 1000,
            **points_kwargs
        }

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_download(self, wells_h_dispatcher, hole_dir):
        gexec = GeomExecutor(hole_dir)
        geomset = await gexec.download(api14s=["a", "b", "c"],
                                       dispatch=wells_h_dispatcher)
        assert isinstance(geomset, WellGeometrySet)
        assert gexec.metrics.shape[0] == 1

    @pytest.mark.asyncio
    async def test_download_bad_holedir(self):
        gexec = GeomExecutor(HoleDirection.H)
        gexec.hole_dir = ProdStatRange.FIRST
        with pytest.raises(ValueError):
            await gexec.download(zaza=["a", "b", "c"])

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_download_catch_network_error(self, hole_dir):
        gexec = GeomExecutor(hole_dir)
        with pytest.raises(Exception):
            await gexec.download(zaza=["a", "b", "c"])

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_h_full(self, geomset_h, bind):
        gexec = GeomExecutor(HoleDirection.H)
        dataset: WellGeometrySet = await gexec.process(geomset_h)
        await gexec.persist(dataset)

    @pytest.mark.asyncio
    async def test_process_and_persist_h_small_batch(self, geoms_h, bind):
        geoms = geoms_h[:3]
        geomset = pd.DataFrame.shapes.from_records(geoms, create_index=True)
        gexec = GeomExecutor(HoleDirection.H)
        dataset: WellGeometrySet = await gexec.process(geomset)
        await gexec.persist(dataset)

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_v_full(self, geomset_v, bind):
        gexec = GeomExecutor(HoleDirection.V)
        dataset: WellGeometrySet = await gexec.process(geomset_v)
        await gexec.persist(dataset)

    @pytest.mark.asyncio
    async def test_process_and_persist_v_small_batch(self, geoms_v, bind):
        geoms = geoms_v[:3]
        geomset = pd.DataFrame.shapes.from_records(geoms, create_index=True)
        gexec = GeomExecutor(HoleDirection.V)
        dataset: WellGeometrySet = await gexec.process(geomset)
        await gexec.persist(dataset)
Esempio n. 11
0
class TestProdExecutor:
    @pytest.fixture
    def pexec(self):
        yield ProdExecutor(HoleDirection.H)

    def test_init_default(self):
        pexec = ProdExecutor(HoleDirection.H)
        assert pexec.metrics.empty is True
        assert pexec.model_kwargs["stats"] == {"batch_size": 1000}

    def test_init_model_kwargs(self):
        header_kwargs = {1: 1}
        monthly_kwargs = {2: 2}
        stats_kwargs = {3: 3}
        pexec = ProdExecutor(
            HoleDirection.H,
            header_kwargs=header_kwargs,
            monthly_kwargs=monthly_kwargs,
            stats_kwargs=stats_kwargs,
        )

        assert pexec.model_kwargs["header"] == header_kwargs
        assert pexec.model_kwargs["monthly"] == monthly_kwargs
        assert pexec.model_kwargs["stats"] == {
            "batch_size": 1000,
            **stats_kwargs
        }

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.asyncio
    async def test_download(self, prod_dispatcher, hole_dir):
        pexec = ProdExecutor(hole_dir)
        prodset = await pexec.download(entities=["a", "b", "c"],
                                       dispatch=prod_dispatcher)
        # check prodset was returned
        assert isinstance(prodset, ProdSet)
        # check metric was added
        assert pexec.metrics.shape[0] == 1

    @pytest.mark.asyncio
    async def test_download_bad_holedir(self):
        pexec = ProdExecutor(HoleDirection.H)
        pexec.hole_dir = ProdStatRange.FIRST
        with pytest.raises(ValueError):
            await pexec.download(entities=["a", "b", "c"])

    @pytest.mark.parametrize("hole_dir", HoleDirection.members())
    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_download_catch_network_error(self, prod_dispatcher,
                                                hole_dir):
        pexec = ProdExecutor(hole_dir)
        with pytest.raises(Exception):
            await pexec.download(entities=["a", "b", "c"])

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_with_default_option_sets(
            self, prod_df_h, bind):
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        ps = await pexec.process(prodset_h)

        assert ps.header.shape[0] == prod_df_h.index.levels[0].shape[0]
        assert ps.monthly.shape[0] == prod_df_h.shape[0]
        assert ps.stats.shape[0] > 0
        await pexec.persist(ps)

        expected = ps.stats.shape[0]
        actual = await Model.agg.count()
        assert expected == actual

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_h_one_option_set(self, prod_df_h, bind):
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_h,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_h.shape[0]
        assert ps.stats.shape[0] > 0

    @pytest.mark.cionly
    @pytest.mark.asyncio
    async def test_process_and_persist_v_one_option_set(self, prod_df_v, bind):

        # select an entity12 from the available dataframe that is likely to have
        # more than one, but not too many, associated wells (for test speed)

        entity12 = (prod_df_v.groupby(
            "entity12").count().iloc[:, 0].sort_values().index[2])
        prod_df_v = prod_df_v.loc[prod_df_v.entity12 == entity12].copy(
            deep=True)
        prodset_v = prod_df_v.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.V)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_v,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_v.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_v.shape[0]
        assert ps.stats.shape[0] > 0

    # @pytest.mark.cionly
    # @pytest.mark.asyncio
    # async def test_process_and_persist_v_with_default_option_sets(
    #     self, prod_df_v, bind
    # ):

    #     prodset_v = prod_df_v.prodstats.to_prodset()
    #     pexec = ProdExecutor(HoleDirection.V)
    #     ps = await pexec.process(prodset_v)
    #     await pexec.persist(ps)
    #     expected = ps.stats.shape[0]
    #     actual = await Model.agg.count()

    #     assert expected == actual
    #     assert ps.header.shape[0] == prod_df_v.index.levels[0].shape[0]
    #     assert ps.monthly.shape[0] == prod_df_v.groupby(level=[0, 1]).first().shape[0]
    #     assert ps.stats.shape[0] > 0

    @pytest.mark.asyncio
    async def test_process_and_persist_h_tiny_batch(self, prod_df_h, bind):

        api14s = (prod_df_h.groupby(
            "api14").count().iloc[:,
                                  0].sort_values().index[:2].values.tolist())

        prod_df_h = prod_df_h.loc[prod_df_h.api14.isin(api14s)]
        prodset_h = prod_df_h.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.H)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_h,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_h.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_h.groupby(
            level=[0, 1]).first().shape[0]
        assert ps.stats.shape[0] > 0

    @pytest.mark.asyncio
    async def test_process_and_persist_v_tiny_batch(self, prod_df_v, bind):

        # select an entity12 from the available dataframe that is likely to have
        # more than one, but not too many, associated wells (for test speed)

        api14s = (prod_df_v.groupby(
            "api14").count().iloc[:,
                                  0].sort_values().index[:2].values.tolist())

        prod_df_v = prod_df_v.loc[prod_df_v.api14.isin(api14s)]
        prodset_v = prod_df_v.prodstats.to_prodset()
        pexec = ProdExecutor(HoleDirection.V)
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        ps = await pexec.process(prodset_v,
                                 prodstat_opts=opts,
                                 ratio_opts=opts)
        await pexec.persist(ps)
        expected = ps.stats.shape[0]
        actual = await Model.agg.count()

        assert expected == actual
        assert ps.header.shape[0] == prod_df_v.reset_index().api14.unique(
        ).shape[0]
        assert ps.monthly.shape[0] == prod_df_v.shape[0]
        assert ps.stats.shape[0] > 0

    @pytest.mark.asyncio
    async def test_arun_h_tiny_batch(self, prod_h, bind):

        prod_h = prod_h[:5]
        api14s = [x["api14"] for x in prod_h]

        dispatch = MockAsyncDispatch({"data": prod_h})
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        pexec = ProdExecutor(
            HoleDirection.H,
            download_kwargs={"dispatch": dispatch},
            process_kwargs={
                "prodstat_opts": opts,
                "ratio_opts": opts
            },
        )

        ct, ps = await pexec.arun(api14s=api14s)
        actual = await ProdHeader.agg.count()
        assert len(api14s) == actual

    @pytest.mark.asyncio
    async def test_arun_v_tiny_batch(self, prod_v, bind):

        prod_v = prod_v[:5]
        api14s = [x["api14"] for x in prod_v]

        dispatch = MockAsyncDispatch({"data": prod_v})
        opts = calc.prodstat_option_matrix(ProdStatRange.FIRST,
                                           months=[6],
                                           include_zeroes=False)
        pexec = ProdExecutor(
            HoleDirection.V,
            download_kwargs={"dispatch": dispatch},
            process_kwargs={
                "prodstat_opts": opts,
                "ratio_opts": opts
            },
        )

        ct, ps = await pexec.arun(api14s=api14s)
        actual = await ProdHeader.agg.count()
        assert len(api14s) == actual