def is_producing(self, status_column: str = "status", producing_states: List[str] = None) -> pd.Series: validate_required_columns([status_column], self._obj.columns) producing_states = producing_states or PRODUCING_STATES return self._obj.status.isin(producing_states)
def as_stick(self, geometry: str = "geom", label: str = "stick") -> pd.Series: points = self._obj validate_required_columns([geometry], points.columns) validate_required_columns(["md"], points.index.names) return (points.groupby(level=0).agg( ["first", "last"]).stack().shapes.as_line(geometry=geometry, label=label))
def relative_trajectory_angle( self, geometry: str = "geom", preceeding: int = 1, following: int = 1, max_soft_angle: float = 150.0, max_hard_angle: float = 130.0, ) -> gp.GeoDataFrame: """ calculate the central angle of a point relative to an arbitrary number points before and after it, element wise. The triangle formed by the three points is used to calculate the measurement of the angle touching the current point (theta). Keyword Arguments: preceeding {int} -- number of points preceeding the current point to be considered when calculating the central angle. (default: {1}) following {int} -- number of points following the current point to be considered when calculating the central angle. (default: {1}) Returns: gp.GeoDataFrame """ points = self.as_gdf() validate_required_columns([geometry], points.columns) rel_prev = points.groupby(level=0).shift(preceeding).shapes.as_gdf() rel_next = points.groupby(level=0).shift(-following).shapes.as_gdf() hyp = rel_prev.distance(rel_next).rename("hyp") adj = points.distance(rel_next).rename("adj") opp = points.distance(rel_prev).rename("opp") tri = hyp.to_frame().join(adj).join(opp) # .mul(100000) # law of cosines: # c2 = a2 + b2 − 2ab cos(C) -> acos((adj^2 + opp^2 - hyp^2) / (2 * adj * opp)) tri[["hyp_sq", "adj_sq", "opp_sq"]] = tri.loc[:, ["hyp", "adj", "opp"]].pow(2) tri["theta"] = (tri.adj_sq.add(tri.opp_sq).sub(tri.hyp_sq).div( tri.adj.mul(tri.opp).mul(2)).apply(np.arccos).apply(np.rad2deg)) # mark the soft corners (bends) soft_corner_mask = (tri.theta > 0) & (tri.theta < max_soft_angle) tri["is_soft_corner"] = False tri.loc[soft_corner_mask, "is_soft_corner"] = True # mark the hard corners (steep bends) hard_corner_mask = (tri.theta > 0) & (tri.theta < max_hard_angle ) # | (tri.theta > 200) tri["is_hard_corner"] = False tri.loc[hard_corner_mask, "is_hard_corner"] = True return tri.loc[:, ["theta", "is_soft_corner", "is_hard_corner"]].dropna( how="any")
def mark_lateral_points(self, dip_threshold: int = None) -> pd.DataFrame: dip_threshold = dip_threshold or LATERAL_DIP_THRESHOLD points = self._obj validate_required_columns(["dip"], points.columns) points.loc[points.dip > dip_threshold, "is_in_lateral"] = True points.is_in_lateral = points.is_in_lateral.fillna(False) return points
def index_survey_points(self, dip_threshold: int = None) -> pd.DataFrame: points = self._obj validate_required_columns(["dip"], points.columns) validate_required_columns(["api14", "md"], points.index.names) calc_column_names = [ "is_in_lateral", "is_heel_point", "is_mid_point", "is_toe_point", ] # points.loc[:, calc_column_names] = np.nan # add placeholders (with defaults) for columns to be calculated points = pd.concat([points, pd.DataFrame(columns=calc_column_names)]) points.loc[:, calc_column_names] = False points["sequence"] = points.groupby(level=0).cumcount() + 1 points = points.shapes.mark_lateral_points() heel_point_index = (points.loc[points.is_in_lateral].reset_index( level=1).groupby(level=0).first().set_index("md", append=True).index) mid_point_sequence_index = (points.loc[points.is_in_lateral].groupby( level=0).sequence.median().apply( np.floor).to_frame().astype(int).set_index("sequence", append=True).index) mid_point_index = (points.reset_index(level=1).set_index( "sequence", append=True).loc[mid_point_sequence_index].reset_index( level=1).set_index("md", append=True).index) toe_point_index = (points.loc[points.is_in_lateral].reset_index( level=1).groupby(level=0).last().set_index("md", append=True).index) points.loc[heel_point_index, "is_heel_point"] = True points.loc[mid_point_index, "is_mid_point"] = True points.loc[toe_point_index, "is_toe_point"] = True heel_start_seq_by_group = points.loc[heel_point_index, "sequence"] # ensure all points after the heel point are marked as in the lateral. # The dip filter doesnt always cant all of the points for api14 in points.groupby(level=0).groups: try: heel_start_seq = heel_start_seq_by_group.loc[api14].iloc[0] group = points.xs(api14, level=0, drop_level=False) group_in_lateral_index = group.loc[ group.sequence >= heel_start_seq].index points.loc[group_in_lateral_index, "is_in_lateral"] = True except KeyError: logger.debug( f"{api14} has no survey points -- skipping lateral indexing" ) return points
def as_line(self, geometry: str = "geom", label: str = "line") -> pd.Series: points = self._obj validate_required_columns([geometry], points.columns) # .to_numpy() is implemented by both pandas and geopandas geoarrays # whereas .values is not. return (points[geometry].groupby(level=0).apply( lambda x: LineString(x.to_numpy().tolist())).rename(label))
def melt_depths(self) -> pd.DataFrame: validate_required_columns(["api14"], self._obj.index.names) depths_melted = (self._obj.dropna(how="all").reset_index().melt( id_vars=["api14"], var_name="property_name")) depths_melted["aggregate_type"] = None depths_melted["name"] = depths_melted.property_name depths_melted = depths_melted.set_index( ["api14", "property_name", "aggregate_type"]) return depths_melted
def find_kop(self) -> pd.Series: """ Return an educated guess as to the location of a survey's kickoff point, element-wise. The calling dataframe be a dataframe of survey points. """ points = self._obj validate_required_columns(["is_in_lateral", "sequence"], points.columns) angles = points.shapes.relative_trajectory_angle() points = points.join(angles.loc[~points.is_in_lateral]) points["theta"] = angles.theta points.loc[:, ["is_soft_corner", "is_hard_corner"]] = points.loc[:, [ "is_soft_corner", "is_hard_corner" ]].fillna(False) points.loc[:, "theta"] = points.loc[:, "theta"].fillna(0) max_hard_corner_mask = ( points.loc[points.is_hard_corner].sequence.groupby( level=0).max().rename("hard")) max_soft_corner_mask = ( points.loc[points.is_soft_corner].sequence.groupby( level=0).max().rename("soft")) last_non_lateral_point_mask = ( points.loc[~points.is_in_lateral].sequence.groupby( level=0).max().rename("last_non_lateral")) # create a frame from the masks created above empty = pd.DataFrame(index=points.groupby(level=0).max().index) joined = (empty.join(max_hard_corner_mask).join( max_soft_corner_mask).join(last_non_lateral_point_mask)) # determine the sequence index of the kop by traversing the joined dataframe's columns # from left to right and taking the first non-na value for each row. kop_seq_index = (joined.fillna( method="bfill", axis=1).iloc[:, 0].rename("kop_seq").fillna( -1).astype(int).to_frame().set_index("kop_seq", append=True).index) # mark kop points using sequence index points["is_kop"] = False kop_index = (points.reset_index(level=1).set_index( "sequence", append=True).loc[kop_seq_index].reset_index(level=1).set_index( "md", append=True).index) points.loc[kop_index, "is_kop"] = True return points.loc[:, [ "theta", "is_soft_corner", "is_hard_corner", "is_kop" ]]
def lateral_length(self): points = self._obj if "is_in_lateral" not in points.columns: points = points.shapes.mark_lateral_points() validate_required_columns(["is_in_lateral"], points.columns) validate_required_columns(["md"], points.index.names) return (points.loc[points.is_in_lateral].reset_index( level=1).loc[:, "md"].groupby(level=0).agg( ["min", "max"]).apply(lambda row: row["max"] - row["min"], axis=1).rename("lateral_length"))
def as_bent_stick(self, geometry: str = "geom", label: str = "bent_stick") -> pd.Series: points = self._obj validate_required_columns([geometry], points.columns) validate_required_columns(["md"], points.index.names) bent_stick_points = ( pd.concat([ points.reset_index(level=1) # first and last points .groupby(level=0).agg(["first", "last"]).stack().reset_index( level=1, drop=True).set_index("md", append=True), points.loc[points.is_kop], # kop points ]).sort_index().loc[:, [geometry]]) return bent_stick_points.shapes.as_line(geometry=geometry, label=label)
def as_3d(self, geometry: str = "geom") -> pd.DataFrame: """transform 2d points into 3d points using the MD index as Z """ xyz = self._obj validate_required_columns([geometry], xyz.columns) validate_required_columns(["md"], xyz.index.names) xyz["x"] = xyz[geometry].apply(lambda x: x.x) xyz["y"] = xyz[geometry].apply(lambda x: x.y) xyz = (xyz.reset_index(level=1).set_index( "md", append=True, drop=False).rename(columns={"md": "z"})) xyz = xyz.loc[:, ["x", "y", "z", geometry]] # reacreate points with z xyz[geometry] = xyz.apply(lambda row: Point(row.x, row.y, row.z), axis=1) return xyz
def merge_lateral_lengths(self) -> pd.DataFrame: """ Merge perfll and lateral_length into a single column, preferring perfll """ wells = self._obj if "lateral_length" not in wells.columns: wells["lateral_length"] = np.nan required_columns = ["perfll", "lateral_length"] validate_required_columns(required_columns, wells.columns) # ? get lateral_length, preferring perll over lateral_length latlens = wells.loc[:, required_columns] latlens.loc[latlens.perfll.notnull(), "lateral_length"] = np.nan latlens = (latlens.reset_index().melt( id_vars="api14", var_name="lateral_length_type", value_name="lateral_length", ).set_index("api14").dropna(how="any")) return latlens
def combine_frac_parameters(self, other: pd.DataFrame, dropna: bool = True) -> pd.DataFrame: fracs = self._obj validate_required_columns(["fluid", "proppant"], fracs.columns) validate_required_columns(["fluid", "proppant"], other.columns) validate_required_columns(["api14"], fracs.index.names) validate_required_columns(["api14"], other.index.names) fracs = fracs.combine_first(other) if dropna: fracs = fracs[(~fracs.fluid.isna()) & (~fracs.proppant.isna())] return fracs
def process_fracs(self): fracs = self._obj validate_required_columns( ["fluid", "proppant", "lateral_length", "lateral_length_type"], fracs.columns, ) fracs = fracs.dropna(how="all", subset=["fluid", "proppant"]) # TODO: validate fluid/proppant UOM and convert to BBL/LB where necessary # convert lb & bbl to lb/ft & bbl/ft per_ft = (fracs.loc[:, ["fluid", "proppant"]].div( fracs["lateral_length"], axis=0).rename(columns={ "fluid": "fluid_bbl_ft", "proppant": "proppant_lb_ft" })) fracs = fracs.join(per_ft) # rename fluid & proppant and drop uoms fracs = fracs.rename(columns={ "fluid": "fluid_bbl", "proppant": "proppant_lb" }).drop(columns=["fluid_uom", "proppant_uom"]) fracs = fracs.dropna( how="all", subset=[ "fluid_bbl", "proppant_lb", "lateral_length_type", "lateral_length", "fluid_bbl_ft", "proppant_lb_ft", ], ) return fracs
def test_validate_required_columns_raise(): with pytest.raises(KeyError): validate_required_columns(required=["a", "b"], columns=["a", "c", "d"])
def assign_status( self, # target_column: str = "new_status", status_column: str = "status", how: str = "waterfall", status_indicator_map: Dict[str, str] = None, detail: bool = False, as_labels: bool = False, empty_label_placeholder: str = ".", ) -> pd.DataFrame: """ Assign well status using indicators existing in the passed DataFrame or using pd.DataFrame.wells.status_indicators() to generate them if they arent present. Keyword Arguments: target_column {str} -- column name for status assignments (default: "new_status") how {str} -- assignment methodology to use, currently the only available option is the default. (default: "waterfall") status_column {str} -- name of column containing the original stati from the data provider (default: "status") detail {bool} -- return the intermediate calculations used in assignments status_indicator_map {Dict[str, str]} -- status_indicator_map of indicator column names and their corresponding status value to be used in waterfall assignment. The precidence of assignment is inferred from the order or items in the status_indicator_map (default: const.STATUS_INDICATOR_MAP). Raises: ValueError Returns: pd.DataFrame """ wells = self._obj well_columns = [ "status", "spud_date", "comp_date", "permit_date", "last_prod_date", ] validate_required_columns( well_columns, wells.columns, ) target_column = "new_status" status: pd.DataFrame = wells.loc[:, well_columns] status = status.wells.status_indicators() status_indicator_map = (status_indicator_map if status_indicator_map else STATUS_INDICATOR_MAP) # seed with keeper values from original status column status.loc[status.is_keeper_status, target_column] = status.loc[status.is_keeper_status, status_column] if how == "waterfall": for column_name, label in status_indicator_map.items(): selected = status.loc[status[target_column].isna() & status[column_name], column_name] if label is not None: selected = selected.replace({True: label}) status.loc[status[target_column].isna(), target_column] = selected else: raise ValueError("Invalid how value: use 'waterfall'") if not detail: status = status.loc[:, [target_column]] # overwrite original status with new status if status_column in status.columns: status = status.drop(columns=[status_column]) status = status.rename(columns={target_column: status_column}) if as_labels: status = status.replace({ "is_other": { True: "OTHER" }, "is_inactive_pa": { True: "INACTIVE-PA" }, "is_ta": { True: "TA" }, "is_producing": { True: "PRODUCING" }, "is_completed": { True: "COMPLETED" }, "is_duc": { True: "DUC" }, "is_drilling": { True: "DRILLING" }, "is_permit": { True: "PERMIT" }, "is_stale_permit": { True: "STALE-PERMIT" }, }).replace({False: empty_label_placeholder}) return status
def status_indicators( self, indicators_only: bool = False, ) -> pd.DataFrame: df = self._obj required_columns = [ "status", "spud_date", "comp_date", "permit_date", "last_prod_date", ] validate_required_columns( required_columns, df.columns, ) """ Original logic: Is_GoodSymCode if Uppercase([IHS_Status]) not in ('OIL PRODUCER', 'OIL-WO', 'AT TOTAL DEPTH', 'WELL START', 'WELL PERMIT', 'TREATD', 'CANCEL', 'GAS PRODUCER', 'GAS-WO', 'TA') then [IHS_Status] else Null() endif Is_Other if IsNull([LastProd]) and ([SPUD_DATE] < "1971-01-01" or [SPUD_DATE] < DateTimeAdd(DateTimeToday(),-36,"months")) then "OTHER" else Null() endif # if last_prod is None and (spudded before 1971 or spudded < 3 years ago) Is_InactivePA if [LastProd] < DateAdd([ProductionDateCutoff],-12,"months") and !IsNull([LastProd]) then "INACTIVE-PA" else Null() endif Is_TA if [LastProd] < DateAdd([ProductionDateCutoff],-3,"months") and [LastProd] >= DateAdd([ProductionDateCutoff],-12,"months") then "TA" else Null() endif Is_Producing if [LastProd] >= DateAdd([ProductionDateCutoff],-3,"months") then "PRODUCING" else Null() endif Is_Completed if [COMP_DATE] >= DateAdd([ProductionDateCutoff],-9,"months") then "COMPLETED" else Null() endif Is_DUC if [SPUD_DATE] < DateAdd(MonthStart(DateTimeToday()),-1,"months") then "DUC" else Null() endif Is_Drilling if !IsNull([SPUD_DATE]) then "DRILLING" else Null() endif Is_Permit if [PERMIT_DATE] >= DateAdd(DateAdd(MonthStart(DateTimeToday()),-1,"months"),-36,"months") then "PERMIT" else Null() endif Is_StalePermit if [PERMIT_DATE] < DateAdd(DateAdd(MonthStart(DateTimeToday()),-1,"months"),-36,"months") then "STALE-PERMIT" else Null() endif Status if !IsNull([Is_GoodSymCode]) then [Is_GoodSymCode] elseif !IsNull([Is_Other]) then [Is_Other] elseif !IsNull([Is_InactivePA]) then [Is_InactivePA] elseif !IsNull([Is_TA]) then [Is_TA] elseif !IsNull([Is_Producing]) then [Is_Producing] elseif !IsNull([Is_Completed]) then [Is_Completed] elseif !IsNull([Is_DUC]) then [Is_DUC] elseif !IsNull([Is_Drilling]) then [Is_Drilling] elseif !IsNull([Is_Permit]) then [Is_Permit] elseif !IsNull([Is_StalePermit]) then [Is_StalePermit] else "OTHER" endif """ # noqa last_prod_norm_date = x_months_ago(3) to_recategorize = [ "OIL PRODUCER", "OIL-WO", "AT TOTAL DEPTH", "WELL START", "WELL PERMIT", "TREATD", "CANCEL", "GAS PRODUCER", "GAS-WO", "TA", ] df.loc[~df.status.isin(to_recategorize), "is_keeper_status"] = True other_mask = (df.last_prod_date.isna()) & (df.spud_date < x_months_ago(36)) df.loc[other_mask, "is_other"] = True inactive_pa_mask = (~df.last_prod_date.isna()) & ( df.last_prod_date < x_months_ago(12, relative_to=last_prod_norm_date)) df.loc[inactive_pa_mask, "is_inactive_pa"] = True is_ta_mask = (df.last_prod_date < x_months_ago( 3, relative_to=last_prod_norm_date)) & ( df.last_prod_date < x_months_ago( 12, relative_to=last_prod_norm_date)) df.loc[is_ta_mask, "is_ta"] = True is_producing_mask = df.last_prod_date >= x_months_ago( 3, relative_to=last_prod_norm_date) df.loc[is_producing_mask, "is_producing"] = True is_completed_mask = df.comp_date.notnull() df.loc[is_completed_mask, "is_completed"] = True is_duc_mask = df.spud_date < x_months_ago( 3, relative_to=last_prod_norm_date) df.loc[is_duc_mask, "is_duc"] = True is_drilling_mask = df.spud_date.notnull() df.loc[is_drilling_mask, "is_drilling"] = True is_permit_mask = df.permit_date >= x_months_ago(36) df.loc[is_permit_mask, "is_permit"] = True is_stale_permit_mask = df.permit_date >= x_months_ago(36) df.loc[is_stale_permit_mask, "is_stale_permit"] = True indicators = [ "is_keeper_status", "is_other", "is_inactive_pa", "is_ta", "is_producing", "is_completed", "is_duc", "is_drilling", "is_permit", "is_stale_permit", ] df.loc[:, indicators] = df.loc[:, indicators].fillna(False) if indicators_only: return_columns = indicators else: return_columns = required_columns + indicators return df.loc[:, return_columns]