def ts_data_spec_dtos(time_series_in_cdp): time_series = [ TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], missing_data_strategy="ffill"), TimeSeries(id=time_series_in_cdp[1]), ] ts_data_spec1 = TimeSeriesDataSpec( time_series=time_series, aggregates=["avg"], granularity="10m", label="ds1", start=TEST_TS_REASONABLE_INTERVAL["start"], end=TEST_TS_REASONABLE_INTERVAL["end"], ) ts_data_spec2 = TimeSeriesDataSpec( time_series=time_series, aggregates=["avg"], granularity="1h", start=TEST_TS_REASONABLE_INTERVAL["start"], end=TEST_TS_REASONABLE_INTERVAL["end"], missing_data_strategy="linearInterpolation", label="ds2", ) yield [ts_data_spec1, ts_data_spec2]
def generate_data_spec(last_processed_timestamp, granularity="10s"): tags_d03 = [] tags_d02 = [] for root, subdirs, files in os.walk("../tags"): for file in files: if file in ("well_tags.csv", "routing.csv", "riser_tags.csv", "output.csv", "template_tags.csv"): with open(os.path.join(root, file)) as f: df = pd.read_csv(f) placements = ["T3 WGM", "Template", "Riser"] placements_d03 = ["WellD03"] + placements placements_d02 = ["WellD02"] + placements df = df[~df["tag"].isin(EXCLUDE_TAGS)] tags_d03.append(df[df["placement"].isin(placements_d03)]) tags_d02.append(df[df["placement"].isin(placements_d02)]) tags_d02_concat = pd.concat(tags_d02, ignore_index=True) tags_d03_concat = pd.concat(tags_d03, ignore_index=True) tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag") tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag") d02_input_time_series = [] d03_input_time_series = [] for tag in tags_d02_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d02_input_time_series.append(ts) for tag in tags_d03_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d03_input_time_series.append(ts) d02_tsds = TimeSeriesDataSpec( time_series=d02_input_time_series, aggregates=["avg"], granularity=granularity, start=last_processed_timestamp, end=int(datetime.now().timestamp() * 1e3), label="d2", missing_data_strategy="ffill", ) d03_tsds = TimeSeriesDataSpec( time_series=d03_input_time_series, aggregates=["avg"], granularity=granularity, start=last_processed_timestamp, end=int(datetime.now().timestamp() * 1e3), label="d3", missing_data_strategy="ffill", ) return DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])
def ts_data_spec_dtos(): time_series = [ TimeSeries(name="constant", aggregates=["step"], missing_data_strategy="ffill"), TimeSeries(name="sinus") ] ts_data_spec1 = TimeSeriesDataSpec( time_series=time_series, aggregates=["avg"], granularity="10m", label="ds1", start=1522188000000, end=1522620000000, ) ts_data_spec2 = TimeSeriesDataSpec( time_series=time_series, aggregates=["avg"], granularity="1h", start=1522188000000, end=1522620000000, missing_data_strategy="linearInterpolation", label="ds2", ) yield [ts_data_spec1, ts_data_spec2]
def test_instantiate_ts_data_spec_duplicate_labels(self): with pytest.raises(DataSpecValidationError): DataSpec( time_series_data_specs=[ TimeSeriesDataSpec(time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s"), TimeSeriesDataSpec( time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s", label="default" ), ] )
def test_get_dataframes_w_column_mapping(self, time_series_in_cdp): ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg"], label="cavg") ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ccv") ts3 = TimeSeries(id=time_series_in_cdp[1], aggregates=["avg"], label="sinavg") tsds = TimeSeriesDataSpec(time_series=[ts1, ts2, ts3], aggregates=["avg"], granularity="1h", start="300d-ago") dts = DataTransferService(DataSpec([tsds])) dfs = dts.get_dataframes() expected = ["timestamp", "cavg", "ccv", "sinavg"] assert expected == list(dfs["default"].columns.values)
def data_spec(self, time_series_in_cdp): ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg", "min"], label="ts1") ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ts2") ts3 = TimeSeries(id=time_series_in_cdp[0], aggregates=["max", "count"], label="ts3") ts4 = TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], label="ts4") tsds = TimeSeriesDataSpec( time_series=[ts1, ts2, ts3, ts4], aggregates=["avg"], granularity="1h", start="300d-ago" ) ds = DataSpec(time_series_data_specs=[tsds]) yield ds
def test_instantiate_ts_data_spec_time_series_not_list(self): with pytest.raises(DataSpecValidationError): DataSpec( time_series_data_specs=[ TimeSeriesDataSpec(time_series=TimeSeries(id=1234), aggregates=["avg"], granularity="1s") ] )
def main(): configure_session(api_key=os.getenv("COGNITE_API_KEY"), project="akerbp", debug=True) tags_d03 = [] tags_d02 = [] for root, subdirs, files in os.walk("../tags"): for file in files: if file in ("well_tags.csv", "routing.csv", "output.csv", "riser_tags.csv", "template_tags.csv"): with open(os.path.join(root, file)) as f: df = pd.read_csv(f) placements = ["T3 WGM", "Template", "Riser"] placements_d03 = ["WellD03"] + placements placements_d02 = ["WellD02"] + placements df = df[~df["tag"].isin(EXCLUDE_TAGS)] tags_d03.append(df[df["placement"].isin(placements_d03)]) tags_d02.append(df[df["placement"].isin(placements_d02)]) tags_d02_concat = pd.concat(tags_d02, ignore_index=True) tags_d03_concat = pd.concat(tags_d03, ignore_index=True) tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag") tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag") d02_input_time_series = [] d03_input_time_series = [] for tag in tags_d02_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ( "ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d02_input_time_series.append(ts) for tag in tags_d03_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ( "ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d03_input_time_series.append(ts) d02_tsds = TimeSeriesDataSpec( time_series=d02_input_time_series, aggregates=["avg"], granularity="10s", start=int(datetime(2017, 3, 1).timestamp() * 1e3), label="d2", ) d03_tsds = TimeSeriesDataSpec( time_series=d03_input_time_series, aggregates=["avg"], granularity="10s", start=int(datetime(2017, 3, 1).timestamp() * 1e3), label="d3", ) data_spec = DataSpec(time_series_data_specs=[d02_tsds, d03_tsds]) dts = DataTransferService(data_spec, num_of_processes=10) print(data_spec.to_JSON()) df_dict = dts.get_dataframes() for label, df in df_dict.items(): df.to_csv(f"../data/{label}.csv") print(df.shape)