コード例 #1
0
 def test_json_dumps_after_used_by_dts(self, ts_data_spec_dtos, files_data_spec_dto):
     data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos, files_data_spec=files_data_spec_dto)
     json_repr = data_spec.to_JSON()
     dts = DataTransferService(data_spec)
     dts.get_dataframes()
     json_repr_after_dts = data_spec.to_JSON()
     assert json_repr == json_repr_after_dts
コード例 #2
0
    def test_dict_dto_equal(self, ts_data_spec_dicts, ts_data_spec_dtos):
        data_spec_dtos = DataSpec(time_series_data_specs=ts_data_spec_dtos)
        data_spec_dicts = DataSpec(time_series_data_specs=ts_data_spec_dicts)
        service = DataTransferService(data_spec_dicts)
        service2 = DataTransferService(data_spec_dtos)
        dataframes_by_dicts = service.get_dataframes()
        dataframes_by_dtos = service2.get_dataframes()

        for df1, df2 in zip(dataframes_by_dtos.values(),
                            dataframes_by_dicts.values()):
            pd.testing.assert_frame_equal(df1, df2)
コード例 #3
0
    def test_get_dataframes(self, ts_data_spec_dtos):
        data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos)
        service = DataTransferService(data_spec)
        dataframes = service.get_dataframes()

        assert isinstance(dataframes.get("ds1"), pd.DataFrame)
        assert isinstance(dataframes.get("ds2"), pd.DataFrame)
コード例 #4
0
    def test_get_dataframes_w_column_mapping(self, time_series_in_cdp):
        ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg"], label="cavg")
        ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ccv")
        ts3 = TimeSeries(id=time_series_in_cdp[1], aggregates=["avg"], label="sinavg")

        tsds = TimeSeriesDataSpec(time_series=[ts1, ts2, ts3], aggregates=["avg"], granularity="1h", start="300d-ago")

        dts = DataTransferService(DataSpec([tsds]))
        dfs = dts.get_dataframes()
        expected = ["timestamp", "cavg", "ccv", "sinavg"]
        assert expected == list(dfs["default"].columns.values)
コード例 #5
0
    def test_get_dataframes_column_mapping_no_drop_agg_suffix(self, data_spec):
        dts = DataTransferService(data_spec, num_of_workers=3)

        dfs = dts.get_dataframes(drop_agg_suffix=False)
        assert list(dfs["default"].columns.values) == [
            "timestamp",
            "ts1|average",
            "ts1|min",
            "ts2|continuousvariance",
            "ts3|max",
            "ts3|count",
            "ts4|stepinterpolation",
        ]
コード例 #6
0
    def test_get_dataframes_column_mapping_drop_agg_suffixes(self, data_spec):
        dts = DataTransferService(data_spec, num_of_workers=3)

        dfs = dts.get_dataframes(drop_agg_suffix=True)
        assert list(dfs["default"].columns.values) == [
            "timestamp",
            "ts1|average",
            "ts1|min",
            "ts2",
            "ts3|max",
            "ts3|count",
            "ts4",
        ]
コード例 #7
0
def main():
    configure_session(api_key=os.getenv("COGNITE_API_KEY"),
                      project="akerbp",
                      debug=True)
    tags_d03 = []
    tags_d02 = []

    for root, subdirs, files in os.walk("../tags"):
        for file in files:
            if file in ("well_tags.csv", "routing.csv", "output.csv",
                        "riser_tags.csv", "template_tags.csv"):
                with open(os.path.join(root, file)) as f:
                    df = pd.read_csv(f)

                    placements = ["T3 WGM", "Template", "Riser"]
                    placements_d03 = ["WellD03"] + placements
                    placements_d02 = ["WellD02"] + placements

                    df = df[~df["tag"].isin(EXCLUDE_TAGS)]

                    tags_d03.append(df[df["placement"].isin(placements_d03)])
                    tags_d02.append(df[df["placement"].isin(placements_d02)])

    tags_d02_concat = pd.concat(tags_d02, ignore_index=True)
    tags_d03_concat = pd.concat(tags_d03, ignore_index=True)

    tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag")
    tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag")

    d02_input_time_series = []
    d03_input_time_series = []

    for tag in tags_d02_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d02_input_time_series.append(ts)

    for tag in tags_d03_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d03_input_time_series.append(ts)

    d02_tsds = TimeSeriesDataSpec(
        time_series=d02_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d2",
    )
    d03_tsds = TimeSeriesDataSpec(
        time_series=d03_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d3",
    )

    data_spec = DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])

    dts = DataTransferService(data_spec, num_of_processes=10)

    print(data_spec.to_JSON())

    df_dict = dts.get_dataframes()

    for label, df in df_dict.items():
        df.to_csv(f"../data/{label}.csv")
        print(df.shape)
コード例 #8
0
def main():
    output_columns = [
        "SKAP_18FI381-VFlLGas/Y/10sSAMP|average",
        "SKAP_18FI381-VFlLH2O/Y/10sSAMP|average",
        "SKAP_18FI381-VFlLOil/Y/10sSAMP|average",
    ]
    router = "SKAP_18HV3806/BCH/10sSAMP|stepinterpolation"
    one_hour_ago = datetime.now() - timedelta(0, 3600)
    last_processed_timestamp = int(one_hour_ago.timestamp() * 1e3)

    is_first = True

    while True:
        d2_inputs = pd.DataFrame([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]])
        d2_inputs.columns = ["hoho", "blaa", "hgi"] + output_columns
        input_has_nans = True
        while input_has_nans:
            ds = generate_data_spec(last_processed_timestamp)
            dts = DataTransferService(data_spec=ds)
            while True:
                try:
                    d2_inputs = dts.get_dataframes()["d2"]
                    break
                except:
                    time.sleep(2)
            any_nans_per_column = d2_inputs.drop(output_columns, axis=1).isna().any()
            all_nans_per_column = d2_inputs.drop(output_columns, axis=1).isna().all()

            print(any_nans_per_column)
            print(all_nans_per_column)

            if any_nans_per_column.any() and not all_nans_per_column.any():
                last_processed_timestamp -= 10000

            print(datetime.fromtimestamp(last_processed_timestamp * 1e-3))
            time.sleep(2)
            input_has_nans = d2_inputs.drop(output_columns, axis=1).isna().any().any()

        last_ts = d2_inputs["timestamp"].iloc[-1]

        print(d2_inputs[output_columns[0]].values.tolist())
        d2_inputs_formatted = (
            d2_inputs.drop("timestamp", axis=1).drop(router, axis=1).drop(output_columns, axis=1).values.tolist()
        )
        timestamps = d2_inputs["timestamp"]
        res = models.online_predict(
            model_id=3885574571413770, version_id=4299054386152423, instances=[d2_inputs_formatted]
        )

        predictions = res["predictions"][0]
        formatted_predictions = [int(pred[0]) for pred in predictions]
        last_processed_timestamp = int(last_ts)

        dps = [Datapoint(ts, value) for ts, value in zip(timestamps.values.tolist(), formatted_predictions)]
        print([dp.value for dp in dps])
        if is_first:
            post_datapoints(name="SKAP_18FI381-VFlLGas/Y/10sSAMP_calc_D02_2", datapoints=dps)
            is_first = False
        else:
            for dp in dps:
                post_datapoints(name="SKAP_18FI381-VFlLGas/Y/10sSAMP_calc_D02_2", datapoints=[dp])
                time.sleep(5)