예제 #1
0
 def test_json_dumps_after_used_by_dts(self, ts_data_spec_dtos, files_data_spec_dto):
     data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos, files_data_spec=files_data_spec_dto)
     json_repr = data_spec.to_JSON()
     dts = DataTransferService(data_spec)
     dts.get_dataframes()
     json_repr_after_dts = data_spec.to_JSON()
     assert json_repr == json_repr_after_dts
    def test_dict_dto_equal(self, ts_data_spec_dicts, ts_data_spec_dtos):
        data_spec_dtos = DataSpec(time_series_data_specs=ts_data_spec_dtos)
        data_spec_dicts = DataSpec(time_series_data_specs=ts_data_spec_dicts)
        service = DataTransferService(data_spec_dicts)
        service2 = DataTransferService(data_spec_dtos)
        dataframes_by_dicts = service.get_dataframes()
        dataframes_by_dtos = service2.get_dataframes()

        for df1, df2 in zip(dataframes_by_dtos.values(),
                            dataframes_by_dicts.values()):
            pd.testing.assert_frame_equal(df1, df2)
예제 #3
0
    def test_get_dataframes(self, ts_data_spec_dtos):
        data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos)
        service = DataTransferService(data_spec)
        dataframes = service.get_dataframes()

        assert isinstance(dataframes.get("ds1"), pd.DataFrame)
        assert isinstance(dataframes.get("ds2"), pd.DataFrame)
예제 #4
0
 def test_instantiate_ts_data_spec_time_series_not_list(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=TimeSeries(id=1234), aggregates=["avg"], granularity="1s")
             ]
         )
예제 #5
0
 def test_instantiate_ts_data_spec_invalid_time_series_types(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=[{"id": 1234}], aggregates=["avg"], granularity="1s")
             ]
         )
예제 #6
0
def generate_data_spec(last_processed_timestamp, granularity="10s"):
    tags_d03 = []
    tags_d02 = []

    for root, subdirs, files in os.walk("../tags"):
        for file in files:
            if file in ("well_tags.csv", "routing.csv", "riser_tags.csv", "output.csv", "template_tags.csv"):
                with open(os.path.join(root, file)) as f:
                    df = pd.read_csv(f)

                    placements = ["T3 WGM", "Template", "Riser"]
                    placements_d03 = ["WellD03"] + placements
                    placements_d02 = ["WellD02"] + placements

                    df = df[~df["tag"].isin(EXCLUDE_TAGS)]

                    tags_d03.append(df[df["placement"].isin(placements_d03)])
                    tags_d02.append(df[df["placement"].isin(placements_d02)])

    tags_d02_concat = pd.concat(tags_d02, ignore_index=True)
    tags_d03_concat = pd.concat(tags_d03, ignore_index=True)

    tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag")
    tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag")

    d02_input_time_series = []
    d03_input_time_series = []

    for tag in tags_d02_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate])
        d02_input_time_series.append(ts)

    for tag in tags_d03_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate])
        d03_input_time_series.append(ts)

    d02_tsds = TimeSeriesDataSpec(
        time_series=d02_input_time_series,
        aggregates=["avg"],
        granularity=granularity,
        start=last_processed_timestamp,
        end=int(datetime.now().timestamp() * 1e3),
        label="d2",
        missing_data_strategy="ffill",
    )
    d03_tsds = TimeSeriesDataSpec(
        time_series=d03_input_time_series,
        aggregates=["avg"],
        granularity=granularity,
        start=last_processed_timestamp,
        end=int(datetime.now().timestamp() * 1e3),
        label="d3",
        missing_data_strategy="ffill",
    )

    return DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])
예제 #7
0
 def test_instantiate_ts_data_spec_duplicate_labels(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s"),
                 TimeSeriesDataSpec(
                     time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s", label="default"
                 ),
             ]
         )
예제 #8
0
    def test_get_files(self):
        data_spec = DataSpec(files_data_spec=FilesDataSpec(file_ids={"test": 7725800487412823}))

        dts = DataTransferService(data_spec)
        data = dts.get_file("test")
        assert isinstance(data, BytesIO)
        assert (
            data.getvalue()
            == b'import os\n\nfrom cognite.config import configure_session\nfrom cognite.v05 import files\n\nconfigure_session(os.getenv("COGNITE_TEST_API_KEY"), "mltest")\n\n\nres = files.upload_file("test.py", "./test.py")\n\nprint(res)\n'
        )
예제 #9
0
    def data_spec(self, time_series_in_cdp):
        ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg", "min"], label="ts1")
        ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ts2")
        ts3 = TimeSeries(id=time_series_in_cdp[0], aggregates=["max", "count"], label="ts3")
        ts4 = TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], label="ts4")

        tsds = TimeSeriesDataSpec(
            time_series=[ts1, ts2, ts3, ts4], aggregates=["avg"], granularity="1h", start="300d-ago"
        )
        ds = DataSpec(time_series_data_specs=[tsds])
        yield ds
예제 #10
0
    def test_get_dataframes_w_column_mapping(self, time_series_in_cdp):
        ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg"], label="cavg")
        ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ccv")
        ts3 = TimeSeries(id=time_series_in_cdp[1], aggregates=["avg"], label="sinavg")

        tsds = TimeSeriesDataSpec(time_series=[ts1, ts2, ts3], aggregates=["avg"], granularity="1h", start="300d-ago")

        dts = DataTransferService(DataSpec([tsds]))
        dfs = dts.get_dataframes()
        expected = ["timestamp", "cavg", "ccv", "sinavg"]
        assert expected == list(dfs["default"].columns.values)
예제 #11
0
def main():
    configure_session(api_key=os.getenv("COGNITE_API_KEY"),
                      project="akerbp",
                      debug=True)
    tags_d03 = []
    tags_d02 = []

    for root, subdirs, files in os.walk("../tags"):
        for file in files:
            if file in ("well_tags.csv", "routing.csv", "output.csv",
                        "riser_tags.csv", "template_tags.csv"):
                with open(os.path.join(root, file)) as f:
                    df = pd.read_csv(f)

                    placements = ["T3 WGM", "Template", "Riser"]
                    placements_d03 = ["WellD03"] + placements
                    placements_d02 = ["WellD02"] + placements

                    df = df[~df["tag"].isin(EXCLUDE_TAGS)]

                    tags_d03.append(df[df["placement"].isin(placements_d03)])
                    tags_d02.append(df[df["placement"].isin(placements_d02)])

    tags_d02_concat = pd.concat(tags_d02, ignore_index=True)
    tags_d03_concat = pd.concat(tags_d03, ignore_index=True)

    tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag")
    tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag")

    d02_input_time_series = []
    d03_input_time_series = []

    for tag in tags_d02_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d02_input_time_series.append(ts)

    for tag in tags_d03_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d03_input_time_series.append(ts)

    d02_tsds = TimeSeriesDataSpec(
        time_series=d02_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d2",
    )
    d03_tsds = TimeSeriesDataSpec(
        time_series=d03_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d3",
    )

    data_spec = DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])

    dts = DataTransferService(data_spec, num_of_processes=10)

    print(data_spec.to_JSON())

    df_dict = dts.get_dataframes()

    for label, df in df_dict.items():
        df.to_csv(f"../data/{label}.csv")
        print(df.shape)
예제 #12
0
 def test_instantiate_files_data_spec_file_id_invalid_type(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(files_data_spec=FilesDataSpec(file_ids={"f1": 123, "f2": "456"}))
예제 #13
0
 def test_json_dumps_loads(self, ts_data_spec_dtos, files_data_spec_dto):
     data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos, files_data_spec=files_data_spec_dto)
     json_repr = data_spec.to_JSON()
     ds = DataSpec.from_JSON(json_repr)
     assert ds.__eq__(data_spec)
예제 #14
0
 def test_from_JSON_str(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec.from_JSON(json.dumps({"blabla": "dada"}))
예제 #15
0
 def test_from_JSON_invalid(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec.from_JSON({"blabla": "dada"})
예제 #16
0
 def test_instantiate_ts_data_spec_invalid_type(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(time_series_data_specs=["str"])
예제 #17
0
 def test_instantiate_data_spec(self, ts_data_spec_dtos):
     DataSpec(ts_data_spec_dtos, files_data_spec=FilesDataSpec(file_ids={"name": 123}))
예제 #18
0
 def test_instantiate_files_data_spec_file_ids_invalid_type(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(files_data_spec=FilesDataSpec(file_ids=[1, 2, 3]))