コード例 #1
0
def generate_data_spec(last_processed_timestamp, granularity="10s"):
    tags_d03 = []
    tags_d02 = []

    for root, subdirs, files in os.walk("../tags"):
        for file in files:
            if file in ("well_tags.csv", "routing.csv", "riser_tags.csv", "output.csv", "template_tags.csv"):
                with open(os.path.join(root, file)) as f:
                    df = pd.read_csv(f)

                    placements = ["T3 WGM", "Template", "Riser"]
                    placements_d03 = ["WellD03"] + placements
                    placements_d02 = ["WellD02"] + placements

                    df = df[~df["tag"].isin(EXCLUDE_TAGS)]

                    tags_d03.append(df[df["placement"].isin(placements_d03)])
                    tags_d02.append(df[df["placement"].isin(placements_d02)])

    tags_d02_concat = pd.concat(tags_d02, ignore_index=True)
    tags_d03_concat = pd.concat(tags_d03, ignore_index=True)

    tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag")
    tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag")

    d02_input_time_series = []
    d03_input_time_series = []

    for tag in tags_d02_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate])
        d02_input_time_series.append(ts)

    for tag in tags_d03_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate])
        d03_input_time_series.append(ts)

    d02_tsds = TimeSeriesDataSpec(
        time_series=d02_input_time_series,
        aggregates=["avg"],
        granularity=granularity,
        start=last_processed_timestamp,
        end=int(datetime.now().timestamp() * 1e3),
        label="d2",
        missing_data_strategy="ffill",
    )
    d03_tsds = TimeSeriesDataSpec(
        time_series=d03_input_time_series,
        aggregates=["avg"],
        granularity=granularity,
        start=last_processed_timestamp,
        end=int(datetime.now().timestamp() * 1e3),
        label="d3",
        missing_data_strategy="ffill",
    )

    return DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])
コード例 #2
0
def ts_data_spec_dtos(time_series_in_cdp):
    time_series = [
        TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], missing_data_strategy="ffill"),
        TimeSeries(id=time_series_in_cdp[1]),
    ]

    ts_data_spec1 = TimeSeriesDataSpec(
        time_series=time_series,
        aggregates=["avg"],
        granularity="10m",
        label="ds1",
        start=TEST_TS_REASONABLE_INTERVAL["start"],
        end=TEST_TS_REASONABLE_INTERVAL["end"],
    )
    ts_data_spec2 = TimeSeriesDataSpec(
        time_series=time_series,
        aggregates=["avg"],
        granularity="1h",
        start=TEST_TS_REASONABLE_INTERVAL["start"],
        end=TEST_TS_REASONABLE_INTERVAL["end"],
        missing_data_strategy="linearInterpolation",
        label="ds2",
    )

    yield [ts_data_spec1, ts_data_spec2]
コード例 #3
0
def ts_data_spec_dtos():
    time_series = [
        TimeSeries(name="constant",
                   aggregates=["step"],
                   missing_data_strategy="ffill"),
        TimeSeries(name="sinus")
    ]

    ts_data_spec1 = TimeSeriesDataSpec(
        time_series=time_series,
        aggregates=["avg"],
        granularity="10m",
        label="ds1",
        start=1522188000000,
        end=1522620000000,
    )
    ts_data_spec2 = TimeSeriesDataSpec(
        time_series=time_series,
        aggregates=["avg"],
        granularity="1h",
        start=1522188000000,
        end=1522620000000,
        missing_data_strategy="linearInterpolation",
        label="ds2",
    )

    yield [ts_data_spec1, ts_data_spec2]
コード例 #4
0
 def test_instantiate_ts_data_spec_duplicate_labels(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s"),
                 TimeSeriesDataSpec(
                     time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s", label="default"
                 ),
             ]
         )
コード例 #5
0
 def test_instantiate_ts_data_spec_invalid_time_series_types(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=[{"id": 1234}], aggregates=["avg"], granularity="1s")
             ]
         )
コード例 #6
0
 def test_instantiate_ts_data_spec_time_series_not_list(self):
     with pytest.raises(DataSpecValidationError):
         DataSpec(
             time_series_data_specs=[
                 TimeSeriesDataSpec(time_series=TimeSeries(id=1234), aggregates=["avg"], granularity="1s")
             ]
         )
コード例 #7
0
    def test_get_dataframes_w_column_mapping(self, time_series_in_cdp):
        ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg"], label="cavg")
        ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ccv")
        ts3 = TimeSeries(id=time_series_in_cdp[1], aggregates=["avg"], label="sinavg")

        tsds = TimeSeriesDataSpec(time_series=[ts1, ts2, ts3], aggregates=["avg"], granularity="1h", start="300d-ago")

        dts = DataTransferService(DataSpec([tsds]))
        dfs = dts.get_dataframes()
        expected = ["timestamp", "cavg", "ccv", "sinavg"]
        assert expected == list(dfs["default"].columns.values)
コード例 #8
0
    def data_spec(self, time_series_in_cdp):
        ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg", "min"], label="ts1")
        ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ts2")
        ts3 = TimeSeries(id=time_series_in_cdp[0], aggregates=["max", "count"], label="ts3")
        ts4 = TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], label="ts4")

        tsds = TimeSeriesDataSpec(
            time_series=[ts1, ts2, ts3, ts4], aggregates=["avg"], granularity="1h", start="300d-ago"
        )
        ds = DataSpec(time_series_data_specs=[tsds])
        yield ds
コード例 #9
0
def main():
    configure_session(api_key=os.getenv("COGNITE_API_KEY"),
                      project="akerbp",
                      debug=True)
    tags_d03 = []
    tags_d02 = []

    for root, subdirs, files in os.walk("../tags"):
        for file in files:
            if file in ("well_tags.csv", "routing.csv", "output.csv",
                        "riser_tags.csv", "template_tags.csv"):
                with open(os.path.join(root, file)) as f:
                    df = pd.read_csv(f)

                    placements = ["T3 WGM", "Template", "Riser"]
                    placements_d03 = ["WellD03"] + placements
                    placements_d02 = ["WellD02"] + placements

                    df = df[~df["tag"].isin(EXCLUDE_TAGS)]

                    tags_d03.append(df[df["placement"].isin(placements_d03)])
                    tags_d02.append(df[df["placement"].isin(placements_d02)])

    tags_d02_concat = pd.concat(tags_d02, ignore_index=True)
    tags_d03_concat = pd.concat(tags_d03, ignore_index=True)

    tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag")
    tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag")

    d02_input_time_series = []
    d03_input_time_series = []

    for tag in tags_d02_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d02_input_time_series.append(ts)

    for tag in tags_d03_concat["tag"]:
        aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg"
        missing_data_strategy = "ffill" if (
            "ESV" in tag or "18HV" in tag) else "linearInterpolation"
        ts = TimeSeries(name=tag,
                        missing_data_strategy=missing_data_strategy,
                        aggregates=[aggregate])
        d03_input_time_series.append(ts)

    d02_tsds = TimeSeriesDataSpec(
        time_series=d02_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d2",
    )
    d03_tsds = TimeSeriesDataSpec(
        time_series=d03_input_time_series,
        aggregates=["avg"],
        granularity="10s",
        start=int(datetime(2017, 3, 1).timestamp() * 1e3),
        label="d3",
    )

    data_spec = DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])

    dts = DataTransferService(data_spec, num_of_processes=10)

    print(data_spec.to_JSON())

    df_dict = dts.get_dataframes()

    for label, df in df_dict.items():
        df.to_csv(f"../data/{label}.csv")
        print(df.shape)