Python import_mojoの例、h2o.import_mojo Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pyunit_mojo_ids.py プロジェクト: vishalbelsare/h2o-3

def test_mojo_ids():

    # Train a model
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    model = H2OGradientBoostingEstimator(ntrees=1)
    model.train(x=["Origin", "Dest"],
                y="IsDepDelayed",
                training_frame=airlines,
                verbose=False)

    # Save the previously created model into a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = model.save_mojo(original_model_filename)

    original_model_id = model.model_id
    print(original_model_id)

    # Import MOJO from the temporary file
    mojo_model = h2o.import_mojo(original_model_filename,
                                 model_id=original_model_id)
    print(mojo_model.model_id)
    assert_equals(mojo_model.model_id, original_model_id,
                  "Ids should be the same.")

    # Download the MOJO
    original_model_filename = model.download_mojo(original_model_filename)

    # Upload MOJO from the temporary file
    mojo_model_up = h2o.upload_mojo(original_model_filename,
                                    model_id=original_model_id)
    print(mojo_model_up.model_id)
    assert_equals(mojo_model_up.model_id, original_model_id,
                  "Ids should be the same.")

    # Load MOJO model from file
    mojo_model_from_file = H2OGenericEstimator.from_file(
        original_model_filename, original_model_id)
    print(mojo_model_from_file.model_id)
    assert_equals(mojo_model_from_file.model_id, original_model_id,
                  "Ids should be the same.")

    # Test initialize model_id from path
    mojo_model_up_wid = h2o.upload_mojo(original_model_filename)
    print(mojo_model_up_wid.model_id)
    assert_equals(mojo_model_up_wid.model_id, original_model_id,
                  "Ids should not be the same.")

    mojo_model_im_wid = h2o.import_mojo(original_model_filename)
    print(mojo_model_im_wid.model_id)
    assert_equals(mojo_model_im_wid.model_id, original_model_id,
                  "Ids should not be the same.")

コード例 #2

0

ファイルを表示

ファイル: pyunit_mojo_import.py プロジェクト: vishalbelsare/h2o-3

def mojo_conveniece():
    # Train a model
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    model = H2OGradientBoostingEstimator(ntrees=1)
    model.train(x=["Origin", "Dest"],
                y="IsDepDelayed",
                training_frame=airlines)

    #Save the previously created model into a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = model.save_mojo(original_model_filename)

    # Load the model from the temporary file
    mojo_model = h2o.import_mojo(original_model_filename)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Test scoring is available on the model
    predictions = mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421

    #####
    # MOJO UPLOAD TEST
    #####

    # Download the MOJO
    original_model_filename = model.download_mojo(original_model_filename)
    # Load the model from the temporary file
    mojo_model = h2o.upload_mojo(original_model_filename)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Test scoring is available on the model
    predictions = mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421

    #####
    # MOJO to POJO Conversion test with POJO re-import
    #####

    pojo_directory = os.path.join(pyunit_utils.locate("results"),
                                  model.model_id + ".java")
    pojo_path = model.download_pojo(path=pojo_directory)
    mojo2_model = h2o.import_mojo(pojo_path)

    predictions2 = mojo2_model.predict(airlines)
    assert predictions2 is not None
    assert predictions2.nrows == 24421
    assert_frame_equal(predictions.as_data_frame(),
                       predictions2.as_data_frame())

コード例 #3

0

ファイルを表示

ファイル: pyunit_gbm_pojo_import.py プロジェクト: wwjiang007/h2o-3

def prostate_pojo_import():
    prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    prostate = prostate.drop("ID")
    prostate['CAPSULE'] = prostate['CAPSULE'].asfactor()

    model = H2OGradientBoostingEstimator()
    model.train(
        y="CAPSULE",
        training_frame=prostate
    )
    
    sandbox_dir = pyunit_utils.locate("results")
    pojo_path = h2o.download_pojo(model, path=sandbox_dir)

    model_imported = h2o.import_mojo(pojo_path)
    print(model_imported)

    # 1. check scoring
    preds_original = model.predict(prostate)
    preds_imported = model_imported.predict(prostate)
    assert_frame_equal(preds_original.as_data_frame(), preds_imported.as_data_frame())

    # 2. check we can get PDPs
    pdp_original = model.partial_plot(data=prostate, cols=['AGE'], server=True, plot=False)
    pdp_imported = model_imported.partial_plot(data=prostate, cols=['AGE'], server=True, plot=False)
    assert_frame_equal(pdp_original[0].as_data_frame(), pdp_imported[0].as_data_frame())

コード例 #4

0

ファイルを表示

def mojo_conveniece():
    
    # Train a model
    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    model = H2OGradientBoostingEstimator(ntrees = 1)
    model.train(x = ["Origin", "Dest"], y = "IsDepDelayed", training_frame=airlines)
    
    #Save the previously created model into a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = model.save_mojo(original_model_filename)
    
    # Load the model from the temporary file
    mojo_model = h2o.import_mojo(original_model_filename)
    assert isinstance(mojo_model, H2OGenericEstimator)
    
    # Test scoring is available on the model
    predictions = mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421

    #####
    # MOJO UPLOAD TEST
    #####

    # Download the MOJO
    original_model_filename = model.download_mojo(original_model_filename)
    # Load the model from the temporary file
    mojo_model = h2o.upload_mojo(original_model_filename)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Test scoring is available on the model
    predictions = mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421

コード例 #5

0

ファイルを表示

def show_entry_fields_monty():
    global num2
    num2 = e2.get()
    print(f"Number of SFC's are {num2}")
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty_path)
    #print(mojo_model)
    df1 = pd.DataFrame([vals_monty], columns=cols_monty)
    hf1 = h2o.H2OFrame(df1)
    predict = mojo_model.predict(hf1) / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    predicted_val = datetime.now() + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(f'The Start time for the SFC is {orig_time}')
    print(
        f'The estimated time of completion for the first SFC is {predicted_time}'
    )
    #Since everything is processed parellely the number of quantity should be added
    full_time = predict + int(num2)
    full_val = datetime.now() + timedelta(seconds=int(full_time))
    shoporder_time = f"{full_val:%d-%m-%Y %H:%M:%S}"
    print(
        f'The estimated time of completion for the last SFC for the Shop Order is {shoporder_time}'
    )

コード例 #6

0

ファイルを表示

ファイル: pyunit_autoencoder_eigen_mojo.py プロジェクト: timgates42/h2o-3

def test_high_cardinality_eigen():
    df = h2o.create_frame(rows=10000,
                          cols=10,
                          categorical_fraction=0.6,
                          integer_fraction=0,
                          binary_fraction=0,
                          real_range=100,
                          integer_range=100,
                          missing_fraction=0,
                          factors=10,
                          seed=1234)
    autoencoder = H2OAutoEncoderEstimator(categorical_encoding="eigen",
                                          reproducible=True,
                                          hidden=[50, 30],
                                          epochs=5,
                                          seed=42)
    autoencoder.train(training_frame=df)

    mojo = pyunit_utils.download_mojo(autoencoder)
    autoencoder_mojo = h2o.import_mojo(mojo["mojo_zip_path"])

    preds_ae_h2o = autoencoder.predict(df)
    preds_ae_mojo = autoencoder_mojo.predict(df)
    assert_frame_equal(preds_ae_mojo.as_data_frame(),
                       preds_ae_h2o.as_data_frame())

コード例 #7

0

ファイルを表示

ファイル: pyunit_PUBDEV_8398_gam_mojo_import.py プロジェクト: timgates42/h2o-3

def import_gam_mojo_regression(family):
    np.random.seed(1234)
    n_rows = 10

    data = {
        "X1": np.random.randn(n_rows),
        "X2": np.random.randn(n_rows),
        "X3": np.random.randn(n_rows),
        "W": np.random.choice([10, 20], size=n_rows),
        "Y": np.random.choice([0, 0, 0, 0, 0, 10, 20, 30], size=n_rows) + 0.1
    }

    train = h2o.H2OFrame(pd.DataFrame(data))
    test = train.drop("W")
    print(train)
    h2o_model = H2OGeneralizedAdditiveEstimator(family=family,
                                                gam_columns=["X3"],
                                                weights_column="W",
                                                lambda_=0,
                                                bs=[2],
                                                tweedie_variance_power=1.5,
                                                tweedie_link_power=0)
    h2o_model.train(x=["X1", "X2"], y="Y", training_frame=train)
    print(h2o_model)

    predict_w = h2o_model.predict(train)
    # scoring without weight column
    predict = h2o_model.predict(test) 
    
    # get train perf on a cloned frame (to avoid re-using cached metrics - force to recalculate) 
    train_clone = h2o.H2OFrame(train.as_data_frame(use_pandas=True))
    model_perf_on_train = h2o_model.model_performance(test_data=train_clone)

    # ditto on test
    test_clone = h2o.H2OFrame(test.as_data_frame(use_pandas=True))
    model_perf_on_test = h2o_model.model_performance(test_data=test_clone)

    # should produce same frame
    pyunit_utils.compare_frames_local(predict_w, predict, prob=1, tol=1e-6)

    # Save the MOJO to a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = h2o_model.save_mojo(original_model_filename)

    # Load the model from the temporary file
    mojo_model = h2o.import_mojo(original_model_filename)

    predict_mojo_w = mojo_model.predict(train)
    predict_mojo = mojo_model.predict(test)

    # Both should produce same results as in-H2O models
    pyunit_utils.compare_frames_local(predict_mojo_w, predict, prob=1, tol=1e-6)
    pyunit_utils.compare_frames_local(predict_mojo, predict, prob=1, tol=1e-6)

    mojo_perf_on_train = mojo_model.model_performance(test_data=train_clone)
    assert abs(mojo_perf_on_train._metric_json["MSE"] - model_perf_on_train._metric_json["MSE"]) < 1e-6

    mojo_perf_on_test = mojo_model.model_performance(test_data=test_clone)
    assert abs(mojo_perf_on_test._metric_json["MSE"] - model_perf_on_test._metric_json["MSE"]) < 1e-6

コード例 #8

0

ファイルを表示

ファイル: ExecuteH2oMojoScoring.py プロジェクト: james94/H2O3-NiFi-MiNiFi-Integration

def onSchedule(context):
    """ onSchedule is where you load and read properties
        this function is called 1 time when the processor is scheduled to run
    """
    global mojo_model
    h2o.init()
    # instantiate H2O-3's MOJO Model
    mojo_model_filepath = context.getProperty("MOJO Model Filepath")
    mojo_model = h2o.import_mojo(mojo_model_filepath)

コード例 #9

0

ファイルを表示

def read_models(path):
    """
    Return dict of models inside specified folder.
    """
    for (dirpath, dirnames, filenames) in walk(path):
        pass
    models = {}
    for file in filenames:
        models[file] = h2o.import_mojo(f'../../models/mojo_50_ensemble/{file}')
    return models

コード例 #10

0

ファイルを表示

def monty_shift_so_forecaster(time, sfc_released, date_stamp=None):
    print(
        f"Time taken for one SFC's is {time} and the total SFC released for the next hour {sfc_released}"
    )
    cols_monty = [
        'shifts', 'SFC_Completed', 'SFC_Released', 'Day of week (name)',
        'Unique concatenate(ITEM_GROUP)', 'MEAN_TOTAL_TIME', 'Month (number)'
    ]
    locale.setlocale(locale.LC_ALL, 'deu_deu')
    flow_state = 1
    if (date_stamp == None):
        hour = f"{datetime.now():%H}"
        day_int = f"{datetime.now():%d}"
        day_str = datetime.today().strftime('%A')
        month_num = f"{datetime.now():%m}"
    else:
        hour = f"{datetime.strptime(date_stamp,'%d-%m-%Y %H:%M:%S'):%H}"
        day_int = f"{datetime.strptime(date_stamp,'%d-%m-%Y %H:%M:%S'):%d}"
        day_str = datetime.strptime(date_stamp,
                                    '%d-%m-%Y %H:%M:%S').strftime('%A')
        month_num = f"{datetime.strptime(date_stamp,'%d-%m-%Y %H:%M:%S'):%m}"
        flow_state = 0

    print(day_str, day_int, month_num, hour)
    if (int(hour) >= 6 and int(hour) < 14):
        shift_name = "Früh"
    elif (int(hour) >= 14 and int(hour) < 22):
        shift_name = "Spät"
    elif (int(hour) >= 22):
        shift_name = "Nacht"
    else:
        shift_name = "Nacht"

    vals_monty = [
        shift_name,
        int(sfc_released) - random.randint(2, int(sfc_released)),
        int(sfc_released), day_str, 'A01PP000300, A01PP000100', time, month_num
    ]
    mojo_model = h2o.import_mojo(monty2_so_shifts)
    print(vals_monty)
    df3 = pd.DataFrame([vals_monty], columns=cols_monty)
    hfmty3 = h2o.H2OFrame(df3)
    predict = int(mojo_model.predict(hfmty3)) / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    #Take the time from the last simulated result and add it to the screen
    #Should be passed from the previous method call
    predicted_val = datetime.now() + timedelta(
        seconds=int(predict)) if flow_state == 1 else datetime.strptime(
            date_stamp, '%d-%m-%Y %H:%M:%S') + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(
        f'The estimated time of completion for the entire {sfc_released} SFCs are {predicted_time}'
    )
    return predicted_time

コード例 #11

0

ファイルを表示

ファイル: pyunit_stackedensemble_fold_column.py プロジェクト: zoudongyang/h2o-3

def test_fold_column_is_used_properly_in_mojo():
    train = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"))
    test = h2o.import_file(pu.locate("smalldata/iris/iris_train.csv"))
    x = train.columns
    y = "petal_wid"
    x.remove(y)
    train["fold_col"] = h2o.H2OFrame([i % 5 for i in range(train.nrow)])
    test["fold_col"] = h2o.H2OFrame([i % 5 for i in range(test.nrow)])

    dl = H2ODeepLearningEstimator(keep_cross_validation_predictions=True,
                                  fold_column="fold_col")
    dl.train(x=x, y=y, training_frame=train)

    drf = H2ORandomForestEstimator(keep_cross_validation_predictions=True,
                                   fold_column="fold_col")
    drf.train(x=x, y=y, training_frame=train)

    gbm = H2OGradientBoostingEstimator(keep_cross_validation_predictions=True,
                                       fold_column="fold_col")
    gbm.train(x=x, y=y, training_frame=train)

    glm = H2OGeneralizedLinearEstimator(keep_cross_validation_predictions=True,
                                        fold_column="fold_col")
    glm.train(x=x, y=y, training_frame=train)

    se = H2OStackedEnsembleEstimator(training_frame=train,
                                     base_models=[gbm, drf, dl],
                                     metalearner_fold_column="fold_col")
    se.train(x=x, y=y, training_frame=train)

    try:
        tempdir = tempfile.mkdtemp()
        predictions = se.predict(test)
        mojoname = se.save_mojo(tempdir)
        mojo_model = h2o.import_mojo(mojoname)
        try:
            mojo_predictions1 = mojo_model.predict(test)
        except Exception:
            assert False, "Can't use the SE loaded from mojo to predict with the whole dataset including the fold_column"
        try:
            mojo_predictions2 = mojo_model.predict(
                test[x + [y]])  # without the fold column present
        except Exception:
            assert False, "Can't use the SE loaded from mojo to predict with the whole dataset without the fold_column"
        assert (predictions == mojo_predictions1).all()
        assert (predictions == mojo_predictions2).all()
    finally:
        shutil.rmtree(tempdir)

コード例 #12

0

ファイルを表示

def ts_shoporder_monty_forecaster(time, sfc_released, date_stamp=None):
    print(
        f"Time taken for one SFC's is {time} and the total SFC released for the next hour {sfc_released}"
    )
    cols_monty = [
        'SINGLE_SFC_TIME', 'SFC_Completed', 'SFC_Released', 'LineItems',
        'Unique concatenate(Day of week (name))',
        'Unique concatenate(ITEM_GROUP)'
    ]
    locale.setlocale(locale.LC_ALL, 'deu_deu')
    flow_state = 1
    if (date_stamp == None):
        hour = f"{datetime.now():%H}"
        day_int = f"{datetime.now():%d}"
        day_str = datetime.today().strftime('%A')
        month_num = f"{datetime.now():%m}"
    else:
        hour = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%H}"
        day_int = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%d}"
        day_str = datetime.strptime(date_stamp,
                                    '%Y-%m-%d %H:%M:%S').strftime('%A')
        month_num = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%m}"
        flow_state = 0

    print(day_str, day_int, month_num, hour)
    vals_monty = [
        time,
        int(sfc_released) - random.randint(2, int(sfc_released)),
        int(sfc_released),
        int(sfc_released) * 10, day_str, 'A01PP000300, A01PP000100'
    ]
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty2_so_daybyday)
    print(vals_monty)
    df3 = pd.DataFrame([vals_monty], columns=cols_monty)
    hf3 = h2o.H2OFrame(df3)
    predict = int(mojo_model.predict(hf3)) / 1000000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    predicted_val = datetime.now() + timedelta(
        seconds=int(predict)) if flow_state == 1 else datetime.strptime(
            date_stamp, '%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(
        f'The estimated time of completion for the entire {sfc_released} SFCs are {predicted_time}'
    )
    return predicted_time

コード例 #13

0

ファイルを表示

def estimate_timeseries_shoporder_monty(time, sfc_released):
    print(
        f"Time taken for one SFC's is {time} and the total SFC released for the next hour {sfc_released}"
    )
    cols_monty = [
        'Sum(ELAPSED_QUEUE_TIME)', 'First(DATE_TIME)', 'Last(DATE_TIME)',
        'Max(transfer_time)', 'Sum(ELAPSED_TIME)', 'SFC_Released',
        'Unique count(WORK_CENTER)', 'Events', 'Min(transfer_time)', 'Hour',
        'Day of month', 'Day of week (name)', 'SFC_Completed',
        'Mode(transfer_time)', 'Month (number)', 'Sum(QTY_SCRAPPED)',
        'Sum(QTY_NON_CONFORMED)'
    ]
    locale.setlocale(locale.LC_ALL, 'deu_deu')
    hour = f"{datetime.now():%H}"
    day_int = f"{datetime.now():%d}"
    day_str = datetime.today().strftime('%A')
    month_num = f"{datetime.now():%m}"
    print(day_str, day_int, month_num, hour)
    vals_monty = [
        time, '02.07.19 16:00:07', '02.07.19 16:11:31', '5m 3s', time,
        sfc_released, 8, 1458, '7s',
        int(hour),
        int(day_int), day_str, sfc_released, '8s', month_num, 0, 0
    ]
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty_so_path)
    print(vals_monty)
    df2 = pd.DataFrame([vals_monty], columns=cols_monty)
    hf2 = h2o.H2OFrame(df2)
    predict = int(mojo_model.predict(hf2)) / 10000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    #predicted_val = datetime.now() + timedelta(seconds=int(predict))
    # if(day_str == 'Samstag'):
    #     predicted_val = datetime.now() + timedelta(seconds=int(predict + 172800)) if date_stamp == None else datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    # elif(day_str == 'Sonntag'):
    #     predicted_val = datetime.now() + timedelta(seconds=int(predict + 86400)) if date_stamp == None else datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    # else:
    #     predicted_val = datetime.now() + timedelta(seconds=int(predict)) if date_stamp == None else datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    predicted_val = datetime.now() + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(
        f'The estimated time of completion for the entire {sfc_released} SFCs are {predicted_time}'
    )
    return predicted_time

コード例 #14

0

ファイルを表示

def estimate_shoporder(qty):
    print(f"Number of SFC's are {qty}")
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(model_path)
    #print(mojo_model)
    predict = mojo_model.predict(hf) / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    predicted_val = datetime.now() + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(f'The Start time for the SFC is {orig_time}')
    print(
        f'The estimated time of completion for the first SFC is {predicted_time}'
    )
    full_time = predict + int(qty)
    full_val = datetime.now() + timedelta(seconds=int(full_time))
    shoporder_time = f"{full_val:%d-%m-%Y %H:%M:%S}"
    print(
        f'The estimated time of completion for the last SFC for the Shop Order is {shoporder_time}'
    )
    return shoporder_time

コード例 #15

0

ファイルを表示

def estimate_shoporder_monty(qty, casing, ventil):
    print(
        f"Number of SFC's are {qty} and casing is {casing} and chosen ventil is {ventil}"
    )
    cols_monty = [
        'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)', 'Max(transfer_time)',
        'sfc_date_diff', 'diff_duration_secs', 'Mode(transfer_time)',
        'Last(DATE_TIME)', 'First(DATE_TIME)', 'Unique count(ROUTER)',
        'Min(transfer_time)', 'STUTZAB', 'Unique count(RESRCE)', 'VENTIL',
        'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
        'SFC_Lineitems', 'Unique count(ITEM_NO)'
    ]
    stut_val = casing
    ventil_val = ventil
    vals_monty = [
        '237580', '72481', '1m 35s', '289000.0', '21.061', '13s',
        '2019-12-06T09:26:50', '2019-12-06T09:22:01', '1', '8s', stut_val,
        '10', ventil_val, '4', '10', '10', '1'
    ]
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty_path)
    #print(mojo_model)
    model_prediction_val = mojo_model.predict(hf1)
    predict = model_prediction_val / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    #print(model_prediction_val)
    print(predict)
    predicted_val = datetime.now() + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(f'The Start time for the SFC is {orig_time}')
    print(
        f'The estimated time of completion for the first SFC is {predicted_time}'
    )
    # full_time = predict + int(qty)
    # full_val = datetime.now() + timedelta(seconds=int(full_time))
    # shoporder_time = f"{full_val:%d-%m-%Y %H:%M:%S}"
    # print(f'The estimated time of completion for the last SFC for the Shop Order is {shoporder_time}')
    # return shoporder_time
    return (predicted_time, int(predict) * 1000)

コード例 #16

0

ファイルを表示

def timeseries_shoporder_monty_millis2(time,
                                       sfc_released,
                                       sfc_factor,
                                       date_stamp=None):
    print(
        f"Time taken for one SFC's is {time} and the total SFC released for the next hour {sfc_released}"
    )
    cols_monty = [
        'SFC_Released', 'SFC_Completed', 'TOTAL_TIME',
        'Unique concatenate(ITEM_GROUP)', 'Last(DATE_TIME)',
        'First(DATE_TIME)', 'Day of month', 'Hour', 'Day of week (name)',
        'Unique count(WORK_CENTER)', 'Month (number)', 'Sum(QTY_SCRAPPED)',
        'Sum(QTY_NON_CONFORMED)'
    ]
    locale.setlocale(locale.LC_ALL, 'deu_deu')
    flow_state = 1
    if (date_stamp == None):
        hour = f"{datetime.now():%H}"
        day_int = f"{datetime.now():%d}"
        day_str = datetime.today().strftime('%A')
        month_num = f"{datetime.now():%m}"
    else:
        hour = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%H}"
        day_int = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%d}"
        day_str = datetime.strptime(date_stamp,
                                    '%Y-%m-%d %H:%M:%S').strftime('%A')
        month_num = f"{datetime.strptime(date_stamp,'%Y-%m-%d %H:%M:%S'):%m}"
        flow_state = 0

    print(day_str, day_int, month_num, hour)
    vals_monty = [
        298, sfc_released, time, 'A01PP000300, A01PP000100',
        '09.09.19 12:59:58', '09.09.19 12:00:00',
        int(day_int),
        int(hour), day_str, 7, month_num, 0, 0
    ]
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty_so_path_millis)
    print(vals_monty)
    df3 = pd.DataFrame([vals_monty], columns=cols_monty)
    hf3 = h2o.H2OFrame(df3)
    predict = int(mojo_model.predict(hf3)) / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    print(predict)
    if (sfc_factor < 1 and flow_state == 1):
        predicted_val = datetime.now() + timedelta(seconds=int(predict))
    elif (sfc_factor < 1 and flow_state == 0):
        predicted_val = datetime.strptime(
            date_stamp, '%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    else:
        predict *= sfc_factor
        predicted_val = datetime.now() + timedelta(
            seconds=int(predict)) if flow_state == 1 else datetime.strptime(
                date_stamp, '%Y-%m-%d %H:%M:%S') + timedelta(
                    seconds=int(predict))

    predicted_time = f"{predicted_val:'%m/%d/%Y %H:%M:%S %p'}"
    print(
        f'The estimated time of completion for the entire {sfc_released} SFCs are {predicted_time}'
    )
    return predicted_time

コード例 #17

0

ファイルを表示

def estimate_sfc_totaltime(qty, casing, ventil, date_stamp=None):
    print(
        f"Number of SFC's are {qty} and casing is {casing} and chosen ventil is {ventil}"
    )
    stut_val = casing
    ventil_val = ventil
    if (stut_val == '152,4' and ventil_val != 'VE'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            409916, 98321, 'A01PDR0017', ventil_val,
            'ATEX-Zähler für Serienaufträge, Aufträge Monti2, Rüstintervall Stutzen 1BS746',
            9, 17, 17, 17, 'A01PP000200, A01PP000100, A01PP000400', stut_val,
            1, 1
        ]
    elif (stut_val == '250,0'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            187778, 72761, 'A01PDR0023', ventil_val,
            'Rüstintervall Stutzen DN25, BK 4 V2', 4, 10, 10, 10,
            'A01PP000500, A00PP000500', stut_val, 1, 1
        ]
    elif (stut_val == '220,0'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            183803, 77224, 'A01PDR0023', ventil_val,
            'Rüstintervall Stutzen GM3/4, BK V2S, Aufträge Monti2', 4, 10, 10,
            10, 'A01PP000300, A00PP000300, A01PP000100', stut_val, 1, 1
        ]
    elif (stut_val == '250,'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            549271, 104155, 'A01PDR0019', ventil_val,
            'Rüstintervall Stutzen DN25, BK 4 V2 TC, ATEX-Zähler für Serienaufträge, Aufträge Monti2',
            7, 14, 14, 14,
            'A01PP000500, A00PP000600, A01PP000200, A01PP000100', stut_val, 1,
            1
        ]
    elif (stut_val == '220'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            564417, 88113, 'A01PDR0001', ventil_val,
            'Rüstintervall Stutzen GM3/4, Aufträge Monti2', 7, 15, 15, 15,
            'A01PP000300, A01PP000100', stut_val, 1, 1
        ]
    elif (stut_val == '50'):
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            0, 0, 'A01PDR0001', ventil_val,
            'Rüstintervall Stutzen GM3/4, Aufträge Monti2', 7, 15, 15, 15,
            'A01PP000300, A01PP000100', '220', 1, 1
        ]
    else:
        #Has Ventil VE (Valve)
        cols_monty = [
            'Sum(ELAPSED_QUEUE_TIME)', 'Sum(ELAPSED_TIME)',
            'Unique concatenate(ROUTER)', 'VENTIL',
            'First(Unique concatenate(DESCRIPTION))',
            'Unique count(WORK_CENTER)', 'Unique count(OPERATION)',
            'SFC_Lineitems', 'Unique count(RESRCE)',
            'First(Unique concatenate(ITEM_GROUP))', 'STUTZAB',
            'Unique count(ROUTER)', 'Unique count(ITEM_NO)'
        ]
        vals_monty = [
            1547654, 99070, 'A01PDR0020', ventil_val,
            'ATEX-Zähler für Serienaufträge, Aufträge Monti2, Rüstintervall Stutzen 1BS746',
            8, 15, 15, 15, 'A01PP000200, A01PP000100, A01PP000400', stut_val,
            1, 1
        ]
    #Importing to work further, for making single shot predictions it is not needed
    mojo_model = h2o.import_mojo(monty_path_new)
    df4 = pd.DataFrame([vals_monty], columns=cols_monty)
    hf4 = h2o.H2OFrame(df4)
    #print(mojo_model)
    model_prediction_val = mojo_model.predict(hf4)
    predict = model_prediction_val / 1000
    orig_time = f"{datetime.now():%d-%m-%Y %H:%M:%S}"
    #print(model_prediction_val)
    print(predict)
    predicted_val = datetime.now() + timedelta(
        seconds=int(predict)) if date_stamp == None else datetime.strptime(
            date_stamp, '%Y-%m-%d %H:%M:%S') + timedelta(seconds=int(predict))
    #predicted_val = datetime.now() + timedelta(seconds=int(predict))
    predicted_time = f"{predicted_val:%d-%m-%Y %H:%M:%S}"
    print(f'The Start time for the SFC is {orig_time}')
    print(
        f'The estimated time of completion for the first SFC is {predicted_time}'
    )
    return (predicted_time, int(predict) * 1000)

コード例 #18

0

ファイルを表示

import h2o
import pandas as pd
from h2o.estimators.random_forest import H2ORandomForestEstimator
parser = argparse.ArgumentParser(description='Ramdom Forest Model')
parser.add_argument("-i","--input", action='store', help="Input relative path to .csv input file (info in README.md)", type=str,default='input.csv')
parser.add_argument("-o","--output", action='store', help="Output file name with extension", type=str,default='output.csv')
args = parser.parse_args()
input_file = args.input
output_file= args.output

h2o.init(nthreads = -1, max_mem_size = 6)

#This model was created to predict the value (price by square meter) of each feature, then, this parameter will be used to predict population density.
print('----------------------- Model is being loaded -----------------------')
ModeloValor = h2o.import_mojo('ModeloValor.zip')
print('--------------------- Model loaded successfully ---------------------')

#The original dataset for predicting is loaded
print('------------------- Loading predicting dataset -------------------')
datosPredecirValor = h2o.upload_file(path=input_file)
#input must contain a table with the following columns: SES, Valor, CBD, Alimentador, Parques, Estaciones, Vias, Salud, Colegios
#The column names of the input table have to match with the columns names mentioned below. The order of the table is not relevant.
print('------------------- Dataset loaded successfully ------------------')
#Predict Value over each feature
print('----------------------- Predicting "Valor" -----------------------')
predictions = ModeloValor.predict(datosPredecirValor)
print('----------------- "Valor" predicted successfully -----------------')
#The model is deleted, because, models can be very large.
del ModeloValor

コード例 #19

0

ファイルを表示

test= h2o.H2OFrame(XY_test)

default_gbm_perf = gbm.model_performance(test)

res = score_estimator(gbm,X_train, X_test, XY_train, XY_test, target, formula)

mlflow.log_metrics(res)

# QA
# complete = h2o.H2OFrame(df_XY[variables+[target]])
# gbm.explain(complete)

path = "output/"
mojo_destination = gbm.save_mojo(path = path, force=True)
imported_model = h2o.import_mojo(mojo_destination)

mlflow.log_artifact(mojo_destination)

h2o.cluster().shutdown(prompt=False) 

mlflow.end_run()


# %%
# other validation functions

# https://scikit-learn.org/stable/auto_examples/linear_model/plot_tweedie_regression_insurance_claims.html
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_poisson_regression_non_normal_loss.html

# from sklearn.utils import gen_even_slices

コード例 #20

0

ファイルを表示

ファイル: pyunit_PUBDEV_7181_check_model_scoring.py プロジェクト: wwjiang007/h2o-3

def as_mojo_model(model):
    mojo_path = tempfile.mkdtemp()
    mojo_path = model.save_mojo(mojo_path)
    return h2o.import_mojo(mojo_path)

コード例 #21

0

ファイルを表示

ファイル: glmFitScore.py プロジェクト: smlindauer/python-sasctl

import math
import pickle
import pandas as pd
import numpy as np

import settings


global _thisModelFit

h2o.init()

with gzip.open(settings.pickle_path + 'glmFit.mojo', 'r') as fileIn, open(settings.pickle_path + 'glmFit.' + '.zip', 'wb') as fileOut:
    shutil.copyfileobj(fileIn, fileOut)
os.chmod(settings.pickle_path + 'glmFit.' + '.zip', 0o777)
_thisModelFit = h2o.import_mojo(settings.pickle_path + 'glmFit.' + '.zip')

def scoreglmFit(Speed_sensor, Vibration, Engine_Load, Coolant_Temp, Intake_Pressure, Engine_RPM, Speed_OBD, Intake_Air, Flow_Rate, Throttle_Pos, Voltage, Ambient, Accel, Engine_Oil_Temp, Speed_GPS, GPS_Longitude, GPS_Latitude, GPS_Bearing, GPS_Altitude, Turbo_Boost, Trip_Distance, Litres_Per_km, Accel_Ssor_Total, CO2, Trip_Time, CO_emission, HC_emission, PM_emission, NOx_emission, CO2_emission, Fuel_level, Oil_life, Vibration_alert, VibrationAlert_Total, Vibration_Recent, Turbo_alert, Emission_alert, Fog_control, Engine_control):
    "Output: EM_EVENTPROBABILITY, EM_CLASSIFICATION"

    try:
        _thisModelFit
    except NameError:

_thisModelFit = h2o.import_mojo(settings.pickle_path + 'glmFit.' + '.zip')

    inputArray = pd.DataFrame([[Speed_sensor, Vibration, Engine_Load, Coolant_Temp, Intake_Pressure, Engine_RPM, Speed_OBD, Intake_Air, Flow_Rate, Throttle_Pos, Voltage, Ambient, Accel, Engine_Oil_Temp, Speed_GPS, GPS_Longitude, GPS_Latitude, GPS_Bearing, GPS_Altitude, Turbo_Boost, Trip_Distance, Litres_Per_km, Accel_Ssor_Total, CO2, Trip_Time, CO_emission, HC_emission, PM_emission, NOx_emission, CO2_emission, Fuel_level, Oil_life, Vibration_alert, VibrationAlert_Total, Vibration_Recent, Turbo_alert, Emission_alert, Fog_control, Engine_control]],
                              columns=['Speed_sensor', 'Vibration', 'Engine_Load', 'Coolant_Temp', 'Intake_Pressure', 'Engine_RPM', 'Speed_OBD', 'Intake_Air', 'Flow_Rate', 'Throttle_Pos', 'Voltage', 'Ambient', 'Accel', 'Engine_Oil_Temp', 'Speed_GPS', 'GPS_Longitude', 'GPS_Latitude', 'GPS_Bearing', 'GPS_Altitude', 'Turbo_Boost', 'Trip_Distance', 'Litres_Per_km', 'Accel_Ssor_Total', 'CO2', 'Trip_Time', 'CO_emission', 'HC_emission', 'PM_emission', 'NOx_emission', 'CO2_emission', 'Fuel_level', 'Oil_life', 'Vibration_alert', 'VibrationAlert_Total', 'Vibration_Recent', 'Turbo_alert', 'Emission_alert', 'Fog_control', 'Engine_control'],
                              dtype=float, index=[0])
    columnTypes = {'Speed_sensor':'numeric', 'Vibration':'numeric', 'Engine_Load':'numeric', 'Coolant_Temp':'numeric', 'Intake_Pressure':'numeric', 'Engine_RPM':'numeric', 'Speed_OBD':'numeric', 'Intake_Air':'numeric', 'Flow_Rate':'numeric', 'Throttle_Pos':'numeric', 'Voltage':'numeric', 'Ambient':'numeric', 'Accel':'numeric', 'Engine_Oil_Temp':'numeric', 'Speed_GPS':'numeric', 'GPS_Longitude':'numeric', 'GPS_Latitude':'numeric', 'GPS_Bearing':'numeric', 'GPS_Altitude':'numeric', 'Turbo_Boost':'numeric', 'Trip_Distance':'numeric', 'Litres_Per_km':'numeric', 'Accel_Ssor_Total':'numeric', 'CO2':'numeric', 'Trip_Time':'numeric', 'CO_emission':'numeric', 'HC_emission':'numeric', 'PM_emission':'numeric', 'NOx_emission':'numeric', 'CO2_emission':'numeric', 'Fuel_level':'numeric', 'Oil_life':'numeric', 'Vibration_alert':'numeric', 'VibrationAlert_Total':'numeric', 'Vibration_Recent':'numeric', 'Turbo_alert':'numeric', 'Emission_alert':'numeric', 'Fog_control':'numeric', 'Engine_control':'numeric'}
    h2oArray = h2o.H2OFrame(inputArray, column_types=columnTypes)

コード例 #22

0

ファイルを表示

def download_mojo_filename():
    fr = h2o.import_file(
        path=pyunit_utils.locate("smalldata/prostate/prostate.csv"))

    model = H2OGradientBoostingEstimator(ntrees=10, seed=1234)
    model.train(x=list(range(2, fr.ncol)), y=1, training_frame=fr)

    # Default location is current working directory and filename is model_id
    mojo_path = model.download_mojo()
    assert_equals(os.path.join(os.getcwd(), model.model_id + ".zip"),
                  mojo_path, "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Location is parent of current working directory and filename is model_id
    mojo_path = model.download_mojo("..")
    assert_equals(
        os.path.abspath(os.path.join(os.pardir, model.model_id + ".zip")),
        mojo_path, "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Location is home directory and filename is model_id
    mojo_path = model.download_mojo("~")
    assert_equals(
        os.path.abspath(
            os.path.expanduser(os.path.join("~", model.model_id + ".zip"))),
        mojo_path, "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Default locations is current working directory with custom filename
    mojo_path = model.download_mojo("gbm_prostate.zip")
    assert_equals(os.path.join(os.getcwd(), "gbm_prostate.zip"), mojo_path,
                  "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Location is current working directory with custom filename
    mojo_path = model.download_mojo("./gbm_prostate.zip")
    assert_equals(os.path.join(os.getcwd(), "gbm_prostate.zip"), mojo_path,
                  "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Location is parent of current working directory with custom filename
    mojo_path = model.download_mojo("../gbm_prostate.zip")
    assert_equals(os.path.abspath(os.path.join(os.pardir, "gbm_prostate.zip")),
                  mojo_path, "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Location is home directory with custom filename
    mojo_path = model.download_mojo("~/gbm_prostate.zip")
    assert_equals(
        os.path.abspath(
            os.path.expanduser(os.path.join("~", "gbm_prostate.zip"))),
        mojo_path, "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

    # Custom filename with custom path
    tmpdir = tempfile.mkdtemp()
    mojo_path = model.download_mojo(os.path.join(tmpdir, "gbm_prostate.zip"))
    assert_equals(os.path.join(tmpdir, "gbm_prostate.zip"), mojo_path,
                  "Not expected path")
    mojo_model = h2o.import_mojo(mojo_path)
    assert isinstance(mojo_model, H2OGenericEstimator)

コード例 #23

0

ファイルを表示

import Algorithmia
import h2o
import pandas as pd

client = Algorithmia.client()
model_file_path = "data://<USERNAME>/h2o_demo/DeepLearning_model_python_1611090304383_1.zip"

h2o.init(log_level="ERRR")

model_fpath = client.file(model_file_path).getFile().name
model = h2o.import_mojo(model_fpath)


def apply(input):
    df = pd.DataFrame(input)
    hf = h2o.H2OFrame(df)
    score = model.predict(hf).as_data_frame().to_dict()
    score = score["predict"][0]
    return {"claims": score}

# {"District": [1], "Group": "1-1.5l", "Age": ">35", "Holders": [3582]}

コード例 #24

0

ファイルを表示

def makeDlModel(subOpt=None,
                xCol=None,
                yCol=None,
                inpData=None,
                modelKey=None):

    log.info('[START] {}'.format('makeDlModel'))

    result = None

    try:

        saveModel = '{}/{}-{}-{}-{}-{}-{}.model'.format(
            globalVar['outPath'], serviceName, modelKey, 'final', 'h2o', 'act',
            '*')
        saveModelList = sorted(glob.glob(saveModel), reverse=True)
        xyCol = xCol.copy()
        xyCol.append(yCol)
        data = inpData[xyCol]

        # h2o.shutdown(prompt=False)

        if (not subOpt['isInit']):
            h2o.init()
            h2o.no_progress()
            subOpt['isInit'] = True

        # 학습 모델이 없을 경우
        if (subOpt['isOverWrite']) or (len(saveModelList) < 1):

            # 7:3에 대한 학습/테스트 분류
            trainData, validData = train_test_split(data, test_size=0.3)
            # trainData = inpData

            # dlModel = H2OAutoML(max_models=30, max_runtime_secs=99999, balance_classes=True, seed=123)
            dlModel = H2OAutoML(max_models=20,
                                max_runtime_secs=99999,
                                balance_classes=True,
                                seed=123)
            dlModel.train(x=xCol,
                          y=yCol,
                          training_frame=h2o.H2OFrame(trainData),
                          validation_frame=h2o.H2OFrame(validData))
            # dlModel.train(x=xCol, y=yCol, training_frame=h2o.H2OFrame(data))
            fnlModel = dlModel.get_best_model()

            # 학습 모델 저장
            saveModel = '{}/{}-{}-{}-{}-{}-{}.model'.format(
                globalVar['outPath'], serviceName, modelKey, 'final', 'h2o',
                'act',
                datetime.now().strftime('%Y%m%d'))
            log.info('[CHECK] saveModel : {}'.format(saveModel))
            os.makedirs(os.path.dirname(saveModel), exist_ok=True)

            # h2o.save_model(model=fnlModel, path=os.path.dirname(saveModel), filename=os.path.basename(saveModel), force=True)
            fnlModel.save_mojo(path=os.path.dirname(saveModel),
                               filename=os.path.basename(saveModel),
                               force=True)
        else:
            saveModel = saveModelList[0]
            log.info('[CHECK] saveModel : {}'.format(saveModel))
            fnlModel = h2o.import_mojo(saveModel)

        result = {
            'msg': 'succ',
            'dlModel': fnlModel,
            'saveModel': saveModel,
            'isExist': os.path.exists(saveModel)
        }

        return result

    except Exception as e:
        log.error('Exception : {}'.format(e))
        return result

    finally:
        # try, catch 구문이 종료되기 전에 무조건 실행
        log.info('[END] {}'.format('makeDlModel'))

コード例 #25

0

ファイルを表示

    def exec(self):

        log.info('[START] {}'.format("exec"))

        try:

            if (platform.system() == 'Windows'):

                globalVar['inpPath'] = 'E:/DATA/OUTPUT'
                globalVar['outPath'] = 'E:/DATA/OUTPUT'
                globalVar['modelPath'] = 'E:/DATA'

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    'srtDate': '2020-09-01',
                    'endDate': '2021-11-01'

                    # 모델 버전 (날짜)
                    ,
                    'modelVer': '*'
                    # , 'modelVer': '20220220'
                }

            else:

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    'srtDate': globalVar['srtDate'],
                    'endDate': globalVar['endDate']

                    # 모델 버전 (날짜)
                    ,
                    'modelVer': '*'
                    # , 'modelVer': '20220220'
                }

            # modelDirKeyList = ['AI_2Y']
            # figActDirKeyList = ['ACT_2Y']
            # figForDirKeyList = ['FOR_2Y']
            #
            # for k, modelDirKey in enumerate(modelDirKeyList):
            #     figActDirKey = figActDirKeyList[k]
            #     figForDirKey = figForDirKeyList[k]

            modelDirKey = 'AI_2Y'
            figActDirKey = 'ACT_2Y'
            figForDirKey = 'FOR_2Y'
            modelVer = sysOpt['modelVer']

            isDlModelInit = False

            # DB 연결 정보
            pymysql.install_as_MySQLdb()

            # 환경 변수 읽기
            config = configparser.ConfigParser()
            config.read(globalVar['sysPath'], encoding='utf-8')
            dbUser = config.get('mariadb', 'user')
            dbPwd = config.get('mariadb', 'pwd')
            dbHost = config.get('mariadb', 'host')
            dbPort = config.get('mariadb', 'port')
            dbName = config.get('mariadb', 'dbName')

            dbEngine = create_engine('mariadb://{0}:{1}@{2}:{3}/{4}'.format(
                dbUser, dbPwd, dbHost, dbPort, dbName))
            sessMake = sessionmaker(bind=dbEngine)
            session = sessMake()

            # 관측소 정보
            # inpPosFile = '{}/{}'.format(globalVar['cfgPath'], 'stnInfo/GA_STN_INFO.xlsx')
            # posData = pd.read_excel(inpPosFile)
            # posDataL1 = posData[['id', 'lat', 'lon']]

            res = session.execute("""
                SELECT *
                FROM TB_STN_INFO
                """).fetchall()

            posDataL1 = pd.DataFrame(res).rename(
                {
                    'ID': 'id',
                    'dtDateKst': 'DATE_TIME_KST',
                    'LAT': 'lat',
                    'LON': 'lon'
                },
                axis='columns')

            lat1D = np.array(posDataL1['lat'])
            lon1D = np.array(posDataL1['lon'])

            # *******************************************************
            # UM 자료 읽기
            # *******************************************************
            dtSrtDate = pd.to_datetime(sysOpt['srtDate'], format='%Y-%m-%d')
            dtEndDate = pd.to_datetime(sysOpt['endDate'], format='%Y-%m-%d')
            dtIncDateList = pd.date_range(start=dtSrtDate,
                                          end=dtEndDate,
                                          freq=Day(1))

            # posLon = posInfo['lon']
            # posLat = posInfo['lat']
            # lon1D = np.array(posLon).reshape(1)
            # lat1D = np.array(posLat).reshape(1)

            cfgFile = '{}/{}'.format(
                globalVar['cfgPath'],
                'modelInfo/UMKR_l015_unis_H000_202110010000.grb2')
            # log.info("[CHECK] cfgFile : {}".format(cfgFile))

            cfgInfo = pygrib.open(cfgFile).select(name='Temperature')[1]
            lat2D, lon2D = cfgInfo.latlons()

            # =======================================================================
            # 최근접 좌표
            # =======================================================================
            posList = []

            # kdTree를 위한 초기 데이터
            for i in range(0, lon2D.shape[0]):
                for j in range(0, lon2D.shape[1]):
                    coord = [lat2D[i, j], lon2D[i, j]]
                    posList.append(cartesian(*coord))

            tree = spatial.KDTree(posList)

            # coord = cartesian(posInfo['lat'], posInfo['lon'])
            row1D = []
            col1D = []
            for ii, posInfo in posDataL1.iterrows():
                coord = cartesian(posInfo['lat'], posInfo['lon'])
                closest = tree.query([coord], k=1)
                cloIdx = closest[1][0]
                row = int(cloIdx / lon2D.shape[1])
                col = cloIdx % lon2D.shape[1]

                row1D.append(row)
                col1D.append(col)

            row2D, col2D = np.meshgrid(row1D, col1D)

            # dtIncDateInfo = dtIncDateList[0]
            dsDataL2 = xr.Dataset()
            for ii, dtIncDateInfo in enumerate(dtIncDateList):
                log.info("[CHECK] dtIncDateInfo : {}".format(dtIncDateInfo))

                dtDateYm = dtIncDateInfo.strftime('%Y%m')
                dtDateDay = dtIncDateInfo.strftime('%d')
                dtDateHour = dtIncDateInfo.strftime('%H')
                dtDateYmd = dtIncDateInfo.strftime('%Y%m%d')
                dtDateHm = dtIncDateInfo.strftime('%H%M')
                dtDateYmdHm = dtIncDateInfo.strftime('%Y%m%d%H%M')

                # UMKR_l015_unis_H001_202110010000.grb2
                inpFilePattern = 'MODEL/{}/{}/{}/UMKR_l015_unis_*_{}.grb2'.format(
                    dtDateYm, dtDateDay, dtDateHour, dtDateYmdHm)
                inpFile = '{}/{}'.format(globalVar['inpPath'], inpFilePattern)
                fileList = sorted(glob.glob(inpFile))

                if (len(fileList) < 1): continue
                # raise Exception("[ERROR] fileInfo : {} : {}".format("입력 자료를 확인해주세요.", inpFile))

                # fileInfo = fileList[2]
                for jj, fileInfo in enumerate(fileList):
                    log.info("[CHECK] fileInfo : {}".format(fileInfo))

                    try:
                        grb = pygrib.open(fileInfo)
                        grbInfo = grb.select(name='Temperature')[1]

                        validIdx = int(
                            re.findall('H\d{3}', fileInfo)[0].replace('H', ''))
                        dtValidDate = grbInfo.validDate
                        dtAnalDate = grbInfo.analDate

                        uVec = grb.select(
                            name='10 metre U wind component')[0].values[row2D,
                                                                        col2D]
                        vVec = grb.select(
                            name='10 metre V wind component')[0].values[row2D,
                                                                        col2D]
                        WD = (270 - np.rad2deg(np.arctan2(vVec, uVec))) % 360
                        WS = np.sqrt(np.square(uVec) + np.square(vVec))
                        PA = grb.select(
                            name='Surface pressure')[0].values[row2D, col2D]
                        TA = grbInfo.values[row2D, col2D]
                        TD = grb.select(
                            name='Dew point temperature')[0].values[row2D,
                                                                    col2D]
                        HM = grb.select(
                            name='Relative humidity')[0].values[row2D, col2D]
                        lowCA = grb.select(
                            name='Low cloud cover')[0].values[row2D, col2D]
                        medCA = grb.select(
                            name='Medium cloud cover')[0].values[row2D, col2D]
                        higCA = grb.select(
                            name='High cloud cover')[0].values[row2D, col2D]
                        CA_TOT = np.mean([lowCA, medCA, higCA], axis=0)
                        SS = grb.select(name='unknown')[0].values[row2D, col2D]

                        dsDataL1 = xr.Dataset(
                            {
                                'uVec':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (uVec).reshape(1, 1, len(lat1D), len(lon1D))),
                                'vVec':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (vVec).reshape(1, 1, len(lat1D), len(lon1D))),
                                'WD':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (WD).reshape(1, 1, len(lat1D), len(lon1D))),
                                'WS':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (WS).reshape(1, 1, len(lat1D), len(lon1D))),
                                'PA':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (PA).reshape(1, 1, len(lat1D), len(lon1D))),
                                'TA':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (TA).reshape(1, 1, len(lat1D), len(lon1D))),
                                'TD':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (TD).reshape(1, 1, len(lat1D), len(lon1D))),
                                'HM':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (HM).reshape(1, 1, len(lat1D), len(lon1D))),
                                'lowCA': (('anaTime', 'time', 'lat', 'lon'),
                                          (lowCA).reshape(
                                              1, 1, len(lat1D), len(lon1D))),
                                'medCA': (('anaTime', 'time', 'lat', 'lon'),
                                          (medCA).reshape(
                                              1, 1, len(lat1D), len(lon1D))),
                                'higCA': (('anaTime', 'time', 'lat', 'lon'),
                                          (higCA).reshape(
                                              1, 1, len(lat1D), len(lon1D))),
                                'CA_TOT': (('anaTime', 'time', 'lat', 'lon'),
                                           (CA_TOT).reshape(
                                               1, 1, len(lat1D), len(lon1D))),
                                'SS':
                                (('anaTime', 'time', 'lat', 'lon'),
                                 (SS).reshape(1, 1, len(lat1D), len(lon1D)))
                            },
                            coords={
                                'anaTime': pd.date_range(dtAnalDate,
                                                         periods=1),
                                'time': pd.date_range(dtValidDate, periods=1),
                                'lat': lat1D,
                                'lon': lon1D
                            })

                    except Exception as e:
                        log.error("Exception : {}".format(e))

                    for kk, posInfo in posDataL1.iterrows():
                        posId = int(posInfo['id'])
                        posLat = posInfo['lat']
                        posLon = posInfo['lon']

                        log.info(
                            "[CHECK] posId (posLon, posLat) : {} ({}. {})".
                            format(posId, posLon, posLat))

                        # umData = dsDataL2
                        umData = dsDataL1
                        dtAnaTimeList = umData['anaTime'].values
                        # umDataL8 = pd.DataFrame()
                        for ll, dtAnaTimeInfo in enumerate(dtAnaTimeList):
                            log.info("[CHECK] dtAnaTimeInfo : {}".format(
                                dtAnaTimeInfo))

                            try:
                                umDataL2 = umData.sel(lat=posLat,
                                                      lon=posLon,
                                                      anaTime=dtAnaTimeInfo)
                                umDataL3 = umDataL2.to_dataframe().dropna(
                                ).reset_index(drop=True)
                                # umDataL3['dtDate'] = pd.to_datetime(dtAnaTimeInfo) + (umDataL3.index.values * datetime.timedelta(hours=1))
                                umDataL3['dtDate'] = pd.to_datetime(
                                    dtAnaTimeInfo) + (
                                        validIdx * datetime.timedelta(hours=1))
                                # umDataL3['dtDateKst'] = umDataL3.index.tz_localize(tzUtc).tz_convert(tzKst)
                                umDataL3[
                                    'dtDateKst'] = umDataL3['dtDate'] + dtKst
                                umDataL4 = umDataL3.rename({'SS': 'SWR'},
                                                           axis='columns')
                                umDataL5 = umDataL4[[
                                    'dtDateKst', 'dtDate', 'CA_TOT', 'HM',
                                    'PA', 'TA', 'TD', 'WD', 'WS', 'SWR'
                                ]]
                                umDataL5['SRV'] = 'SRV{:05d}'.format(posId)
                                umDataL5['TA'] = umDataL5['TA'] - 273.15
                                umDataL5['TD'] = umDataL5['TD'] - 273.15
                                umDataL5['PA'] = umDataL5['PA'] / 100.0
                                umDataL5['CA_TOT'] = np.where(
                                    umDataL5['CA_TOT'] < 0, 0,
                                    umDataL5['CA_TOT'])
                                umDataL5['CA_TOT'] = np.where(
                                    umDataL5['CA_TOT'] > 1, 1,
                                    umDataL5['CA_TOT'])

                                umDataL6 = umDataL5
                                for i in umDataL6.index:
                                    lat = posLat
                                    lon = posLon
                                    pa = umDataL6._get_value(i, 'PA') * 100.0
                                    ta = umDataL6._get_value(i, 'TA')
                                    # dtDateTime = umDataL6._get_value(i, 'dtDateKst')
                                    dtDateTime = umDataL6._get_value(
                                        i, 'dtDate')

                                    solPosInfo = pvlib.solarposition.get_solarposition(
                                        dtDateTime,
                                        lat,
                                        lon,
                                        pressure=pa,
                                        temperature=ta,
                                        method='nrel_numpy')
                                    umDataL6._set_value(
                                        i, 'sza', solPosInfo['zenith'].values)
                                    umDataL6._set_value(
                                        i, 'aza', solPosInfo['azimuth'].values)
                                    umDataL6._set_value(
                                        i, 'et',
                                        solPosInfo['equation_of_time'].values)

                                # umDataL7 = umDataL6.merge(pvDataL2, how='left', left_on=['dtDateKst'], right_on=['dtDateKst'])
                                umDataL7 = umDataL6
                                umDataL7['anaTime'] = pd.to_datetime(
                                    dtAnaTimeInfo)

                                # umDataL8 = umDataL8.append(umDataL7)

                            except Exception as e:
                                log.error("Exception : {}".format(e))

                        # log.info("[CHECK] modelDirKey : {}".format(modelDirKey))
                        # log.info("[CHECK] figActDirKey : {}".format(figActDirKey))

                        # *******************************************************
                        # 관측자료 읽기
                        # *******************************************************
                        # inpData = pd.read_excel(fileInfo, engine='openpyxl')
                        # inpData = umDataL7
                        inpData = umDataL7
                        inpDataL1 = inpData.rename({'dtDate_x': 'dtDate'},
                                                   axis='columns')
                        # log.info("[CHECK] inpDataL1 : {}".format(inpDataL1))

                        iAnaYear = int(inpDataL1['anaTime'][0].strftime("%Y"))

                        selDbTable = 'TB_FOR_DATA_NEW_{}'.format(iAnaYear)

                        # 테이블 없을 시 생성
                        session.execute("""
                            CREATE TABLE IF NOT EXISTS `{}`
                            (
                                SRV           varchar(10) not null comment '관측소 정보',
                                ANA_DATE      date        not null comment '예보일',
                                DATE_TIME     datetime    not null comment '예보날짜 UTC',
                                DATE_TIME_KST datetime    null comment '예보날짜 KST',
                                CA_TOT        float       null comment '전운량',
                                HM            float       null comment '상대습도',
                                PA            float       null comment '현지기압',
                                TA            float       null comment '기온',
                                TD            float       null comment '이슬점온도',
                                WD            float       null comment '풍향',
                                WS            float       null comment '풍속',
                                SZA           float       null comment '태양 천정각',
                                AZA           float       null comment '태양 방위각',
                                ET            float       null comment '태양 시간각',
                                SWR           float       null comment '일사량',
                                ML            float       null comment '머신러닝',
                                DL            float       null comment '딥러닝',
                                REG_DATE      datetime    null comment '등록일',
                                MOD_DATE      datetime    null comment '수정일',
                                primary key (SRV, ANA_DATE, DATE_TIME)
                            )    
                                comment '기상 예보 테이블_{}';
                            """.format(selDbTable, iAnaYear))

                        # **********************************************************************************************************
                        # 머신러닝
                        # **********************************************************************************************************
                        # saveMlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model.pkl'.format(globalVar['modelPath'], modelDirKey, serviceName, posId, 'final', 'pycaret', 'for', '*')
                        # saveMlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model.pkl'.format(globalVar['modelPath'], modelDirKey, serviceName, posId, 'final', 'pycaret', 'for', '20220220')
                        # saveMlModelList = sorted(glob.glob(saveMlModel), reverse=True)
                        #
                        # # from pycaret.regression import *
                        #
                        # if (len(saveMlModelList) > 0):
                        #     saveMlModelInfo = saveMlModelList[0]
                        #     log.info("[CHECK] saveMlModelInfo : {}".format(saveMlModelInfo))
                        #
                        #     mlModel = load_model(os.path.splitext(saveMlModelInfo)[0])
                        #
                        # mlModelPred = predict_model(mlModel, data=inpDataL1).rename({'Label': 'ML'}, axis='columns')[['dtDateKst', 'anaTime', 'ML']]

                        # **********************************************************************************************************
                        # 딥러닝
                        # **********************************************************************************************************
                        saveDlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model'.format(
                            globalVar['modelPath'], modelDirKey, serviceName,
                            posId, 'final', 'h2o', 'for', modelVer)
                        saveDlModelList = sorted(glob.glob(saveDlModel),
                                                 reverse=True)

                        if (isDlModelInit == False):
                            h2o.init()
                            isDlModelInit = True

                        # 학습 모델 불러오기
                        if (len(saveDlModelList) > 0):
                            saveDlModelInfo = saveDlModelList[0]
                            log.info("[CHECK] saveDlModelInfo : {}".format(
                                saveDlModelInfo))
                            fnlModel = h2o.import_mojo(saveDlModelInfo)
                            dlModel = h2o.load_model(path=saveDlModelInfo)

                        tmpData = inpDataL1[[
                            'dtDateKst', 'anaTime', 'CA_TOT', 'HM', 'PA', 'TA',
                            'TD', 'WD', 'WS', 'SWR', 'sza', 'aza', 'et'
                        ]].dropna().reset_index(drop=True)
                        dlModelPred = dlModel.predict(
                            h2o.H2OFrame(tmpData)).as_data_frame().rename(
                                {'predict': 'DL'}, axis='columns')
                        dlModelPredL1 = pd.concat(
                            [tmpData[['dtDateKst', 'anaTime']], dlModelPred],
                            axis=1)

                        # 머신러닝 또는 딥러닝
                        # inpDataL2 = inpDataL1.merge(mlModelPred, how='left', left_on=['dtDateKst', 'anaTime'],right_on=['dtDateKst', 'anaTime'])\
                        #     .merge(dlModelPredL1, how='left', left_on=['dtDateKst', 'anaTime'], right_on=['dtDateKst', 'anaTime'])

                        # 딥러닝
                        inpDataL2 = inpDataL1.merge(
                            dlModelPredL1,
                            how='left',
                            left_on=['dtDateKst', 'anaTime'],
                            right_on=['dtDateKst', 'anaTime'])

                        # dtDateKst 및 anaTime을 기준으로 중복 제거
                        inpDataL2.drop_duplicates(
                            subset=['dtDateKst', 'anaTime'], inplace=True)
                        inpDataL2 = inpDataL2.reset_index(drop=True)

                        dbData = inpDataL2.rename(
                            {
                                'anaTime': 'ANA_DATE',
                                'dtDateKst': 'DATE_TIME_KST',
                                'dtDate': 'DATE_TIME',
                                'sza': 'SZA',
                                'aza': 'AZA',
                                'et': 'ET'
                            },
                            axis='columns')

                        for kk, dbInfo in dbData.iterrows():
                            # 중복 검사
                            resChk = session.execute("""
                                SELECT COUNT(*) AS CNT
                                FROM `{}`
                                WHERE  SRV = '{}' AND ANA_DATE = '{}' AND DATE_TIME = '{}'
                                """.format(selDbTable, iAnaYear, dbInfo['SRV'],
                                           dbInfo['ANA_DATE'],
                                           dbInfo['DATE_TIME'])).fetchone()

                            log.info("[CHECK] resChk : {}".format(
                                resChk['CNT']))

                            # 삽입 및 수정
                            if (resChk['CNT'] > 0):
                                dbInfo['MOD_DATE'] = datetime.datetime.now()
                                session.execute("""
                                    UPDATE `{}` SET PV = '{}', MOD_DATE = '{}' WHERE SRV = '{}' AND DATE_TIME = '{}'; 
                                    """.format(selDbTable, dbInfo['PV'],
                                               dbInfo['MOD_DATE'],
                                               dbInfo['SRV'],
                                               dbInfo['DATE_TIME']))

                            else:
                                dbInfo['REG_DATE'] = datetime.datetime.now()
                                session.execute("""
                                    INSERT INTO `{}` (SRV, ANA_DATE, DATE_TIME, DATE_TIME_KST, CA_TOT, HM, PA, TA, TD, WD, WS, SWR, SZA, AZA, ET, ML, DL, REG_DATE, MOD_DATE) 
                                    VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}') 
                                    """.format(selDbTable, dbInfo['SRV'],
                                               dbInfo['DATE_TIME'],
                                               dbInfo['DATE_TIME_KST'],
                                               dbInfo['PV'],
                                               dbInfo['REG_DATE'],
                                               dbInfo['REG_DATE']))
            session.commit()
        except Exception as e:
            log.error("Exception : {}".format(e))
            session.rollback()
            raise e

        finally:
            log.info('[END] {}'.format("exec"))

コード例 #26

0

ファイルを表示

ファイル: pyunit_pojo_import.py プロジェクト: h2oai/h2o-3

def generate_and_import_combined_pojo():
    if sys.version_info[0] < 3:  # Python 2
        print("This example needs Python 3.x+")
        return

    weather_orig = h2o.import_file(
        path=pyunit_utils.locate("smalldata/junit/weather.csv"))
    weather = weather_orig  # working copy

    features = list(set(weather.names) - {"Date", "RainTomorrow", "Sunshine"})
    features.sort()
    response = "RISK_MM"

    glm_model = H2OGeneralizedLinearEstimator()
    glm_model.train(x=features, y=response, training_frame=weather)
    glm_preds = glm_model.predict(weather)

    gbm_model = H2OGradientBoostingEstimator(ntrees=5)
    gbm_model.train(x=features, y=response, training_frame=weather)
    gbm_preds = gbm_model.predict(weather)

    # Drop columns that we will calculate in POJO manually (we will recreate them in POJO to be the exact same)
    weather = weather.drop("ChangeTemp")
    weather = weather.drop("ChangeTempDir")

    combined_pojo_path = generate_combined_pojo(glm_model, gbm_model)
    print("Combined POJO was stored in: " + combined_pojo_path)

    # FIXME: https://h2oai.atlassian.net/browse/PUBDEV-8561 We need to make this work for upload_mojo as well
    pojo_model = h2o.import_mojo(combined_pojo_path)

    # Testing begins

    # Sanity test - test parameterization that delegates to GLM
    weather["Bias"] = 1  # behave like GLM
    pojo_glm_preds = pojo_model.predict(weather)
    assert_frame_equal(pojo_glm_preds.as_data_frame(),
                       glm_preds.as_data_frame())

    # Sanity test - test parameterization that delegates to GBM
    weather["Bias"] = 0  # behave like GBM
    pojo_gbm_preds = pojo_model.predict(weather)
    assert_frame_equal(pojo_gbm_preds.as_data_frame(),
                       gbm_preds.as_data_frame())

    # Test per-segment specific behavior, segments are defined by ChangeWindDirect
    weather["Bias"] = float("NaN")
    for change_wind_dir in weather["ChangeWindDirect"].levels()[0]:
        weather_cwd = weather[weather["ChangeWindDirect"] == change_wind_dir]
        weather_orig_cwd = weather_orig[weather_orig["ChangeWindDirect"] ==
                                        change_wind_dir]
        pojo_weather_cwd_preds = pojo_model.predict(weather_cwd)
        if change_wind_dir == "c" or change_wind_dir == "l":
            expected = glm_model.predict(weather_orig_cwd) * 2
            assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(),
                               expected.as_data_frame())
        elif change_wind_dir == "n":
            expected = (glm_model.predict(weather_orig_cwd) +
                        gbm_model.predict(weather_orig_cwd)) / 2
            assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(),
                               expected.as_data_frame())
        elif change_wind_dir == "s":
            expected = gbm_model.predict(weather_orig_cwd)
            assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(),
                               expected.as_data_frame())

コード例 #27

0

ファイルを表示

    def exec(self):

        log.info('[START] {}'.format("exec"))

        try:

            # h2o.init()

            if (platform.system() == 'Windows'):

                globalVar['inpPath'] = 'E:/DATA/OUTPUT'
                globalVar['outPath'] = 'E:/DATA/OUTPUT'

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    'srtDate': '2021-10-01',
                    'endDate': '2021-11-01'

                    # 모델 버전 (날짜)
                    ,
                    'modelVer': '*'
                    # , 'modelVer': '20220220'
                }

                globalVar['inpPath'] = 'E:/DATA'
                globalVar['outPath'] = 'E:/DATA'

            else:

                # 옵션 설정
                sysOpt = {
                    # 시작/종료 시간
                    'srtDate': globalVar['srtDate'],
                    'endDate': globalVar['endDate']
                }

            inpPosFile = '{}/{}'.format(globalVar['cfgPath'],
                                        'stnInfo/GA_STN_INFO.xlsx')
            posData = pd.read_excel(inpPosFile, engine='openpyxl')
            posDataL1 = posData[['id', 'lat', 'lon']]

            isDlModelInit = False

            # modelDirKeyList = ['AI']
            # figActDirKeyList = ['ACT']
            # figForDirKeyList = ['FOR']

            # modelDirKeyList = ['AI_2Y', 'AI_7D', 'AI_15D', 'AI_1M', 'AI_3M', 'AI_6M']
            # figActDirKeyList = ['ACT_2Y', 'ACT_7D', 'ACT_15D', 'ACT_1M', 'ACT_3M', 'ACT_6M']
            # figForDirKeyList = ['FOR_2Y', 'FOR_7D', 'FOR_15D', 'FOR_1M', 'FOR_3M', 'FOR_6M']
            modelDirKeyList = ['AI_2Y']
            figActDirKeyList = ['ACT_2Y']
            figForDirKeyList = ['FOR_2Y']
            modelVer = sysOpt['modelVer']

            # DB 연결 정보
            pymysql.install_as_MySQLdb()

            # 환경 변수 읽기
            config = configparser.ConfigParser()
            config.read(globalVar['sysPath'], encoding='utf-8')
            dbUser = config.get('mariadb', 'user')
            dbPwd = config.get('mariadb', 'pwd')
            dbHost = config.get('mariadb', 'host')
            dbPort = config.get('mariadb', 'port')
            dbName = config.get('mariadb', 'dbName')

            # dbCon = create_engine('mysql://{0}:{1}@{2}:{3}/{4}'.format(dbUser, dbPwd, dbHost, dbPort, dbName))
            dbCon = create_engine('mariadb://{0}:{1}@{2}:{3}/{4}'.format(
                dbUser, dbPwd, dbHost, dbPort, dbName))

            for k, modelDirKey in enumerate(modelDirKeyList):
                figActDirKey = figActDirKeyList[k]
                figForDirKey = figForDirKeyList[k]

                log.info("[CHECK] modelDirKey : {}".format(modelDirKey))
                log.info("[CHECK] figActDirKey : {}".format(figActDirKey))
                log.info("[CHECK] figForDirKey : {}".format(figForDirKey))

                for i, posInfo in posDataL1.iterrows():
                    posId = int(posInfo['id'])
                    posLat = posInfo['lat']
                    posLon = posInfo['lon']

                    if (not re.search('17', str(posId))): continue

                    # *******************************************************
                    # 관측자료 읽기
                    # *******************************************************
                    inpFile = '{}/{}/{}-SRV{:05d}-{}-{}-{}.xlsx'.format(
                        globalVar['outPath'], 'FOR', serviceName, posId,
                        'final', 'proc', 'for')
                    fileList = sorted(glob.glob(inpFile))

                    # 파일 없을 경우 예외 처리
                    if fileList is None or len(fileList) < 1:
                        log.error('[ERROR] inpFile : {} / {}'.format(
                            inpFile, '입력 자료를 확인해주세요.'))
                        continue

                    fileInfo = fileList[0]
                    inpData = pd.read_excel(fileInfo, engine='openpyxl')

                    inpDataL1 = inpData.rename({'dtDate_x': 'dtDate'},
                                               axis='columns')

                    # **********************************************************************************************************
                    # 머신러닝
                    # **********************************************************************************************************
                    # saveMlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model.pkl'.format(globalVar['outPath'], modelDirKey, serviceName, posId, 'final', 'pycaret', 'for', modelVer)
                    # saveMlModelList = sorted(glob.glob(saveMlModel), reverse=True)
                    #
                    # if (len(saveMlModelList) > 0):
                    #     saveMlModelInfo = saveMlModelList[0]
                    #     log.info("[CHECK] saveMlModelInfo : {}".format(saveMlModelInfo))
                    #
                    #     mlModel = load_model(os.path.splitext(saveMlModelInfo)[0])
                    #
                    # mlModelPred = predict_model(mlModel, data=inpDataL1).rename({'Label': 'ML'}, axis='columns')[['dtDateKst', 'anaTime', 'ML']]
                    # mlModelPred = predict_model(mlModel, data=inpDataL1).rename({'Label': 'ML'}, axis='columns')[['dtDateKst', 'anaTime', 'ML']]

                    # **********************************************************************************************************
                    # 딥러닝
                    # **********************************************************************************************************
                    # saveDlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model'.format(globalVar['outPath'], modelDirKey, serviceName, posId, 'final', 'h2o', 'for', '*')
                    saveDlModel = '{}/{}/{}-SRV{:05d}-{}-{}-{}-{}.model'.format(
                        globalVar['outPath'], modelDirKey, serviceName, posId,
                        'final', 'h2o', 'for', modelVer)
                    saveDlModelList = sorted(glob.glob(saveDlModel),
                                             reverse=True)

                    # 학습 모델 불러오기
                    if (len(saveDlModelList) > 0):
                        saveDlModelInfo = saveDlModelList[0]
                        log.info("[CHECK] saveDlModelInfo : {}".format(
                            saveDlModelInfo))

                        if (isDlModelInit == False):
                            h2o.init()
                            isDlModelInit = True

                        # dlModel = h2o.load_model(path=saveDlModelInfo)
                        dlModel = h2o.import_mojo(saveDlModelInfo)

                    inpDataL1['year'] = inpDataL1['dtDateKst'].dt.strftime(
                        '%Y').astype('int64')
                    inpDataL1['month'] = inpDataL1['dtDateKst'].dt.strftime(
                        '%m').astype('int64')
                    inpDataL1['day'] = inpDataL1['dtDateKst'].dt.strftime(
                        '%d').astype('int64')
                    inpDataL1['hour'] = inpDataL1['dtDateKst'].dt.strftime(
                        '%H').astype('int64')

                    # tmpData = inpDataL1[['dtDateKst', 'anaTime', 'CA_TOT', 'HM', 'PA', 'TA', 'TD', 'WD', 'WS', 'SWR', 'pv', 'sza', 'aza', 'et']].dropna().reset_index(drop=True)
                    tmpData = inpDataL1[[
                        'year', 'month', 'day', 'hour', 'dtDateKst', 'anaTime',
                        'CA_TOT', 'HM', 'PA', 'TA', 'TD', 'WD', 'WS', 'SWR',
                        'pv', 'sza', 'aza', 'et'
                    ]].dropna().reset_index(drop=True)
                    # tmpData = inpDataL1[['hour', 'dtDateKst', 'anaTime', 'CA_TOT', 'HM', 'PA', 'TA', 'TD', 'WD', 'WS', 'SWR', 'pv', 'sza', 'aza', 'et']].dropna().reset_index(drop=True)
                    dlModelPred = dlModel.predict(
                        h2o.H2OFrame(tmpData)).as_data_frame().rename(
                            {'predict': 'DL'}, axis='columns')
                    dlModelPredL1 = pd.concat(
                        [tmpData[['dtDateKst', 'anaTime']], dlModelPred],
                        axis=1)

                    # inpDataL2 = inpDataL1.merge(mlModelPred, how='left', left_on=['dtDateKst', 'anaTime'], right_on=['dtDateKst', 'anaTime']) \
                    #     .merge(dlModelPredL1, how='left', left_on=['dtDateKst', 'anaTime'], right_on=['dtDateKst', 'anaTime'])
                    inpDataL2 = inpDataL1.merge(
                        dlModelPredL1,
                        how='left',
                        left_on=['dtDateKst', 'anaTime'],
                        right_on=['dtDateKst', 'anaTime'])

                    # dtDateKst 및 anaTime을 기준으로 중복 제거
                    inpDataL2.drop_duplicates(subset=['dtDateKst', 'anaTime'],
                                              inplace=True)
                    inpDataL2 = inpDataL2.reset_index(drop=True)
                    # inpDataL2['anaTime'] = inpDataL2['anaTime'].astype(str)

                    # **********************************************************************************************************
                    # 엑셀 저장
                    # **********************************************************************************************************
                    # saveXlsxFile = '{}/{}/{}-SRV{:05d}-{}-{}-{}.xlsx'.format(globalVar['outPath'], 'FOR', serviceName, posId, 'final', 'pred', 'for')
                    # os.makedirs(os.path.dirname(saveXlsxFile), exist_ok=True)
                    # log.info("[CHECK] saveXlsxFile : {}".format(saveXlsxFile))
                    # inpDataL2.to_excel(saveXlsxFile, index=False)

                    # **********************************************************************************************************
                    # DB 삽입
                    # **********************************************************************************************************
                    inpDataL2['anaYear'] = inpDataL2['anaTime'].dt.strftime(
                        "%Y").astype(str)
                    anaYearList = inpDataL2['anaYear'].unique()

                    # anaYearInfo = anaYearList[0]
                    # for j, anaYearInfo in enumerate(anaYearList):
                    #
                    #     inpDataL3 = inpDataL2.loc[
                    #         inpDataL2['anaYear'] == anaYearInfo
                    #         ].dropna().reset_index(drop=True)
                    #
                    #     if (len(inpDataL3) < 1): continue
                    #
                    #     inpDataL3['SRV'] = 'SRV{:05d}'.format(posId)
                    #     inpDataL3['REG_DATE'] = datetime.now()
                    #     iAnaYear = int(anaYearInfo)
                    #
                    #     dbData = inpDataL3.rename(
                    #         {
                    #             'anaTime': 'ANA_DATE'
                    #             , 'dtDateKst': 'DATE_TIME_KST'
                    #             , 'dtDate': 'DATE_TIME'
                    #             , 'sza': 'SZA'
                    #             , 'aza': 'AZA'
                    #             , 'et': 'ET'
                    #         }
                    #         , axis='columns'
                    #     )
                    #
                    #     dbData = dbData.drop(['id', 'time', 'pv', 'PlantCapacity', 'anaYear'], axis=1)
                    #
                    #     # 테이블 없을 시 생성
                    #     dbCon.execute(
                    #         """
                    #         create table IF NOT EXISTS TB_FOR_DATA_%s
                    #         (
                    #             SRV           varchar(10) not null comment '관측소 정보',
                    #             ANA_DATE      date        not null comment '예보일',
                    #             DATE_TIME     datetime    not null comment '예보날짜 UTC',
                    #             DATE_TIME_KST datetime    null comment '예보날짜 KST',
                    #             CA_TOT        float       null comment '전운량',
                    #             HM            float       null comment '상대습도',
                    #             PA            float       null comment '현지기압',
                    #             TA            float       null comment '기온',
                    #             TD            float       null comment '이슬점온도',
                    #             WD            float       null comment '풍향',
                    #             WS            float       null comment '풍속',
                    #             SZA           float       null comment '태양 천정각',
                    #             AZA           float       null comment '태양 방위각',
                    #             ET            float       null comment '태양 시간각',
                    #             SWR           float       null comment '일사량',
                    #             ML            float       null comment '머신러닝',
                    #             DL            float       null comment '딥러닝',
                    #             REG_DATE      datetime    null comment '등록일',
                    #             MOD_DATE      datetime    null comment '수정일',
                    #             primary key (SRV, DATE_TIME, ANA_DATE)
                    #         )
                    #             comment '기상 예보 테이블_%s';
                    #         """
                    #         , (iAnaYear, iAnaYear)
                    #     )
                    #
                    #     # 삽입
                    #     selDbTable = 'TB_FOR_DATA_{}'.format(iAnaYear)
                    #     dbData.to_sql(name=selDbTable, con=dbCon, if_exists='append', index=False)
                    # dbData.to_sql(name=selDbTable, con=dbCon, if_exists='replace', index=False)

                    # **********************************************************************************************************
                    # 시각화
                    # **********************************************************************************************************
                    # # 폴더 삭제
                    # delFile = '{}/{}/{}/SRV{:05d}'.format(globalVar['figPath'], serviceName, figForDirKey, posId)
                    # shutil.rmtree(delFile, ignore_errors=True)

                    # idxInfo = inpDataL2.loc[inpDataL2['dtDateKst'] >= pd.to_datetime('2021-01-01', format='%Y-%m-%d')].index.to_numpy()
                    # idxInfo = inpDataL2.loc[inpDataL2['dtDateKst'] >= pd.to_datetime('2021-01-01', format='%Y-%m-%d')].index.to_numpy()
                    # idxinfo = inpDataL2.loc[inpDataL2['dtDateKst'] >= pd.to_datetime('2021-11-01', format='%y-%m-%d')].index.to_numpy()
                    idxInfo = inpDataL2.loc[
                        inpDataL2['dtDateKst'] >= pd.to_datetime(
                            '2021-06-01', format='%Y-%m-%d')].index.to_numpy()

                    if (len(idxInfo) < 1): continue
                    idx = idxInfo[0]
                    trainData, testData = inpDataL2[0:idx], inpDataL2[
                        idx:len(inpDataL2)]

                    if (len(testData) < 1): continue
                    log.info('[CHECK] testData : {} - {}'.format(
                        testData['dtDateKst'].min(),
                        testData['dtDateKst'].max()))

                    trainDataL1 = trainData.dropna().reset_index(drop=True)
                    testDataL1 = testData.dropna().reset_index(drop=True)

                    anaTimeList = testDataL1['anaTime'].unique()
                    # min(anaTimeList).datetime.strftime("%Y%m%d")

                    minAnaTime = pd.to_datetime(anaTimeList).min().strftime(
                        "%Y%m%d")
                    maxAnaTime = pd.to_datetime(anaTimeList).max().strftime(
                        "%Y%m%d")

                    # anaTimeInfo = anaTimeList[0]
                    # for j, anaTimeInfo in enumerate(anaTimeList):
                    #
                    #     testDataL2 = testDataL1.loc[
                    #         testDataL1['anaTime'] == anaTimeInfo
                    #         ].dropna().reset_index(drop=True)

                    # mainTitle = '[{}] {}'.format(anaTimeInfo, '기상 예보 정보 (수치모델)를 활용한 48시간 예측 시계열')
                    # saveImg = '{}/{}/{}/SRV{:05d}/{}.png'.format(globalVar['figPath'], serviceName, figForDirKey, posId, mainTitle)
                    # os.makedirs(os.path.dirname(saveImg), exist_ok=True)
                    #
                    # if (os.path.exists(saveImg)): continue
                    # rtnInfo = makeUserTimeSeriesPlot(pd.to_datetime(testDataL2['dtDate']), testDataL2['ML'], testDataL2['DL'], testDataL2['pv'], '예측 (머신러닝)', '예측 (딥러닝)', '실측 (발전량)', '시간 (시)', '발전량', mainTitle, saveImg, True)
                    # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))

                    # mainTitle = '[{}-{}] {}'.format(min(anaTimeList), max(anaTimeList), '기상 예보 정보 (수치모델)를 활용한 머신러닝 (48시간 예측) 산점도')
                    # saveImg = '{}/{}/{}/SRV{:05d}/{}.png'.format(globalVar['figPath'], serviceName, figForDirKey, posId, mainTitle)
                    # os.makedirs(os.path.dirname(saveImg), exist_ok=True)
                    # rtnInfo = makeUserScatterPlot(testDataL1['ML'], testDataL1['pv'], '머신러닝', '실측', mainTitle, saveImg, 0, 1000, 20, 60, True)
                    # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))

                    # anaTimeList.strftime("%Y%m%d")
                    # mainTitle = '[{}-{}] {}'.format(minAnaTime, maxAnaTime, '기상 예보 정보 (수치모델)를 활용한 머신러닝 (48시간 예측) 2D 산점도')
                    # saveImg = '{}/{}/{}/SRV{:05d}/{}.png'.format(globalVar['figPath'], serviceName, figForDirKey, posId, mainTitle)
                    # os.makedirs(os.path.dirname(saveImg), exist_ok=True)
                    # rtnInfo = makeUserHist2DPlot(testDataL1['ML'], testDataL1['pv'], '머신러닝', '실측', mainTitle, saveImg, 0, 1000, 20, 60, 20, True)
                    # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))

                    # mainTitle = '[{}-{}] {}'.format(min(anaTimeList), max(anaTimeList), '기상 예보 정보 (수치모델)를 활용한 딥러닝 (48시간 예측) 산점도')
                    # saveImg = '{}/{}/{}/SRV{:05d}/{}.png'.format(globalVar['figPath'], serviceName, figForDirKey, posId, mainTitle)
                    # os.makedirs(os.path.dirname(saveImg), exist_ok=True)
                    # rtnInfo = makeUserScatterPlot(testDataL1['DL'], testDataL1['pv'], '딥러닝', '실측', mainTitle, saveImg, 0, 1000, 20, 60, True)
                    # log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))

                    mainTitle = '[{}-{}] {}'.format(
                        minAnaTime, maxAnaTime,
                        '기상 예보 정보 (수치모델)를 활용한 딥러닝 (48시간 예측) 2D 산점도')
                    saveImg = '{}/{}/{}/SRV{:05d}/{}.png'.format(
                        globalVar['figPath'], serviceName, figForDirKey, posId,
                        mainTitle)
                    os.makedirs(os.path.dirname(saveImg), exist_ok=True)
                    rtnInfo = makeUserHist2DPlot(testDataL1['DL'],
                                                 testDataL1['pv'], '딥러닝', '실측',
                                                 mainTitle, saveImg, 0, 1000,
                                                 20, 60, 20, True)
                    log.info('[CHECK] rtnInfo : {}'.format(rtnInfo))

        except Exception as e:
            log.error("Exception : {}".format(e))
            raise e
        finally:
            log.info('[END] {}'.format("exec"))