def test_infogram_personal_loan_plot(): """ checking plotting function of infogram for fair model """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = [ "Experience", "Income", "Family", "CCAvg", "Education", "Mortgage", "Securities Account", "CD Account", "Online", "CreditCard" ] infogram_model = H2OInfogram(seed=12345, protected_columns=["Age", "ZIP Code"]) infogram_model.train(x=x, y=target, training_frame=fr) infogram_model.plot(server=True) infogram_model2 = H2OInfogram(seed=12345, protected_columns=["Age", "ZIP Code"], safety_index_threshold=0.05, relevance_index_threshold=0.05) infogram_model2.train(x=x, y=target, training_frame=fr) infogram_model2.plot(server=True) assert len(infogram_model.get_admissible_cmi()) <= len( infogram_model2.get_admissible_cmi())
def test_infogram_breast_cancer(): """ Simple breast cancer data test to check that core infogram is working: 1. cmi/relevance in Frame equals to those passed in model.output. 2. when model and infogram parameters are specified, it uses the correct specification. 3. cmi/relevance from Deep code aggree with ours. :return: """ deep_rel = [ 0.0040477989, 0.0974455315, 0.0086303713, 0.0041002103, 0.0037914745, 0.0036801151, 0.0257819346, 0.2808010416, 0.0005372569, 0.0036280018, 0.0032444598, 0.0002943119, 0.0026430897, 0.0262074332, 0.0033317064, 0.0068812603, 0.0006185385, 0.0082121491, 0.0014562177, 0.0081786997, 1.0000000000, 0.0894895310, 0.6187801784, 0.3302352775, 0.0021346433, 0.0016077771, 0.0260198502, 0.3404628948, 0.0041384517, 0.0019399743 ] deep_cmi = [ 0.00000000, 0.31823883, 0.52769230, 0.00000000, 0.00000000, 0.00000000, 0.01183309, 0.67430653, 0.00000000, 0.00000000, 0.45443221, 0.00000000, 0.24561013, 0.87720587, 0.31939378, 0.19370515, 0.00000000, 0.16463918, 0.00000000, 0.00000000, 0.44830772, 1.00000000, 0.00000000, 0.00000000, 0.62478098, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.64466111 ] fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/wdbc_changed.csv")) target = "diagnosis" fr[target] = fr[target].asfactor() x = [ "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst" ] infogram_model = H2OInfogram(seed=12345, top_n_features=50) infogram_model.train(x=x, y=target, training_frame=fr) # make sure our result matches Deep's pred_names, rel = infogram_model.get_all_predictor_relevance() x, cmi = infogram_model.get_all_predictor_cmi() assert deep_rel.sort() == rel.sort(), "Expected: {0}, actual: {1}".format( deep_rel, rel) assert deep_cmi.sort() == cmi.sort(), "Expected: {0}, actual: {1}".format( deep_cmi, cmi) gbm_params = {'ntrees': 3, 'max_depth': 5} infogram_model_gbm = H2OInfogram(seed=12345, top_n_features=50, algorithm='gbm', algorithm_params=gbm_params) infogram_model_gbm.train(x=x, y=target, training_frame=fr) x, cmi_gbm = infogram_model_gbm.get_all_predictor_cmi() assert abs(cmi_gbm[1]-cmi[1]) > 0.01, "CMI from infogram model with gbm using different number of trees should" \ " be different but are not."
def test_infogram_breast_cancer_cv_fold_column(): """ Test to make sure cross-validation are implemented properly using fold_column """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/wdbc_changed.csv")) target = "diagnosis" fr[target] = fr[target].asfactor() x = [ "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst" ] n_fold = 3 infogram_model_cv = H2OInfogram( seed=12345, top_n_features=50, nfolds=n_fold, fold_assignment="modulo") # model with cross-validation infogram_model_cv.train(x=x, y=target, training_frame=fr) relcmi_train_cv = infogram_model_cv.get_admissible_score_frame() relcmi_cv_cv = infogram_model_cv.get_admissible_score_frame(xval=True) fold_numbers = fr.modulo_kfold_column(n_folds=n_fold) fold_numbers.set_names(["fold_numbers"]) fr = fr.cbind(fold_numbers) infogram_model_cv_fold_column = H2OInfogram(seed=12345, top_n_features=50, fold_column="fold_numbers") infogram_model_cv_fold_column.train(x=x, y=target, training_frame=fr) relcmi_train_cv_fold_column = infogram_model_cv_fold_column.get_admissible_score_frame( ) relcmi_cv_cv_fold_column = infogram_model_cv_fold_column.get_admissible_score_frame( xval=True) # training rel cmi frames should all equal print("Comparing infogram data from training dataset") pyunit_utils.compare_frames_local(relcmi_train_cv, relcmi_train_cv_fold_column, prob=1) # cv rel cmi frames should be the same print("Comparing infogram data from cross-validation dataset") pyunit_utils.compare_frames_local(relcmi_cv_cv, relcmi_cv_cv_fold_column, prob=1)
def test_infogram_german_data(): """ Simple german data test to check that safe infogram is working: 1. it generates the correct lists as Deep's original code. 2. when model and infogram parameters are specified, it uses the correct specification. :return: """ deep_rel = [ 1.00000000, 0.58302027, 0.43431236, 0.66177924, 0.53677082, 0.25084764, 0.34379833, 0.13251726, 0.11473028, 0.09548423, 0.20398740, 0.16432640, 0.06875276, 0.04870468, 0.12573930, 0.01382682, 0.04496173, 0.01273963 ] deep_cmi = [ 0.84946975, 0.73020930, 0.58553936, 0.75780528, 1.00000000, 0.38461582, 0.57575695, 0.30663930, 0.07604779, 0.19979514, 0.42293369, 0.20628365, 0.25316918, 0.15096705, 0.24501686, 0.11296778, 0.13068605, 0.03841617 ] fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/german_credit.csv")) target = "BAD" fr[target] = fr[target].asfactor() x = fr.names x.remove(target) x.remove("status_gender") x.remove("age") infogram_model = H2OInfogram(seed=12345, protected_columns=["status_gender", "age"], top_n_features=50) infogram_model.train(x=x, y=target, training_frame=fr) # make sure our result matches Deep's pred_names, rel = infogram_model.get_all_predictor_relevance() x, cmi = infogram_model.get_all_predictor_cmi() assert deep_rel.sort() == rel.sort(), "Expected: {0}, actual: {1}".format( deep_rel, rel) assert deep_cmi.sort() == cmi.sort(), "Expected: {0}, actual: {1}".format( deep_cmi, cmi) gbm_params = {'ntrees': 3} infogram_model_gbm_glm = H2OInfogram( seed=12345, protected_columns=["status_gender", "age"], top_n_features=50, algorithm='gbm', algorithm_params=gbm_params) infogram_model_gbm_glm.train(x=x, y=target, training_frame=fr) x, cmi_gbm_glm = infogram_model_gbm_glm.get_all_predictor_cmi() assert abs(cmi_gbm_glm[1]-cmi[1]) > 0.01, "CMI from infogram model with gbm using different number of trees should" \ " be different but are not."
def test_infogram_iris_wrong_thresholds(): """ Simple Iris test to check that when wrong thresholds are specified for core infogram, warnings will be received """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/irisROriginal.csv")) target = "Species" fr[target] = fr[target].asfactor() x = fr.names x.remove(target) with pyunit_utils.catch_warnings() as ws: infogram_model = H2OInfogram( seed=12345, distribution='multinomial', safety_index_threshold=0.2, relevance_index_threshold=0.2, top_n_features=len( x)) # build infogram model with default settings infogram_model.train(x=x, y=target, training_frame=fr) assert len( ws ) == 2, "Expected two warnings but received {0} warnings instead.".format( len(ws)) assert pyunit_utils.contains_warning( ws, 'index_threshold for core infogram runs.')
def test_infogram_personal_loan(): """ Simple Perosnal loan test to check that when wrong thresholds are specified, warnings should be generated. :return: """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = [ "Experience", "Income", "Family", "CCAvg", "Education", "Mortgage", "Securities Account", "CD Account", "Online", "CreditCard" ] with pyunit_utils.catch_warnings() as ws: infogram_model = H2OInfogram(seed=12345, protected_columns=["Age", "ZIP Code"], top_n_features=len(x), net_information_threshold=0.2, total_information_threshold=0.2) infogram_model.train(x=x, y=target, training_frame=fr) assert len( ws ) == 2, "Expected two warnings but received {0} warnings instead.".format( len(ws)) assert pyunit_utils.contains_warning( ws, 'information_threshold for fair infogram runs.')
def test_infogram_personal_loan(): """ Test to make sure predictor can be specified using infogram model. """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = [ "Experience", "Income", "Family", "CCAvg", "Education", "Mortgage", "Securities Account", "CD Account", "Online", "CreditCard" ] infogram_model = H2OInfogram(seed=12345, protected_columns=["Age", "ZIP Code"]) infogram_model.train(x=x, y=target, training_frame=fr) glm_model1 = H2OGeneralizedLinearEstimator() glm_model1.train(x=infogram_model._extract_x_from_model(), y=target, training_frame=fr) coef1 = glm_model1.coef() glm_model2 = H2OGeneralizedLinearEstimator() glm_model2.train(x=infogram_model, y=target, training_frame=fr) coef2 = glm_model2.coef() pyunit_utils.assertCoefDictEqual(coef1, coef2, tol=1e-6)
def test_infogram_iris_x_attributes(): """ Test to showcase that we can specify predictors using infogram model """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/irisROriginal.csv")) target = "Species" fr[target] = fr[target].asfactor() x = fr.names x.remove(target) infogram_model = H2OInfogram( seed=12345, distribution='multinomial' ) # build infogram model with default settings infogram_model.train(x=x, y=target, training_frame=fr) glm_model1 = H2OGeneralizedLinearEstimator(family='multinomial') glm_model1.train(x=infogram_model._extract_x_from_model(), y=target, training_frame=fr) coef1 = glm_model1.coef() glm_model2 = H2OGeneralizedLinearEstimator(family='multinomial') glm_model2.train(x=infogram_model, y=target, training_frame=fr) coef2 = glm_model2.coef() coef_classes = coef1.keys() for key in coef_classes: pyunit_utils.assertCoefDictEqual(coef1[key], coef2[key], tol=1e-6)
def test_infogram_personal_loan_cv_fold_column(): """ Make sure safe infogram works with validation frame and supports cross-validation """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = [ "Experience", "Income", "Family", "CCAvg", "Education", "Mortgage", "Securities Account", "CD Account", "Online", "CreditCard" ] n_fold = 3 infogram_model_cv = H2OInfogram(seed=12345, protected_columns=["Age", "ZIP Code"], nfolds=n_fold, fold_assignment='modulo') infogram_model_cv.train(x=x, y=target, training_frame=fr) # model with cross-vdaliation fold_numbers = fr.modulo_kfold_column(n_folds=n_fold) fold_numbers.set_names(["fold_numbers"]) fr = fr.cbind(fold_numbers) infogram_model_cv_fold_column = H2OInfogram( seed=12345, protected_columns=["Age", "ZIP Code"], fold_column="fold_numbers") infogram_model_cv_fold_column.train( x=x, y=target, training_frame=fr) # cross-validation, validation print("compare rel cmi from training dataset") relcmi_train_cv = infogram_model_cv.get_admissible_score_frame() relcmi_train_cv_fold_column = infogram_model_cv_fold_column.get_admissible_score_frame( ) pyunit_utils.compare_frames_local(relcmi_train_cv, relcmi_train_cv_fold_column, prob=1.0) print("compare rel cmi from cross-validation hold out") relcmi_cv_cv = infogram_model_cv.get_admissible_score_frame(xval=True) relcmi_cv_cv_fold_column = infogram_model_cv_fold_column.get_admissible_score_frame( xval=True) pyunit_utils.compare_frames_local(relcmi_cv_cv, relcmi_cv_cv_fold_column, prob=1.0)
def test_infogram_personal_loan_cv_valid(): """ Make sure safe infogram works with validation frame and supports cross-validation """ fr = h2o.import_file(path=pyunit_utils.locate("smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = ["Experience","Income","Family","CCAvg","Education","Mortgage", "Securities Account","CD Account","Online","CreditCard"] splits = fr.split_frame(ratios=[0.80]) train = splits[0] test = splits[1] infogram_model = H2OInfogram(seed = 12345, protected_columns=["Age","ZIP Code"]) # model on training dataset infogram_model.train(x=x, y=target, training_frame=train) infogram_model_v = H2OInfogram(seed = 12345, protected_columns=["Age","ZIP Code"]) # model with validation dataset infogram_model_v.train(x=x, y=target, training_frame=train, validation_frame=test) infogram_model_cv = H2OInfogram(seed = 12345, protected_columns=["Age","ZIP Code"], nfolds=3) infogram_model_cv.train(x=x, y=target, training_frame=train) # model with cross-vdaliation infogram_model_cv_v = H2OInfogram(seed = 12345, protected_columns=["Age","ZIP Code"], nfolds=3) infogram_model_cv_v.train(x=x, y=target, training_frame=train, validation_frame=test) # cross-validation, validation print("compare rel cmi from training dataset") relcmi_train = infogram_model.get_admissible_score_frame() relcmi_train_v = infogram_model_v.get_admissible_score_frame() relcmi_train_cv = infogram_model_cv.get_admissible_score_frame() relcmi_train_cv_v = infogram_model_cv_v.get_admissible_score_frame() pyunit_utils.compare_frames_local(relcmi_train, relcmi_train_v, prob=1.0) pyunit_utils.compare_frames_local(relcmi_train_cv, relcmi_train_cv_v, prob=1.0) pyunit_utils.compare_frames_local(relcmi_train_cv, relcmi_train, prob=1.0) print("compare rel cmi from validation dataset") relcmi_valid_v = infogram_model_v.get_admissible_score_frame(valid=True) relcmi_valid_cv_v = infogram_model_cv_v.get_admissible_score_frame(valid=True) pyunit_utils.compare_frames_local(relcmi_valid_v, relcmi_valid_cv_v, prob=1.0) print("compare rel cmi from cross-validation hold out") relcmi_cv_cv = infogram_model_cv.get_admissible_score_frame(xval=True) relcmi_cv_cv_v = infogram_model_cv_v.get_admissible_score_frame(xval=True) pyunit_utils.compare_frames_local(relcmi_cv_cv, relcmi_cv_cv_v, prob=1.0)
def test_infogram_iris_plot(): """ Check to make sure infogram can be plotted :return: """ fr = h2o.import_file(path=pyunit_utils.locate("smalldata/admissibleml_test/irisROriginal.csv")) target = "Species" fr[target] = fr[target].asfactor() x = fr.names x.remove(target) infogram_model = H2OInfogram(seed = 12345, distribution = 'multinomial') # build infogram model with default settings infogram_model.train(x=x, y=target, training_frame=fr) infogram_model.plot(server=True) # make sure graph will not show infogram_model2 = H2OInfogram(seed = 12345, distribution = 'multinomial', net_information_threshold=0.05, total_information_threshold=0.05) # build infogram model with default settings infogram_model2.train(x=x, y=target, training_frame=fr) infogram_model2.plot(server=True) assert len(infogram_model.get_admissible_cmi()) <= len(infogram_model2.get_admissible_cmi())
def test_infogram_breast_cancer_cv_fold_column(): """ Test to make sure cross-validation are implemented properly using fold_column """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/wdbc_changed.csv")) target = "diagnosis" fr[target] = fr[target].asfactor() x = [ "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst" ] splits = fr.split_frame(ratios=[0.80]) train = splits[0] test = splits[1] n_fold = 5 infogram_model_cv_valid = H2OInfogram( seed=12345, top_n_features=50, nfolds=n_fold, fold_assignment="modulo") # model with cross-validation infogram_model_cv_valid.train(x=x, y=target, training_frame=train, validation_frame=test) infogram_model_cv_valid.plot(title="infogram from training dataset 1", server=True) infogram_model_cv_valid.plot( train=True, valid=True, title="infogram from traiing/validation dataset 1", server=True) infogram_model_cv_valid.plot( train=True, valid=True, xval=True, title="infogram from training/validation/cv holdout" " dataset 1", server=True) relcmi_valid = infogram_model_cv_valid.get_admissible_score_frame( valid=True) relcmi_cv = infogram_model_cv_valid.get_admissible_score_frame(xval=True) assert relcmi_valid.nrow == relcmi_cv.nrow
def test_infogram_iris(): """ Simple Iris test to check that core infogram is working: 1. it generates the correct lists as Deep's original code. 2. check and make sure the frame contains the correct information. 3. check the admissible features contains cmi and relevance >= 0.1 :return: """ deep_rel = [0.009010006, 0.011170417, 0.755170945, 1.000000000] deep_cmi = [0.1038524, 0.7135458, 0.5745915, 1.0000000] fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/irisROriginal.csv")) target = "Species" fr[target] = fr[target].asfactor() x = fr.names x.remove(target) infogram_model = H2OInfogram( seed=12345, distribution='multinomial' ) # build infogram model with default settings infogram_model.train(x=x, y=target, training_frame=fr) # make sure frame returning all predictors, relevance and cmi contains correct value pred_names, rel = infogram_model.get_all_predictor_relevance() x, cmi = infogram_model.get_all_predictor_cmi() predictor_rel_cmi_frame = infogram_model.get_admissible_score_frame( ) # get relevance and cmi frame assert_list_frame_equal(cmi, rel, predictor_rel_cmi_frame) # make sure our result matches Deep's assert deep_rel.sort() == rel.sort(), "Expected: {0}, actual: {1}".format( deep_rel, rel) assert deep_cmi.sort() == cmi.sort(), "Expected: {0}, actual: {1}".format( deep_cmi, cmi) # check admissible features values >= 0.1 admissible_rel = infogram_model.get_admissible_relevance() admissible_cmi = infogram_model.get_admissible_cmi() for index in range(0, len(admissible_rel)): assert admissible_rel[index] >= 0.1, "Admissible relevance should equal or exceed 0.1 but is not. Actual admissible" \ " relevance is {0}".format(admissible_rel[index]) assert admissible_cmi[index] >= 0.1, "Admissible cmi should equal or exceed 0.1 but is not. Actual admissible cmi" \ " is {0}".format(admissible_cmi[index])
def test_infogram_personal_loan_cv_valid(): """ Make sure safe infogram plot works with cv and validation dataset. """ fr = h2o.import_file(path=pyunit_utils.locate("smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv")) target = "Personal Loan" fr[target] = fr[target].asfactor() x = ["Experience","Income","Family","CCAvg","Education","Mortgage", "Securities Account","CD Account","Online","CreditCard"] splits = fr.split_frame(ratios=[0.80]) train = splits[0] test = splits[1] infogram_model_cv_v = H2OInfogram(seed = 12345, protected_columns=["Age","ZIP Code"], nfolds=5) infogram_model_cv_v.train(x=x, y=target, training_frame=train, validation_frame=test) # cross-validation, validation infogram_model_cv_v.plot(title="Infogram calcuated from training dataset", server=True) # plot infogram from training dataset infogram_model_cv_v.plot(train=True, valid=True, title="Infogram calculated from training/validation dataset", server=True) # plot infogram from validation dataset infogram_model_cv_v.plot(train=True, valid=True, xval=True, title="Infogram calculated from " "training/validation/xval holdout dataset", server=True) # plot infogram from cv hold out dataset relcmi_train = infogram_model_cv_v.get_admissible_score_frame() relcmi_valid = infogram_model_cv_v.get_admissible_score_frame(valid=True) assert relcmi_train.nrow==relcmi_valid.nrow
def test_infogram_breast_cancer_cv_fold_column(): """ Test to make sure cross-validation are implemented properly using fold_column """ fr = h2o.import_file(path=pyunit_utils.locate( "smalldata/admissibleml_test/wdbc_changed.csv")) target = "diagnosis" fr[target] = fr[target].asfactor() x = [ "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst" ] splits = fr.split_frame(ratios=[0.80]) train = splits[0] test = splits[1] n_fold = 3 infogram_model_cv_valid = H2OInfogram( seed=12345, top_n_features=50, nfolds=n_fold, fold_assignment="modulo") # model with cross-validation infogram_model_cv_valid.train(x=x, y=target, training_frame=train, validation_frame=test) relcmi_train_cv_valid = infogram_model_cv_valid.get_admissible_score_frame( ) relcmi_cv_cv_valid = infogram_model_cv_valid.get_admissible_score_frame( xval=True) relcmi_valid_cv_valid = infogram_model_cv_valid.get_admissible_score_frame( valid=True) infogram_model = H2OInfogram(seed=12345, top_n_features=50) infogram_model.train(x=x, y=target, training_frame=train) relcmi_train = infogram_model.get_admissible_score_frame() infogram_model_valid = H2OInfogram(seed=12345, top_n_features=50) infogram_model_valid.train(x=x, y=target, training_frame=train, validation_frame=test) relcmi_train_valid = infogram_model_valid.get_admissible_score_frame() relcmi_valid_valid = infogram_model_valid.get_admissible_score_frame( valid=True) infogram_model_cv = H2OInfogram(seed=12345, top_n_features=50, nfolds=n_fold, fold_assignment="modulo") infogram_model_cv.train(x=x, y=target, training_frame=train) relcmi_train_cv = infogram_model_cv.get_admissible_score_frame() relcmi_cv_cv = infogram_model_cv.get_admissible_score_frame(xval=True) # training rel cmi frames should all equal print("Comparing infogram data from training dataset") pyunit_utils.compare_frames_local(relcmi_train_cv_valid, relcmi_train, prob=1) pyunit_utils.compare_frames_local(relcmi_train_cv, relcmi_train_valid, prob=1) pyunit_utils.compare_frames_local(relcmi_train_cv_valid, relcmi_train_cv, prob=1) # valid rel cmi frames should be the same print("Comparing infogram data from validation dataset") pyunit_utils.compare_frames_local(relcmi_valid_cv_valid, relcmi_valid_valid, prob=1) print("Comparing infogram data from cross-validation dataset") pyunit_utils.compare_frames_local(relcmi_cv_cv, relcmi_cv_cv_valid, prob=1)