예제 #1
0
def validate_builder(builder):
    ''' Validate that a model builder seems to have a well-formed parameters list. '''
    assert 'parameters' in builder and isinstance(builder['parameters'], list)
    parameters = builder['parameters']
    assert len(parameters) > 0
    for parameter in parameters:
        h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values'])
예제 #2
0
    def testCheckWithModelAPI(self):
        ######################################################################
        # Now look for kmeans_model_name using the one-model API and find_compatible_frames, and check it
        model = self.a_node.models(key=self.kmeans_model_name,
                                   find_compatible_frames=True)
        found_kmeans = False
        h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames'])
        assert self.prostate_key in model['models'][0]['compatible_frames'], \
            "Failed to find " + self.prostate_key + " in compatible_frames list."
        ######################################################################
        # Now look for prostate_key using the one-frame API and find_compatible_models, and check it
        result = self.a_node.frames(key='prostate.hex',
                                    find_compatible_models=True)
        frames = result['frames']
        frames_dict = h2o_util.list_to_dict(frames, 'key/name')
        assert_true('prostate.hex' in frames_dict,
                    "Failed to find prostate.hex in Frames list.")

        compatible_models = result['compatible_models']
        models_dict = h2o_util.list_to_dict(compatible_models, 'key')
        assert_true ( self.dl_prostate_model_name in models_dict, "Failed to find " + \
                    self.dl_prostate_model_name + " in compatible models list.")

        assert_true(
            self.dl_prostate_model_name in frames[0]['compatible_models'])
        assert_true(self.kmeans_model_name in frames[0]['compatible_models'])
예제 #3
0
def validate_builder(algo, builder):
    ''' Validate that a model builder seems to have a well-formed parameters list. '''
    assert 'parameters' in builder, "FAIL: Failed to find parameters list in builder: " + algo + " (" + repr(
        builder) + ")"
    assert isinstance(
        builder['parameters'], list
    ), "FAIL: 'parameters' element is not a list in builder: " + algo + " (" + repr(
        builder) + ")"
    parameters = builder['parameters']
    assert len(parameters
               ) > 0, "FAIL: parameters list is empty: " + algo + " (" + repr(
                   builder) + ")"
    for parameter in parameters:
        h2o_util.assertKeysExist(parameter, '', [
            'name', 'label', 'help', 'required', 'type', 'default_value',
            'actual_value', 'level', 'values'
        ])

    assert 'can_build' in builder, "FAIL: Failed to find can_build list in builder: " + algo + " (" + repr(
        builder) + ")"
    assert isinstance(
        builder['can_build'], list
    ), "FAIL: 'can_build' element is not a list in builder: " + algo + " (" + repr(
        builder) + ")"
    assert len(
        builder['can_build']
    ) > 0, "FAIL: 'can_build' list is empty in builder: " + algo + " (" + repr(
        builder) + ")"
예제 #4
0
def validate_builder(builder):
    assert 'parameters' in builder and isinstance(builder['parameters'], list)
    parameters = builder['parameters']
    assert len(parameters) > 0
    parameter = parameters[0]
    h2o_util.assertKeysExist(parameter, '', [
        'name', 'label', 'help', 'required', 'type', 'default_value',
        'actual_value', 'level', 'dependencies', 'values'
    ])
예제 #5
0
def validate_builder(algo, builder):
    ''' Validate that a model builder seems to have a well-formed parameters list. '''
    assert 'parameters' in builder, "FAIL: Failed to find parameters list in builder: " + algo + " (" + repr(builder) + ")"
    assert isinstance(builder['parameters'], list), "FAIL: 'parameters' element is not a list in builder: " + algo + " (" + repr(builder) + ")"
    parameters = builder['parameters']
    assert len(parameters) > 0, "FAIL: parameters list is empty: " + algo + " (" + repr(builder) + ")"
    for parameter in parameters:
        h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values'])

    assert 'can_build' in builder, "FAIL: Failed to find can_build list in builder: " + algo + " (" + repr(builder) + ")"
    assert isinstance(builder['can_build'], list), "FAIL: 'can_build' element is not a list in builder: " + algo + " (" + repr(builder) + ")"
    assert len(builder['can_build']) > 0, "FAIL: 'can_build' list is empty in builder: " + algo + " (" + repr(builder) + ")"
예제 #6
0
 def testImportProstate(self):
     cleanup(self.a_node)
     import_result = self.a_node.import_files(path="/Users/radu/h2o-dev/smalldata/logreg/prostate.csv")
     parse_result = self.a_node.parse(key=import_result['keys'][0]) # TODO: handle multiple files
     self.prostate_key = parse_result['frames'][0]['key']['name']
     # Test /Frames for prostate.csv
     frames = self.a_node.frames()['frames']
     frames_dict = h2o_util.list_to_dict(frames, 'key/name')
     assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list."
     # Test /Frames/{key} for prostate.csv
     frames = self.a_node.frames(key='prostate.hex')['frames']
     frames_dict = h2o_util.list_to_dict(frames, 'key/name')
     assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list."
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'CAPSULE' in columns_dict, "Failed to find CAPSULE in Frames/prostate.hex."
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary
     frames = self.a_node.columns(key='prostate.hex')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'ID' in columns_dict, "Failed to find ID in Frames/prostate.hex/columns."
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary
     frames = self.a_node.column(key='prostate.hex', column='AGE')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary
     frames = self.a_node.summary(key='prostate.hex', column='AGE')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns/AGE/summary."
     col = columns_dict['AGE']
     h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins',
             'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base', 'stride', 'pctiles'])
     h2o_util.assertKeysExist(col, '', ['domain', 'str_data'])
     assert col['mins'][0] == 43, 'Failed to find 43 as the first min for AGE.'
     assert col['maxs'][0] == 79, 'Failed to find 79 as the first max for AGE.'
     assert col['mean'] == 66.03947368421052, 'Failed to find 66.03947368421052 as the mean for AGE.'
     assert col['sigma'] == 6.527071269173308, 'Failed to find 6.527071269173308 as the sigma for AGE.'
     assert col['type'] == 'int', 'Failed to find int as the type for AGE.'
     assert col['data'][0] == 65, 'Failed to find 65 as the first data for AGE.'
     assert col['precision'] == -1, 'Failed to find -1 as the precision for AGE.'
     assert col['bins'][0] == 1, 'Failed to find 1 as the first bin for AGE.'
     assert col['base'] == 43, 'Failed to find 43 as the base for AGE.'
     assert col['stride'] == 1, 'Failed to find 1 as the stride for AGE.'
     assert col['pctiles'][0] == 50.5, 'Failed to find 50.5 as the first pctile for AGE.'
예제 #7
0
    def testCheckWithModelAPI(self):
        ######################################################################
        # Now look for kmeans_model_name using the one-model API and find_compatible_frames, and check it
        model = self.a_node.models(key=self.kmeans_model_name, find_compatible_frames=True)
        found_kmeans = False;
        h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames'])
        assert self.prostate_key in model['models'][0]['compatible_frames'], \
            "Failed to find " + self.prostate_key + " in compatible_frames list."
        ######################################################################
        # Now look for prostate_key using the one-frame API and find_compatible_models, and check it
        result = self.a_node.frames(key='prostate.hex', find_compatible_models=True)
        frames = result['frames']
        frames_dict = h2o_util.list_to_dict(frames, 'key/name')
        assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list."

        compatible_models = result['compatible_models']
        models_dict = h2o_util.list_to_dict(compatible_models, 'key')
        assert self.dl_prostate_model_name in models_dict, "Failed to find " + \
                    self.dl_prostate_model_name + " in compatible models list."

        assert self.dl_prostate_model_name in frames[0]['compatible_models']
        assert self.kmeans_model_name in frames[0]['compatible_models']
예제 #8
0
found_dl = False
dl_model = None
for model in models['models']:
    if model['key'] == deep_learning_model_name:
        found_dl = True
        dl_model = model

assert found_dl, 'Did not find ' + deep_learning_model_name + ' in the models list.'
validate_actual_parameters(dl_parameters, dl_model['parameters'], prostate_key,
                           None)

######################################################################
# Now look for kmeans_model_name using the one-model API, and check it
model = a_node.models(key=kmeans_model_name, find_compatible_frames=True)
found_kmeans = False
h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames'])
h2o_util.assertKeysExist(model['models'][0]['compatible_frames'], '',
                         ['frames'])

found = False
for frame in model['models'][0]['compatible_frames']['frames']:
    if frame['key']['name'] == prostate_key:
        found = True
assert found, "Failed to find " + prostate_key + " in compatible_frames list."

###################
# test delete_model
a_node.delete_model(kmeans_model_name)
models = a_node.models()

found_kmeans = False
예제 #9
0
# Test /Frames/{key}/columns/{label} for prostate.csv
frames = a_node.column(key='prostate_binomial', column='AGE')['frames']
columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns."
assert 'bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE."
h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['bins']))
assert None is columns_dict['AGE']['bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary

# Test /Frames/{key}/columns/{label}/summary for prostate.csv
frames = a_node.summary(key='prostate_binomial', column='AGE')['frames']
columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary."
col = columns_dict['AGE']
h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base', 'stride', 'pctiles'])
h2o_util.assertKeysExist(col, '', ['domain', 'str_data'])
assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.'
assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.'
assert col['mean'] == 66.03947368421052, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.'
assert col['sigma'] == 6.527071269173308, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.'
assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.'
assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.'
assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.'
assert col['bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.'
assert col['base'] == 43, 'FAIL: Failed to find 43 as the base for AGE.'
assert col['stride'] == 1, 'FAIL: Failed to find 1 as the stride for AGE.'
assert col['pctiles'][0] == 50.5, 'FAIL: Failed to find 50.5 as the first pctile for AGE.'


####################################################################################################
# Build and do basic validation checks on models
예제 #10
0
 def testImportProstate(self):
     cleanup(self.a_node)
     prostate_tuple = self.cfg.data['prostate']
     if (prostate_tuple[0] == "file"):
         import_result = self.a_node.import_files(
             path=os.path.abspath(prostate_tuple[1]))
     else:
         raise RuntimeError("Unsupported file type specified")
     parse_result = self.a_node.parse(
         key=import_result['keys'][0])  # TODO: handle multiple files
     self.prostate_key = parse_result['frames'][0]['key']['name']
     # Test /Frames for prostate.csv
     frames = self.a_node.frames()['frames']
     frames_dict = h2o_util.list_to_dict(frames, 'key/name')
     assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list."
     # Test /Frames/{key} for prostate.csv
     frames = self.a_node.frames(key='prostate.hex')['frames']
     frames_dict = h2o_util.list_to_dict(frames, 'key/name')
     assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list."
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'CAPSULE' in columns_dict, "Failed to find CAPSULE in Frames/prostate.hex."
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict[
         'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE'][
         'bins'], "Failed to clear bins field."  # should be cleared except for /summary
     frames = self.a_node.columns(key='prostate.hex')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'ID' in columns_dict, "Failed to find ID in Frames/prostate.hex/columns."
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict[
         'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE'][
         'bins'], "Failed to clear bins field."  # should be cleared except for /summary
     frames = self.a_node.column(key='prostate.hex', column='AGE')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns."
     assert 'bins' in columns_dict[
         'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE."
     assert None is columns_dict['AGE'][
         'bins'], "Failed to clear bins field."  # should be cleared except for /summary
     frames = self.a_node.summary(key='prostate.hex',
                                  column='AGE')['frames']
     columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
     assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns/AGE/summary."
     col = columns_dict['AGE']
     h2o_util.assertKeysExistAndNonNull(col, '', [
         'label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins', 'maxs',
         'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base',
         'stride', 'pctiles'
     ])
     h2o_util.assertKeysExist(col, '', ['domain', 'str_data'])
     assert col['mins'][
         0] == 43, 'Failed to find 43 as the first min for AGE.'
     assert col['maxs'][
         0] == 79, 'Failed to find 79 as the first max for AGE.'
     assert col[
         'mean'] == 66.03947368421052, 'Failed to find 66.03947368421052 as the mean for AGE.'
     assert col[
         'sigma'] == 6.527071269173308, 'Failed to find 6.527071269173308 as the sigma for AGE.'
     assert col['type'] == 'int', 'Failed to find int as the type for AGE.'
     assert col['data'][
         0] == 65, 'Failed to find 65 as the first data for AGE.'
     assert col[
         'precision'] == -1, 'Failed to find -1 as the precision for AGE.'
     assert col['bins'][
         0] == 1, 'Failed to find 1 as the first bin for AGE.'
     assert col['base'] == 43, 'Failed to find 43 as the base for AGE.'
     assert col['stride'] == 1, 'Failed to find 1 as the stride for AGE.'
     assert col['pctiles'][
         0] == 50.5, 'Failed to find 50.5 as the first pctile for AGE.'
예제 #11
0
def validate_builder(builder):
    assert 'parameters' in builder and isinstance(builder['parameters'], list)
    parameters = builder['parameters']
    assert len(parameters) > 0
    parameter = parameters[0]
    h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values'])
예제 #12
0
# Test /Frames/{key}/columns/{label} for prostate.csv
frames = a_node.column(key='prostate_binomial', column='AGE')['frames']
columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns."
assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE."
h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins']))
assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary

# Test /Frames/{key}/columns/{label}/summary for prostate.csv
frames = a_node.summary(key='prostate_binomial', column='AGE')['frames']
columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label')
assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary."
col = columns_dict['AGE']
h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing_count', 'zero_count', 'positive_infinity_count', 'negative_infinity_count', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'histogram_bins', 'histogram_base', 'histogram_stride', 'percentiles'])
h2o_util.assertKeysExist(col, '', ['domain', 'string_data'])
assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.'
assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.'
assert col['mean'] == 66.03947368421052, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.'
assert col['sigma'] == 6.527071269173308, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.'
assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.'
assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.'
assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.'
assert col['histogram_bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.'
assert col['histogram_base'] == 43, 'FAIL: Failed to find 43 as the histogram_base for AGE.'
assert col['histogram_stride'] == 1, 'FAIL: Failed to find 1 as the histogram_stride for AGE.'
assert col['percentiles'][0] == 43, 'FAIL: Failed to find 43 as the first percentile for AGE. '+str(col['percentiles'][0])

# Test /SplitFrame for prostate.csv
if verbose: print 'Testing SplitFrame with named dest_keys. . .'
splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.8], dest_keys=['bigger', 'smaller'])
예제 #13
0
# Check dl_airlines_model_name
found_dl = False;
dl_model = None
for model in models['models']:
    if model['key'] == dl_airlines_model_name:
        found_dl = True
        dl_model = model

assert found_dl, 'Did not find ' + dl_airlines_model_name + ' in the models list.'
validate_actual_parameters(dl_airline_1_parameters, dl_model['parameters'], airlines_key, None)

######################################################################
# Now look for kmeans_model_name using the one-model API, and check it
model = a_node.models(key=kmeans_model_name, find_compatible_frames=True)
found_kmeans = False;
h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames'])
h2o_util.assertKeysExist(model['models'][0]['compatible_frames'], '', ['frames'])

found = False
for frame in model['models'][0]['compatible_frames']['frames']:
    if frame['key']['name'] == prostate_key:
        found = True
assert found, "Failed to find " + prostate_key + " in compatible_frames list."


###################
# test delete_model
a_node.delete_model(kmeans_model_name)
models = a_node.models()

found_kmeans = False;