コード例 #1
0
ファイル: evaluation_steps.py プロジェクト: bigmlcom/bigmler
def i_create_all_resources_to_evaluate_with_model_and_map(step, data=None, fields_map=None, output=None):
    if data is None or fields_map is None or output is None:
        assert False
    command = (
        "bigmler --evaluate --test "
        + res_filename(data)
        + " --model "
        + world.model["resource"]
        + " --output "
        + output
        + " --fields-map "
        + res_filename(fields_map)
    )
    command = check_debug(command)
    try:
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.directory = os.path.dirname(output)
            world.folders.append(world.directory)
            world.output = output
            assert True
    except OSError as e:
        assert False
コード例 #2
0
def i_create_all_anomaly_resources(step, data=None, test=None, output=None):
    if data is None or test is None or output is None:
        assert False
    test = res_filename(test)
    command = ("bigmler anomaly --train " + res_filename(data) + " --test " + test +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #3
0
def i_create_all_mc_resources(step, data, max_categories=None, objective=None, test=None, output=None):
    if max_categories is None or test is None or output is None or objective is None:
        assert False
    world.directory = os.path.dirname(output)
    world.folders.append(world.directory)
    test = res_filename(test)
    try:
        command = (
            "bigmler --train "
            + res_filename(data)
            + " --max-categories "
            + max_categories
            + " --objective "
            + objective
            + " --test "
            + test
            + " --store --output "
            + output
        )
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.test_lines = file_number_of_lines(test)
            # test file has headers in it, so first line must be ignored
            world.test_lines -= 1
            world.output = output
            assert True
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #4
0
def i_create_all_ml_resources(step, tag=None, label_separator=None, number_of_labels=None, data=None, training_separator=None, test=None, output=None):
    if tag is None or label_separator is None or training_separator is None or number_of_labels is None or data is None or test is None or output is None:
        assert False
    world.directory = os.path.dirname(output)
    world.folders.append(world.directory)
    world.number_of_models = int(number_of_labels)
    test = res_filename(test)
    try:
        command = ("bigmler --multi-label --train " + res_filename(data) +
                   " --label-separator \"" + label_separator +
                   "\" --training-separator \"" + training_separator +
                   "\" --test " + test + " --store --output " + output +
                   " --tag " + tag + " --max-batch-models 1")
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.test_lines = file_number_of_lines(test)
            # test file has headers in it, so first line must be ignored
            world.test_lines -= 1
            world.output = output
            assert True
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #5
0
def i_create_all_ml_resources(step,
                              tag=None,
                              label_separator=None,
                              number_of_labels=None,
                              data=None,
                              training_separator=None,
                              test=None,
                              output=None):
    if tag is None or label_separator is None or training_separator is None or number_of_labels is None or data is None or test is None or output is None:
        assert False
    world.directory = os.path.dirname(output)
    world.folders.append(world.directory)
    world.number_of_models = int(number_of_labels)
    test = res_filename(test)
    try:
        command = ("bigmler --multi-label --train " + res_filename(data) +
                   " --label-separator \"" + label_separator +
                   "\" --training-separator \"" + training_separator +
                   "\" --test " + test + " --store --output " + output +
                   " --tag " + tag + " --max-batch-models 1")
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.test_lines = file_number_of_lines(test)
            # test file has headers in it, so first line must be ignored
            world.test_lines -= 1
            world.output = output
            assert True
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #6
0
def i_create_all_anomaly_resources(step, data=None, test=None, output=None):
    if data is None or test is None or output is None:
        assert False
    test = res_filename(test)
    command = ("bigmler anomaly --train " + res_filename(data) + " --test " + test +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #7
0
def i_create_all_lr_resources(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler linear-regression --train " + res_filename(data) +
               " --test " + test +
               " --store --no-bias --default-numeric-value mean --output " + output)
    shell_execute(command, output, test=test)
コード例 #8
0
def i_create_all_lr_resources(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler logistic-regression --train " + res_filename(data) +
               " --test " + test +
               " --store --no-balance-fields --no-bias --output " + output)
    shell_execute(command, output, test=test)
コード例 #9
0
def i_create_all_mc_resources(step,
                              data,
                              max_categories=None,
                              objective=None,
                              test=None,
                              output=None):
    if max_categories is None or test is None or output is None or objective is None:
        assert False
    world.directory = os.path.dirname(output)
    world.folders.append(world.directory)
    test = res_filename(test)
    try:
        command = ("bigmler --train " + res_filename(data) +
                   " --max-categories " + max_categories + " --objective " +
                   objective + " --test " + test + " --store --output " +
                   output)
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.test_lines = file_number_of_lines(test)
            # test file has headers in it, so first line must be ignored
            world.test_lines -= 1
            world.output = output
            assert True
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #10
0
def i_create_all_cluster_resources(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler cluster --train " + res_filename(data) + " --test " + test +
               " --k 8" +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #11
0
def i_create_all_lr_resources(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler linear-regression --train " + res_filename(data) +
               " --test " + test +
               " --store --no-bias --default-numeric-value mean --output " + output)
    shell_execute(command, output, test=test)
コード例 #12
0
def i_create_all_dn_resources(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler deepnet --train " + res_filename(data) +
               " --test " + test +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #13
0
def i_create_all_resources_to_test_from_stdin(step, data=None, test=None, name=None, output=None):
    if data is None or test is None or output is None or name is None:
        assert False
    test = res_filename(test)
    command = ("cat " + test + "|bigmler --train " + res_filename(data) +
               " --test --store --output " + output + " --name \"" +
               name + "\" --max-batch-models 1")
    shell_execute(command, output, test=test)
コード例 #14
0
def i_create_all_pca_resources_with_no_headers(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler pca --train " + res_filename(data) +
               " --test " + test +
               " --store --output " + output +
               " --no-train-header --no-test-header")
    shell_execute(command, output, test=test, options="--projection-header")
コード例 #15
0
def i_create_all_cluster_resources_with_mapping(step, data=None, test=None, fields_map=None, output=None):
    ok_(data is not None and test is not None and output is not None and fields_map is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" +
               " --fields-map " + res_filename(fields_map) +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #16
0
def i_create_all_cluster_resources_to_dataset(step, data=None, test=None, output_dir=None):
    ok_(data is not None and test is not None and output_dir is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" +
               " --to-dataset --no-csv " +
               " --store --output-dir " + output_dir)
    shell_execute(command, "%s/x.csv" % output_dir, test=test)
コード例 #17
0
def i_create_all_dn_resources_headers(step, data=None, test=None, output=None):
    ok_(data is not None and test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler deepnet --train " + res_filename(data) +
               " --test " + test +
               " --store --prediction-header --prediction-info full" +
               " --output " + output)
    shell_execute(command, output, test=test, options='--prediction-header')
コード例 #18
0
def i_create_all_anomaly_resources_with_mapping(step, data=None, test=None, fields_map=None, output=None):
    if data is None or test is None or output is None or fields_map is None:
        assert False
    test = res_filename(test)
    command = ("bigmler anomaly --remote --train " + res_filename(data) + " --test " + test +
               " --fields-map " + res_filename(fields_map) +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #19
0
def i_create_all_anomaly_resources_with_mapping(step, data=None, test=None, fields_map=None, output=None):
    if data is None or test is None or output is None or fields_map is None:
        assert False
    test = res_filename(test)
    command = ("bigmler anomaly --remote --train " + res_filename(data) + " --test " + test +
               " --fields-map " + res_filename(fields_map) +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #20
0
def i_create_all_cluster_resources_with_prediction_fields(step, data=None, test=None, prediction_fields=None, output=None):
    ok_(data is not None and test is not None and output is not None and prediction_fields is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" +
               " --prediction-fields \"" + prediction_fields +
               "\" --prediction-info full --prediction-header --store " +
               "--output " + output)
    shell_execute(command, output, test=test, options='--prediction-header')
コード例 #21
0
def i_create_dn_resources_from_model_remote_with_options(
        step, test=None, options_file=None, output=None):
    ok_(test is not None and output is not None and options_file is not None)
    test = res_filename(test)
    options_file = res_filename(options_file)
    command = ("bigmler deepnet --deepnet " + world.deepnet['resource'] +
               " --test " + test + " --batch-prediction-attributes " +
               options_file + " --store --remote --output " + output)
    shell_execute(command, output, test=test)
コード例 #22
0
ファイル: basic_execute_steps.py プロジェクト: mmerce/bigmler
def i_create_all_execution_with_io_resources(step, code_file=None, output_dir=None, inputs_dec=None, outputs_dec=None, inputs=None):
    ok_(code_file is not None and output_dir is not None and
        inputs_dec is not None and outputs_dec is not None and
        inputs is not None)
    command = ("bigmler execute --code-file " + res_filename(code_file) +
               " --store --declare-inputs " + res_filename(inputs_dec) +
               " --declare-outputs "+ res_filename(outputs_dec) +
               " --inputs " + res_filename(inputs) + " --output-dir " + output_dir)
    shell_execute(command, "%s/xx.txt" % output_dir)
コード例 #23
0
def i_create_all_cluster_resources_to_dataset(step,
                                              data=None,
                                              test=None,
                                              output_dir=None):
    ok_(data is not None and test is not None and output_dir is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" + " --to-dataset --no-csv " +
               " --store --output-dir " + output_dir)
    shell_execute(command, "%s/x.csv" % output_dir, test=test)
コード例 #24
0
def i_create_dn_resources_from_model_remote_with_options(step, test=None, options_file=None, output=None):
    ok_(test is not None and output is not None and options_file is not None)
    test = res_filename(test)
    options_file = res_filename(options_file)
    command = ("bigmler deepnet --deepnet " +
               world.deepnet['resource'] + " --test " + test +
               " --batch-prediction-attributes " + options_file +
               " --store --remote --output " +
               output)
    shell_execute(command, output, test=test)
コード例 #25
0
def i_create_resources_from_model_with_op_remote(step, operating_point=None,
                                                 test=None, output=None):
    ok_(operating_point is not None and
        test is not None and output is not None)
    test = res_filename(test)
    operating_point = res_filename(operating_point)
    command = ("bigmler --model " + world.model['resource'] + " --test " +
               test + " --operating-point " + operating_point +
               " --store --remote --output " + output +
               " --max-batch-models 1")
    shell_execute(command, output, test=test)
コード例 #26
0
def i_create_fs_resources_from_model_remote_with_options(
        step, test=None, output=None, options_file=None):
    ok_(test is not None and output is not None and options_file is not None)
    test = res_filename(test)
    options_file = res_filename(options_file)

    models = [world.model["resource"], world.deepnet["resource"]]
    command = ("bigmler fusion --fusion-models " + ",".join(models) +
               " --test \"" + test + "\" --batch-prediction-attributes " +
               options_file + " --store --remote --output " + output)
    shell_execute(command, output, test=test)
コード例 #27
0
ファイル: export_steps.py プロジェクト: bigmlcom/bigmler
def i_create_all_resources_to_model_with_source_attrs( \
    self, data=None, source_attributes=None, output=None):
    ok_(data is not None and source_attributes is not None
        and output is not None)
    if source_attributes != "":
        source_attributes = " --source-attributes " + \
            res_filename(source_attributes)
    command = ("bigmler --train " + res_filename(data) +
               " --output " + output + source_attributes +
               " --store --max-batch-models 1 --no-fast")
    shell_execute(command, output)
コード例 #28
0
def i_create_all_cluster_resources_with_prediction_fields(
        step, data=None, test=None, prediction_fields=None, output=None):
    ok_(data is not None and test is not None and output is not None
        and prediction_fields is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" + " --prediction-fields \"" +
               prediction_fields +
               "\" --prediction-info full --prediction-header --store " +
               "--output " + output)
    shell_execute(command, output, test=test, options='--prediction-header')
コード例 #29
0
def i_create_resources_from_model_with_op_remote(step, operating_point=None,
                                                 test=None, output=None):
    ok_(operating_point is not None and
        test is not None and output is not None)
    test = res_filename(test)
    operating_point = res_filename(operating_point)
    command = ("bigmler --model " + world.model['resource'] + " --test " +
               test + " --operating-point " + operating_point +
               " --store --remote --output " + output +
               " --max-batch-models 1")
    shell_execute(command, output, test=test)
コード例 #30
0
def i_create_all_resources_to_model_with_source_attrs( \
    self, data=None, source_attributes=None, output=None):
    ok_(data is not None and source_attributes is not None
        and output is not None)
    if source_attributes != "":
        source_attributes = " --source-attributes " + \
            res_filename(source_attributes)
    command = ("bigmler --train " + res_filename(data) + " --output " +
               output + source_attributes +
               " --store --max-batch-models 1 --no-fast")
    shell_execute(command, output)
コード例 #31
0
ファイル: basic_fusion_steps.py プロジェクト: jaor/bigmler
def i_create_fs_resources_from_model_remote_with_options(step, test=None, output=None, options_file=None):
    ok_(test is not None and output is not None and options_file is not None)
    test = res_filename(test)
    options_file = res_filename(options_file)

    models = [world.model["resource"], world.deepnet["resource"]]
    command = ("bigmler fusion --fusion-models " +
               ",".join(models) + " --test \"" + test +
               "\" --batch-prediction-attributes " + options_file +
               " --store --remote --output " +
               output)
    shell_execute(command, output, test=test)
コード例 #32
0
def i_create_all_resources_to_test_from_stdin(step, data=None, test=None, name=None, output=None):
    if data is None or test is None or output is None or name is None:
        assert False

    test = res_filename(test)
    if not PYTHON3:
        name = name.decode("utf-8")

    command = (CAT + test + u"|bigmler --train " + res_filename(data) +
               u" --test --store --output " + output + u" --name \"" +
               name + u"\" --max-batch-models 1")
    shell_execute(command, output, test=test)
コード例 #33
0
def i_create_lr_resources_from_model_with_op(step, test=None, output=None,
                                             operating_point=None):
    ok_(test is not None and output is not None and \
        operating_point is not None)
    test = res_filename(test)
    operating_point = res_filename(operating_point)
    command = ("bigmler logistic-regression --logistic-regression " +
               world.logistic_regression['resource'] + " --test " + test +
               " --operating-point " + operating_point +
               " --store --no-balance-fields --no-bias --output " +
               output)
    shell_execute(command, output, test=test)
コード例 #34
0
def i_create_all_cluster_resources_with_mapping(step,
                                                data=None,
                                                test=None,
                                                fields_map=None,
                                                output=None):
    ok_(data is not None and test is not None and output is not None
        and fields_map is not None)
    test = res_filename(test)
    command = ("bigmler cluster --remote --train " + res_filename(data) +
               " --test " + test + " --k 8" + " --fields-map " +
               res_filename(fields_map) + " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #35
0
def i_create_all_execution_with_io_resources(step,
                                             code_file=None,
                                             output_dir=None,
                                             inputs_dec=None,
                                             outputs_dec=None,
                                             inputs=None):
    ok_(code_file is not None and output_dir is not None
        and inputs_dec is not None and outputs_dec is not None
        and inputs is not None)
    command = ("bigmler execute --code-file " + res_filename(code_file) +
               " --store --declare-inputs " + res_filename(inputs_dec) +
               " --declare-outputs " + res_filename(outputs_dec) +
               " --inputs " + res_filename(inputs) + " --output-dir " +
               output_dir)
    shell_execute(command, "%s/xx.txt" % output_dir)
コード例 #36
0
def create_dataset_from_batch_anomaly(filename, output=None, args=None):
    source = world.api.create_source(res_filename(filename))
    world.source = source
    world.directory = os.path.dirname(output)
    world.output = output
    world.api.ok(world.source)
    world.sources.append(source['resource'])
    world.dataset = world.api.create_dataset(source)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
    world.anomaly = world.api.create_anomaly(world.dataset, {
        "seed": "bigml",
        "anomaly_seed": "bigml"
    })
    world.api.ok(world.anomaly)
    world.anomalies.append(world.anomaly['resource'])
    world.batch_anomaly_score = world.api.create_batch_anomaly_score( \
        world.anomaly, world.dataset, {"output_dataset": True})
    world.api.ok(world.batch_anomaly_score)
    world.batch_anomaly_scores.append(world.batch_anomaly_score['resource'])
    world.batch_anomaly_score_dataset = world.api.get_dataset(
        world.batch_anomaly_score['object']['output_dataset_resource'])
    world.api.ok(world.batch_anomaly_score_dataset)
    world.batch_anomaly_score_dataset = world.api.update_dataset( \
        world.batch_anomaly_score_dataset, args)
    world.api.ok(world.batch_anomaly_score_dataset)
    world.datasets.append(world.batch_anomaly_score_dataset['resource'])
コード例 #37
0
def create_evaluation_split(filename, output=None, args=None):
    source = world.api.create_source( \
        res_filename(filename), {"project": world.project_id})
    world.source = source
    world.directory = os.path.dirname(output)
    world.output = output
    world.api.ok(world.source)
    world.sources.append(source['resource'])
    world.dataset = world.api.create_dataset(source)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
    world.dataset_train = world.api.create_dataset(world.dataset, { \
        'sample_rate': 0.7, 'seed': 'BigML'})
    world.api.ok(world.dataset_train)
    world.datasets.append(world.dataset_train['resource'])
    world.dataset_test = world.api.create_dataset(world.dataset, { \
        'sample_rate': 0.7, 'seed': 'BigML', 'out_of_bag': True})
    world.api.ok(world.dataset_test)
    world.datasets.append(world.dataset_test['resource'])
    world.model = world.api.create_model(world.dataset_train)
    world.api.ok(world.model)
    world.models.append(world.model['resource'])
    world.evaluation = world.api.create_evaluation( \
        world.model, world.dataset_test, args)
    world.api.ok(world.evaluation)
    world.evaluations.append(world.evaluation['resource'])
コード例 #38
0
def i_create_pca_resources_from_source( \
    step, test=None, output=None):
    ok_(test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler pca --source " + world.source['resource']
               + " --test " + test + " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #39
0
def i_create_source_from_stdin(step, data=None, output_dir=None):
    if data is None or output_dir is None:
        assert False
    command = (CAT + res_filename(data) + u"|bigmler --train " +
               u"--store --no-dataset --no-model --output-dir " +
               output_dir + u" --max-batch-models 1")
    shell_execute(command, output_dir + "/test", test=None)
コード例 #40
0
def i_check_anomaly_scores(step, check_file):
    check_file = res_filename(check_file)
    predictions_file = world.output
    try:
        predictions_file = csv.reader(open(predictions_file, "U"), lineterminator="\n")
        check_file = csv.reader(open(check_file, "U"), lineterminator="\n")
        for row in predictions_file:
            check_row = check_file.next()
            if len(check_row) != len(row):
                assert False
            for index in range(len(row)):
                dot = row[index].find(".")
                if dot > 0 or (check_row[index].find(".") > 0
                               and check_row[index].endswith(".0")):
                    try:
                        decimal_places = min(len(row[index]), len(check_row[index])) - dot - 1
                        row[index] = round(float(row[index]), decimal_places)
                        check_row[index] = round(float(check_row[index]), decimal_places)
                    except ValueError:
                        pass
                if check_row[index] != row[index]:
                    print row, check_row
                    assert False
        assert True
    except Exception, exc:
        assert False, str(exc)
コード例 #41
0
def i_create_all_anomaly_resources_with_test_split(step, data=None, test_split=None, output=None):
    if data is None or output is None or test_split is None:
        assert False
    data = res_filename(data)
    command = ("bigmler anomaly --remote --train " + data + " --test-split " + test_split +
               " --store --output " + output)
    shell_execute(command, output, data=data, test_split=test_split)
コード例 #42
0
def i_create_cluster_resources_from_clusters_file(step, clusters_file=None, test=None, output=None):
    ok_(test is not None and output is not None and clusters_file is not None)
    test = res_filename(test)
    command = ("bigmler cluster --clusters " +
               clusters_file + " --test " + test +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #43
0
def i_create_cluster_resources_from_cluster(step, test=None, output=None):
    ok_(test is not None and output is not None)
    test = res_filename(test)
    command = ("bigmler cluster --cluster " +
               world.cluster['resource'] + " --test " + test + " --k 8" +
               " --store --output " + output)
    shell_execute(command, output, test=test)
コード例 #44
0
def i_create_anomaly_resources_with_options(step, data=None, options=None, output_dir=None):
    if data is None or output_dir is None or options is None:
        assert False
    command = ("bigmler anomaly --train " + res_filename(data) + " " +
               options +
               " --anomalies-dataset in --store --output-dir " + output_dir)
    shell_execute(command, "%s/x.csv" % output_dir, data=data)
コード例 #45
0
ファイル: reify_steps.py プロジェクト: bigmlcom/bigmler
def create_dataset_from_batch_anomaly(filename, output=None, args=None):
    source = world.api.create_source(res_filename(filename))
    world.source = source
    world.directory = os.path.dirname(output)
    world.output = output
    world.api.ok(world.source)
    world.sources.append(source['resource'])
    world.dataset = world.api.create_dataset(source)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
    world.anomaly = world.api.create_anomaly(world.dataset,
                                             {"seed": "bigml",
                                              "anomaly_seed": "bigml"})
    world.api.ok(world.anomaly)
    world.anomalies.append(world.anomaly['resource'])
    world.batch_anomaly_score = world.api.create_batch_anomaly_score( \
        world.anomaly, world.dataset, {"output_dataset": True})
    world.api.ok(world.batch_anomaly_score)
    world.batch_anomaly_scores.append(world.batch_anomaly_score['resource'])
    world.batch_anomaly_score_dataset = world.api.get_dataset(
        world.batch_anomaly_score['object']['output_dataset_resource'])
    world.api.ok(world.batch_anomaly_score_dataset)
    world.batch_anomaly_score_dataset = world.api.update_dataset( \
        world.batch_anomaly_score_dataset, args)
    world.api.ok(world.batch_anomaly_score_dataset)
    world.datasets.append(world.batch_anomaly_score_dataset['resource'])
コード例 #46
0
def create_dataset_from_dataset_from_batch_centroid(filename,
                                                    output=None,
                                                    args=None):
    source = world.api.create_source(res_filename(filename))
    world.source = source
    world.directory = os.path.dirname(output)
    world.output = output
    world.api.ok(world.source)
    world.sources.append(source['resource'])
    world.dataset = world.api.create_dataset(source)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
    world.cluster = world.api.create_cluster( \
        world.dataset, {"cluster_seed": "bigml"})
    world.api.ok(world.cluster)
    world.clusters.append(world.cluster['resource'])
    world.batch_centroid = world.api.create_batch_centroid( \
        world.cluster, world.dataset, {"output_dataset": True})
    world.api.ok(world.batch_centroid)
    world.batch_centroids.append(world.batch_centroid['resource'])
    world.batch_centroid_dataset = world.api.get_dataset(
        world.batch_centroid['object']['output_dataset_resource'])
    world.api.ok(world.batch_centroid_dataset)
    world.datasets.append(world.batch_centroid_dataset['resource'])
    world.dataset = world.api.create_dataset( \
        world.batch_centroid_dataset['resource'], args)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
コード例 #47
0
def i_create_source_with_project(step,
                                 data=None,
                                 project=None,
                                 output_dir=None):
    if data is None:
        assert False
    world.directory = output_dir
    world.folders.append(world.directory)
    #Check if the project already exists
    previous_projects = world.api.list_projects('name=%s' % project)
    while previous_projects['meta']['total_count'] > 0:
        print "the project %s already exists, trying with:" % project
        project += " " + project
        print project
        previous_projects = world.api.list_projects('name=%s' % project)
    try:
        command = (u"bigmler --train " + res_filename(data) +
                   u" --no-model --no-dataset --store --output-dir " +
                   output_dir + u" --project=\"" + project + "\"")
        if not PYTHON3:
            command = command.encode(SYSTEM_ENCODING)
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.output = output_dir
            assert True
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #48
0
def i_create_source_with_project(step, data=None, project=None, output_dir=None):
    ok_(data is not None)
    world.directory = output_dir
    world.folders.append(world.directory)
    #Check if the project already exists
    previous_projects = world.api.list_projects('name=%s' % project)
    while previous_projects['meta']['total_count'] > 0:
        print "the project %s already exists, trying with:" % project
        project += " " + project
        print project
        previous_projects = world.api.list_projects('name=%s' % project)
    try:
        command = (u"bigmler --train " + res_filename(data) +
                   u" --no-model --no-dataset --store --output-dir " +
                   output_dir +
                   u" --project=\"" + project + "\"")
        if not PYTHON3:
            command = command.encode(SYSTEM_ENCODING)
        command = check_debug(command)
        retcode = check_call(command, shell=True)
        if retcode < 0:
            assert False
        else:
            world.output = output_dir
    except (OSError, CalledProcessError, IOError) as exc:
        assert False, str(exc)
コード例 #49
0
def i_create_anomaly_resources_with_options(step, data=None, options=None, output_dir=None):
    if data is None or output_dir is None or options is None:
        assert False
    command = ("bigmler anomaly --train " + res_filename(data) + " " +
               options +
               " --anomalies-dataset in --store --output-dir " + output_dir)
    shell_execute(command, "%s/x.csv" % output_dir, data=data)
コード例 #50
0
def i_check_anomaly_scores(step, check_file):
    check_file = res_filename(check_file)
    predictions_file = world.output
    try:
        predictions_file = csv.reader(open(predictions_file, "U"), lineterminator="\n")
        check_file = csv.reader(open(check_file, "U"), lineterminator="\n")
        for row in predictions_file:
            check_row = check_file.next()
            if len(check_row) != len(row):
                assert False
            for index in range(len(row)):
                dot = row[index].find(".")
                if dot > 0 or (check_row[index].find(".") > 0
                               and check_row[index].endswith(".0")):
                    try:
                        decimal_places = min(len(row[index]), len(check_row[index])) - dot - 1
                        row[index] = round(float(row[index]), decimal_places)
                        check_row[index] = round(float(check_row[index]), decimal_places)
                    except ValueError:
                        pass
                if check_row[index] != row[index]:
                    print row, check_row
                    assert False
        assert True
    except Exception, exc:
        assert False, str(exc)
コード例 #51
0
def i_create_source_from_file(step, data=None, output_dir=None):
    ok_(data is not None and output_dir is not None)
    command = ("bigmler --train " + res_filename(data) + " --store --output-dir " +
               output_dir +
               " --no-dataset --no-model --store")
    shell_execute(command, os.path.join(output_dir, "p.csv"), test=None,
                  project=False)
コード例 #52
0
def i_create_all_anomaly_resources_with_test_split(step, data=None, test_split=None, output=None):
    if data is None or output is None or test_split is None:
        assert False
    data = res_filename(data)
    command = ("bigmler anomaly --remote --train " + data + " --test-split " + test_split +
               " --store --output " + output)
    shell_execute(command, output, data=data, test_split=test_split)
コード例 #53
0
def i_check_topic_distributions(step, check_file):
    check_file = res_filename(check_file)
    predictions_file = world.output
    import traceback
    try:
        with UnicodeReader(predictions_file) as predictions_file:
            with UnicodeReader(check_file) as check_file:
                for row in predictions_file:
                    check_row = check_file.next()
                    assert len(check_row) == len(row)
                    for index in range(len(row)):
                        dot = row[index].find(".")
                        decimal_places = 1
                        if dot > 0 or (check_row[index].find(".") > 0
                                       and check_row[index].endswith(".0")):
                            try:
                                decimal_places = min( \
                                    len(row[index]),
                                    len(check_row[index])) - dot - 1
                                row[index] = round(float(row[index]),
                                                   decimal_places)
                                check_row[index] = round(
                                    float(check_row[index]), decimal_places)
                            except ValueError:
                                decimal_places = 1
                            assert_almost_equal(check_row[index],
                                                row[index],
                                                places=(decimal_places - 1))
                        else:
                            assert_equal(check_row[index], row[index])
    except Exception, exc:
        assert False, traceback.format_exc()
コード例 #54
0
ファイル: reify_steps.py プロジェクト: bigmlcom/bigmler
def create_dataset_from_dataset_from_batch_centroid(filename, output=None, args=None):
    source = world.api.create_source(res_filename(filename))
    world.source = source
    world.directory = os.path.dirname(output)
    world.output = output
    world.api.ok(world.source)
    world.sources.append(source['resource'])
    world.dataset = world.api.create_dataset(source)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
    world.cluster = world.api.create_cluster( \
        world.dataset, {"cluster_seed": "bigml"})
    world.api.ok(world.cluster)
    world.clusters.append(world.cluster['resource'])
    world.batch_centroid = world.api.create_batch_centroid( \
        world.cluster, world.dataset, {"output_dataset": True})
    world.api.ok(world.batch_centroid)
    world.batch_centroids.append(world.batch_centroid['resource'])
    world.batch_centroid_dataset = world.api.get_dataset(
        world.batch_centroid['object']['output_dataset_resource'])
    world.api.ok(world.batch_centroid_dataset)
    world.datasets.append(world.batch_centroid_dataset['resource'])
    world.dataset = world.api.create_dataset( \
        world.batch_centroid_dataset['resource'], args)
    world.api.ok(world.dataset)
    world.datasets.append(world.dataset['resource'])
コード例 #55
0
def i_check_topic_distributions(step, check_file):
    check_file = res_filename(check_file)
    predictions_file = world.output
    import traceback
    try:
        with UnicodeReader(predictions_file) as predictions_file:
            with UnicodeReader(check_file) as check_file:
                for row in predictions_file:
                    check_row = check_file.next()
                    assert len(check_row) == len(row)
                    for index in range(len(row)):
                        dot = row[index].find(".")
                        decimal_places = 1
                        if dot > 0 or (check_row[index].find(".") > 0
                                       and check_row[index].endswith(".0")):
                            try:
                                decimal_places = min( \
                                    len(row[index]),
                                    len(check_row[index])) - dot - 1
                                row[index] = round(float(row[index]),
                                                   decimal_places)
                                check_row[index] = round(
                                    float(check_row[index]), decimal_places)
                            except ValueError:
                                decimal_places = 1
                            assert_almost_equal(check_row[index], row[index],
                                                places=(decimal_places - 1))
                        else:
                            assert_equal(check_row[index], row[index])
    except Exception, exc:
        assert False, traceback.format_exc()