def i_create_resources_from_ensemble_with_threshold(step, test=None, output2=None, output3=None): if test is None or output2 is None or output3 is None: assert False try: command = ("bigmler --ensemble " + world.ensemble['resource'] + " --test " + test + " --tag my_ensemble --store --output " + output2 + " --method threshold --threshold " + str(world.number_of_models)) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True command = ("bigmler --ensemble " + world.ensemble['resource'] + " --test " + test + " --tag my_ensemble --store --output " + output3 + " --method threshold --threshold 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_cross_validation_from_dataset(step, rate=None, dataset_file=None, output=None): if rate is None or output is None or dataset_file is None: assert False with open(dataset_file, "r") as handler: dataset_id = handler.readline().strip() world.directory = os.path.dirname(output) world.folders.append(world.directory) world.number_of_models = int(MONTECARLO_FACTOR * float(rate)) world.number_of_evaluations = world.number_of_models try: command = ("bigmler --dataset " + dataset_id + " --cross-validation-rate " + rate + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_resources_from_ensemble_generic(step, number_of_models=None, no_replacement="", test=None, output=None): if number_of_models is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --dataset " + world.dataset['resource'] + " --test " + test + " --number-of-models " + str(number_of_models) + " --tag my_ensemble --store" + " --output " + output + no_replacement) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output world.number_of_models = int(number_of_models) assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_predict_ml_from_model_tag_with_labels(step, labels=None, tag=None, test=None, output=None): if tag is None or labels is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --multi-label --model-tag " + tag + " --labels " + labels + " --test " + test + " --store --output " + output + " --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_ml_source(step, label_separator=None, number_of_labels=None, data=None, training_separator=None, multi_label_fields=None, objective=None, output_dir=None): if label_separator is None or training_separator is None or number_of_labels is None or data is None or multi_label_fields is None or output_dir is None or objective is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = ("bigmler --multi-label --train " + data + " --label-separator \"" + label_separator + "\" --training-separator \"" + training_separator + "\" --multi-label-fields " + multi_label_fields + " --objective " + objective + " --store --output-dir " + output_dir + " --no-dataset --no-model --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_ml_resources_and_ensembles(step, tag=None, label_separator=None, number_of_labels=None, data=None, training_separator=None, number_of_models=None, test=None, output=None): if tag is None or label_separator is None or training_separator is None or number_of_labels is None or data is None or test is None or output is None or number_of_models is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) world.number_of_models = int(number_of_labels) * int(number_of_models) try: command = ("bigmler --multi-label --train " + data + " --label-separator \"" + label_separator + "\" --training-separator \"" + training_separator + "\" --test " + test + " --number-of-models " + str(number_of_models) + " --store --output " + output + " --tag " + tag + " --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_resources_and_ensembles_from_dataset(step, multi_label=None, number_of_models=None, test=None, output=None): if test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) multi_label = "" if multi_label is None else " --multi-label " try: command = ("bigmler " + multi_label + "--dataset " + world.dataset['resource'] + " --number-of-models " + str(number_of_models) + " --test " + test + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_ml_resources(step, tag=None, label_separator=None, number_of_labels=None, data=None, training_separator=None, test=None, output=None): if tag is None or label_separator is None or training_separator is None or number_of_labels is None or data is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) world.number_of_models = int(number_of_labels) try: command = ("bigmler --multi-label --train " + data + " --label-separator \"" + label_separator + "\" --training-separator \"" + training_separator + "\" --test " + test + " --store --output " + output + " --tag " + tag + " --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_mc_resources_from_dataset(step, max_categories=None, objective=None, test=None, output=None): if max_categories is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --dataset " + world.dataset['resource'] + " --max-categories " + max_categories + " --objective " + objective + " --test " + test + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def shell_execute(command, output, test=None, options=None, data=None, test_split=None): """Excute bigmler command in shell """ command = check_debug(command) world.directory = os.path.dirname(output) world.folders.append(world.directory) try: retcode = check_call(command, shell=True) if retcode < 0: assert False else: if test is not None: world.test_lines = file_number_of_lines(test) if options is None or options.find( '--prediction-header') == -1: # test file has headers in it, so first line must be ignored world.test_lines -= 1 if test_split is not None: data_lines = file_number_of_lines(data) - 1 world.test_lines = int(data_lines * float(test_split)) world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def shell_execute(command, output, test=None, options=None, data=None, test_split=None): """Excute bigmler command in shell """ command = check_debug(command) world.directory = os.path.dirname(output) world.folders.append(world.directory) try: retcode = check_call(command, shell=True) if retcode < 0: assert False else: if test is not None: world.test_lines = file_number_of_lines(test) if options is None or options.find('--prediction-header') == -1: # test file has headers in it, so first line must be ignored world.test_lines -= 1 if test_split is not None: data_lines = file_number_of_lines(data) - 1 world.test_lines = int(data_lines * float(test_split)) world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_source_with_project(step, data=None, project=None, output_dir=None): if data is None: assert False world.directory = output_dir world.folders.append(world.directory) #Check if the project already exists previous_projects = world.api.list_projects('name=%s' % project) while previous_projects['meta']['total_count'] > 0: print "the project %s already exists, trying with:" % project project += " " + project print project previous_projects = world.api.list_projects('name=%s' % project) try: command = (u"bigmler --train " + data + u" --no-model --no-dataset --store --output-dir " + output_dir + u" --project=\"" + project + "\"") command = check_debug(command) retcode = check_call(command.encode(SYSTEM_ENCODING), shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_update_dataset_new_properties(step, json_file=None): if json_file is None: assert False try: command = ("bigmler --dataset " + world.dataset['resource'] + " --no-model --store --output-dir " + world.output + " --dataset-attributes " + json_file) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_export_the_dataset(step, filename): if filename is None: assert False try: command = ("bigmler --dataset " + world.dataset['resource'] + " --to-csv " + filename + " --output-dir " + world.directory + " --no-model") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_objective_weighted_model(step, data=None, path=None, output_dir=None): if data is None or path is None or output_dir is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = ("bigmler --train " + data + " --objective-weights " + path + " --store --output-dir " + output_dir) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_filter_field_from_dataset(step, field=None, output_dir=None): if field is None or output_dir is None: assert False try: command = ("bigmler --dataset " + world.dataset['resource'] + " --no-model --store --output-dir " + output_dir + " --dataset-fields=\"-" + field + "\""+ " --new-fields ../data/empty.json") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_dataset_new_fields(step, json_file=None, model_fields=None): if json_file is None or model_fields is None: assert False try: command = ("bigmler --dataset " + world.dataset['resource'] + " --model-fields \"" + model_fields + "\" --store" + " --output-dir " + world.output + " --new-fields " + json_file) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_balanced_model(step, data=None, output_dir=None): if data is None or output_dir is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = ("bigmler --train " + data + " --balance " + " --store --output-dir " + output_dir) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_dataset(step, data=None, output_dir=None): if data is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = (u"bigmler --train " + data + u" --no-model --store --output-dir " + output_dir) command = check_debug(command) retcode = check_call(command.encode(SYSTEM_ENCODING), shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_dataset(step, data=None, output_dir=None): if data is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = ("bigmler --train " + data + " --no-model --store --output-dir " + output_dir) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_find_predictions_files(step, directory1=None, directory2=None, output=None, method=None): if directory1 is None or directory2 is None or output is None or method is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --combine-votes " + directory1 + "," + directory2 + " --store --output " + output + " --method " + method) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines("%s%spredictions.csv" % (directory1, os.sep)) world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_ml_evaluations_from_models(step, models_file=None, output=None): if output is None or models_file is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --multi-label --models " + models_file + " --dataset " + world.dataset['resource'] + " --evaluate --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_sample(step, options=None, output_dir=None): if options is None or output_dir is None: assert False world.directory = output_dir world.folders.append(world.directory) try: command = (u"bigmler sample --dataset " + world.dataset['resource'] + u" --store --output-dir " + output_dir + u" " + options) command = check_debug(command) retcode = check_call(command.encode(SYSTEM_ENCODING), shell=True) if retcode < 0: assert False else: world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_ml_evaluations_from_tagged_models(step, tag=None, output=None): if output is None or tag is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --multi-label --model-tag " + tag + " --dataset " + world.dataset['resource'] + " --evaluate --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_multi_dataset(step, output_dir): if output_dir is None: assert False world.folders.append(output_dir) datasets_file = "%s%sdataset" % (world.directory, os.sep) try: command = ("bigmler --datasets " + datasets_file + " --multi-dataset --no-model --store --output-dir " + output_dir) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.directory = output_dir world.output = output_dir assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_mc_resources_from_models(step, models_file=None, test=None, output=None): if models_file is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --models " + models_file + " --method combined --test " + test + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_predict_ml_from_model_tag(step, tag=None, test=None, output=None): if tag is None or test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --multi-label --model-tag " + tag + " --test " + test + " --store --output " + output + " --max-batch-models 1") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_all_mc_resources_from_dataset_with_model_fields(step, max_categories=None, objective=None, model_fields=None, test=None, output=None): if max_categories is None or test is None or output is None or model_fields is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --dataset " + world.dataset['resource'] + " --max-categories " + max_categories + " --objective " + objective + " --test " + test + " --store --output " + output + " --model-fields \"" + model_fields + "\"") command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_find_predictions_files(step, directory1=None, directory2=None, output=None): if directory1 is None or directory2 is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) try: command = ("bigmler --combine-votes " + directory1 + "," + directory2 + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines("%s%spredictions.csv" % (directory1, os.sep)) world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)
def i_create_resources_and_ensembles_from_dataset(step, multi_label=None, number_of_models=None, test=None, output=None): if test is None or output is None: assert False world.directory = os.path.dirname(output) world.folders.append(world.directory) multi_label = "" if multi_label is None else " --multi-label " try: command = ("bigmler "+ multi_label +"--dataset " + world.dataset['resource'] + " --number-of-models " + str(number_of_models) + " --test " + test + " --store --output " + output) command = check_debug(command) retcode = check_call(command, shell=True) if retcode < 0: assert False else: world.test_lines = file_number_of_lines(test) # test file has headers in it, so first line must be ignored world.test_lines -= 1 world.output = output assert True except (OSError, CalledProcessError, IOError) as exc: assert False, str(exc)