Example #1
0
def set_library_args(args, name=None):
    """Returns a library arguments dict

    """

    if name is None:
        name = args.name
    library_args = set_basic_args(args, name)
    if args.project_id is not None:
        library_args.update({"project": args.project_id})
    if args.imports is not None:
        library_args.update({"imports": args.imports_})
    update_attributes(library_args, args.json_args.get('library'))
    return library_args
Example #2
0
def set_script_args(args, name=None):
    """Returns a script arguments dict

    """

    if name is None:
        name = args.name
    script_args = set_basic_args(args, name)
    if args.project_id is not None:
        script_args.update({"project": args.project_id})
    if args.imports is not None:
        script_args.update({"imports": args.imports_})
    if args.parameters_ is not None:
        script_args.update({"inputs": args.parameters_})
    if args.declare_outputs_:
        script_args.update({"outputs": args.declare_outputs_})
    update_attributes(script_args, args.json_args.get('script'))
    return script_args
Example #3
0
def set_execution_args(args, name=None):
    """Returns an execution arguments dict

    """

    if name is None:
        name = args.name
    execution_args = set_basic_args(args, name)
    if args.project_id is not None:
        execution_args.update({"project": args.project_id})
    if args.arguments_:
        execution_args.update({"inputs": args.arguments_})
    if args.creation_defaults is not None:
        execution_args.update({"creation_defaults": args.creation_defaults_})
    if args.outputs_:
        execution_args.update({"outputs": args.outputs_})
    if args.input_maps_:
        execution_args.update({"input_maps_": args.input_maps_})
    update_attributes(execution_args, args.json_args.get('execution'))
    return execution_args
Example #4
0
def set_source_args(args,
                    name=None,
                    multi_label_data=None,
                    data_set_header=None,
                    fields=None):
    """Returns a source arguments dict

    """

    if name is None:
        name = args.name
    source_args = set_basic_args(args, name)
    if args.project_id is not None:
        source_args.update({"project": args.project_id})
    # if header is set, use it
    if data_set_header is not None:
        source_args.update({"source_parser": {"header": data_set_header}})
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args.update({'source_parser': {}})
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = decode2(args.training_separator,
                                     encoding="string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        source_args.update(
            {"user_metadata": {
                "multi_label_data": multi_label_data
            }})

    # to update fields attributes or types you must have a previous fields
    # structure (at update time)
    if fields:
        if args.field_attributes_:
            update_attributes(source_args, {"fields": args.field_attributes_},
                              by_column=True,
                              fields=fields)
        if args.types_:
            update_attributes(source_args, {"fields": args.types_},
                              by_column=True,
                              fields=fields)
        if args.import_fields:
            fields_struct = fields.new_fields_structure(args.import_fields)
            check_fields_struct(fields_struct, "source")
            update_attributes(source_args, fields_struct)
        if 'source' in args.json_args:
            update_json_args(source_args, args.json_args.get('source'), fields)
    return source_args
Example #5
0
    if args.node_threshold > 0:
        ensemble_args.update(node_threshold=args.node_threshold)
    if args.balance:
        ensemble_args.update(balance_objective=True)
    if args.weight_field:
        try:
            weight_field = fields.field_id(args.weight_field)
        except ValueError, exc:
            sys.exit(exc)
        ensemble_args.update(weight_field=weight_field)
    if args.objective_weights:
        ensemble_args.update(objective_weights=args.objective_weights_json)
    if args.random_candidates:
        ensemble_args.update(random_candidates=args.random_candidates)

    update_attributes(ensemble_args, args.json_args.get('model'))

    ensemble_args = update_sample_parameters_args(ensemble_args, args)

    ensemble_args["ensemble_sample"].update( \
        {"rate": args.ensemble_sample_rate,
         "replacement": args.ensemble_sample_replacement})

    if 'ensemble' in args.json_args:
        update_json_args(ensemble_args, args.json_args.get('ensemble'), fields)

    return ensemble_args


def create_ensembles(datasets, ensemble_ids, ensemble_args, args,
                     number_of_ensembles=1,
Example #6
0
        dataset_args.update(json_filter=args.json_filter)
    elif args.lisp_filter:
        dataset_args.update(lisp_filter=args.lisp_filter)

    if args.dataset_fields_ and fields is not None:
        input_fields = configure_input_fields(fields, args.dataset_fields_)
        dataset_args.update(input_fields=input_fields)
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        dataset_args.update(
            user_metadata={'multi_label_data': multi_label_data})

    if fields and args.import_fields:
        fields_struct = fields.new_fields_structure(args.import_fields)
        check_fields_struct(fields_struct, "dataset")
        update_attributes(dataset_args, fields_struct)
    if 'dataset' in args.json_args:
        update_json_args(dataset_args, args.json_args.get('dataset'), fields)

    return dataset_args


def set_dataset_split_args(name,
                           description,
                           args,
                           sample_rate=1,
                           out_of_bag=False,
                           multi_label_data=None):
    """Return dataset arguments dict to split a dataset

    """