Beispiel #1
0
def set_association_args(args, name=None, fields=None,
                         association_fields=None):
    """Return association arguments dict

    """
    if name is None:
        name = args.name
    if association_fields is None:
        association_fields = args.association_fields_

    association_args = set_basic_model_args(args, name)

    if association_fields and fields is not None:
        input_fields = configure_input_fields(fields, association_fields)
        association_args.update(input_fields=input_fields)
    if args.association_k:
        association_args.update({"max_k": args.association_k})
    if args.search_strategy:
        association_args.update({"search_strategy": args.search_strategy})

    association_args = update_sample_parameters_args(association_args, args)

    if 'association' in args.json_args:
        update_json_args(association_args,
                         args.json_args.get('association'), fields)

    return association_args
Beispiel #2
0
def set_anomaly_args(args, name=None, fields=None, anomaly_fields=None):
    """Return anomaly arguments dict

    """
    if name is None:
        name = args.name
    if anomaly_fields is None:
        anomaly_fields = args.anomaly_fields_

    anomaly_args = set_basic_model_args(args, name)
    anomaly_args.update({
        "seed":
        SEED if args.seed is None else args.seed,
        "anomaly_seed":
        (SEED if args.anomaly_seed is None else args.anomaly_seed)
    })

    if anomaly_fields and fields is not None:
        input_fields = configure_input_fields(fields, anomaly_fields)
        anomaly_args.update(input_fields=input_fields)

    if args.top_n > 0:
        anomaly_args.update(top_n=args.top_n)
    if args.forest_size > 0:
        anomaly_args.update(forest_size=args.forest_size)

    anomaly_args = update_sample_parameters_args(anomaly_args, args)

    if 'anomaly' in args.json_args:
        update_json_args(anomaly_args, args.json_args.get('anomaly'), fields)

    return anomaly_args
Beispiel #3
0
def set_cluster_args(args, name=None, fields=None,
                     cluster_fields=None):
    """Return cluster arguments dict

    """
    if name is None:
        name = args.name
    if cluster_fields is None:
        cluster_fields = args.cluster_fields_

    cluster_args = set_basic_model_args(args, name)
    cluster_args.update({
        "seed": SEED if args.seed is None else args.seed,
        "cluster_seed": (SEED if args.cluster_seed is None
                         else args.cluster_seed)
    })

    if args.cluster_models is not None:
        cluster_args.update({"model_clusters": True})
    if args.cluster_k:
        cluster_args.update({"k": args.cluster_k})
    if cluster_fields and fields is not None:
        input_fields = configure_input_fields(fields, cluster_fields)
        cluster_args.update(input_fields=input_fields)
    if args.summary_fields is not None:
        cluster_args.update({"summary_fields": args.summary_fields_})

    cluster_args = update_sample_parameters_args(cluster_args, args)

    if 'cluster' in args.json_args:
        update_json_args(cluster_args, args.json_args.get('cluster'), fields)

    return cluster_args
def set_external_connector_args(args, name=None):
    """Return external connector arguments dict

    """
    if name is None:
        name = args.name
    external_connector_args = set_basic_args(args, name)

    source = "postgresql" if args.source is None else args.source
    external_connector_args.update({"source": source})

    connection_keys = EXTERNAL_CONNECTION_ATTRS.values()
    connection_keys.remove("source")

    connection_info = {}
    for key in connection_keys:
        if hasattr(args, key) and getattr(args, key):
            connection_info.update({key: getattr(args, key)})
    if not connection_info:
        # try to read environment variables
        connection_info = get_env_connection_info()
    args.connection_info = connection_info

    if args.hosts:
        args.connection_info.update({"hosts": args.hosts.split(",")})

    # rare arguments must be provided in a JSON file
    if args.connection_json_:
        args.connection_info.update(args.connection_json_)

    if 'external_connector' in args.json_args:
        update_json_args(external_connector_args,
                         args.json_args.get('external_connector'), None)

    return external_connector_args
Beispiel #5
0
def set_pca_args(args, name=None, fields=None, pca_fields=None):
    """Return pca arguments dict

    """
    if name is None:
        name = args.name
    if pca_fields is None:
        pca_fields = args.pca_fields_

    pca_args = set_basic_args(args, name)
    pca_args.update({
        "seed": SEED if args.seed is None else args.seed,
        "pca_seed": SEED if args.seed is None else args.seed
    })

    pca_args.update({"sample_rate": args.sample_rate})
    pca_args = update_sample_parameters_args( \
        pca_args, args)
    if fields is not None:
        input_fields = fields.fields.keys()
    if pca_fields and fields is not None:
        input_fields = configure_input_fields(fields, pca_fields)
    if args.exclude_objective:
        input_fields = [field for field in input_fields \
            if field not in args.exclude_fields]
    pca_args.update(input_fields=input_fields)

    if 'pca' in args.json_args:
        update_json_args(pca_args, args.json_args.get('pca'), fields)
    return pca_args
Beispiel #6
0
def set_logistic_regression_args(args,
                                 name=None,
                                 fields=None,
                                 objective_id=None,
                                 logistic_regression_fields=None):
    """Return logistic regression arguments dict

    """
    if name is None:
        name = args.name
    if logistic_regression_fields is None:
        logistic_regression_fields = args.logistic_regression_fields_
    if objective_id is None:
        objective_id = args.objective_id_

    logistic_regression_args = set_basic_model_args(args, name)
    logistic_regression_args.update(
        {"seed": SEED if args.seed is None else args.seed})

    if objective_id is not None and fields is not None:
        logistic_regression_args.update({"objective_field": objective_id})
    if logistic_regression_fields and fields is not None:
        input_fields = configure_input_fields(fields,
                                              logistic_regression_fields)
        logistic_regression_args.update(input_fields=input_fields)
    if ((args.evaluate and args.test_split == 0 and args.test_datasets is None)
            or args.cross_validation_rate > 0):
        logistic_regression_args.update(seed=SEED)
        if args.cross_validation_rate > 0:
            args.sample_rate = 1 - args.cross_validation_rate
            args.replacement = False
        elif (args.sample_rate == 1 and args.test_datasets is None
              and not args.dataset_off):
            args.sample_rate = EVALUATE_SAMPLE_RATE
    logistic_regression_args.update({"sample_rate": args.sample_rate})
    if args.lr_c:
        logistic_regression_args.update({"c": args.lr_c})
    logistic_regression_args.update({"bias": args.bias})
    logistic_regression_args.update( \
        {"balance_fields": args.balance_fields})
    if args.eps:
        logistic_regression_args.update({"eps": args.eps})
    if args.normalize is not None:
        logistic_regression_args.update({"normalize": args.normalize})
    if args.missing_numerics is not None:
        logistic_regression_args.update( \
            {"missing_numerics": args.missing_numerics})
    if args.field_codings is not None:
        logistic_regression_args.update(\
            {"field_codings": args.field_codings_})

    logistic_regression_args = update_sample_parameters_args( \
        logistic_regression_args, args)

    if 'logistic_regression' in args.json_args:
        update_json_args(logistic_regression_args,
                         args.json_args.get('logistic_regression'), fields)
    return logistic_regression_args
Beispiel #7
0
def set_evaluation_args(args, fields=None,
                        dataset_fields=None, name=None):
    """Return evaluation args dict

    """
    if name is None:
        name = args.name
    evaluation_args = set_basic_args(args, name)

    if hasattr(args, 'method') and (args.number_of_models > 1
                                    or args.ensemble):
        evaluation_args.update(combiner=args.method)
    if hasattr(args, 'method') and args.method:
        evaluation_args.update({"combiner": args.method})
        if args.method == THRESHOLD_CODE:
            threshold = {}
            if hasattr(args, 'threshold') and args.threshold is not None:
                threshold.update(k=args.threshold)
            if hasattr(args, 'threshold_class') \
                    and args.threshold_class is not None:
                threshold.update({"class": args.threshold_class})
            evaluation_args.update(threshold=threshold)
    if args.fields_map_ and fields is not None:
        if dataset_fields is None:
            dataset_fields = fields
        evaluation_args.update({"fields_map": map_fields(args.fields_map_,
                                                         fields,
                                                         dataset_fields)})
    if hasattr(args, 'missing_strategy') and args.missing_strategy:
        evaluation_args.update(missing_strategy=args.missing_strategy)
    if 'evaluation' in args.json_args:
        update_json_args(
            evaluation_args, args.json_args.get('evaluation'), fields)
    # if evaluating time series we need to use ranges
    if args.subcommand == "time-series" and args.test_split == 0 and \
            not args.has_test_datasets_:
        args.range_ = [int(args.max_rows * EVALUATE_SAMPLE_RATE) + 1,
                       args.max_rows]
        evaluation_args.update({"range": args.range_})
        return evaluation_args
    # Two cases to use out_of_bag and sample_rate: standard evaluations where
    # only the training set is provided, and cross_validation
    # [--dataset|--test] [--model|--models|--model-tag|--ensemble] --evaluate
    if (((hasattr(args, "dataset") and args.dataset) or args.test_set)
            and args.has_supervised_):
        return evaluation_args
    # [--train|--dataset] --test-split --evaluate
    if args.test_split > 0 and (args.training_set or args.dataset):
        return evaluation_args
    # --datasets --test-datasets or equivalents
    #if args.datasets and (args.test_datasets or args.dataset_off):
    if args.has_datasets_ and (args.has_test_datasets_ or args.dataset_off):
        return evaluation_args
    if args.sample_rate == 1:
        args.sample_rate = EVALUATE_SAMPLE_RATE
    evaluation_args.update(out_of_bag=True, seed=SEED,
                           sample_rate=args.sample_rate)
    return evaluation_args
Beispiel #8
0
def set_project_args(args, name=None):
    """Return project arguments dict

    """
    if name is None:
        name = args.name
    project_args = set_basic_args(args, name)
    if 'project' in args.json_args:
        update_json_args(project_args, args.json_args.get('project'), None)
    return project_args
Beispiel #9
0
def set_fusion_args(args, name=None, fields=None):
    """Return fusion arguments dict

    """
    if name is None:
        name = args.name

    fusion_args = set_basic_args(args, name)

    if 'fusion' in args.json_args:
        update_json_args(fusion_args, args.json_args.get('fusion'), fields)
    return fusion_args
Beispiel #10
0
def set_sample_args(args, name=None):
    """Return sample arguments dict

    """
    if name is None:
        name = args.name

    sample_args = set_basic_args(args, name)

    if 'sample' in args.json_args:
        update_json_args(sample_args, args.json_args.get('sample'))
    return sample_args
Beispiel #11
0
def set_time_series_args(args, name=None, fields=None, objective_id=None):
    """Return time-series arguments dict

    """
    if name is None:
        name = args.name
    if objective_id is None:
        objective_id = args.objective_id_

    time_series_args = set_basic_model_args(args, name)
    time_series_args.update({
        "all_numeric_objectives": args.all_numeric_objectives,
        "period": args.period
    })

    # if we need to evaluate and there's no previous split, use a range
    if args.evaluate and args.test_split == 0 and not args.has_test_datasets_:
        args.range_ = [1, int(args.max_rows * EVALUATE_SAMPLE_RATE)]
    if objective_id is not None:
        time_series_args.update({"objective_field": objective_id})
    if args.objectives:
        time_series_args.update({"objective_fields": args.objective_fields_})
    if args.damped_trend is not None:
        time_series_args.update({"damped_trend": args.damped_trend})
    if args.error is not None:
        time_series_args.update({"error": args.error})
    if args.field_parameters:
        time_series_args.update({"field_parameters": args.field_parameters_})
    if args.range_:
        time_series_args.update({"range": args.range_})
    if args.seasonality is not None:
        time_series_args.update({"seasonality": args.seasonality})
    if args.trend is not None:
        time_series_args.update({"trend": args.trend})

    if args.time_start or args.time_end or args.time_interval or \
            args.time_interval_unit:
        time_range = {}
        if args.time_start:
            time_range.update({"start": args.time_start})
        if args.time_end:
            time_range.update({"end": args.time_end})
        if args.time_interval:
            time_range.update({"interval": args.time_interval})
        if args.time_interval_unit:
            time_range.update({"interval_unit": args.time_interval_unit})
        time_series_args.update({"time_range": time_range})

    if 'time_series' in args.json_args:
        update_json_args(time_series_args, args.json_args.get('time_series'),
                         fields)
    return time_series_args
Beispiel #12
0
def set_forecast_args(args, fields=None):
    """Return forecast dict

    """
    forecast_args = set_basic_args(args, args.name)
    forecast_args.update({
        "intervals": args.intervals,
    })

    if 'forecast' in args.json_args:
        update_json_args(forecast_args, args.json_args.get('forecast'), fields)

    return forecast_args
Beispiel #13
0
def set_topic_model_args(args,
                         name=None,
                         fields=None,
                         topic_model_fields=None):
    """Return topic_model arguments dict

    """
    if name is None:
        name = args.name
    if topic_model_fields is None:
        topic_model_fields = args.topic_model_fields_

    topic_model_args = set_basic_args(args, name)
    topic_model_args.update({
        "seed":
        SEED if args.seed is None else args.seed,
        "topicmodel_seed":
        SEED if args.seed is None else args.seed
    })

    if topic_model_fields and fields is not None:
        input_fields = configure_input_fields(fields, topic_model_fields)
        topic_model_args.update(input_fields=input_fields)
    topic_model_args.update({"sample_rate": args.sample_rate})
    topic_model_args.update({"bigrams": args.bigrams})
    topic_model_args.update({"case_sensitive": args.case_sensitive})
    if args.number_of_topics is not None:
        topic_model_args.update({"number_of_topics": args.number_of_topics})
    if args.term_limit is not None:
        topic_model_args.update({"term_limit": args.term_limit})
    if args.top_n_terms is not None:
        topic_model_args.update({"top_n_terms": args.top_n_terms})
    if args.minimum_name_terms is not None:
        topic_model_args.update(
            {"minimum_name_terms": args.minimum_name_terms})

    if args.excluded_terms:
        topic_model_args.update({"excluded_terms": args.excluded_terms_})

    topic_model_args = update_sample_parameters_args( \
        topic_model_args, args)

    if 'topic_model' in args.json_args:
        update_json_args(topic_model_args, args.json_args.get('topic_model'),
                         fields)
    return topic_model_args
Beispiel #14
0
        batch_projection_args.update(all_fields=True)
        projection_fields = []
        if args.projection_fields != "all":
            batch_projection_args.update(all_fields=True)
            for field in args.projection_fields.split(args.args_separator):
                field = field.strip()
                if not field in dataset_fields.fields:
                    try:
                        field = dataset_fields.field_id(field)
                    except ValueError, exc:
                        sys.exit(exc)
                projection_fields.append(field)
            batch_projection_args.update(output_fields=projection_fields)
    if 'batch_projection' in args.json_args:
        update_json_args(
            batch_projection_args, args.json_args.get( \
                'batch_projection'), fields)

    return batch_projection_args


def create_batch_projection(pca,
                            test_dataset,
                            batch_projection_args,
                            args,
                            api=None,
                            session_file=None,
                            path=None,
                            log=None):
    """Creates remote batch projection
Beispiel #15
0
        ensemble_args.update(weight_field=weight_field)
    if args.objective_weights:
        ensemble_args.update(objective_weights=args.objective_weights_json)
    if args.random_candidates:
        ensemble_args.update(random_candidates=args.random_candidates)

    update_attributes(ensemble_args, args.json_args.get('model'))

    ensemble_args = update_sample_parameters_args(ensemble_args, args)

    ensemble_args["ensemble_sample"].update( \
        {"rate": args.ensemble_sample_rate,
         "replacement": args.ensemble_sample_replacement})

    if 'ensemble' in args.json_args:
        update_json_args(ensemble_args, args.json_args.get('ensemble'), fields)

    return ensemble_args


def create_ensembles(datasets, ensemble_ids, ensemble_args, args,
                     number_of_ensembles=1,
                     api=None, path=None, session_file=None, log=None):
    """Create ensembles from input data

    """

    if api is None:
        api = bigml.api.BigML()
    ensembles = ensemble_ids[:]
    existing_ensembles = len(ensembles)
Beispiel #16
0
        batch_anomaly_score_args.update(all_fields=True)
    if args.prediction_fields:
        batch_anomaly_score_args.update(all_fields=False)
        prediction_fields = []
        for field in args.prediction_fields.split(args.args_separator):
            field = field.strip()
            if not field in dataset_fields.fields:
                try:
                    field = dataset_fields.field_id(field)
                except ValueError, exc:
                    sys.exit(exc)
            prediction_fields.append(field)
        batch_anomaly_score_args.update(output_fields=prediction_fields)

    if 'batch_anomaly_score' in args.json_args:
        update_json_args(batch_anomaly_score_args,
                         args.json_args.get('batch_anomaly_score'), fields)

    return batch_anomaly_score_args


def create_batch_anomaly_score(anomaly,
                               test_dataset,
                               batch_anomaly_score_args,
                               args,
                               api=None,
                               session_file=None,
                               path=None,
                               log=None):
    """Creates remote batch anomaly score

    """
Beispiel #17
0
def set_source_args(args,
                    name=None,
                    multi_label_data=None,
                    data_set_header=None,
                    fields=None):
    """Returns a source arguments dict

    """

    if name is None:
        name = args.name
    source_args = set_basic_args(args, name)
    if args.project_id is not None:
        source_args.update({"project": args.project_id})
    # if header is set, use it
    if data_set_header is not None:
        source_args.update({"source_parser": {"header": data_set_header}})
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args.update({'source_parser': {}})
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = decode2(args.training_separator,
                                     encoding="string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        source_args.update(
            {"user_metadata": {
                "multi_label_data": multi_label_data
            }})

    # to update fields attributes or types you must have a previous fields
    # structure (at update time)
    if fields:
        if args.field_attributes_:
            update_attributes(source_args, {"fields": args.field_attributes_},
                              by_column=True,
                              fields=fields)
        if args.types_:
            update_attributes(source_args, {"fields": args.types_},
                              by_column=True,
                              fields=fields)
        if args.import_fields:
            fields_struct = fields.new_fields_structure(args.import_fields)
            check_fields_struct(fields_struct, "source")
            update_attributes(source_args, fields_struct)
        if 'source' in args.json_args:
            update_json_args(source_args, args.json_args.get('source'), fields)
    return source_args
Beispiel #18
0
    if args.prediction_info == FULL_FORMAT:
        batch_centroid_args.update(all_fields=True)
    if args.prediction_fields:
        batch_centroid_args.update(all_fields=False)
        prediction_fields = []
        for field in args.prediction_fields.split(args.args_separator):
            field = field.strip()
            if not field in dataset_fields.fields:
                try:
                    field = dataset_fields.field_id(field)
                except ValueError, exc:
                    sys.exit(exc)
            prediction_fields.append(field)
        batch_centroid_args.update(output_fields=prediction_fields)
    if 'batch_centroid' in args.json_args:
        update_json_args(
            batch_centroid_args, args.json_args.get('batch_centroid'), fields)

    return batch_centroid_args


def create_batch_centroid(cluster, test_dataset,
                          batch_centroid_args, args,
                          api=None, session_file=None,
                          path=None, log=None):
    """Creates remote batch_centroid

    """
    if api is None:
        api = bigml.api.BigML()
    message = dated("Creating batch centroid.\n")
    log_message(message, log_file=session_file, console=args.verbosity)
Beispiel #19
0
def set_deepnet_args(args,
                     name=None,
                     fields=None,
                     objective_id=None,
                     deepnet_fields=None):
    """Return deepnet arguments dict

    """
    if name is None:
        name = args.name
    if deepnet_fields is None:
        deepnet_fields = args.deepnet_fields_
    if objective_id is None:
        objective_id = args.objective_id_

    deepnet_args = set_basic_model_args(args, name)
    deepnet_args.update({"seed": SEED if args.seed is None else args.seed})

    if objective_id is not None and fields is not None:
        deepnet_args.update({"objective_field": objective_id})
    if deepnet_fields and fields is not None:
        input_fields = configure_input_fields(fields, deepnet_fields)
        deepnet_args.update(input_fields=input_fields)
    if ((args.evaluate and args.test_split == 0 and args.test_datasets is None)
            or args.cross_validation_rate > 0):
        deepnet_args.update(seed=SEED)
        if args.cross_validation_rate > 0:
            args.sample_rate = 1 - args.cross_validation_rate
            args.replacement = False
        elif (args.sample_rate == 1 and args.test_datasets is None
              and not args.dataset_off):
            args.sample_rate = EVALUATE_SAMPLE_RATE
    deepnet_args.update({"sample_rate": args.sample_rate})

    if args.batch_normalization is not None:
        deepnet_args.update({"batch_normalization": args.batch_normalization})
    if args.dropout_rate:
        deepnet_args.update({"dropout_rate": args.dropout_rate})

    if args.hidden_layers is not None:
        deepnet_args.update({"hidden_layers": args.hidden_layers_})

    if args.learn_residuals is not None:
        deepnet_args.update( \
            {"learn_residuals": args.learn_residuals})

    if args.max_iterations is not None:
        deepnet_args.update(\
            {"learning_rate": args.learning_rate})

    if args.max_training_time is not None:
        deepnet_args.update(\
            {"max_training_time": args.max_training_time})

    if args.number_of_hidden_layers is not None:
        deepnet_args.update(\
            {"number_of_hidden_layers": args.number_of_hidden_layers})

    if args.number_of_model_candidates is not None:
        deepnet_args.update(\
            {"number_of_model_candidates": args.number_of_model_candidates})

    if args.search is not None:
        deepnet_args.update(\
            {"search": args.search})

    if args.suggest_structure is not None:
        deepnet_args.update(\
            {"suggest_structure": args.suggest_structure})

    if not args.missing_numerics:
        deepnet_args.update(\
            {"missing_numerics": args.missing_numerics})

    if args.tree_embedding:
        deepnet_args.update(\
            {"tree_embedding": args.tree_embedding})

    deepnet_args = update_sample_parameters_args( \
        deepnet_args, args)

    if 'deepnet' in args.json_args:
        update_json_args(deepnet_args, args.json_args.get('deepnet'), fields)
    return deepnet_args
Beispiel #20
0
        batch_topic_distribution_args.update(all_fields=True)
    if args.prediction_fields:
        batch_topic_distribution_args.update(all_fields=False)
        prediction_fields = []
        for field in args.prediction_fields.split(args.args_separator):
            field = field.strip()
            if not field in dataset_fields.fields:
                try:
                    field = dataset_fields.field_id(field)
                except Exception, exc:
                    sys.exit(exc)
            prediction_fields.append(field)
        batch_topic_distribution_args.update(output_fields=prediction_fields)
    if 'batch_topic_distribution' in args.json_args:
        update_json_args(
            batch_topic_distribution_args, args.json_args.get( \
                'batch_topic_distribution'), fields)

    return batch_topic_distribution_args


def create_batch_topic_distribution(topic_model,
                                    test_dataset,
                                    batch_topic_distribution_args,
                                    args,
                                    api=None,
                                    session_file=None,
                                    path=None,
                                    log=None):
    """Creates remote batch topic distribution
Beispiel #21
0
        except ValueError, exc:
            sys.exit(exc)
        model_args.update(weight_field=weight_field)

    if args.objective_weights:
        model_args.update(objective_weights=args.objective_weights_json)

    if args.max_categories > 0:
        model_args.update(
            user_metadata={'other_label': other_label,
                           'max_categories': args.max_categories})

    model_args = update_sample_parameters_args(model_args, args)

    if 'model' in args.json_args:
        update_json_args(model_args, args.json_args.get('model'), fields)

    return model_args


def set_label_model_args(args, fields, labels, multi_label_data):
    """Set of args needed to build a model per label

    """

    objective_field = args.objective_field
    if not args.model_fields_:
        model_fields = []
    else:
        model_fields = relative_input_fields(fields, args.model_fields_)
    if objective_field is None:
Beispiel #22
0
                except ValueError, exc:
                    sys.exit(exc)
            prediction_fields.append(field)
        batch_prediction_args.update(output_fields=prediction_fields)
    if hasattr(args, 'missing_strategy') and args.missing_strategy:
        batch_prediction_args.update(missing_strategy=args.missing_strategy)
    if hasattr(args, "operating_point_") and args.operating_point_:
        batch_prediction_args.update(operating_point=args.operating_point_)
        if args.operating_point_.get("kind") == "probability":
            batch_prediction_args.update({
                "probability": True,
                "confidence": False
            })

    if 'batch_prediction' in args.json_args:
        update_json_args(batch_prediction_args,
                         args.json_args.get('batch_prediction'), fields)
    return batch_prediction_args


def create_batch_prediction(model_or_ensemble,
                            test_dataset,
                            batch_prediction_args,
                            args,
                            api=None,
                            session_file=None,
                            path=None,
                            log=None):
    """Creates remote batch_prediction

    """
    if api is None:
Beispiel #23
0
        dataset_args.update(lisp_filter=args.lisp_filter)

    if args.dataset_fields_ and fields is not None:
        input_fields = configure_input_fields(fields, args.dataset_fields_)
        dataset_args.update(input_fields=input_fields)
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        dataset_args.update(
            user_metadata={'multi_label_data': multi_label_data})

    if fields and args.import_fields:
        fields_struct = fields.new_fields_structure(args.import_fields)
        check_fields_struct(fields_struct, "dataset")
        update_attributes(dataset_args, fields_struct)
    if 'dataset' in args.json_args:
        update_json_args(dataset_args, args.json_args.get('dataset'), fields)

    return dataset_args


def set_dataset_split_args(name,
                           description,
                           args,
                           sample_rate=1,
                           out_of_bag=False,
                           multi_label_data=None):
    """Return dataset arguments dict to split a dataset

    """
    dataset_args = {
        "name": name,