Ejemplo n.º 1
0
def set_anomaly_args(args, name=None, fields=None, anomaly_fields=None):
    """Return anomaly arguments dict

    """
    if name is None:
        name = args.name
    if anomaly_fields is None:
        anomaly_fields = args.anomaly_fields_

    anomaly_args = set_basic_model_args(args, name)
    anomaly_args.update({
        "seed":
        SEED if args.seed is None else args.seed,
        "anomaly_seed":
        (SEED if args.anomaly_seed is None else args.anomaly_seed)
    })

    if anomaly_fields and fields is not None:
        input_fields = configure_input_fields(fields, anomaly_fields)
        anomaly_args.update(input_fields=input_fields)

    if args.top_n > 0:
        anomaly_args.update(top_n=args.top_n)
    if args.forest_size > 0:
        anomaly_args.update(forest_size=args.forest_size)

    anomaly_args = update_sample_parameters_args(anomaly_args, args)

    if 'anomaly' in args.json_args:
        update_json_args(anomaly_args, args.json_args.get('anomaly'), fields)

    return anomaly_args
Ejemplo n.º 2
0
def set_association_args(args, name=None, fields=None,
                         association_fields=None):
    """Return association arguments dict

    """
    if name is None:
        name = args.name
    if association_fields is None:
        association_fields = args.association_fields_

    association_args = set_basic_model_args(args, name)

    if association_fields and fields is not None:
        input_fields = configure_input_fields(fields, association_fields)
        association_args.update(input_fields=input_fields)
    if args.association_k:
        association_args.update({"max_k": args.association_k})
    if args.search_strategy:
        association_args.update({"search_strategy": args.search_strategy})

    association_args = update_sample_parameters_args(association_args, args)

    if 'association' in args.json_args:
        update_json_args(association_args,
                         args.json_args.get('association'), fields)

    return association_args
Ejemplo n.º 3
0
def set_model_args(args, name=None, objective_id=None, fields=None,
                   model_fields=None, other_label=None):
    """Return model arguments dict

    """
    if name is None:
        name = args.name
    if objective_id is None and args.max_categories is None:
        objective_id = args.objective_id_
    if args.max_categories > 0:
        objective_id = args.objective_field
    if model_fields is None:
        model_fields = args.model_fields_

    model_args = set_basic_model_args(args, name)
    model_args.update({"missing_splits": args.missing_splits})
    if objective_id is not None and fields is not None:
        model_args.update({"objective_field": objective_id})

    # If evaluate flag is on and no test_split flag is provided,
    # we choose a deterministic sampling with
    # args.sample_rate (80% by default) of the data to create the model
    # If cross_validation_rate = n/100, then we choose to run 2 * n evaluations
    # by holding out a n% of randomly sampled data.

    if ((args.evaluate and args.test_split == 0 and args.test_datasets is None)
            or args.cross_validation_rate > 0):
        model_args.update(seed=SEED)
        if args.cross_validation_rate > 0:
            args.sample_rate = 1 - args.cross_validation_rate
            args.replacement = False
        elif (args.sample_rate == 1 and args.test_datasets is None
              and not args.dataset_off):
            args.sample_rate = EVALUATE_SAMPLE_RATE
    if model_fields and fields is not None:
        input_fields = configure_input_fields(
            fields, model_fields, by_name=(args.max_categories > 0))
        model_args.update(input_fields=input_fields)

    if args.pruning and args.pruning != 'smart':
        model_args.update(stat_pruning=(args.pruning == 'statistical'))

    if args.node_threshold > 0:
        model_args.update(node_threshold=args.node_threshold)

    if args.balance:
        model_args.update(balance_objective=True)

    if args.split_field:
        model_args.update(split_field=args.split_field)

    if args.focus_field:
        model_args.update(focus_field=args.focus_field)

    if args.weight_field:
        try:
            weight_field = fields.field_id(args.weight_field)
        except ValueError, exc:
            sys.exit(exc)
        model_args.update(weight_field=weight_field)
Ejemplo n.º 4
0
def set_cluster_args(args, name=None, fields=None,
                     cluster_fields=None):
    """Return cluster arguments dict

    """
    if name is None:
        name = args.name
    if cluster_fields is None:
        cluster_fields = args.cluster_fields_

    cluster_args = set_basic_model_args(args, name)
    cluster_args.update({
        "seed": SEED if args.seed is None else args.seed,
        "cluster_seed": (SEED if args.cluster_seed is None
                         else args.cluster_seed)
    })

    if args.cluster_models is not None:
        cluster_args.update({"model_clusters": True})
    if args.cluster_k:
        cluster_args.update({"k": args.cluster_k})
    if cluster_fields and fields is not None:
        input_fields = configure_input_fields(fields, cluster_fields)
        cluster_args.update(input_fields=input_fields)
    if args.summary_fields is not None:
        cluster_args.update({"summary_fields": args.summary_fields_})

    cluster_args = update_sample_parameters_args(cluster_args, args)

    if 'cluster' in args.json_args:
        update_json_args(cluster_args, args.json_args.get('cluster'), fields)

    return cluster_args
Ejemplo n.º 5
0
def set_logistic_regression_args(args,
                                 name=None,
                                 fields=None,
                                 objective_id=None,
                                 logistic_regression_fields=None):
    """Return logistic regression arguments dict

    """
    if name is None:
        name = args.name
    if logistic_regression_fields is None:
        logistic_regression_fields = args.logistic_regression_fields_
    if objective_id is None:
        objective_id = args.objective_id_

    logistic_regression_args = set_basic_model_args(args, name)
    logistic_regression_args.update(
        {"seed": SEED if args.seed is None else args.seed})

    if objective_id is not None and fields is not None:
        logistic_regression_args.update({"objective_field": objective_id})
    if logistic_regression_fields and fields is not None:
        input_fields = configure_input_fields(fields,
                                              logistic_regression_fields)
        logistic_regression_args.update(input_fields=input_fields)
    if ((args.evaluate and args.test_split == 0 and args.test_datasets is None)
            or args.cross_validation_rate > 0):
        logistic_regression_args.update(seed=SEED)
        if args.cross_validation_rate > 0:
            args.sample_rate = 1 - args.cross_validation_rate
            args.replacement = False
        elif (args.sample_rate == 1 and args.test_datasets is None
              and not args.dataset_off):
            args.sample_rate = EVALUATE_SAMPLE_RATE
    logistic_regression_args.update({"sample_rate": args.sample_rate})
    if args.lr_c:
        logistic_regression_args.update({"c": args.lr_c})
    logistic_regression_args.update({"bias": args.bias})
    logistic_regression_args.update( \
        {"balance_fields": args.balance_fields})
    if args.eps:
        logistic_regression_args.update({"eps": args.eps})
    if args.normalize is not None:
        logistic_regression_args.update({"normalize": args.normalize})
    if args.missing_numerics is not None:
        logistic_regression_args.update( \
            {"missing_numerics": args.missing_numerics})
    if args.field_codings is not None:
        logistic_regression_args.update(\
            {"field_codings": args.field_codings_})

    logistic_regression_args = update_sample_parameters_args( \
        logistic_regression_args, args)

    if 'logistic_regression' in args.json_args:
        update_json_args(logistic_regression_args,
                         args.json_args.get('logistic_regression'), fields)
    return logistic_regression_args
Ejemplo n.º 6
0
def set_ensemble_args(args, name=None,
                      objective_id=None, model_fields=None, fields=None):
    """Return ensemble arguments dict

    """
    if name is None:
        name = args.name
    if objective_id is None:
        objective_id = args.objective_id_
    if model_fields is None:
        model_fields = args.model_fields_

    ensemble_args = set_basic_model_args(args, name)
    ensemble_args.update({
        "missing_splits": args.missing_splits,
        "ensemble_sample": {"seed": SEED if args.ensemble_sample_seed is None \
            else args.ensemble_sample_seed},
        "seed": SEED if args.seed is None else args.seed
    })
    if objective_id is not None and fields is not None:
        ensemble_args.update({"objective_field": objective_id})

    if args.boosting:
        boosting_args = {}
        for option in BOOSTING_OPTIONS:
            if hasattr(args, option) and getattr(args, option) is not None:
                boosting_args.update({option: getattr(args, option)})
        ensemble_args.update({"boosting": boosting_args})
    else:
        ensemble_args.update({"number_of_models": args.number_of_models})

    # If evaluate flag is on and no test_split flag is provided,
    # we choose a deterministic sampling with
    # args.sample_rate (80% by default) of the data to create the model

    if (args.evaluate and args.test_split == 0 and
            args.test_datasets is None and not args.dataset_off):
        ensemble_args.update({"seed": SEED})
        if args.sample_rate == 1:
            args.sample_rate = EVALUATE_SAMPLE_RATE

    if model_fields and fields is not None:
        input_fields = configure_input_fields(fields, model_fields)
        ensemble_args.update(input_fields=input_fields)

    if args.pruning and args.pruning != 'smart':
        ensemble_args.update(stat_pruning=(args.pruning == 'statistical'))
    if args.node_threshold > 0:
        ensemble_args.update(node_threshold=args.node_threshold)
    if args.balance:
        ensemble_args.update(balance_objective=True)
    if args.weight_field:
        try:
            weight_field = fields.field_id(args.weight_field)
        except ValueError, exc:
            sys.exit(exc)
        ensemble_args.update(weight_field=weight_field)
Ejemplo n.º 7
0
def set_time_series_args(args, name=None, fields=None, objective_id=None):
    """Return time-series arguments dict

    """
    if name is None:
        name = args.name
    if objective_id is None:
        objective_id = args.objective_id_

    time_series_args = set_basic_model_args(args, name)
    time_series_args.update({
        "all_numeric_objectives": args.all_numeric_objectives,
        "period": args.period
    })

    # if we need to evaluate and there's no previous split, use a range
    if args.evaluate and args.test_split == 0 and not args.has_test_datasets_:
        args.range_ = [1, int(args.max_rows * EVALUATE_SAMPLE_RATE)]
    if objective_id is not None:
        time_series_args.update({"objective_field": objective_id})
    if args.objectives:
        time_series_args.update({"objective_fields": args.objective_fields_})
    if args.damped_trend is not None:
        time_series_args.update({"damped_trend": args.damped_trend})
    if args.error is not None:
        time_series_args.update({"error": args.error})
    if args.field_parameters:
        time_series_args.update({"field_parameters": args.field_parameters_})
    if args.range_:
        time_series_args.update({"range": args.range_})
    if args.seasonality is not None:
        time_series_args.update({"seasonality": args.seasonality})
    if args.trend is not None:
        time_series_args.update({"trend": args.trend})

    if args.time_start or args.time_end or args.time_interval or \
            args.time_interval_unit:
        time_range = {}
        if args.time_start:
            time_range.update({"start": args.time_start})
        if args.time_end:
            time_range.update({"end": args.time_end})
        if args.time_interval:
            time_range.update({"interval": args.time_interval})
        if args.time_interval_unit:
            time_range.update({"interval_unit": args.time_interval_unit})
        time_series_args.update({"time_range": time_range})

    if 'time_series' in args.json_args:
        update_json_args(time_series_args, args.json_args.get('time_series'),
                         fields)
    return time_series_args
Ejemplo n.º 8
0
def set_deepnet_args(args,
                     name=None,
                     fields=None,
                     objective_id=None,
                     deepnet_fields=None):
    """Return deepnet arguments dict

    """
    if name is None:
        name = args.name
    if deepnet_fields is None:
        deepnet_fields = args.deepnet_fields_
    if objective_id is None:
        objective_id = args.objective_id_

    deepnet_args = set_basic_model_args(args, name)
    deepnet_args.update({"seed": SEED if args.seed is None else args.seed})

    if objective_id is not None and fields is not None:
        deepnet_args.update({"objective_field": objective_id})
    if deepnet_fields and fields is not None:
        input_fields = configure_input_fields(fields, deepnet_fields)
        deepnet_args.update(input_fields=input_fields)
    if ((args.evaluate and args.test_split == 0 and args.test_datasets is None)
            or args.cross_validation_rate > 0):
        deepnet_args.update(seed=SEED)
        if args.cross_validation_rate > 0:
            args.sample_rate = 1 - args.cross_validation_rate
            args.replacement = False
        elif (args.sample_rate == 1 and args.test_datasets is None
              and not args.dataset_off):
            args.sample_rate = EVALUATE_SAMPLE_RATE
    deepnet_args.update({"sample_rate": args.sample_rate})

    if args.batch_normalization is not None:
        deepnet_args.update({"batch_normalization": args.batch_normalization})
    if args.dropout_rate:
        deepnet_args.update({"dropout_rate": args.dropout_rate})

    if args.hidden_layers is not None:
        deepnet_args.update({"hidden_layers": args.hidden_layers_})

    if args.learn_residuals is not None:
        deepnet_args.update( \
            {"learn_residuals": args.learn_residuals})

    if args.max_iterations is not None:
        deepnet_args.update(\
            {"learning_rate": args.learning_rate})

    if args.max_training_time is not None:
        deepnet_args.update(\
            {"max_training_time": args.max_training_time})

    if args.number_of_hidden_layers is not None:
        deepnet_args.update(\
            {"number_of_hidden_layers": args.number_of_hidden_layers})

    if args.number_of_model_candidates is not None:
        deepnet_args.update(\
            {"number_of_model_candidates": args.number_of_model_candidates})

    if args.search is not None:
        deepnet_args.update(\
            {"search": args.search})

    if args.suggest_structure is not None:
        deepnet_args.update(\
            {"suggest_structure": args.suggest_structure})

    if not args.missing_numerics:
        deepnet_args.update(\
            {"missing_numerics": args.missing_numerics})

    if args.tree_embedding:
        deepnet_args.update(\
            {"tree_embedding": args.tree_embedding})

    deepnet_args = update_sample_parameters_args( \
        deepnet_args, args)

    if 'deepnet' in args.json_args:
        update_json_args(deepnet_args, args.json_args.get('deepnet'), fields)
    return deepnet_args