def profile_table(table, group_by=None, **params): check_required_parameters(_profile_table, params, ['table']) params = get_default_from_parameters_if_required(params, _profile_table) param_validation_check = [ greater_than_or_equal_to(params, 1, 'bins'), greater_than(params, 0.0, 'correlation_threshold') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_profile_table, table, group_by=group_by, **params) else: return _profile_table(table, **params)
def plot_roc_pr_curve(table, group_by=None, **params): check_required_parameters(_plot_roc_pr_curve, params, ['table']) params = get_default_from_parameters_if_required(params, _plot_roc_pr_curve) param_validation_check = [ greater_than_or_equal_to(params, 0.0, 'fig_w'), greater_than_or_equal_to(params, 0.0, 'fig_h') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_plot_roc_pr_curve, table, group_by=group_by, **params) else: return _plot_roc_pr_curve(table, **params)
def autocorrelation(table, group_by=None, **params): check_required_parameters(_autocorrelation, params, ['table']) params = get_default_from_parameters_if_required(params, _autocorrelation) param_validation_check = [ greater_than_or_equal_to(params, 1, 'nlags'), from_under(params, 0.0, 1.0, 'conf_level') ] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_autocorrelation, table, group_by=group_by, **params) return grouped_model else: return _autocorrelation(table, **params)
def timeseries_decomposition(table, group_by=None, **params): len_table = len(table) check_required_parameters(_timeseries_decomposition, params, ['table']) params = get_default_from_parameters_if_required( params, _timeseries_decomposition) param_validation_check = [ from_under(params, 1, len_table, 'frequency'), greater_than_or_equal_to(params, 0, 'extrapolate_trend') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_timeseries_decomposition, table, group_by=group_by, **params) else: return _timeseries_decomposition(table, **params)
def statistic_summary(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _statistic_summary) param_validation_check = [ all_elements_from_to(params, 0, 100, 'percentile_amounts'), all_elements_from_under(params, 0, 0.5, 'trimmed_mean_amounts') ] validate(*param_validation_check) check_required_parameters(_statistic_summary, params, ['table']) if group_by is not None: return _function_by_group(_statistic_summary, table, group_by=group_by, **params) else: return _statistic_summary(table, **params)
def ada_boost_regression_train(table, group_by=None, **params): check_required_parameters(_ada_boost_regression_train, params, ['table']) params = get_default_from_parameters_if_required( params, _ada_boost_regression_train) param_validation_check = [ greater_than_or_equal_to(params, 2, 'max_depth'), greater_than_or_equal_to(params, 1, 'n_estimators'), greater_than(params, 0, 'learning_rate') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_ada_boost_regression_train, table, group_by=group_by, **params) else: return _ada_boost_regression_train(table, **params)
def kmeans_train_predict(table, group_by=None, **params): check_required_parameters(_kmeans_train_predict, params, ['table']) params = get_default_from_parameters_if_required(params, _kmeans_train_predict) param_validation_check = [greater_than_or_equal_to(params, 1, 'n_clusters'), greater_than_or_equal_to(params, 1, 'n_init'), greater_than_or_equal_to(params, 1, 'max_iter'), greater_than(params, 0.0, 'tol'), greater_than_or_equal_to(params, 1, 'n_jobs'), greater_than_or_equal_to(params, 0, 'n_samples')] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_kmeans_train_predict, table, group_by=group_by, **params) return grouped_model else: return _kmeans_train_predict(table, **params)
def linear_regression_train(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _linear_regression_train) param_validation_check = [ greater_than_or_equal_to(params, 1, 'vif_threshold') ] validate(*param_validation_check) check_required_parameters(_linear_regression_train, params, ['table']) if group_by is not None: grouped_model = _function_by_group(_linear_regression_train, table, group_by=group_by, **params) return grouped_model else: return _linear_regression_train(table, **params)
def split_data(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _split_data) param_validation_check = [ greater_than(params, 0.0, 'train_ratio'), greater_than(params, 0.0, 'test_ratio'), from_to(params, 0, 2**30, 'random_state') ] validate(*param_validation_check) check_required_parameters(_split_data, params, ['table']) if group_by is not None: return _function_by_group(_split_data, table, group_by=group_by, **params) else: return _split_data(table, **params)
def gaussian_mixture_train(table, group_by=None, **params): check_required_parameters(_gaussian_mixture_train, params, ['table']) params = get_default_from_parameters_if_required(params, _gaussian_mixture_train) param_validation_check = [ greater_than_or_equal_to(params, 1, 'number_of_components'), greater_than(params, 0, 'tolerance'), greater_than(params, 0, 'regularize_covariance'), greater_than_or_equal_to(params, 1, 'max_iteration') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_gaussian_mixture_train, table, group_by=group_by, **params) else: return _gaussian_mixture_train(table, **params)
def collaborative_filtering_recommend(table, group_by=None, **params): params = get_default_from_parameters_if_required( params, _collaborative_filtering_recommend) param_validation_check = [ greater_than_or_equal_to(params, 1, 'N'), greater_than_or_equal_to(params, 1, 'k') ] validate(*param_validation_check) check_required_parameters(_collaborative_filtering_recommend, params, ['table']) if group_by is not None: return _function_by_group(_collaborative_filtering_recommend, table, group_by=group_by, **params) else: return _collaborative_filtering_recommend(table, **params)
def hierarchical_clustering(table, group_by=None, **params): check_required_parameters(_hierarchical_clustering, params, ['table']) params = get_default_from_parameters_if_required(params, _hierarchical_clustering) param_validation_check = [ greater_than(params, 0, 'num_rows'), greater_than(params, 0.0, 'figure_height') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_hierarchical_clustering, table, group_by=group_by, **params) else: return _hierarchical_clustering(table, **params)
def logistic_regression_train(table, group_by=None, **params): check_required_parameters(_logistic_regression_train, params, ['table']) params = get_default_from_parameters_if_required( params, _logistic_regression_train) param_validation_check = [ greater_than(params, 0.0, 'C'), greater_than_or_equal_to(params, 1, 'max_iter'), greater_than(params, 0.0, 'tol') ] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_logistic_regression_train, table, group_by=group_by, **params) return grouped_model else: return _logistic_regression_train(table, **params)
def svm_classification_train(table, group_by=None, **params): check_required_parameters(_svm_classification_train, params, ['table']) params = get_default_from_parameters_if_required( params, _svm_classification_train) param_validation_check = [ over_to(params, 0.0, 1.0, 'c'), greater_than_or_equal_to(params, 0, 'degree'), greater_than(params, 0.0, 'tol') ] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_svm_classification_train, table, group_by=group_by, **params) return grouped_model else: return _svm_classification_train(table, **params)
def xgb_regression_train(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _xgb_regression_train) param_validation_check = [ greater_than_or_equal_to(params, 1, 'max_depth'), greater_than_or_equal_to(params, 0.0, 'learning_rate'), greater_than_or_equal_to(params, 1, 'n_estimators') ] validate(*param_validation_check) check_required_parameters(_xgb_regression_train, params, ['table']) if group_by is not None: grouped_model = _function_by_group(_xgb_regression_train, table, group_by=group_by, **params) return grouped_model else: return _xgb_regression_train(table, **params)
def als_recommend(table, group_by=None, **params): check_required_parameters(_als_recommend, params, ['table']) params = get_default_from_parameters_if_required(params, _als_recommend) param_validation_check = [ greater_than_or_equal_to(params, 1, 'number'), greater_than_or_equal_to(params, 1, 'iterations'), greater_than_or_equal_to(params, 0.1, 'reg_param'), greater_than_or_equal_to(params, 1, 'rank'), greater_than_or_equal_to(params, 0, 'alpha'), greater_than_or_equal_to(params, 0, 'seed') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_als_recommend, table, group_by=group_by, **params) else: return _als_recommend(table, **params)
def mlp_classification_train(table, group_by=None, **params): check_required_parameters(_mlp_classification_train, params, ['table']) params = get_default_from_parameters_if_required(params, _mlp_classification_train) if (params['batch_size_auto']): param_validation_check = [greater_than(params, 0.0, 'learning_rate_init'), greater_than(params, 0.0, 'tol')] else: if not params['batch_size'] or not isinstance(params['batch_size'], int): param_validation_check = [require_param('batch_size')] validate(*param_validation_check) param_validation_check = [greater_than(params, 0, 'batch_size'), greater_than(params, 0.0, 'learning_rate_init'), greater_than(params, 0.0, 'tol')] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_mlp_classification_train, table, group_by=group_by, **params) return grouped_model else: return _mlp_classification_train(table, **params)
def association_rule_visualization(table, group_by=None, **params): params = get_default_from_parameters_if_required( params, _association_rule_visualization) param_validation_check = [ greater_than(params, 0, 'figure_size_muliplier'), greater_than(params, 0, 'edge_length_scaling'), greater_than(params, 0, 'node_size_scaling'), greater_than(params, 0, 'font_size') ] validate(*param_validation_check) check_required_parameters(_association_rule_visualization, params, ['table']) if group_by is not None: return _function_by_group(_association_rule_visualization, table, group_by=group_by, **params) else: return _association_rule_visualization(table, **params)
def random_forest_regression_train(table, group_by=None, **params): check_required_parameters(_random_forest_regression_train, params, ['table']) params = get_default_from_parameters_if_required( params, _random_forest_regression_train) param_validation_check = [ greater_than_or_equal_to(params, 1, 'n_estimators'), greater_than_or_equal_to(params, 1, 'max_depth'), greater_than_or_equal_to(params, 1, 'min_samples_split'), greater_than_or_equal_to(params, 1, 'min_samples_leaf') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_random_forest_regression_train, table, group_by=group_by, **params) else: return _random_forest_regression_train(table, **params)
def decision_tree_regression_train(table, group_by=None, **params): check_required_parameters(_decision_tree_regression_train, params, ['table']) params = get_default_from_parameters_if_required( params, _decision_tree_regression_train) param_validation_check = [ greater_than_or_equal_to(params, 2, 'min_samples_split'), greater_than_or_equal_to(params, 1, 'min_samples_leaf'), greater_than_or_equal_to(params, 0.0, 'min_weight_fraction_leaf'), greater_than_or_equal_to(params, 1, 'max_depth'), greater_than_or_equal_to(params, 1, 'max_features'), greater_than(params, 1, 'max_leaf_nodes'), greater_than_or_equal_to(params, 0.0, 'min_impurity_split') ] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_decision_tree_regression_train, table, group_by=group_by, **params) return grouped_model else: return _decision_tree_regression_train(table, **params)
def one_hot_encoder_model(table, model, group_by=None, **params): check_required_parameters(_one_hot_encoder_model, params, ['table', 'model']) if group_by is not None: return _function_by_group(_one_hot_encoder_model, table, model, group_by=group_by, **params) else: return _one_hot_encoder_model(table, model, **params)
def linear_regression_predict(table, model, group_by=None, **params): check_required_parameters(_linear_regression_predict, params, ['table', 'model']) if group_by is not None: return _function_by_group(_linear_regression_predict, table, model, group_by=group_by, **params) else: return _linear_regression_predict(table, model, **params)
def linear_regression_train(table, group_by=None, **params): check_required_parameters(_linear_regression_train, params, ['table']) if group_by is not None: return _function_by_group(_linear_regression_train, table, group_by=group_by, **params) else: return _linear_regression_train(table, **params)
def decision_tree_classification_predict(table, model, **params): check_required_parameters(_decision_tree_classification_predict, params, ['table', 'model']) if '_grouped_data' in model: return _function_by_group(_decision_tree_classification_predict, table, model, **params) else: return _decision_tree_classification_predict(table, model, **params)
def mean_shift_predict(table, model, **params): check_required_parameters(_mean_shift_predict, params, ['table', 'model']) if '_grouped_data' in model: return _function_by_group(_mean_shift_predict, table, model, **params) else: return _mean_shift_predict(table, model, **params)
def add_row_number(table, group_by=None, **params): check_required_parameters(_add_row_number, params, ['table']) if group_by is not None: return _function_by_group(_add_row_number, table, group_by=group_by, **params) else: return _add_row_number(table, **params)
def scale(table, group_by=None, **params): check_required_parameters(_scale, params, ['table']) if group_by is not None: return _function_by_group(_scale, table, group_by=group_by, **params) else: return _scale(table, **params)
def label_encoder(table, group_by=None, **params): check_required_parameters(_label_encoder, params, ['table']) if group_by is not None: return _function_by_group(_label_encoder, table, group_by=group_by, **params) else: return _label_encoder(table, **params)
def pca_model(table, model, **params): check_required_parameters(_pca_model, params, ['table', 'model']) if '_grouped_data' in model: return _function_by_group(_pca_model, table, model, **params) else: return _pca_model(table, model, **params)
def collaborative_filtering_predict(table, model, **params): check_required_parameters(_collaborative_filtering_predict, params, ['table', 'model']) if '_grouped_data' in model: return _function_by_group(_collaborative_filtering_predict, table, model, **params) else: return _collaborative_filtering_predict(table, model, **params)