def decision_tree_classification_train(table, group_by=None, **params):
    check_required_parameters(_decision_tree_classification_train, params,
                              ['table'])

    params = get_default_from_parameters_if_required(
        params, _decision_tree_classification_train)

    param_validation_check = [
        greater_than_or_equal_to(params, 2, 'min_samples_split'),
        greater_than_or_equal_to(params, 1, 'min_samples_leaf'),
        from_to(params, 0.0, 0.5, 'min_weight_fraction_leaf'),
        greater_than_or_equal_to(params, 0.0, 'min_impurity_decrease'),
        greater_than_or_equal_to(params, 1, 'max_depth'),
        greater_than_or_equal_to(params, 1, 'max_features'),
        greater_than(params, 1, 'max_leaf_nodes')
    ]

    validate(*param_validation_check)

    if group_by is not None:
        grouped_model = _function_by_group(_decision_tree_classification_train,
                                           table,
                                           group_by=group_by,
                                           **params)
        return grouped_model
    else:
        return _decision_tree_classification_train(table, **params)
def timeseries_decomposition(table, group_by=None, **params):
    check_required_parameters(_timeseries_decomposition, params, ['table'])
    params = get_default_from_parameters_if_required(params, _timeseries_decomposition)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'frequency'),
                              greater_than_or_equal_to(params, 0, 'extrapolate_trend')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_timeseries_decomposition, table, group_by=group_by, **params)
    else:
        return _timeseries_decomposition(table, **params)
Exemple #3
0
def knn_regression(train_table, test_table, **params):
    check_required_parameters(_knn_regression, params, ['train_table', 'test_table'])

    params = get_default_from_parameters_if_required(params,_knn_regression)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'k'),
                              greater_than_or_equal_to(params, 1, 'leaf_size'),
                              greater_than_or_equal_to(params, 1, 'p')]
    validate(*param_validation_check)

    return _knn_regression(train_table, test_table, **params)
Exemple #4
0
def ner_crf_train(table, **params):
    check_required_parameters(_ner_crf_train, params, ['table'])
    params = get_default_from_parameters_if_required(params, _ner_crf_train)
    param_validation_check = [
        greater_than_or_equal_to(params, 0, 'c1'),
        greater_than_or_equal_to(params, 0, 'c2'),
        greater_than_or_equal_to(params, 1, 'max_iterations')
    ]
    validate(*param_validation_check)

    return _ner_crf_train(table, **params)
Exemple #5
0
def plot_roc_pr_curve(table, group_by=None, **params):
    check_required_parameters(_plot_roc_pr_curve, params, ['table'])
    params = get_default_from_parameters_if_required(params, _plot_roc_pr_curve)
    param_validation_check = [greater_than_or_equal_to(params, 0.0, 'fig_w'),
                              greater_than_or_equal_to(params, 0.0, 'fig_h')]
    validate(*param_validation_check)

    if group_by is not None:
        return _function_by_group(_plot_roc_pr_curve, table, group_by=group_by, **params)
    else:
        return _plot_roc_pr_curve(table, **params)    
Exemple #6
0
def collaborative_filtering_recommend(table, group_by=None, **params):
    params = get_default_from_parameters_if_required(params, _collaborative_filtering_recommend)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'N'),
                              greater_than_or_equal_to(params, 1, 'k')]
        
    validate(*param_validation_check)
    check_required_parameters(_collaborative_filtering_recommend, params, ['table'])
    if group_by is not None:
        return _function_by_group(_collaborative_filtering_recommend, table, group_by=group_by, **params)
    else:
        return _collaborative_filtering_recommend(table, **params)
Exemple #7
0
def dtm(table, group_by=None, **params):
    check_required_parameters(_dtm, params, ['table'])
    params = get_default_from_parameters_if_required(params, _dtm)
    param_validation_check = [greater_than_or_equal_to(params, 2, 'num_topic'),
                              greater_than_or_equal_to(params, 2, 'num_topic_word'),
                              greater_than_or_equal_to(params, 1, 'max_iter')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_dtm, table, group_by=group_by, **params)
    else:
        return _dtm(table, **params)
Exemple #8
0
def tfidf(table, group_by=None, **params):  # This will be deprecated.
    check_required_parameters(_tfidf, params, ['table'])
    params = get_default_from_parameters_if_required(params, _tfidf)
    param_validation_check = [greater_than_or_equal_to(params, 0, 'min_df'),
                              greater_than_or_equal_to(params, 2, 'num_voca'),
                              greater_than(params, 0, 'max_df')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_tfidf, table, group_by=group_by, **params)
    else:
        return _tfidf(table, **params)
def ada_boost_classification_train(table, group_by=None, **params):
    check_required_parameters(_ada_boost_classification_train, params, ['table'])
    params = get_default_from_parameters_if_required(params, _ada_boost_classification_train)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'max_depth'),
                              greater_than_or_equal_to(params, 1, 'n_estimators'),
                              greater_than(params, 0, 'learning_rate')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_ada_boost_classification_train, table, group_by=group_by, **params)
    else:
        return _ada_boost_classification_train(table, **params)
Exemple #10
0
def word2vec(table, **params):
    check_required_parameters(_word2vec, params, ['table'])
    
    params = get_default_from_parameters_if_required(params, _word2vec)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'size'),
                              greater_than_or_equal_to(params, 1, 'window'),
                              greater_than_or_equal_to(params, 1, 'min_count'),
                              greater_than_or_equal_to(params, 1, 'workers'),
                              greater_than_or_equal_to(params, 1, 'topn')]
    validate(*param_validation_check) 
    return _word2vec(table, **params)
Exemple #11
0
def tfidf3(table, group_by=None, **params):
    check_required_parameters(_tfidf3, params, ['table'])
    params = get_default_from_parameters_if_required(params, _tfidf3)
    param_validation_check = [greater_than_or_equal_to(params, 0, 'min_df'),
                              greater_than_or_equal_to(params, 2, 'max_features'),
                              greater_than(params, 0, 'max_df')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_tfidf3, table, group_by=group_by, **params)
    else:
        return _tfidf3(table, **params)
Exemple #12
0
def gaussian_mixture_train(table, group_by=None, **params):
    check_required_parameters(_gaussian_mixture_train, params, ['table'])
    params = get_default_from_parameters_if_required(params, _gaussian_mixture_train)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'number_of_components'),
                              greater_than(params, 0, 'tolerance'),
                              greater_than(params, 0, 'regularize_covariance'),
                              greater_than_or_equal_to(params, 1, 'max_iteration')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_gaussian_mixture_train, table, group_by=group_by, **params)
    else:
        return _gaussian_mixture_train(table, **params)
Exemple #13
0
def xgb_regression_train(table, group_by=None, **params):
    params = get_default_from_parameters_if_required(params, _xgb_regression_train)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'max_depth'),
                              greater_than_or_equal_to(params, 0.0, 'learning_rate'),
                              greater_than_or_equal_to(params, 1, 'n_estimators')]
        
    validate(*param_validation_check)
    check_required_parameters(_xgb_regression_train, params, ['table'])
    if group_by is not None:
        grouped_model = _function_by_group(_xgb_regression_train, table, group_by=group_by, **params)
        return grouped_model
    else:
        return _xgb_regression_train(table, **params)
Exemple #14
0
def gsdmm(table, **params):
    check_required_parameters(_gsdmm, params, ['table'])
    params = get_default_from_parameters_if_required(params, _gsdmm)
    param_validation_check = [
        greater_than_or_equal_to(params, 2, 'K'),
        greater_than_or_equal_to(params, 0.0, 'alpha'),
        greater_than_or_equal_to(params, 0.0, 'beta'),
        greater_than_or_equal_to(params, 1, 'max_iter'),
        greater_than_or_equal_to(params, 1, 'num_topic_words')
    ]

    validate(*param_validation_check)
    return _gsdmm(table, **params)
Exemple #15
0
def bow(table, group_by=None, **params):
    check_required_parameters(_bow, params, ['table'])
    params = get_default_from_parameters_if_required(params, _bow)
    param_validation_check = [
        greater_than_or_equal_to(params, 0, 'no_below'),
        less_than_or_equal_to(params, 1.0, 'no_above'),
        greater_than(params, 0.0, 'no_above'),
        greater_than_or_equal_to(params, 1, 'keep_n')
    ]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_bow, table, group_by=group_by, **params)
    else:
        return _bow(table, **params)
Exemple #16
0
def lda(table, group_by=None, **params):
    check_required_parameters(_lda, params, ['table'])
    params = get_default_from_parameters_if_required(params, _lda)
    param_validation_check = [greater_than_or_equal_to(params, 2, 'num_voca'),
                              greater_than_or_equal_to(params, 2, 'num_topic'),
                              from_to(params, 2, params['num_voca'], 'num_topic_word'),
                              greater_than_or_equal_to(params, 1, 'max_iter'),
                              greater_than(params, 1.0, 'learning_offset')]
    
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_lda, table, group_by=group_by, **params)
    else:
        return _lda(table, **params)
Exemple #17
0
def random_forest_regression_train(table, group_by=None, **params):
    check_required_parameters(_random_forest_regression_train, params, ['table'])
    
    params = get_default_from_parameters_if_required(params,_random_forest_regression_train)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'n_estimators'),
                              greater_than_or_equal_to(params, 1, 'max_depth'),
                              greater_than_or_equal_to(params, 1, 'min_samples_split'),
                              greater_than_or_equal_to(params, 1, 'min_samples_leaf')]
    validate(*param_validation_check)
    
    if group_by is not None:
        return _function_by_group(_random_forest_regression_train, table, group_by=group_by, **params)
    else:
        return _random_forest_regression_train(table, **params)
Exemple #18
0
def arima_train(table, group_by=None, **params):
    check_required_parameters(_arima_train, params, ['table'])
    params = get_default_from_parameters_if_required(params, _arima_train)
    param_validation_check = [
        greater_than_or_equal_to(params, 0, 'p'),
        from_to(params, 0, 2, 'd'),
        greater_than_or_equal_to(params, 0, 'q')
    ]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_arima_train,
                                  table,
                                  group_by=group_by,
                                  **params)
    else:
        return _arima_train(table, **params)
Exemple #19
0
def _replace_missing_number(table, input_cols, fill_method=None, fill_value='value', fill_value_to=0.0, limit=None, downcast=None):
    # Validation : limit >= 1
    if limit is not None:
        validate(greater_than_or_equal_to(limit, 1, 'limit'))

    _table = table.copy()
    
    if input_cols is None or len(input_cols) == 0:
        _raw_input_cols = _table.columns
    else:
        _raw_input_cols = input_cols
    
    if fill_method == 'ffill' or fill_method == 'bfill':
        _out_table = _table
        _out_table[input_cols] = _table[input_cols].fillna(method=fill_method, limit=limit, downcast=downcast)
    else:
        _input_cols = [x for x in _raw_input_cols if np.issubdtype(table[x].dtype, np.number)]
        if fill_value == 'mean':
            _values = {x:_table[x].dtype.type(np.mean(_table[x].dropna())) for x in _input_cols}
        elif fill_value == 'median':
            _values = {x:_table[x].dtype.type(np.median(_table[x].dropna())) for x in _input_cols}
        elif fill_value == 'min':
            _values = {x:np.min(_table[x].dropna()) for x in _input_cols}
        elif fill_value == 'max':
            _values = {x:np.max(_table[x].dropna()) for x in _input_cols}
        else:
            _values = {x:fill_value_to for x in _input_cols}

        _out_table = _table.fillna(value=_values, limit=limit, downcast=downcast)
    return {'out_table':_out_table}
Exemple #20
0
def svm_classification_train(table, group_by=None, **params):
    check_required_parameters(_svm_classification_train, params,
                              ['table', 'gamma_val'])
    params = get_default_from_parameters_if_required(
        params, _svm_classification_train)

    if params['gamma'] == 'other':
        if 'gamma_val' not in params:
            raise BFE.from_errors([{
                '0100':
                'Gamma value is mandatory when gamma is other'
            }])
        if params['gamma_val'] <= 0:
            raise BFE.from_errors([{
                '0100': 'Gamma value must be greater than 0'
            }])
    else:
        params['gamma_val'] = None

    param_validation_check = [
        over_to(params, 0.0, 1.0, 'c'),
        greater_than_or_equal_to(params, 0, 'degree'),
        greater_than(params, 0.0, 'tol'),
        greater_than_or_equal_to_or_equal_to(params, 1, -1, 'max_iter')
    ]
    validate(*param_validation_check)

    if group_by is not None:
        grouped_model = _function_by_group(_svm_classification_train,
                                           table,
                                           group_by=group_by,
                                           **params)
        return grouped_model
    else:
        return _svm_classification_train(table, **params)
Exemple #21
0
def word2vec_similarity(model, **params):
    check_required_parameters(_word2vec_similarity, params, ['model'])
    
    params = get_default_from_parameters_if_required(params, _word2vec_similarity)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'topn')]
    validate(*param_validation_check) 
    return _word2vec_similarity(model, **params)
Exemple #22
0
def fasttext_similarity(table, model, **params):
    check_required_parameters(_fasttext_similarity, params, ['table', 'model'])

    params = get_default_from_parameters_if_required(params, _fasttext_similarity)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'topn')]
    validate(*param_validation_check)
    return _fasttext_similarity(table, model, **params)
Exemple #23
0
def _replace_missing_string(table,
                            input_cols,
                            fill_method=None,
                            fill_string='',
                            limit=None,
                            downcast=None):
    # Validation : limit >= 1
    if limit is not None:
        validate(greater_than_or_equal_to(limit, 1, 'limit'))

    _table = table.copy()

    if input_cols is None or len(input_cols) == 0:
        _raw_input_cols = _table.columns
    else:
        _raw_input_cols = input_cols

    if fill_method == 'ffill' or fill_method == 'bfill':
        _out_table = _table.fillna(method=fill_method,
                                   limit=limit,
                                   downcast=downcast)
    else:
        _input_cols = [x for x in _raw_input_cols if table[x].dtype == object]
        _values = {x: fill_string for x in _input_cols}
        _out_table = _table.fillna(value=_values,
                                   limit=limit,
                                   downcast=downcast)

    return {'out_table': _out_table}
def topic_name_extraction(table, **params):
    check_required_parameters(_topic_name_extraction, params, ['table'])
    params = get_default_from_parameters_if_required(params,
                                                     _topic_name_extraction)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'topn')]
    validate(*param_validation_check)
    return _topic_name_extraction(table, **params)
Exemple #25
0
def kmeans_silhouette_train_predict(table, group_by=None, **params):
    check_required_parameters(_kmeans_silhouette_train_predict, params, ['table'])
    params = get_default_from_parameters_if_required(params, _kmeans_silhouette_train_predict)
    param_validation_check = [all_elements_greater_than(params, 1, 'n_clusters_list'),
                              greater_than_or_equal_to(params, 1, 'n_init'),
                              greater_than_or_equal_to(params, 1, 'max_iter'),
                              greater_than(params, 0.0, 'tol'),
                              greater_than_or_equal_to(params, 1, 'n_jobs'),
                              greater_than_or_equal_to(params, 0, 'n_samples')]
    validate(*param_validation_check)

    if group_by is not None:
        grouped_model = _function_by_group(_kmeans_silhouette_train_predict, table, group_by=group_by, **params) 
        return grouped_model
    else:
        return _kmeans_silhouette_train_predict(table, **params)
Exemple #26
0
def fasttext(table, **params):
    check_required_parameters(_fasttext, params, ['table'])

    params = get_default_from_parameters_if_required(params, _fasttext)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'size'),
                              greater_than_or_equal_to(params, 1, 'window'),
                              greater_than_or_equal_to(params, 1, 'min_count'),
                              greater_than_or_equal_to(params, 1, 'train_epoch'),
                              greater_than_or_equal_to(params, 1, 'workers'),
                              greater_than_or_equal_to(params, 1, 'negative'),
                              greater_than_or_equal_to(params, 1, 'topn'),
                              greater_than_or_equal_to(params, 1, 'min_n'),
                              greater_than_or_equal_to(params, 1, 'max_n'),
                              greater_than_or_equal_to(params, 1, 'bucket')]
    validate(*param_validation_check)
    return _fasttext(table, **params)
Exemple #27
0
def ngram(table, **params):
    check_required_parameters(_ngram, params, ['table'])

    params = get_default_from_parameters_if_required(params, _ngram)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'n')]
    validate(*param_validation_check)

    return _ngram(table, **params)
Exemple #28
0
def moving_average(table, group_by=None, **params):
    check_required_parameters(_moving_average, params, ['table'])
    params = get_default_from_parameters_if_required(params,_moving_average)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'window_size')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_moving_average, table, group_by=group_by, **params)
    else:
        return _moving_average(table, **params)
Exemple #29
0
def replace_missing_string(table, group_by=None, **params):
    check_required_parameters(_replace_missing_string, params, ['table'])
    params = get_default_from_parameters_if_required(params, _replace_missing_string)
    param_validation_check = [greater_than_or_equal_to(params, 1, 'limit')]
    validate(*param_validation_check)
    if group_by is not None:
        return _function_by_group(_replace_missing_string, table, group_by=group_by, **params)
    else:
        return _replace_missing_string(table, **params)
Exemple #30
0
def doc_summarizer_eng(table, **params):
    check_required_parameters(_doc_summarizer_eng, params, ['table'])
    params = get_default_from_parameters_if_required(params,
                                                     _doc_summarizer_eng)
    param_validation_check = [
        greater_than(params, 0, 'ratio'),
        greater_than_or_equal_to(params, 1, 'num_sentence')
    ]
    validate(*param_validation_check)
    return _doc_summarizer_eng(table, **params)