Esempio n. 1
0
def single_split(data, estimator, scoring):
    attrs, classes = utils.horizontal_split(data)
    X_train, X_test, y_train, y_test = train_test_split(attrs,
                                                        classes,
                                                        test_size=0.4)
    estimator.fit(X_train, y_train)
    scorer = get_scorer(scoring)
    return scorer(estimator, X_test, y_test)
Esempio n. 2
0
def load(file):
    iterator = csv.reader(open(file, "r"))
    data = list(iterator)
    class_index = utils.get_class_index(data)
    for i in range(len(data)):
        for j in range(len(data[i])):
            val = data[i][j]
            data[i][j] = float(val) if j != class_index else val
    return utils.horizontal_split(data)
    def get_attr_by_class_probs(self, X, y):
        data = utils.merge_attrs(X, y)
        class_index = utils.get_class_index(data)
        result = self.get_empty_classes_dict([])
        for record in data:
            class_key = record[class_index]
            result[class_key].append(record)
        for key in result:
            class_X, class_y = utils.horizontal_split(result[key])
            result[key] = self.get_attr_probs(class_X, self.empty_bins())

        return result
Esempio n. 4
0
def score(data,
          estimator_const,
          est_param,
          k=10,
          cross_val=True,
          stratified=True):
    attrs, classes = utils.horizontal_split(data)
    attr_ranges = utils.attr_ranges(attrs)
    unique_classes = utils.unique_classes(
        classes
    )  # it's important to recognize classes from both training and test set(whole data)
    estimator = estimator_const(unique_classes, est_param, attr_ranges)
    scoring = ['f1_macro', 'accuracy', 'precision_macro', 'recall_macro']
    score = validation.k_fold(data, estimator, scoring, k, stratified) \
        if cross_val else validation.single_split(data, estimator, scoring)
    print(("mean " if cross_val else "") + f"score: {score}")
    return score
Esempio n. 5
0
def k_fold(data, estimator, scoring, k, stratified=True):
    attrs, classes = utils.horizontal_split(data)
    cv = StratifiedKFold(n_splits=k) if stratified else KFold(n_splits=k)
    scores = cross_validate(estimator, attrs, classes, cv=cv, scoring=scoring)
    scores_test = [scores["test_" + scoring_elem] for scoring_elem in scoring]
    return np.mean(scores_test, (0, 1))  # mean by metrics then by folds