Exemple #1
0
def train_svm(name='svm', dataset='wine', C=1.0, problem='classification', **kwargs):
    data = get_dataset(dataset, split=True, discrete=False, one_hot=True)
    train_x, train_y, test_x, test_y, feature_names = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names']
    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))
    one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical']
    model_name = '-'.join([dataset, name])
    svm = SVM(name=model_name, problem=problem, C=C, one_hot_encoder=one_hot_encoder, **kwargs)
    svm.train(train_x, train_y)
    svm.evaluate(train_x, train_y, stage='train')
    acc, loss, auc = svm.test(test_x, test_y)
    return svm, acc
Exemple #2
0
def train_nn(name='nn', dataset='wine', neurons=(20,), alpha=0.01, problem='classification', **kwargs):
    data = get_dataset(dataset, split=True, discrete=False, one_hot=True)
    train_x, train_y, test_x, test_y, feature_names = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names']
    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))
    one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical']
    model_name = '-'.join([dataset, name] + [str(neuron) for neuron in neurons])
    nn = NeuralNet(name=model_name, problem=problem, neurons=neurons, max_iter=5000, alpha=alpha,
                   one_hot_encoder=one_hot_encoder, **kwargs)
    nn.train(train_x, train_y)
    nn.evaluate(train_x, train_y, stage='train')
    acc, loss, auc = nn.test(test_x, test_y)
    return nn, acc
Exemple #3
0
def train_nn(name='nn', dataset='wine', neurons=(20,), alpha=0.01, **kwargs):
    from sklearn.neural_network import MLPClassifier
    data = get_dataset(dataset, split=True, discrete=False, one_hot=True)
    train_x, train_y, test_x, test_y, feature_names = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names']
    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))
    one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical']
    model_name = '-'.join([dataset, name] + [str(neuron) for neuron in neurons])
    model = MLPClassifier(hidden_layer_sizes=neurons, max_iter=5000, alpha=alpha, **kwargs)
    nn = SKClassifier(model, name=model_name, standardize=True, one_hot_encoder=one_hot_encoder)
    nn.train(train_x, train_y)
    nn.evaluate(train_x, train_y, stage='train')
    nn.test(test_x, test_y)
    nn.save()
Exemple #4
0
def train_svm(name='svm', dataset='wine', C=1.0, problem='classification', **kwargs):
    from sklearn.svm import SVC
    data = get_dataset(dataset, split=True, discrete=False, one_hot=True)
    train_x, train_y, test_x, test_y, feature_names = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names']
    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))
    one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical']
    model_name = '-'.join([dataset, name])
    model = SVC(C=C, probability=True, **kwargs)
    svm = SKClassifier(model, name=model_name, one_hot_encoder=one_hot_encoder)
    svm.train(train_x, train_y)
    svm.evaluate(train_x, train_y, stage='train')
    svm.test(test_x, test_y)
    svm.save()
Exemple #5
0
def train_tree(name='tree', dataset='wine', max_depth=None, min_samples_leaf=0.005, **kwargs):
    data = get_dataset(dataset, split=True, discrete=False, one_hot=True)
    train_x, train_y, test_x, test_y, feature_names, one_hot_encoder = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'], data['one_hot_encoder']

    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))

    model_name = '-'.join([dataset, name])
    tree = Tree(name=model_name, max_depth=max_depth, min_samples_leaf=min_samples_leaf,
                one_hot_encoder=one_hot_encoder, **kwargs)
    tree.train(train_x, train_y)
    tree.evaluate(train_x, train_y, stage='train')
    tree.test(test_x, test_y)
    tree.describe()
    tree.export(get_path('models', '{}.json'.format(model_name)))
    tree.save()
Exemple #6
0
def train_rule(name='rule', dataset='breast_cancer', rule_max_len=2, **kwargs):
    data = get_dataset(dataset, split=True, discrete=True)
    train_x, train_y, test_x, test_y, feature_names = \
        data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names']
    from iml.models.rule_model import RuleList

    if rebalance:
        print("balancing training data")
        train_x, train_y = sample_balance(train_x, train_y)
        print("#data after balancing:", len(train_y))

    # print(train_x.shape, train_x.dtype)
    discretizer = data['discretizer']
    model_name = '-'.join([dataset, name])
    brl = RuleList(name=model_name, rule_maxlen=rule_max_len, discretizer=discretizer, **kwargs)
    brl.train(train_x, train_y)
    brl.evaluate(train_x, train_y, stage='train')
    # print(brl.infer(test_x))
    brl.test(test_x, test_y)
    brl.describe(feature_names=feature_names)
    brl.save()