def check_vb(dirnm, datanm_train, datanm_valid, C, num_classes):
    spct = 10*70
    tdata, tlabels = load_full(dirnm+datanm_train, spct)
    #print tdata.shape, tlabels.shape

    spct = 10*30
    vdata, vlabels = load_full(dirnm+datanm_valid, spct)

    h = np.arange(0, 310, 10)
    h[0] +=1
    # artif
    ans = np.zeros((h.size, 2))

    tind = kget(tlabels, num_classes, h[-1])
    vind = kget(vlabels, num_classes, h[-1])

    for l in xrange(0, h.size):

        clf = LogisticRegression(C  =C,     penalty='l2', multi_class = 'ovr',
                                 tol=0.001, n_jobs = -1, verbose = 0, solver = 'newton-cg')
        clf.fit(tdata[tind[:h[l]*num_classes]], tlabels[tind[:h[l]*num_classes]])

        out_train = clf.predict_proba(tdata[tind[:h[l]*num_classes]])
        out_valid = clf.predict_proba(vdata[vind[:h[l]*num_classes]])

        ans[l, 0] += log_loss(tlabels[tind[:h[l]*num_classes]], out_train)
        ans[l, 1] += log_loss(vlabels[vind[:h[l]*num_classes]], out_valid)

    np.savez("logreg_bv", ans= ans, C = C, num_classes = num_classes)
    return ans
def check_lambda(dirnm, datanm_train, datanm_valid, datanm_orig_train, datanm_orig_valid, samples_per_class, Cs, num_classes):
    spct = 10*70
    tdata, tlabels = load_full(dirnm+datanm_train, spct)
    print tdata.shape, tlabels.shape

    spct = 10
    otdata, otlabels = load_full(dirnm+datanm_orig_train, spct)

    spct = 10*30
    vdata, vlabels = load_full(dirnm+datanm_valid, spct)

    spct = 10
    ovdata, ovlabels = load_full(dirnm+datanm_orig_valid, spct)

    # artif
    ans = np.zeros((len(Cs), 4))

    for i, C in enumerate(Cs):
        clf = LogisticRegression(C  =C,     penalty='l2', multi_class = 'ovr',
                                 tol=0.001, n_jobs = -1, verbose = 0, solver = 'newton-cg')
        clf.fit(tdata, tlabels)

        out_train = clf.predict_proba(tdata)
        out_valid = clf.predict_proba(vdata)
        out_train_real = clf.predict_proba(otdata)
        out_valid_real = clf.predict_proba(ovdata)

        ans[i, 0] += log_loss(tlabels, out_train)
        ans[i, 1] += log_loss(vlabels, out_valid)
        ans[i, 2] += log_loss(otlabels, out_train_real)
        ans[i, 3] += log_loss(ovlabels, out_valid_real)

    np.savez("logreg_lambda", ans= ans, Cs = Cs, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
def check_lambda(datanm, samples_per_class,depv, num_classes, criterion, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    ans = np.zeros((len(depv), 4))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for i, d in enumerate(depv):
            clf = DecisionTreeClassifier(criterion=criterion, splitter='best',
                                         max_depth=d, min_samples_split=2,
                                         min_samples_leaf=1, min_weight_fraction_leaf=0.0,
                                         max_features=None, random_state=None,
                                         max_leaf_nodes=None, class_weight=None, presort=False)
            clf.fit(train_data[0], train_data[1])

            out_train = clf.predict_proba(train_data[0])
            out_valid = clf.predict_proba(valid_data[0])

            ans[i, 0] += log_loss(train_data[1], out_train)
            ans[i, 1] += log_loss(valid_data[1], out_valid)
            ans[i, 2] += brier(train_data[1], out_train, num_classes)
            ans[i, 3] += brier(valid_data[1], out_valid, num_classes)

    ans[:, :] /= num_iter

    np.savez("rand_forest_lambda_" + criterion, ans= ans, mdep = mdep, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
def check_vb(datanm, samples_per_class, Cs, num_classes, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.5, train_size=0.5, random_state=None)
    ans = np.zeros((len(Cs), samples_per_class/2, 2))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for l in xrange(samples_per_class/2):
            ind_train = []
            ind_valid = []
            for k in xrange(num_classes):
                ind_train = ind_train + np.where(train_data[1] == k)[0].tolist()[:l+1]
                ind_valid = ind_valid + np.where(valid_data[1] == k)[0].tolist()[:l+1]

            ctrain_data = [ train_data[0][ind_train], train_data[1][ind_train] ]
            cvalid_data = [ valid_data[0][ind_valid], valid_data[1][ind_valid] ]

            for i, C in enumerate(Cs):
                clf = LogisticRegression(C  =C   , penalty='l2', multi_class = 'ovr',
                                         tol=0.001, n_jobs = -1 , verbose = 0)#, solver = 'newton-cg')
                clf.fit(ctrain_data[0], ctrain_data[1])

                out_train = clf.predict_proba(ctrain_data[0])
                out_valid = clf.predict_proba(cvalid_data[0])

                ans[i, l, 0] += log_loss(ctrain_data[1], out_train)
                ans[i, l, 1] += log_loss(cvalid_data[1], out_valid)

    ans /= num_iter

    np.savez("logreg_bv", ans= ans, Cs = Cs, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
Beispiel #5
0
def check_lambda(datanm, samples_per_class, Cs, num_classes, gamma, num_iter = 100, kernel = 'linear', strat = 'ovr'):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    ans = np.zeros((len(Cs), len(gamma), 4))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for j, g in enumerate(gamma):
            for i, C in enumerate(Cs):
                clf = svm.SVC(C=C, kernel=kernel, degree=3, gamma=g, coef0=0.0, shrinking=True,
                                  probability=False, tol=0.001,  cache_size=10000, class_weight=None,
                                  verbose=False, max_iter=-1, decision_function_shape=strat, random_state=None)
                clf.fit(train_data[0], train_data[1])

                out_train = clf.decision_function(train_data[0])
                out_valid = clf.decision_function(valid_data[0])

                ans[i, j, 2] += hinge_loss(train_data[1], out_train, range(num_classes))
                ans[i, j, 3] += hinge_loss(valid_data[1], out_valid, range(num_classes))

                #ans[i, j, 0] += log_loss(train_data[1], clf.predict_proba(train_data[0]))
                #ans[i, j, 1] += log_loss(valid_data[1], clf.predict_proba(valid_data[0]))

    ans[:, :, :] /= num_iter

    np.savez("svm_lambda_" + kernel + '_' + strat, ans= ans, Cs = Cs, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
def main_func(datanm, samples_per_class, C, num_classes, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    recall = np.zeros((num_classes+1, 2))
    precision = np.zeros((num_classes+1, 2))
    f1 = np.zeros((num_classes+1, 2))
    accuracy = np.zeros((2))
    logloss = np.zeros((2))
    
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        clf = LogisticRegression(C  =C,     penalty='l2', multi_class = 'ovr',
                                 tol=0.001, n_jobs = -1, verbose = 0)#, solver = 'newton-cg')
        clf.fit(train_data[0], train_data[1])

        out_train = clf.predict_proba(train_data[0])
        out_valid = clf.predict_proba(valid_data[0])

        logloss[0] += log_loss(train_data[1], out_train)
        logloss[1] += log_loss(valid_data[1], out_valid)

        out_train = clf.predict(train_data[0])
        out_valid = clf.predict(valid_data[0])

        accuracy[0] += accuracy_score(train_data[1], out_train)
        accuracy[1] += accuracy_score(valid_data[1], out_valid)

        precision[:-1, 0] += precision_score(train_data[1], out_train, average = None)
        precision[-1, 0] += precision_score(train_data[1], out_train, average = 'macro')
        precision[:-1, 1] += precision_score(valid_data[1], out_valid, average = None)
        precision[-1, 1] += precision_score(valid_data[1], out_valid, average = 'macro')

        recall[:-1, 0] += recall_score(train_data[1], out_train, average = None)
        recall[-1, 0] += recall_score(train_data[1], out_train, average = 'macro')
        recall[:-1, 1] += recall_score(valid_data[1], out_valid, average = None)
        recall[-1, 1] += recall_score(valid_data[1], out_valid, average = 'macro')

        f1[:-1, 0] += f1_score(train_data[1], out_train, average = None)
        f1[-1, 0] += f1_score(train_data[1], out_train, average = 'macro')
        f1[:-1, 1] += f1_score(valid_data[1], out_valid, average = None)
        f1[-1, 1] += f1_score(valid_data[1], out_valid, average = 'macro')

    f1 /= num_iter
    recall  /= num_iter
    precision  /= num_iter
    logloss  /= num_iter
    accuracy  /= num_iter

    np.savez("logreg_final", accuracy = accuracy, recall = recall, f1 = f1,
                             precision = precision, logloss = logloss, C = C,
                             num_iter = num_iter, num_classes = num_classes,
                             samples_per_class = samples_per_class)
    return [accuracy, recall, f1, precision, logloss]
Beispiel #7
0
def check_vb(datanm, samples_per_class, Cs, num_classes, gamma, num_iter = 100, kernel = 'linear', strat = 'ovr'):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.5, train_size=0.5, random_state=None)
    ans = np.zeros((len(Cs), len(gamma), samples_per_class/2, 4))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for l in xrange(samples_per_class/2):
            ind_train = []
            ind_valid = []
            for k in xrange(num_classes):
                ind_train = ind_train + np.where(train_data[1] == k)[0].tolist()[:l+1]
                ind_valid = ind_valid + np.where(valid_data[1] == k)[0].tolist()[:l+1]

            ctrain_data = [ train_data[0][ind_train], train_data[1][ind_train] ]
            cvalid_data = [ valid_data[0][ind_valid], valid_data[1][ind_valid] ]

            for i, C in enumerate(Cs):
                for j, g in enumerate(gamma):
                    clf = svm.SVC(C=C, kernel=kernel, degree=3, gamma=g, coef0=0.0, shrinking=True,
                                  probability=False, tol=0.001,  cache_size=10000, class_weight=None,
                                  verbose=False, max_iter=-1, decision_function_shape=strat, random_state=None)
                    clf.fit(ctrain_data[0], ctrain_data[1])

                    #out_train = clf.predict_proba(ctrain_data[0])
                    #out_valid = clf.predict_proba(cvalid_data[0])

                    #ans[i, l, 0] += log_loss(ctrain_data[1], out_train)
                    #ans[i, l, 1] += log_loss(cvalid_data[1], out_valid)
                    
                    out_train = clf.decision_function(train_data[0])
                    out_valid = clf.decision_function(valid_data[0])

                    ans[i, j, l, 2] += hinge_loss(train_data[1], out_train, range(num_classes))
                    ans[i, j, l, 3] += hinge_loss(valid_data[1], out_valid, range(num_classes))

    ans /= num_iter

    np.savez("svm_bv_" + kernel + '_' + strat, ans= ans, Cs = Cs, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
def check_vb(datanm, samples_per_class, depv, nest, num_classes, criterion, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.5, train_size=0.5, random_state=None)
    ans = np.zeros((samples_per_class/2, 4))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for l in xrange(samples_per_class/2):
            ind_train = []
            ind_valid = []
            for k in xrange(num_classes):
                ind_train = ind_train + np.where(train_data[1] == k)[0].tolist()[:l+1]
                ind_valid = ind_valid + np.where(valid_data[1] == k)[0].tolist()[:l+1]

            ctrain_data = [ train_data[0][ind_train], train_data[1][ind_train] ]
            cvalid_data = [ valid_data[0][ind_valid], valid_data[1][ind_valid] ]

            clf = RandomForestClassifier(n_estimators=nest, criterion=criterion, max_depth=depv,
                                    min_samples_split=2, min_samples_leaf=1,
                                    min_weight_fraction_leaf=0.0, max_features='auto',
                                    max_leaf_nodes=None, bootstrap=True, oob_score=False,
                                    n_jobs=8, random_state=None, verbose=0, warm_start=False,
                                    class_weight=None)
            clf.fit(ctrain_data[0], ctrain_data[1])

            out_train = clf.predict_proba(ctrain_data[0])
            out_valid = clf.predict_proba(cvalid_data[0])

            ans[l, 0] += log_loss(ctrain_data[1], out_train)
            ans[l, 1] += log_loss(cvalid_data[1], out_valid)

            ans[l, 2] += brier(ctrain_data[1], out_train, num_classes)
            ans[l, 3] += brier(cvalid_data[1], out_valid, num_classes)

    ans /= num_iter

    np.savez("rand_forest_bv_" + criterion, ans= ans, depv = depv, nest=nest, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
def check_lambda(datanm, samples_per_class, Cs, num_classes, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    ans = np.zeros((len(Cs), 2))
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for i, C in enumerate(Cs):
            clf = LogisticRegression(C  =C,     penalty='l2', multi_class = 'ovr',
                                     tol=0.001, n_jobs = -1, verbose = 0)#, solver = 'newton-cg')
            clf.fit(train_data[0], train_data[1])

            out_train = clf.predict_proba(train_data[0])
            out_valid = clf.predict_proba(valid_data[0])

            ans[i, 0] += log_loss(train_data[1], out_train)
            ans[i, 1] += log_loss(valid_data[1], out_valid)

    ans[:, :] /= num_iter

    np.savez("logreg_lambda", ans= ans, Cs = Cs, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
    return ans
Beispiel #10
0
def main_func(dirnm, datanm_train, datanm_valid, datanm_orig_train, datanm_orig_valid, Cs, num_classes):
    recall = np.zeros((len(Cs), num_classes+1, 4))
    precision = np.zeros((len(Cs), num_classes+1, 4))
    f1 = np.zeros((len(Cs), num_classes+1, 4))
    accuracy = np.zeros((len(Cs), 4))
    logloss = np.zeros((len(Cs), 4))
    
    spct = 10*70
    tdata, tlabels = load_full(dirnm+datanm_train, spct)
    print tdata.shape, tlabels.shape

    spct = 10
    otdata, otlabels = load_full(dirnm+datanm_orig_train, spct)

    spct = 10*30
    vdata, vlabels = load_full(dirnm+datanm_valid, spct)

    spct = 10
    ovdata, ovlabels = load_full(dirnm+datanm_orig_valid, spct)

    for i, C in enumerate(Cs):

        clf = LogisticRegression(C  =C,     penalty='l2', multi_class = 'ovr',
                                 tol=0.001, n_jobs = -1, verbose = 0)#, solver = 'newton-cg')
        clf.fit(tdata, tlabels)

        out_train = clf.predict_proba(tdata)
        out_valid = clf.predict_proba(vdata)
        out_train_real = clf.predict_proba(otdata)
        out_valid_real = clf.predict_proba(ovdata)

        logloss[i, 0] += log_loss(tlabels, out_train)
        logloss[i, 1] += log_loss(vlabels, out_valid)
        logloss[i, 2] += log_loss(otlabels, out_train_real)
        logloss[i, 3] += log_loss(ovlabels, out_valid_real)

        out_train = clf.predict(tdata)
        out_valid = clf.predict(vdata)
        out_train_real = clf.predict(otdata)
        out_valid_real = clf.predict(ovdata)

        accuracy[i, 0] += accuracy_score(tlabels, out_train)
        accuracy[i, 1] += accuracy_score(vlabels, out_valid)
        accuracy[i, 2] += accuracy_score(otlabels, out_train_real)
        accuracy[i, 3] += accuracy_score(ovlabels, out_valid_real)


        precision[i, :-1, 0] += precision_score(tlabels, out_train, average = None)
        precision[i, -1, 0] += precision_score(tlabels, out_train, average = 'macro')

        precision[i, :-1, 1] += precision_score(vlabels, out_valid, average = None)
        precision[i, -1, 1] += precision_score(vlabels, out_valid, average = 'macro')

        precision[i, :-1, 2] += precision_score(otlabels, out_train_real, average = None)
        precision[i, -1, 2] += precision_score(otlabels, out_train_real, average = 'macro')

        precision[i, :-1, 3] += precision_score(ovlabels, out_valid_real, average = None)
        precision[i, -1, 3] += precision_score(ovlabels, out_valid_real, average = 'macro')


        recall[i, :-1, 0] += recall_score(tlabels, out_train, average = None)
        recall[i, -1, 0] += recall_score(tlabels, out_train, average = 'macro')

        recall[i, :-1, 1] += recall_score(vlabels, out_valid, average = None)
        recall[i, -1, 1] += recall_score(vlabels, out_valid, average = 'macro')

        recall[i, :-1, 2] += recall_score(otlabels, out_train_real, average = None)
        recall[i, -1, 2] += recall_score(otlabels, out_train_real, average = 'macro')

        recall[i, :-1, 3] += recall_score(ovlabels, out_valid_real, average = None)
        recall[i, -1, 3] += recall_score(ovlabels, out_valid_real, average = 'macro')


        f1[i, :-1, 0] += f1_score(tlabels, out_train, average = None)
        f1[i, -1, 0] += f1_score(tlabels, out_train, average = 'macro')

        f1[i, :-1, 1] += f1_score(vlabels, out_valid, average = None)
        f1[i, -1, 1] += f1_score(vlabels, out_valid, average = 'macro')

        f1[i, :-1, 2] += f1_score(otlabels, out_train_real, average = None)
        f1[i, -1, 2] += f1_score(otlabels, out_train_real, average = 'macro')

        f1[i, :-1, 3] += f1_score(ovlabels, out_valid_real, average = None)
        f1[i, -1, 3] += f1_score(ovlabels, out_valid_real, average = 'macro')

    np.savez("logreg_final", accuracy = accuracy, recall = recall, f1 = f1,
                             precision = precision, logloss = logloss, C = C,
                             num_classes = num_classes)
    return [accuracy, recall, f1, precision, logloss]
def main_func(datanm, samples_per_class, depv, nest, num_classes, criterion, num_iter = 100):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    recall = np.zeros((len(mdep), len(nest), num_classes+1, 2))
    precision = np.zeros((len(mdep), len(nest), num_classes+1, 2))
    f1 = np.zeros((len(mdep), len(nest), num_classes+1, 2))
    accuracy = np.zeros((len(mdep), len(nest), 2))
    logloss = np.zeros((len(mdep), len(nest), 2))
    brierloss = np.zeros((len(mdep), len(nest), 2))
    
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        for i, d in enumerate(mdep):
            for j, n in enumerate(nest):
                clf = RandomForestClassifier(n_estimators=n, criterion=criterion, max_depth=d,
                                        min_samples_split=2, min_samples_leaf=1,
                                        min_weight_fraction_leaf=0.0, max_features='auto',
                                        max_leaf_nodes=None, bootstrap=True, oob_score=False,
                                        n_jobs=8, random_state=None, verbose=0, warm_start=False,
                                        class_weight=None)
                clf.fit(train_data[0], train_data[1])

                out_train = clf.predict_proba(train_data[0])
                out_valid = clf.predict_proba(valid_data[0])

                logloss[i, j, 0] += log_loss(train_data[1], out_train)
                logloss[i, j, 1] += log_loss(valid_data[1], out_valid)

                brierloss[i, j, 0] += brier(train_data[1], out_train, num_classes)
                brierloss[i, j, 1] += brier(valid_data[1], out_valid, num_classes)


                out_train = clf.predict(train_data[0])
                out_valid = clf.predict(valid_data[0])

                accuracy[i, j, 0] += accuracy_score(train_data[1], out_train)
                accuracy[i, j, 1] += accuracy_score(valid_data[1], out_valid)

                precision[i, j, :-1, 0] += precision_score(train_data[1], out_train, average = None)
                precision[i, j, -1, 0] += precision_score(train_data[1], out_train, average = 'macro')
                precision[i, j, :-1, 1] += precision_score(valid_data[1], out_valid, average = None)
                precision[i, j, -1, 1] += precision_score(valid_data[1], out_valid, average = 'macro')

                recall[i, j, :-1, 0] += recall_score(train_data[1], out_train, average = None)
                recall[i, j, -1, 0] += recall_score(train_data[1], out_train, average = 'macro')
                recall[i, j, :-1, 1] += recall_score(valid_data[1], out_valid, average = None)
                recall[i, j, -1, 1] += recall_score(valid_data[1], out_valid, average = 'macro')

                f1[i, j, :-1, 0] += f1_score(train_data[1], out_train, average = None)
                f1[i, j, -1, 0] += f1_score(train_data[1], out_train, average = 'macro')
                f1[i, j, :-1, 1] += f1_score(valid_data[1], out_valid, average = None)
                f1[i, j, -1, 1] += f1_score(valid_data[1], out_valid, average = 'macro')

    f1 /= num_iter
    recall  /= num_iter
    precision  /= num_iter
    logloss  /= num_iter
    accuracy  /= num_iter

    np.savez("rand_forest_final", accuracy = accuracy, recall = recall, f1 = f1,
                             precision = precision, logloss = logloss, depv = depv,
                             num_iter = num_iter, num_classes = num_classes,
                             samples_per_class = samples_per_class, brierloss = brierloss)
    return [accuracy, recall, f1, precision, logloss, brierloss]
Beispiel #12
0
def main_func(datanm, samples_per_class, C, num_classes, gamma, num_iter = 100, kernel = 'linear', strat = 'ovr'):
    data, labels = load_full(datanm, samples_per_class)
    slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
    recall = np.zeros((num_classes+1, 2))
    precision = np.zeros((num_classes+1, 2))
    f1 = np.zeros((num_classes+1, 2))
    accuracy = np.zeros((2))
    logloss = np.zeros((2))
    hingeloss = np.zeros((2))

    
    for train_index, test_index in slo:
        train_data = [data[train_index, :], labels[train_index]]
        valid_data = [data[test_index , :], labels[test_index ]]

        clf = svm.SVC(C=C, kernel=kernel, degree=3, gamma=gamma, coef0=0.0, shrinking=True,
                      probability=False, tol=0.001,  cache_size=10000, class_weight=None,
                      verbose=False, max_iter=-1, decision_function_shape=strat, random_state=None)
        clf.fit(train_data[0], train_data[1])

        #out_train = clf.predict_proba(train_data[0])
        #out_valid = clf.predict_proba(valid_data[0])

        #logloss[0] += log_loss(train_data[1], out_train)
        #logloss[1] += log_loss(valid_data[1], out_valid)

        out_train = clf.decision_function(train_data[0])
        out_valid = clf.decision_function(valid_data[0])

        hingeloss[0] += hinge_loss(train_data[1], out_train)
        hingeloss[1] += hinge_loss(valid_data[1], out_valid)

        out_train = clf.predict(train_data[0])
        out_valid = clf.predict(valid_data[0])

        accuracy[0] += accuracy_score(train_data[1], out_train)
        accuracy[1] += accuracy_score(valid_data[1], out_valid)

        precision[:-1, 0] += precision_score(train_data[1], out_train, average = None)
        precision[-1, 0] += precision_score(train_data[1], out_train, average = 'macro')
        precision[:-1, 1] += precision_score(valid_data[1], out_valid, average = None)
        precision[-1, 1] += precision_score(valid_data[1], out_valid, average = 'macro')

        recall[:-1, 0] += recall_score(train_data[1], out_train, average = None)
        recall[-1, 0] += recall_score(train_data[1], out_train, average = 'macro')
        recall[:-1, 1] += recall_score(valid_data[1], out_valid, average = None)
        recall[-1, 1] += recall_score(valid_data[1], out_valid, average = 'macro')

        f1[:-1, 0] += f1_score(train_data[1], out_train, average = None)
        f1[-1, 0] += f1_score(train_data[1], out_train, average = 'macro')
        f1[:-1, 1] += f1_score(valid_data[1], out_valid, average = None)
        f1[-1, 1] += f1_score(valid_data[1], out_valid, average = 'macro')

    f1 /= num_iter
    recall  /= num_iter
    precision  /= num_iter
    logloss  /= num_iter
    accuracy  /= num_iter

    np.savez("svm_final_" + kernel + '_' + strat, accuracy = accuracy, recall = recall, f1 = f1,
                             precision = precision, logloss = logloss, C = C,
                             num_iter = num_iter, num_classes = num_classes,
                             samples_per_class = samples_per_class,
                             hingeloss = hingeloss)
    return [accuracy, recall, f1, precision, logloss, hingeloss]