Example #1
0
    def _evaluate(self, Gs, Gs_kwargs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph for each GPU.
        result_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()

                # Generate images.
                latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
                labels = self._get_random_labels_tf(self.minibatch_per_gpu)
                dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs)
                images = Gs_clone.get_output_for(latents, None, **Gs_kwargs)

                # Downsample to 256x256. The attribute classifiers were built for 256x256.
                if images.shape[2] > 256:
                    factor = images.shape[2] // 256
                    images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Run classifier for each attribute.
                result_dict = dict(latents=latents, dlatents=dlatents[:,-1])
                for attrib_idx in self.attrib_indices:
                    classifier = misc.load_pkl(classifier_urls[attrib_idx])
                    logits = classifier.get_output_for(images, None)
                    predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1))
                    result_dict[attrib_idx] = predictions
                result_expr.append(result_dict)

        # Sampling loop.
        results = []
        for begin in range(0, self.num_samples, minibatch_size):
            self._report_progress(begin, self.num_samples)
            results += tflib.run(result_expr)
        results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()}

        # Calculate conditional entropy for each attribute.
        conditional_entropies = defaultdict(list)
        for attrib_idx in self.attrib_indices:
            # Prune the least confident samples.
            pruned_indices = list(range(self.num_samples))
            pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
            pruned_indices = pruned_indices[:self.num_keep]

            # Fit SVM to the remaining samples.
            svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
            for space in ['latents', 'dlatents']:
                svm_inputs = results[space][pruned_indices]
                try:
                    svm = sklearn.svm.LinearSVC()
                    svm.fit(svm_inputs, svm_targets)
                    svm.score(svm_inputs, svm_targets)
                    svm_outputs = svm.predict(svm_inputs)
                except:
                    svm_outputs = svm_targets # assume perfect prediction

                # Calculate conditional entropy.
                p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)]
                conditional_entropies[space].append(conditional_entropy(p))
Example #2
0
def leave_one_out_cv(gram_matrix, labels, alg = 'SVM'):
    """
    leave-one-out cross-validation
    """
    scores = []
    preds = []
    loo = sklearn.cross_validation.LeaveOneOut(len(labels))
    for train_index, test_index in loo:
        X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index]
        y_train, y_test = labels[train_index], labels[test_index]
        if(alg == 'SVM'):
            svm = sklearn.svm.SVC(kernel = 'precomputed')
            svm.fit(X_train, y_train)
            preds += svm.predict(X_test).tolist()
            score = svm.score(X_test, y_test)
        elif(alg == 'kNN'):
            knn = sklearn.neighbors.KNeighborsClassifier()
            knn.fit(X_train, y_train)
            preds += knn.predict(X_test).tolist()
            score = knn.score(X_test, y_test)
        scores.append(score)

    print "Mean accuracy: %f" %(np.mean(scores))
    print "Stdv: %f" %(np.std(scores))

    return preds, scores
Example #3
0
def train(df_train, df_test):
    X_train = df_train.drop('income', axis=1)
    y_train = df_train['income']
    X_test = df_train.drop('income', axis=1)
    y_test = df_train['income']

    start = time.time()
    lr = LogisticRegression(penalty='l1', tol=0.01)
    lr.fit(X_train, y_train)
    start = cost_times(start, 'lr.fit')
    score = lr.score(X_test, y_test)
    start = cost_times(start, 'lr.score')
    print("LR : ", score)

    from sklearn.ensemble import RandomForestClassifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    start = cost_times(start, 'rfc.fit')
    score = rfc.score(X_test, y_test)
    start = cost_times(start, 'rfc.score')
    print("RF : ", score)

    from sklearn import svm
    svm = svm.SVC()
    svm.fit(X_train, y_train)
    start = cost_times(start, 'svc.fit')
    score = svm.score(X_test, y_test)
    start = cost_times(start, 'svc.score')
    print("SVM : ", score)
Example #4
0
def k_fold_cv(gram_matrix, labels, folds = 10, alg = 'SVM', shuffle = True):
    """
    K-fold cross-validation
    """
    pdb.set_trace()
    scores = []
    preds = []
    loo = sklearn.cross_validation.KFold(len(labels), folds, shuffle = shuffle, random_state = random.randint(0,100))
    #loo = sklearn.cross_validation.LeaveOneOut(len(labels))
    for train_index, test_index in loo:
        X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index]
        y_train, y_test = labels[train_index], labels[test_index]
        if(alg == 'SVM'):
            svm = sklearn.svm.SVC(kernel = 'precomputed')
            svm.fit(X_train, y_train)
            preds += svm.predict(X_test).tolist()
            score = svm.score(X_test, y_test)
        elif(alg == 'kNN'):
            knn = sklearn.neighbors.KNeighborsClassifier()
            knn.fit(X_train, y_train)
            preds += knn.predict(X_test).tolist()
            score = knn.score(X_test, y_test)

        scores.append(score)

    print "Mean accuracy: %f" %(np.mean(scores))
    print "Stdv: %f" %(np.std(scores))

    return preds, scores
Example #5
0
def SVM(X_train, X_test, y_train, y_test):
    filenames = [
        'SVM1.sav', 'SVM2.sav', 'SVM3.sav', 'SVM4.sav', 'SVM5.sav', 'SVM7.sav',
        'SVM6.sav', 'SVM8.sav', 'SVM9.sav'
    ]
    c = [1, 1000, 1000000]
    ker = ['linear', 'poly', 'rbf']
    training_time, testing_time, max_accuracy = 0, 0, 0
    for i in range(len(c)):
        for j in range(len(ker)):
            k = i * len(c) + j
            filename = filenames[k]
            if os.path.exists(filename):
                svm = pickle.load(open(filename, 'rb'))
            else:
                t1 = time.time()
                svm = OneVsRestClassifier(SVC(kernel=ker[j],
                                              C=c[i])).fit(X_train, y_train)
                t2 = time.time()
                training_time = max(training_time, t2 - t1)
                pickle.dump(svm, open(filename, 'wb'))
            t1 = time.time()
            accuracy = svm.score(X_test, y_test) * 100
            t2 = time.time()
            testing_time = max(testing_time, t2 - t1)
            max_accuracy = max(accuracy, max_accuracy)
            print('One VS Rest SVM accuracy with kernel={} and c={} is : {}%'.
                  format(ker[j], c[i], accuracy))
    return training_time, training_time, max_accuracy
Example #6
0
def train_svm(train, labels_train, test, labels_test, dims):
    if dims == 0:
        #Train as is
        new_train = train
        new_test = test
    else:
        svd_t = skd.TruncatedSVD(n_components=dims)
        pca = PCA(n_components=dims)

        pca.fit(train)
        pca_train = pca.transform(train)
        inv_pca_train = pca.inverse_transform(pca_train)
        #pca.fit(test);
        #pca_test=pca.transform(test);
        svd_t.fit(train)
        new_train = svd_t.transform(train)

        #new_train=svd.transform(train);
        svd_t.fit(test)
        new_test = svd_t.transform(test)
        #new_test=svd.transform(test);

    svm = SVC(decision_function_shape='ovo')
    # svm=LinearSVC(max_iter=10000);
    svm.fit(inv_pca_train, labels_train)
    predicted = svm.predict(test)
    ac = svm.score(test, labels_test)
    print("Scoring ", ac)
    #print("predicted", predicted.shape);
    return ac
Example #7
0
 def train_model(self, X_train_cv, y_train, X_test_cv, y_test):
     """
     This function will train_model & retun accuracy, f1_score.
     :param X_train_cv, y_train, X_test_cv, y_test: Training data
     :return: f1, accuracy
     """
     svm = sklearn.svm.LinearSVC(C=0.1)
     svm.fit(X_train_cv, y_train)
     pred = svm.predict(X_test_cv)
     f1 = sklearn.metrics.f1_score(pred, y_test, average='weighted')
     accuracy = int(round(svm.score(X_test_cv, y_test) * 100))
     return svm, f1, accuracy
Example #8
0
def SVR_rbf(dates, prices, test_date, sl_df, forcastingDays):
    svm = SVR(kernel='rbf', C=1e3, gamma=0.1)

    sl_trainX, sl_trainY, sl_testX, sl_testY, sl_trainX_close, sl_trainY_close, sl_testX_close, sl_testY_close = create_sl_stock_preprocessed_Dataset(
        sl_df)

    X_train, X_test, y_train, y_test = train_test_split(sl_trainX,
                                                        sl_trainY,
                                                        test_size=0.33,
                                                        random_state=42)
    X_train_close, X_test_close, y_train_close, y_test_close = train_test_split(
        sl_trainX_close, sl_trainY_close, test_size=0.33, random_state=42)

    df = sl_df[['close']]
    forecast_out = int(
        forcastingDays)  # predicting forcastingDays days into future
    df['Prediction'] = df[['close']].shift(-forecast_out)

    X = np.array(df.drop(['Prediction'], 1))
    X = preprocessing.scale(X)

    X_forecast = X[-forecast_out:]  # set X_forecast equal to last 30
    X = X[:-forecast_out]  # remove last forcastingDays from X

    y = np.array(df['Prediction'])
    y = y[:-forecast_out]

    X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(X,
                                                                y,
                                                                test_size=0.2)
    svm.fit(X_train_f, y_train_f)
    confidence = svm.score(X_test_f, y_test_f)
    forecast_prediction = svm.predict(X_forecast)

    svm.fit(sl_trainX, sl_trainY)
    svm.fit(sl_trainX_close, sl_trainY_close)

    svm_decision_boundary = svm.predict(sl_trainX)
    predict_decision_boundary_close = svm.predict(sl_trainX_close)

    svm_y_pred = svm.predict(X_test)
    svm_reg_y_pred_close = svm.predict(X_test_close)

    svm_test_score = mean_squared_error(y_test, svm_y_pred)
    mean_squared_error_test_score_close = mean_squared_error(
        y_test_close, svm_reg_y_pred_close)

    svm_prediction = svm.predict(sl_testX)[0]
    prediction_of_svm_close = svm.predict(sl_testX_close)[0]

    return svm_decision_boundary, svm_prediction, svm_test_score, prediction_of_svm_close, forecast_prediction
Example #9
0
    def svm_inference(self, data, confidence, svm, norm=True, in_test=False):
        print('\tPerforming SVM inference')
        Nt = len(data)
        print(Nt)
        acc1 = 0
        acc2 = 0
        total1 = 0
        total2 = 0
        conf_new = np.zeros(confidence.shape)

        cur_line = 0
        for i in range(Nt):

            word = data[i]
            word_len = word.shape[0]
            # print(word.shape)

            Y = word[:, -1]

            if in_test:
                self.test_labels[cur_line:cur_line + word_len] = Y

            # TODO: implemented iterative context inference
            W_prime = np.zeros(
                (word_len, self.dtr + self.n_classes * self.window_size * 2))
            # W_prime : [X | extended context]
            W_prime[:, :self.dtr] = word[:, :self.dtr]
            W_prime[:, self.dtr:] = self.extend_context(
                confidence[cur_line:(cur_line + word_len), :])

            # y_hat = svm.predict(W_prime)          # Predictions
            conf = svm.decision_function(
                W_prime)  # Confidence measures of predictions

            if norm:
                conf = (1 + np.exp(
                    -1 * conf))**-1  # Sigmoid function --> Normalization

            conf_new[cur_line:cur_line + word_len, :] = conf
            cur_line += word_len

            # Calculate accuracy rates
            total1 += word_len
            total2 += 1
            subtask_acc = svm.score(W_prime, Y)
            acc2 += subtask_acc
            acc1 += subtask_acc * word_len
            # print('\t\tShort-term accuracy: ' + str(subtask_acc))

        return acc1 / total1, acc2 / total2, conf_new
def run(attrib_idx):
    results = np.load("principal_directions/wspace_att_%d.npy" %
                      attrib_idx).item()

    pruned_indices = list(range(results['latents'].shape[0]))
    # pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
    # keep = int(results['latents'].shape[0] * 0.95)
    # print('Keeping: %d' % keep)
    # pruned_indices = pruned_indices[:keep]

    # Fit SVM to the remaining samples.
    svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
    space = 'dlatents'

    svm_inputs = results[space][pruned_indices]

    svm = sklearn.svm.LinearSVC(C=1.0, dual=False, max_iter=10000)
    svm.fit(svm_inputs, svm_targets)
    svm.score(svm_inputs, svm_targets)
    svm_outputs = svm.predict(svm_inputs)

    w = svm.coef_[0]

    np.save("principal_directions/direction_%d" % attrib_idx, w)
Example #11
0
    def svm_inference(self, data, confidence, svm, norm=True, in_test=False):
        print('\tPerforming SVM inference')
        Nt = len(data)
        print(Nt)
        acc1 = 0
        acc2 = 0
        total1 = 0
        total2 = 0
        conf_new = np.zeros(confidence.shape)

        cur_line = 0
        for i in range(Nt):

            word = data[i]
            word_len = word.shape[0]
            # print(word.shape)

            Y = word[:, -1]

            if in_test:
                self.test_labels[cur_line:cur_line+word_len] = Y

            # TODO: implemented iterative context inference
            W_prime = np.zeros((word_len, self.dtr + self.n_classes * self.window_size * 2))
            # W_prime : [X | extended context]
            W_prime[:, :self.dtr] = word[:, :self.dtr]
            W_prime[:, self.dtr:] = self.extend_context(confidence[cur_line:(cur_line + word_len), :])

            # y_hat = svm.predict(W_prime)          # Predictions
            conf = svm.decision_function(W_prime)   # Confidence measures of predictions

            if norm:
                conf = (1 + np.exp(-1*conf))**-1    # Sigmoid function --> Normalization

            conf_new[cur_line : cur_line+word_len, :] = conf
            cur_line += word_len

            # Calculate accuracy rates
            total1 += word_len
            total2 += 1
            subtask_acc = svm.score(W_prime, Y)
            acc2 += subtask_acc
            acc1 += subtask_acc * word_len
            # print('\t\tShort-term accuracy: ' + str(subtask_acc))

        return acc1/total1, acc2/total2, conf_new
Example #12
0
def train_svm(cls, train_x, train_y):
    """
    train the binary SVM
    :param cls: the class that this SVM is about
    :param train_x: descriptors
    :param train_y: corresponding labels
    :return: the weight and bias of the SVM
    """
    svm = sklearn.svm.LinearSVC(max_iter=1000)
    svm.C = C[cls]

    svm.fit(train_x, train_y)
    error = 1 - svm.score(train_x, train_y)
    print('Training Error for "{}" : {:.4f}'.format(cls, error))
    w = svm.coef_
    b = svm.intercept_
    return w, b
Example #13
0
def SVM(X_train, X_test, y_train, y_test, withpca, pca):
    filenames = []
    if withpca == 1:
        filenames = [
            'SVM1pca' + str(pca) + '.sav', 'SVM2pca' + str(pca) + '.sav',
            'SVM3pca' + str(pca) + '.sav', 'SVM4pca' + str(pca) + '.sav',
            'SVM5pca' + str(pca) + '.sav', 'SVM7pca' + str(pca) + '.sav',
            'SVM6pca' + str(pca) + '.sav', 'SVM8pca' + str(pca) + '.sav',
            'SVM9pca' + str(pca) + '.sav'
        ]
    else:
        filenames = [
            'SVM1.sav', 'SVM2.sav', 'SVM3.sav', 'SVM4.sav', 'SVM5.sav',
            'SVM7.sav', 'SVM6.sav', 'SVM8.sav', 'SVM9.sav'
        ]
    c = [0.1, 1, 1000]
    ker = ['linear', 'poly', 'rbf']
    training_time, testing_time, max_accuracy = 0, 0, 0
    max_model = None
    for i in range(len(c)):
        for j in range(len(ker)):
            k = i * len(c) + j
            filename = filenames[k]
            if os.path.exists(filename):
                svm = pickle.load(open(filename, 'rb'))
            else:
                t1 = time.time()
                svm = OneVsRestClassifier(SVC(kernel=ker[j],
                                              C=c[i])).fit(X_train, y_train)
                t2 = time.time()
                training_time = max(training_time, t2 - t1)
                pickle.dump(svm, open(filename, 'wb'))
            t1 = time.time()
            accuracy = svm.score(X_test, y_test) * 100
            t2 = time.time()
            testing_time = max(testing_time, t2 - t1)
            if accuracy > max_accuracy:
                max_accuracy = accuracy
                max_model = svm
            max_accuracy = max(accuracy, max_accuracy)
            # print('One VS Rest SVM accuracy with kernel={} and c={} is : {}%'.format(ker[j], c[i],  accuracy))
    y_pred = max_model.predict(X_test)
    conf_mat = confusion_matrix(y_test, y_pred)
    print("conf_mat of SVM ")
    print(conf_mat)
    return training_time, testing_time, max_accuracy
Example #14
0
def SVM(request):
    pk = request.user.id
    student = Student.objects.get(user=pk)
    df = pd.DataFrame(list(Student.objects.all().values()))
    df = df.loc[df['user_id'] == pk]
    df = df.drop(["user_id", "id", "Department_DS", "Department_SVM","Department_KNN" ,"DS_acc",
                   "SVM_acc" , "KNN_acc"], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)
    svm = SVC(kernel="linear", C=0.025, random_state=101)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    Dep_pred = svm.predict(df)
    accuracy = svm.score(X_test, y_test)
    student.Department_SVM = list(Dep_pred)[0]
    student.SVM_acc = format(accuracy * 100,".2f")
    form = StudentObj(instance=student)
    form = StudentObj(request.POST, instance=student)
    if form.is_valid():
        form.save()
    return request
Example #15
0
 def svm_classifiation_score(self,
                             X,
                             y,
                             C=1.0,
                             kernel='rbf',
                             gamma='auto',
                             degree=3):
     """
     Returns the classifiation score on the learnt encoding using a SVM with specified parameters.
     The data X and the labels y are splitted into training and tesing sets.
     """
     H = self.encode(X)
     H_train, H_test, Y_train, Y_test = train_test_split(H,
                                                         y,
                                                         test_size=0.1)
     svm = svm.SVC(C=C,
                   kernel=kernel,
                   gamma=gamma,
                   degree=degree,
                   cache_size=600)
     svm.fit(H_train, Y_train)
     return svm.score(H_test, Y_test)
Example #16
0
def chapter_13():
    cancer = load_breast_cancer()
    X_train, X_test, y_train, y_test = \
        train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=66)

    if 0:  # Cross validation
        tree = DecisionTreeClassifier(criterion='entropy',
                                      max_depth=3,
                                      random_state=0)
        scores = cross_val_score(tree, cancer.data, cancer.target, cv=5)
        print("Cross validation scores      :{}".format(scores))
        print("Cross validation scores(mean):{:.5}".format(scores.mean()))

    if 0:  # Grid search ex
        best_score = 0
        best_param = {}
        for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
            for C in [0.001, 0.01, 0.1, 1, 10, 100]:
                svm = SVC(gamma=gamma, C=C)
                svm.fit(X_train, y_train)
                score = svm.score(X_test, y_test)
                if score > best_score:
                    best_score = score
                    best_param = {'C': C, 'gamma': gamma}

        print("Best score:{:.2f}".format(best_score))
        print("Parameters in best score:{}".format(best_param))

    if 0:
        param_grid = {
            'C': [10**i for i in range(-3, 2)],
            'gamma': [10**i for i in range(-3, 2)]
        }

        grid_search = GridSearchCV(SVC(), param_grid, cv=5)
        grid_search.fit(X_train, y_train)

        print("Test set score:{:.2f}".format(grid_search.score(X_test,
                                                               y_test)))
        print("Best parameters:{}".format(grid_search.best_params_))
        print("Best cross-validation score:{:.2f}".format(
            grid_search.best_score_))

    if 0:
        model = SVC(gamma=0.001, C=1)
        model.fit(X_train, y_train)
        print("train:", clf.__class__.__name__, model.score(X_train, y_train))
        print("test :", clf.__class__.__name__, model.score(X_test, y_test))

        pred_svc = model.predict(X_test)
        confusion_m = confusion_matrix(y_test, pred_svc)
        print("Confution matrix:\n{}".format(confusion_m))

        print("Precision score: {:.3f}".format(
            precision_score(y_true=y_test, y_pred=pred_svc)))
        print("Recall score   : {:.3f}".format(
            recall_score(y_true=y_test, y_pred=pred_svc)))
        print("F1 score       : {:.3f}".format(
            f1_score(y_true=y_test, y_pred=pred_svc)))

    if 0:
        model = LogisticRegression()
        lrmodelfit = model.fit(X_train, y_train)

        prob_result = pd.DataFrame(lrmodelfit.predict_proba(X_test))
        prob_result.columns = ["malignant", "benign"]
        for threhold, flg in zip([0.4, 0.3, 0.15, 0.05],
                                 ["flg_04", "flg_03", "flg_015", "flg_005"]):
            prob_result[flg] = prob_result["benign"].map(
                lambda x: 1 if x > threhold else 0)
        print(prob_result.head(15))

        fig, ax = plt.subplots()
        for flg in ["flg_04", "flg_03", "flg_015", "flg_005"]:
            confusion_m = confusion_matrix(y_test, prob_result[flg])
            print("◆ThreholdFlg:", flg)
            print("Confution matrix:\n{}".format(confusion_m))
            fpr = (confusion_m[0, 1]) / (confusion_m[0, 0] + confusion_m[0, 1])
            tpr = (confusion_m[1, 1]) / (confusion_m[1, 0] + confusion_m[1, 1])
            plt.scatter(fpr, tpr)
            plt.xlabel("fpr")
            plt.ylabel("tpr")
            ax.annotate(flg, (fpr, tpr))
        # plt.show()

    if 0:
        X = cancer.data
        y = cancer.target
        y = label_binarize(y, classes=[0, 1])
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.5,
                                                            random_state=0)

        classifier = OneVsRestClassifier(
            SVC(kernel='linear', probability=True, random_state=0))
        y_score = classifier.fit(X_train, y_train).decision_function(X_test)

        # fpr:偽陽性率、tpr:真陽性率を計算
        fpr, tpr, _ = roc_curve(y_test, y_score)
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr,
                 tpr,
                 color='red',
                 lw=2,
                 label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')

        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic')
        plt.legend(loc="best")
        plt.show()

    if 0:  # jack-knife
        random.seed(0)
        norm_random_sample_data = random.randn(1000)
        mean_array = np.array([])
        for i in range(0, len(norm_random_sample_data)):
            ind = np.ones(1000, dtype=bool)
            extract_num = [i]
            ind[i] = False
            mean_array = np.append(mean_array,
                                   norm_random_sample_data[ind].mean())
        x = (mean_array - mean_array.mean())**2
        print(np.sqrt(x.sum() * 999 / 1000))

        mean_array_boot = np.array([])
        for i in range(0, len(norm_random_sample_data)):
            mean_array_boot = \
                np.append(mean_array_boot, random.choice(norm_random_sample_data, 500, replace=True).mean())
        x = (mean_array_boot - mean_array_boot.mean())**2
        print(np.sqrt(x.sum() / 1000))

    if 0:  # Bagging
        bagging = BaggingClassifier(KNeighborsClassifier(),
                                    max_samples=0.5,
                                    max_features=0.5)
        clf = bagging
        clf.fit(X_train, y_train)
        print("train:", clf.__class__.__name__, clf.score(X_train, y_train))
        print("test:", clf.__class__.__name__, clf.score(X_test, y_test))

    if 0:  # Boosting
        clf = AdaBoostClassifier(learning_rate=1.0)
        clf.fit(X_train, y_train)
        print("train:", clf.__class__.__name__, clf.score(X_train, y_train))
        print("test:", clf.__class__.__name__,
              clf.score(X_test, y_test))  # 若干、オーバーフィッティングしている

        score_list = []

        for r in np.arange(0.00001, 2, 0.01):
            clf = AdaBoostClassifier(learning_rate=r)
            clf.fit(X_train, y_train)
            score_list.append([r, clf.score(X_test, y_test)])

        score_list_df = pd.DataFrame(score_list)
        score_list_df.columns = ["r", "score"]

        plt.plot(score_list_df.r, score_list_df.score)
        plt.xlabel("learning rate")
        plt.ylabel("test score")
        plt.grid(True)
        plt.show()

    if 1:  # Random forest
        f_model = RandomForestClassifier(random_state=0)
        clf = f_model.fit(X_train, y_train)
        print("train:", clf.__class__.__name__, clf.score(X_train, y_train))
        print("test:", clf.__class__.__name__, clf.score(X_test, y_test))

        importances = f_model.feature_importances_
        indi = np.argsort(importances)[::-1]

        label = cancer.feature_names

        plt.bar(range(X_train.shape[1]), importances[indi])
        plt.xticks(range(X_train.shape[1]), label[indi], rotation=90)
        plt.grid(True)
        plt.tight_layout()
        plt.show()
Example #17
0
df['Size'] = df['Size'].apply(lambda x: x.strip('M'))
df[df['Size'] == 'Varies with device'] = 0
df['Size'] = df['Size'].astype(float)

#take specific attributes
features = ['Size','Type', 'Price', 'Content Rating', 'Genres']
X = df[features]
y = df['Rating'].astype(int)


#spite for train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 10)

svm = svm.SVR()
svm.fit(X_train, y_train)
accuracy = svm.score(X_test,y_test)
print('Accuracy: ' + str(np.round(accuracy*100, 2)) + '%')  




window = tk.Tk()
window.title('Predict Rate App')
window.geometry('800x350')
window.configure(background='gray')


def calculate_number():

    df['Size'] = size_entry.get()
    df['Type'] = type_entry.get()
Example #18
0
X_test = [(x, np.empty((0, 2), dtype=np.int)) for x in x_test]
print len(x_test)
for i in range(len(test_labels)):
    test_labels = test_labels.astype(int)
"""
print len(test_labels)
pbl = GraphCRF(inference_method='ad3')
svm = NSlackSSVM(pbl, C=1,n_jobs = 1,verbose = 1)
start = time()
print len(X_valid)
print len(valid_Y)
svm.fit(X_valid, valid_Y)
print "fit finished"
time_svm = time() - start
print X_test[i][0].shape
print svm.score(X_valid,valid_Y)
print svm.score(X_test,test_Y)
y_pred = np.vstack(svm.predict(np.array(X_valid)))
print("Score with pystruct crf svm: %f (took %f seconds)"
      % (np.mean(y_pred == valid_Y), time_svm))
y_predt = np.vstack(svm.predict(np.array(X_test)))
print("Score with pystruct crf svm: %f (took %f seconds)"
      % (np.mean(y_predt == test_Y), time_svm))


#we throw away void superpixels and flatten everything
#y_pred, y_true = np.hstack(y_pred), np.hstack(valid_Y)
#y_pred = y_pred[y_true != 255]
#y_true = y_true[y_true != 255]

#print("Score on test set: %f" % np.mean(y_true == y_pred))
Example #19
0
knn = neighbors.KNeighborsClassifier(n_neighbors = 100)
knn.fit(x_train,y_train)
prediction = knn.predict(x_test)
print("knn score:", knn.score(x_test,y_test))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction)))

#%% svm

from sklearn import svm
svm = svm.SVC(random_state=1)
svm.fit(x_train,y_train)
prediction_svm = svm.predict(x_test)
print("svm accuary: ",svm.score(x_test,y_test))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction_svm))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction_svm))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction_svm)))
#%% rf classification

from sklearn import ensemble
rf= ensemble.RandomForestClassifier(n_estimators=10,random_state=1)
rf.fit(x_train,y_train)
prediction_rf = rf.predict(x_test)
print("rf accuracy: ",rf.score(x_test,y_test))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction_rf))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction_rf))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction_rf)))

print('\n Testing set Accuracy:' + str(100 * np.mean((predicted_label == test_label).astype(float))) + '%')
print(confusion_matrix(test_label, predicted_label, labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
stop_time_LR = time.time() - start_time_LR
print("Time taken for Logistic Regression {}.seconds\n".format(str(stop_time_LR)))

# Code for SVM
print("Learning SVM Using Linear Kernel")

svm = SVC(kernel = 'linear')
#train_label = train_label.flatten()
indexes = np.random.randint(50000, size = 10000)
sample_data = train_data[indexes, :]
sample_label = train_label[indexes, :]
svm.fit(sample_data, sample_label.flatten())

traning_accuracy = svm.score(train_data, train_label)
traning_accuracy = str(100*traning_accuracy)
print("Traning data Accuracy for Linear Kernel: {}%\n".format(traning_accuracy))
validation_accuracy = svm.score(validation_data, validation_label)
validation_accuracy = str(100*validation_accuracy)
print("Validation data Accuracy for Linear Kernel: {}%\n".format(validation_accuracy))
test_accuracy = svm.score(test_data, test_label)
test_accuracy = str(100*test_accuracy)
print("Test data Accuracy for Linear Kernel: {}%\n".format(test_accuracy))
time_linear_kernel = time.time() - start_time_linear_kernel

print("Time taken for SVM using Linear Kernel {}.seconds\n\n\n".format(str(time_linear_kernel)))


print("SVM with radial basis function with value of gamma setting to 1 ")
start_time_rbf = time.time()
    X = scaler.transform(X)

    #split dataset into train and test data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=1,
                                                        stratify=y)
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)

    # Create SVM classifier

    svm.fit(X_train, y_train)
    score_vals = dict()
    score_vals['bins'] = biin
    score_vals['score'] = svm.score(X_test, y_test)
    scores.append(score_vals)
    print(score_vals)

# calculate best run parameters
print(scores)
scores_list = [record['score'] for record in scores]
# best_run = [record for record in scores if record['score'] == np.max(scores)]
best_run = [
    record for record in scores if record['score'] == np.max(scores_list)
]
print(best_run)

# save best trained model
biin = best_run[0]['bins']
print('Running to save the best model for bin = {}'.format(biin))
Example #22
0
# Sample 3.3: Bagging
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression

data = pd.read_csv('wifi.txt', header=None)

X = data.values[:, :-1]
y = data.values[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

bagging = BaggingClassifier()
svm = svm.SVC()
logreg = LogisticRegression()

bagging.fit(X_train, y_train)
svm.fit(X_train, y_train)
logreg.fit(X_train, y_train)

print("Bagging Accuracy: %.2f%s" % (bagging.score(X_test, y_test) * 100, '%'))
print("SVM Accuracy: %.2f%s" % (svm.score(X_test, y_test) * 100, '%'))
print("LogReg Accuracy: %.2f%s" % (logreg.score(X_test, y_test) * 100, '%'))
Example #23
0
def score(test_data, test_labels):
    global svm
    return svm.score(test_data, test_labels)
Example #24
0
def main():
    mnist = fetch_openml(name='mnist_784')
    echantillon = np.random.randint(70000, size=5000)
    data = mnist.data[echantillon]
    target = mnist.target[echantillon]

    xtrain, xtest, ytrain, ytest = train_test_split(data,
                                                    target,
                                                    train_size=0.7)

    classifier = svm.SVC(kernel='linear')
    classifier.fit(xtrain, ytrain)
    error = 1 - classifier.score(xtest, ytest)
    print(f"Score SVM linéaire : {error}")

    kernels = []
    print("Modification du kernel : ")
    for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
        classifier = svm.SVC(kernel=kernel)

        start_training = time.time()
        classifier.fit(xtrain, ytrain)
        final_training = time.time() - start_training

        start_prediction = time.time()
        ypred = classifier.predict(xtest)
        final_prediction = time.time() - start_prediction

        error = metrics.zero_one_loss(ytest, ypred)
        kernels.append((kernel, final_training, final_prediction, error))
        print(f"\t {kernels[-1]}")

    kernels_liste = list(zip(*kernels))

    plot_fig(kernels_liste)

    tol = []
    print("Evolution de la tolérance : ")
    for tolerance in np.linspace(0.1, 1.0, num=5):
        svm = svm.SVC(C=tolerance)

        start_training = time.time()
        svm.fit(xtrain, ytrain)
        final_training = time.time() - start_training

        start_prediction = time.time()
        ypred = svm.predict(xtest)
        final_prediction = time.time() - start_prediction

        error = metrics.zero_one_loss(ytest, ypred)
        error_training = svm.score(xtrain, ytrain)
        tol.append((tolerance, final_training, final_prediction, error,
                    error_training))
        print(f"\t {tol[-1]}")

    tol_list = list(zip(*tol))

    plot_fig(tol_list)

    plt.figure(figsize=(19, 9))
    plt.plot(tol_list[0], tol_list[3], 'x-', color='blue')  # erreur de test
    plt.plot(tol_list[0], tol_list[-1], 'x-',
             color='orange')  # erreur d'entrainement
    plt.grid(True)
    plt.show()

    best_kernel = 'rbf'
    best_tolerance = 1.0
    best_svm = svm.SVC(kernel=best_kernel, C=best_tolerance)

    start_training = time.time()
    best_svm.fit(xtrain, ytrain)
    best_final_entrainement = time.time() - start_training

    start_prediction = time.time()
    ypred = best_svm.predict(xtest)
    best_final_prediction = time.time() - start_prediction

    cross_val = model_selection.cross_val_score(best_svm, data, target, cv=10)
    meilleure_erreur = 1 - np.mean(cross_val)

    print(f"Durée de l'entraînement : {best_final_entrainement}")
    print(f"Durée de la prédiction : {best_final_prediction}")
    print(f"Erreur : {meilleure_erreur}")

    cm = confusion_matrix(ytest, ypred)
    df_cm = pd.DataFrame(cm, columns=np.unique(ytest), index=np.unique(ytest))
    df_cm.index.name = 'Valeur réelle'
    df_cm.columns.name = 'Valeur prédite'
    plt.figure(figsize=(16, 9))
    sn.heatmap(df_cm, cmap="Blues", annot=True)
    plt.show()
print('Accuracy of GNB classifier on training set: {:.2f}'.format(
    gnb.score(X_train, Y_train)))
print('Accuracy of GNB classifier on test set: {:.2f}'.format(
    gnb.score(X_test, Y_test)))


def svc_param_selection(X, y, nfolds):
    from sklearn import svm
    import numpy as np

    GridSearchCV = sklearn.model_selection.GridSearchCV
    Cs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
    # gammas = [0.001, 0.01, 0.1, 1]
    kernels = ['linear', 'rbf']
    param_grid = {'C': Cs, 'kernel': kernels}
    grid_search = GridSearchCV(svm.SVC(), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    return grid_search.best_params_


# Learning
params = svc_param_selection(X_train, Y_train, 3)
print params
svm = SVC(**params)
# svm = SVC()
svm.fit(X_train, Y_train)
print('Accuracy of SVM classifier on training set: {:.2f}'.format(
    svm.score(X_train, Y_train)))
print('Accuracy of SVM classifier on test set: {:.2f}'.format(
    svm.score(X_test, Y_test)))
Example #26
0
    #svm, X1, Y1, cv=bs)#, score_func=metrics.f1_score)

#print 'score: %f +- %f' % (scores.mean(), scores.std())

#pred_Y = svm.predict(test_X)

#print metrics.precision_score(test_Y, pred_Y)
#print metrics.recall_score(test_Y, pred_Y)
#print metrics.f1_score(test_Y, pred_Y)

#pred_Y = svm.predict(X1)

#print metrics.precision_score(Y1, pred_Y)
#print metrics.recall_score(Y1, pred_Y)
#print metrics.f1_score(Y1, pred_Y)

alpha_arr = []
for label in np.unique(labels):
    n = np.sum(labels == label)
    alpha_arr.append(n / float(labels.shape[0]))
alpha_arr = np.array(alpha_arr)
alpha = np.max(alpha_arr)
print alpha

bs = cross_validation.Bootstrap(data.shape[0], 3)
for train_indices, test_indices in bs:
    svm.fit(data[train_indices], labels[train_indices])
    score = svm.score(data[test_indices], labels[test_indices])
    print score, (score - alpha) / (1 - alpha)
    #pred = svm.predict(data[test_indices])
Example #27
0
# Splitting data

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=44, shuffle=True
)


# ----------------------------------------------------
# Applying LogisticRegression Model


svm = svm.SVC()
svm.fit(X_train, y_train)

# Calculating Details
print("svm Train Score is : ", svm.score(X_train, y_train))
print("svm Test Score is : ", svm.score(X_test, y_test))
print("svm Classes are : ", svm.classes_)
print("----------------------------------------------------")

# Calculating Prediction
y_pred = svm.predict(X_test)
print("Predicted Value for svm is : ", y_pred[:10])

# ----------------------------------------------------
# Calculating Confusion Matrix
CM = confusion_matrix(y_test, y_pred)
print("Confusion Matrix is : \n", CM)

# drawing confusion matrix
sns.heatmap(CM, center=True)
Example #28
0
#One fight at a time
le = LabelEncoder()
cat = ['genre','certificate', 'distributor']
for col in cat:
    train[col] = le.fit_transform(train[col])
    test[col] = le.fit_transform(test[col])

#no shirts, no shoes
train_X = train.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis=1)  
test_X = test.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis = 1)
 
train_Y = train['oscar']

#Fights will go on as long as they want to
svm =svm.SVC(kernel='rbf',C=1).fit(train_X,train_Y)
svm.score(train_X, train_Y)

#If this is your first night at Fight Club, you have to fight.
pred_svm = svm.predict_proba(test_X)[:,1]
svm_prediction = pd.DataFrame(pred_svm, test['movie_name'])










Example #29
0
X = data.values[:, :-1]
y = data.values[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

svm = svm.SVC(probability=True)
logreg = LogisticRegression()
tree = tree.DecisionTreeClassifier()

svm.fit(X_train, y_train)
logreg.fit(X_train, y_train)
tree.fit(X_train, y_train)

print("SVM Accuracy: %.2f%s" %
      (svm.score(X_test, y_test) * 100, '%'))
print("LogReg Accuracy: %.2f%s" %
      (logreg.score(X_test, y_test) * 100, '%'))
print("Tree Accuracy: %.2f%s" %
      (tree.score(X_test, y_test) * 100, '%'))

w = [1, 1, 1]
ensemble = VotingClassifier(
    estimators=[('svm', svm),
                ('logreg', logreg),
                ('tree', tree)],
    voting='hard', weights=w)
ensemble.fit(X_train, y_train)

print("Ensemble Accuracy: %.2f%s" %
      (ensemble.score(X_test, y_test) * 100, '%'))
    def _evaluate(self, Gs, Gs_kwargs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu
        inception = misc.load_pkl(
            'https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn'
        )  # inception_v3_features.pkl
        real_activations = np.empty(
            [self.num_images, inception.output_shape[1]], dtype=np.float32)
        fake_activations = np.empty(
            [self.num_images, inception.output_shape[1]], dtype=np.float32)

        # Construct TensorFlow graph.
        self._configure(self.minibatch_per_gpu, hole_range=self.hole_range)
        real_img_expr = []
        fake_img_expr = []
        real_result_expr = []
        fake_result_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                inception_clone = inception.clone()
                latents = tf.random_normal([self.minibatch_per_gpu] +
                                           Gs_clone.input_shape[1:])
                reals, labels = self._get_minibatch_tf()
                reals_tf = tflib.convert_images_from_uint8(reals)
                masks = self._get_random_masks_tf()
                fakes = Gs_clone.get_output_for(latents, labels, reals_tf,
                                                masks, **Gs_kwargs)
                fakes = tflib.convert_images_to_uint8(fakes[:, :3])
                reals = tflib.convert_images_to_uint8(reals_tf[:, :3])
                real_img_expr.append(reals)
                fake_img_expr.append(fakes)
                real_result_expr.append(inception_clone.get_output_for(reals))
                fake_result_expr.append(inception_clone.get_output_for(fakes))

        for begin in tqdm(range(0, self.num_images, minibatch_size)):
            self._report_progress(begin, self.num_images)
            end = min(begin + minibatch_size, self.num_images)
            real_results, fake_results = tflib.run(
                [real_result_expr, fake_result_expr])
            real_activations[begin:end] = np.concatenate(real_results,
                                                         axis=0)[:end - begin]
            fake_activations[begin:end] = np.concatenate(fake_results,
                                                         axis=0)[:end - begin]

        # Calculate FID conviniently.
        mu_real = np.mean(real_activations, axis=0)
        sigma_real = np.cov(real_activations, rowvar=False)
        mu_fake = np.mean(fake_activations, axis=0)
        sigma_fake = np.cov(fake_activations, rowvar=False)
        m = np.square(mu_fake - mu_real).sum()
        s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False)
        dist = m + np.trace(sigma_fake + sigma_real - 2 * s)
        self._report_result(np.real(dist), suffix='-FID')

        svm = sklearn.svm.LinearSVC(dual=False)
        svm_inputs = np.concatenate([real_activations, fake_activations])
        svm_targets = np.array([1] * real_activations.shape[0] +
                               [0] * fake_activations.shape[0])
        svm.fit(svm_inputs, svm_targets)
        self._report_result(1 - svm.score(svm_inputs, svm_targets),
                            suffix='-U')
        real_outputs = svm.decision_function(real_activations)
        fake_outputs = svm.decision_function(fake_activations)
        self._report_result(np.mean(fake_outputs > real_outputs), suffix='-P')
#%%

svm = sklearn.svm.SVR()

Cs = numpy.logspace(0, 2, 8)
gammas = numpy.logspace(-6, -4, 10)

accuracies = numpy.zeros((8, 10))
for i, c in enumerate(Cs):
    for j, gamma in enumerate(gammas):
        svm = sklearn.svm.SVR(C = c, gamma = gamma)

        svm.fit(trainingx[:1000], trainingy[:1000])

        accuracies[i, j] = svm.score(testingx, testingy)
        print i, j

plt.imshow(accuracies, interpolation = 'NONE')
plt.xticks(range(10), ['{:2.2e}'.format(v) for v in gammas])
plt.yticks(range(8), ['{:2.1e}'.format(v) for v in Cs])
plt.gcf().set_size_inches((10, 10))
plt.colorbar()
plt.show()

#%%

svm = sklearn.svm.SVR(C = 5.0, gamma = 1e-5)
svm.fit(trainingx[:1000], trainingy[:1000])

print svm.score(testingx, testingy)
Example #32
0
df.drop(['scan_date', 'dod', 'Measure.volume', 'Braak_Lewy', 'Braak_NFT', 'Braak_AB'], axis=1, inplace=True)
X_train, X_test, y_train, y_test = train_test_split(df.drop('Group', axis=1), df['Group'], test_size=0.30, random_state=101)

#logistic regression model
lr=LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print("LR Training:", lr.score(X_train, y_train))
print("LR Test:", lr.score(X_test, y_test))
print(classification_report(y_test, y_pred))

#SVM model
svm = svm.SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print("SVM Training:", svm.score(X_train, y_train))
print("SVM Test:", svm.score(X_test, y_test))
print(classification_report(y_test, y_pred))

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier

# Define the classifiers
classifiers = [LogisticRegression(), LinearSVC(), SVC(), KNeighborsClassifier()]

# Fit the classifiers
for c in classifiers:
    c.fit(X_train, y_train)

# Plot the classifiers
from sklearn import svm

#Don't forget to change this value AND also The filename on the second to last line!
experiment_number = 10

print "Loading datasets..."
#train_samples = pickle.load(open("Models/SC/trainset%d.pkl"%experiment_number,'rb'))
#train_samples = pickle.load(open("Models/AutoE/trainset%d.pkl"%experiment_number,'rb'))
train_samples = pickle.load(open("Models/RBM/trainset%d.pkl"%experiment_number,'rb'))
train_outputs = pickle.load(open("Models/train_outputs.pkl",'rb'))[:1000]
#valid_samples = pickle.load(open("Models/SC/validset%d.pkl"%experiment_number,'rb'))
#valid_samples = pickle.load(open("Models/AutoE/validset%d.pkl"%experiment_number,'rb'))
valid_samples = pickle.load(open("Models/RBM/validset%d.pkl"%experiment_number,'rb'))
valid_outputs = pickle.load(open("Models/valid_outputs.pkl",'rb'))[:1000]

print "Training the svm"
svm = svm.SVC() # Default uses RBF
svm.fit(train_samples, train_outputs)

print "Predicting..."
train_score = svm.score(train_samples,train_outputs)
valid_score = svm.score(valid_samples,valid_outputs)

print "Training accuracy: %.3f, validation accuracy¸: %.3f"%(train_score, valid_score)

#Save the output to file.
with open("Outputs/RBM_param_tests.txt", "a") as myfile:
    myfile.write("Experiment %d, training accuracy: %.3f, validation accuracy: %.3f \n"%(experiment_number,train_score, valid_score))


Example #34
0




"""#> **SVM**"""

from sklearn import svm
svm = svm.SVC(C=1000)

svm.get_params

svm.fit(X_train,y_train)
y_svm = svm.predict(X_test)

svm.score(X_train,y_train)

svm.score(X_test,y_test)

generate_model_report(y_test,y_svm)

from yellowbrick.classifier.rocauc import roc_auc
roc_auc(svm, X_train, y_train, X_test=X_test, y_test=y_test, classes=["Low_damage","Medium_damage","High_damage"])

# from sklearn.model_selection import GridSearchCV
# parameters={"C":[1,10,100,500,1000],'gamma':['auto','scale']}
# sv = GridSearchCV(svm, parameters,scoring= 'f1',
#  cv=5)
# sv.fit(X_train,y_train)
# params = sv.best_params_
# svm2 = svm.SVC()
Example #35
0
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

for svm, title, ax in zip(models, titles, sub.flatten()):

    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    plot_contours(ax, svm, xx, yy, cmap=plt.cm.brg, alpha=0.8)
    #Z = Z.reshape(xx.shape)
    #plt.figure(1, figsize=(4, 3))
    #plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
    #ax.scatter(X_train[:, 0], X_train[:, 1], c=Y_train, cmap=plt.cm.brg, s=20, edgecolors='k')
    ax.scatter(X_test[:, 0],
               X_test[:, 1],
               c=Y_test,
               cmap=plt.cm.brg,
               s=20,
               edgecolors='k',
               marker='D')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xlabel('Sepal length')
    ax.set_ylabel('Sepal width')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title + ' \n traning error: ' +
                 str(round(100 * (1 - svm.score(X_train, Y_train)), 2)) + '%'
                 ' \n test error: ' +
                 str(round(100 * (1 - svm.score(X_test, Y_test)), 2)) + '%')

plt.show()
Example #36
0
svm.fit(X_train, y_train)

predictions = svm.predict(X_test)

# KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)

plt.title("KNN Predictions vs Actual")
plt.scatter(y_test, predictions)
plt.xlabel("Actual Values")
plt.ylabel("Predictions")
plt.show()

# Adapted from https://towardsdatascience.com/solving-a-simple-classification-problem-with-python-fruits-lovers-edition-d20ab6b071d2
print('Accuracy of Logistic regression classifier on training set: {:.2f}'
     .format(lm.score(X_train, y_train)))
print('Accuracy of Logistic regression classifier on test set: {:.2f}'
     .format(lm.score(X_test, y_test)))

print('Accuracy of SVM classifier on training set: {:.2f}'
     .format(svm.score(X_train, y_train)))
print('Accuracy of SVM classifier on test set: {:.2f}'
     .format(svm.score(X_test, y_test)))

print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(X_train, y_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(X_test, y_test)))
Example #37
0
cv = CountVectorizer(token_pattern=r"(?u)\b\w+\b")
X = cv.fit_transform(split_corpus).toarray()

#构造标签向量,垃圾标签为0,正常标签为1
y = [0] * 5000 + [1] * 5000

#将特征集 分为训练集和测试集
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.4, random_state = 0)

#使用SVM训练分类模型
svm = svm.SVC(kernel='rbf', gamma=0.7, C = 1.0)
svm.fit(X_train, y_train)

#SVM分类性能
y_pred_svm = svm.predict(X_test)

print("SVM accuracy:\n",svm.score(X_test, y_test))
print("SVM report:\n",metrics.classification_report(y_test, y_pred_svm))
print("SVM matrix:\n",metrics.confusion_matrix(y_test, y_pred_svm))

#使用朴素贝叶斯训练分类模型,给出分类效果
gnb = GaussianNB()
gnb.fit(X_train,y_train)

y_pred = gnb.predict(X_test)

#朴素贝叶斯模型分类效果
print("naive_bayes accuracy:\n",gnb.score(X_test, y_test))
print("naive_bayes report:\n",metrics.classification_report(y_test, y_pred))
print("naive_bayes matrix:\n",metrics.confusion_matrix(y_test, y_pred))