Esempio n. 1
0
 def __init__(self, task='spam'):
     super(TaskTrainer, self).__init__()
     from sklearn.svm import SVC as SVM
     self.task = task
     if task == 'vehicle':
         self.env = SVM(C=1e2, kernel='rbf', random_state=0) # For vehicle task
     elif task == 'page':
         self.env = SVM(C=1e2, kernel='rbf', random_state=0, gamma=1e-2) # For page blocks
     elif task == 'credit':
         self.env = DT(max_depth=4) # For credit card task
     elif task == 'spam':
         self.env = LogisticRegression(C=1e2, random_state=0) # For spam detection task
Esempio n. 2
0
    def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier,
                 save_conf_mat):
        self.train_X = train_X
        self.test_X = test_X
        self.train_Y = train_Y
        self.test_Y = test_Y
        self.classifier = classifier

        if (self.classifier.lower() == 'knn'):
            self.clf = KNN()
        elif (self.classifier.lower() == 'rf'):
            self.clf = RF()
        elif (self.classifier.lower() == 'svm'):
            self.clf = SVM()
        else:
            self.clf = None
            print('\n[Error!] We don\'t currently support {} classifier...\n'.
                  format(classifier))
            exit(1)

        if (agent == None):
            self.agent = np.ones(train_X.shape[1])
        self.predictions = self.classify()
        self.accuracy = self.compute_accuracy()
        self.precision = self.compute_precision()
        self.recall = self.compute_recall()
        self.f1_score = self.compute_f1()
        self.confusion_matrix = self.compute_confusion_matrix()
        self.plot_confusion_matrix(save_conf_mat)
Esempio n. 3
0
def main(x, y, task):

    #ys = [yr, ym, y25]
    #y_names = ['readm', 'mort_h', 'pheno25']
    #xs = [x48, onehot, w2v, w48, sentences]
    #x_names = ['48h', 'sparse_dx', 'w2v', 'w2v_48h', 'sentences']

    lr = LR(C=1e-4, penalty='l2', verbose=1)  #sag if multiclass/multilabel
    svm = SVM(C=1e5, verbose=True)
    rf = RF(n_estimators=60, verbose=1)
    gbc = GBC(n_estimators=200, learning_rate=1e-3, verbose=1)

    models = [lr, svm, rf, gbc]
    names = ['LR', 'SVM', 'RF', 'GBC']
    data = {}
    for idx in range(len(models)):
        if task != 'binary':
            data[names[idx]] = {}
            for ix in range(25):
                dat = run_experiment(x, y[:, ix], models[idx], task)
                data[names[idx]][ix] = dat
        else:
            dat = run_experiment(x, y, models[idx], task)
            data[names[idx]] = dat

    return (data)
Esempio n. 4
0
def _build_target_classifier(args, samples, one_vs_all_labels,
                             transferability_values):
    compute_kernel = lambda x, y: _compute_target_classifier_kernel_matrix(
        args, samples, one_vs_all_labels, transferability_values)
    classifier = SVM(kernel=compute_kernel)
    classifier.fit(samples, one_vs_all_labels)
    return classifier
Esempio n. 5
0
 def __init__(self,
              kernel='rbf',
              gamma='scale',
              tol=0.001,
              nu=0.5,
              shrinking=True,
              max_iter=1000):
     """
     Unsupervised Outlier Detection.
     Arguments
     ---------
         kernel : {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, optional (default=rbf).
             Specifies the kernel type to be used in the algorithm.
             It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable
         gamma : {‘scale’, ‘auto’} or float, default=’scale’
             Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
         tol : float, default=1e-3
             Tolerance for stopping criterion
         nu : float, default=0.5
             An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors.
             Should be in the interval (0, 1]. By default 0.5 will be taken
         max_iter : int, default=-1
             Hard limit on iterations within solver, or -1 for no limit.
     Reference
     ---------
         For more information, please visit https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html
     """
     self.model = SVM(kernel=kernel,
                      gamma=gamma,
                      tol=tol,
                      nu=nu,
                      shrinking=shrinking,
                      max_iter=max_iter)
     self.transformer = None
Esempio n. 6
0
def train_model(param):
    model= SVM()
    C = 10**(param)
    print("Param C= ",C)
    model.set_params(C=C,kernel='linear')
    model.fit(X_train,Y_train)
    print("Test Accuracy: ",model.score(X_test,Y_test))
    return model
 def fit(self, X, y):
     for C in self.c_values:
         svm = SVM(dual=False, penalty="l1", C=C, class_weight='auto')
         svm.fit(X, y)
         if np.sum(svm.coef_!=0) >= self.n_non_null:
             self.svm = svm
             self.coef_ = self.svm.coef_
             self.C = C
             break
     return self
Esempio n. 8
0
def optimal_svm_kernel (X_train, y_train, X_validate, y_validate):
    best_kernel = None
    best_accuracy = 0
    for kernel in ['linear', 'rbf', 'poly', 'sigmoid']:
        classifier = SVM(X_train, y_train, kernel)
        accuracy = classifier.score(X_validate, y_validate)
        if best_kernel is None or accuracy > best_accuracy:
            best_accuracy = accuracy
            best_kernel = kernel
        print kernel, ":", accuracy
    return best_kernel, best_accuracy
Esempio n. 9
0
def _get_source_class_classifiers(args, all_samples):
    source_class_classifiers = [
    ]  # To find a mapping between classifier and label, just use source_class_classifiers[i] and source_class_list[i]
    for source_class in args.source_class_list:
        one_vs_all_labels = [0] * len(all_samples)
        for i in range(len(args.source_samples)):
            one_vs_all_labels[
                i] = 1 if args.source_labels[i] == source_class else 0
        classifier = SVM(kernel="linear")
        classifier.fit(all_samples, one_vs_all_labels)
        source_class_classifiers.append(classifier)
    return source_class_classifiers
    def train(self):
        logging.info('-' * 20)
        logging.info('Start training the %s model', self.model)
        train_data = self.feature_extractor.extract_feature(
            self.data_loader.get_trainset())
        if self.model == 'GNB':
            # Gaussian naive bayes
            self.classifier = GNB()
        elif self.model == 'BNB':
            # Bernoulli naive bayes
            self.classifier = BNB()
            # self.tok = RT(r'\w+')
            # vectorizer = Vectorizer(tokenizer=self.tok.tokenize)
            # train_data = self.data_loader.get_trainset()
            # train_data = [vectorizer.fit_transform(train_data[0]).toarray(), train_data[1]]
            # self.vocabulary = vectorizer.get_feature_names()
        elif self.model == 'MNB':
            # Multinomial naive bayes
            self.classifier = MNB()
        elif self.model == 'LR':
            # Logistic regression
            param = {'C': [10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01]}
            self.classifier = GS(cv=5,
                                 estimator=LR(penalty=self.penalty,
                                              max_iter=self.epoch,
                                              solver='liblinear'),
                                 param_grid=param)
        elif self.model == 'SVM':
            # Support vector machine
            self.penalty = self.penalty if self.penalty in ['l1', 'l2'
                                                            ] else 'l2'
            dual = self.penalty == 'l2'
            #self.classifier = SVM(penalty=self.penalty, C=self.c, max_iter=self.epoch, dual=dual)
            param = {'C': [10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01]}
            self.classifier = GS(cv=5,
                                 estimator=SVM(penalty=self.penalty,
                                               dual=dual,
                                               max_iter=self.epoch),
                                 param_grid=param)

        elif self.model == 'R':
            # RandomGuess
            self.classifier = DC(strategy='stratified')
        else:
            logging.info('Unsupported model : %s', self.model)
            exit(0)

        self.classifier.fit(train_data[0], train_data[1])
        self.classifier.predict(train_data[0])
        predictions = self.classifier.predict(train_data[0])
        acc = evaluator.accuracy_score(train_data[1], predictions)
        return acc
Esempio n. 11
0
                def iterate(cself, svm, selectors, instances, K):
                    cself.mention('Training SVM...')
                    alphas, obj = qp.solve(cself.verbose)

                    # Construct SVM from solution
                    svm = SVM(kernel=self.kernel,
                              gamma=self.gamma,
                              p=self.p,
                              verbose=self.verbose,
                              sv_cutoff=self.sv_cutoff)
                    svm._X = instances
                    svm._y = classes
                    svm._alphas = alphas
                    svm._objective = obj
                    svm._compute_separator(K)
                    svm._K = K

                    cself.mention('Recomputing classes...')
                    p_confs = svm.predict(bs.pos_instances)
                    pos_selectors = bs.L_n + np.array([
                        l + np.argmax(p_confs[l:u])
                        for l, u in slices(bs.pos_groups)
                    ])
                    new_selectors = np.hstack([neg_selectors, pos_selectors])

                    if selectors is None:
                        sel_diff = len(new_selectors)
                    else:
                        sel_diff = np.nonzero(new_selectors -
                                              selectors)[0].size

                    cself.mention('Selector differences: %d' % sel_diff)
                    if sel_diff == 0:
                        return None, svm
                    elif sel_diff > 5:
                        # Clear results to avoid a
                        # bad starting point in
                        # the next iteration
                        qp.clear_results()

                    cself.mention('Updating QP...')
                    indices = (new_selectors, )
                    K = K_all[indices].T[indices].T
                    D = spdiag(classes)
                    qp.update_H(D * K * D)
                    return {
                        'svm': svm,
                        'selectors': new_selectors,
                        'instances': bs.instances[indices],
                        'K': K
                    }, None
Esempio n. 12
0
    def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier,
                 save_conf_mat, averaging):

        self.agent = agent
        if self.agent is None:
            self.agent = np.ones(train_X.shape[1])

        cols = np.flatnonzero(self.agent)
        if (cols.shape[0] == 0):
            print('[Error!] There are 0 features in the agent......')
            exit(1)

        # store the train and test features and labels
        self.train_X = train_X[:, cols]
        self.test_X = test_X[:, cols]
        self.train_Y = train_Y
        self.test_Y = test_Y

        # set the classifier type
        self.classifier = classifier

        # get the unique labels
        self.labels = np.unique(train_Y)

        # select the averaging procedure
        if (len(self.labels) == 2):
            self.averaging = "binary"
        else:
            self.averaging = averaging

        # setup the classifier
        if (self.classifier.lower() == 'knn'):
            self.clf = KNN()
        elif (self.classifier.lower() == 'rf'):
            self.clf = RF()
        elif (self.classifier.lower() == 'svm'):
            self.clf = SVM()
        else:
            self.clf = None
            print('\n[Error!] We don\'t currently support {} classifier...\n'.
                  format(classifier))
            exit(1)

        # call the member functions
        self.predictions = self.classify()
        self.accuracy = self.compute_accuracy()
        self.precision = self.compute_precision()
        self.recall = self.compute_recall()
        self.f1_score = self.compute_f1()
        self.confusion_matrix = self.compute_confusion_matrix()
        self.plot_confusion_matrix(save_conf_mat)
Esempio n. 13
0
def make_model(opts, input_shape, aux_shape):
    targets, multiclass, deep = get_setup(opts)
        
    if opts.model == 'lstm':
        if aux_shape:
            model = hierarchical_lstm(input_shape=input_shape, aux_shape=aux_shape,
                                      targets=targets, hidden = opts.hidden_size, 
                                      multiclass= multiclass, learn_rate=opts.learning_rate)
        else:
            model = lstm_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, 
                                      multiclass= multiclass, learn_rate=opts.learning_rate)
    elif opts.model == 'cnn':
        if aux_shape:
            model = hierarchical_cnn(input_shape=input_shape, aux_shape=aux_shape,
                                      targets=targets, hidden = opts.hidden_size, 
                                      multiclass= multiclass, learn_rate=opts.learning_rate)
        else:
            model = cnn_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, 
                                      multiclass= multiclass, learn_rate=opts.learning_rate)
    elif opts.model == 'mlp':
        model = mlp_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, 
                                      multiclass= multiclass, learn_rate=opts.learning_rate)
    elif opts.model == 'svm':
        if targets>1: model = OneVsRestClassifier(SVM(C=50000, kernel = 'rbf', max_iter= 1000, verbose = True, decision_function_shape = 'ovr', probability = True))
        else: model = SVM(C=1e4, kernel = 'linear', verbose = True, probability = True, max_iter= 1000)
    elif opts.model == 'rf':
        if targets>1: model = RF(n_estimators = 450, class_weight = 'balanced', criterion = 'entropy', bootstrap = False, verbose = 1)
        else: model = RF(n_estimators = 450, verbose = 1)
    elif opts.model =='gbc':
        if targets>1: model = OneVsRestClassifier(GBC(n_estimators = 484, learning_rate = 0.0984, verbose = 1))
        else: model = GBC(n_estimators = 400, learning_rate = 0.09, verbose = 1)
    elif opts.model == 'lr':
        if targets>1: model = OneVsRestClassifier(LR(max_iter= 1000, class_weight = 'balanced', multi_class = 'ovr', C = .09, penalty = 'l1', verbose = 1))    #sag if multiclass/multilabel
        else: model = LR(C = 1e-3, penalty = 'l2', verbose = 1)    #sag if multiclass/multilabel
    else:
        model = None
    return model
Esempio n. 14
0
def classifySVM(Xtrain, Ytrain, Xval, Yval):

    # set range of C parameters to test
    #cvals = np.logspace(-2,7,40)
    cvals = [405]

    # set range of gamma values to test
    #gammas = np.logspace(-10,2,40)
    gammas = [0.00015]

    # find most accurate combination of C, gamma
    best_accuracy = 0
    best_model = None
    best_c = None
    best_g = None

    # cycle through all combinations of C, gamma
    for c in cvals:
        for g in gammas:
            clf = SVM(C=c,
                      cache_size=200,
                      class_weight=None,
                      coef0=0.0,
                      degree=3,
                      gamma=g,
                      kernel='rbf',
                      max_iter=-1,
                      probability=False,
                      random_state=None,
                      shrinking=True,
                      tol=0.001,
                      verbose=False)
            clf.fit(Xtrain, Ytrain)
            accuracy = clf.score(Xval, Yval)

            if accuracy >= best_accuracy:
                print "C = ", c, " gamma = ", g, " -> ", accuracy
                best_accuracy = accuracy
                best_model = clf
                best_c = c
                best_g = g

    print "Optimal SVM parameters: C = ", best_c, "and best gamma = ", best_g

    print "Best validation accuracy = " + str(best_accuracy)
    train_accuracy = best_model.score(Xtrain, Ytrain)
    print "With training accuracy = " + str(train_accuracy)

    return best_model
    def build_classifier(self):
        """
        build both and LDA and an SVM classifier for offline training. can lateron be used for online training
        :return:
        """
        self.clf = [LDA(n_components=None, priors=None, shrinkage='auto',
                        solver='eigen', store_covariance=False, tol=0.0001),
                    SVM(kernel='rbf', shrinking=True, probability=True, gamma='scale')]
        # self.clf.fit(self.features, self.labels)
        [c.fit(self.features, self.labels) for c in self.clf]
        # possible to use methods predict(X), predict_log_proba(X) or predict_proba()

        self.cv_scores = []
        [self.cv_scores.append(cvs(estimator=c, X=self.features, y=self.labels, cv=10, n_jobs=-1)) for c in self.clf]
        [print('mean cv score of clf {:d} is'.format(i), np.mean(cv)) for i, cv in enumerate(self.cv_scores)]
Esempio n. 16
0
def init_SVM(params):
    C = getattr(params, 'C', 1.0)
    kernel = getattr(params, 'kernel', 'rbf')
    degree = getattr(params, 'degree', 3)
    gamma = getattr(params, 'gamma', 'auto')
    coef0 = getattr(params, 'coef0', 0.0)
    shrinking = getattr(params, 'shrinking', True)
    probability = getattr(params, 'probability', False)
    tol = getattr(params, 'tol', 0.001)
    cache_size = getattr(params, 'cache_size', 200)
    class_weight = getattr(params, 'class_weight', None)
    verbose = getattr(params, 'verbose', False)
    max_iter = getattr(params, 'max_iter', -1)
    decision_function_shape = getattr(params, 'decision_function_shape', 'ovr')
    random_state = getattr(params, 'random_state', None)
    return SVM(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, max_iter=max_iter, decision_function_shape=decision_function_shape, random_state=random_state)
Esempio n. 17
0
def impute(data, imputer, imp_method, params_dict):
    imp_data = None

    if imp_method == 'RandomReplace':
        imp_data = imputer.replace(data, params_dict['miss_data_cond'])
    elif imp_method == 'Drop':
        imp_data = imputer.drop(data, params_dict['miss_data_cond'])
    elif imp_method == 'Summary':
        imp_data = imputer.summarize(data,
                                     params_dict['summary_func'],
                                     params_dict['miss_data_cond'])
    elif imp_method == 'RandomForest':
        clf = RandomForestClassifier(n_estimators=100, criterion='gini')
        imp_data = imputer.predict(data,
                                   params_dict['cat_cols'],
                                   params_dict['miss_data_cond'],
                                   clf)

    elif imp_method == 'SVM':
        clf = SVM()
        imp_data = imputer.predict(data,
                                   params_dict['cat_cols'],
                                   params_dict['miss_data_cond'],
                                   clf)
    elif imp_method == 'LogisticRegression':
        clf = LogisticRegression()
        imp_data = imputer.predict(data,
                                   params_dict['cat_cols'],
                                   params_dict['miss_data_cond'],
                                   clf)
    elif imp_method == 'SVD':
        imp_data = imputer.factor_analysis(data,
                                           params_dict['cat_cols'],
                                           params_dict['miss_data_cond'],
                                           technique='SVD')

    elif imp_method == 'KNN':
        imp_data = imputer.knn(data,
                               params_dict['n_neighbors'],
                               params_dict['knn_summary_func'],
                               params_dict['miss_data_cond'],
                               params_dict['cat_cols'])
    elif imp_method == 'Identity':
        imp_data = data
    else:
        raise Exception("Imputation method {} is not valid".format(imp_method))
    return imp_data
Esempio n. 18
0
def kv_pipe(DX, labels, groups, tr_options, report_options):
    '''
    @pre: DX hash table database of dx histories for each patient per encounter. 
    DX codes are represented by index in ICD9 groups dictionary.
    - labels is hash table in {s: {y: 0/1, t: time} ... } format for each subject s in subset S. 
    - groups is a reference hash table of indices of ICD9 group codes. 
    - tr_options is a hash table with options for X_training options (e.g., flat, window size, ...)
    - report_options is a hash_table with options for result reporting 
    @post: pickle or h5 format for model and hash table of training and testing data
    '''
    random.seed(11)
    skf = StratifiedKFold(n_splits=5, random_state=7)
    splits = [(i[0], list(i[1].items())[1][1]) for i in list(labels.items())]
    x_split, y_split = zip(*[(a, b) for a, b in splits])
    x_split, y_split = np.array(x_split), np.array(y_split)

    for train_index, test_index in skf.split(x_split, y_split):
        #1. downsample training split
        pos = [i for i in train_index if y_split[i] == 1]
        neg = random.sample([i for i in train_index if y_split[i] == 0],
                            len(pos))
        tr_split = x_split[neg + pos]
        random.shuffle(tr_split)
        te_split = x_split[test_index]

        #2. split labels hash table by kfold
        tr_subj = {key: labels[key] for key in tr_split}
        te_subj = {key: labels[key] for key in te_split}

        #3. generate train = [(x_tr,y_tr), ...] and test = [(x_te, y_te), ...]
        train, test = generate_x_y(DX, tr_subj, te_subj)
        x_tr, y_tr = zip(*[(a, b) for a, b in train])
        x_te, y_te = zip(*[(a, b) for a, b in test])

        #4. train models
        lr = LR(warm_start=True, C=1e-3, penalty='l2',
                verbose=1)  #sag if multiclass/multilabel
        svm = SVM(C=1e4,
                  kernel='linear',
                  verbose=True,
                  probability=True,
                  max_iter=1000)
        rf = RF(warm_start=True, n_estimators=450, verbose=1)

    return
Esempio n. 19
0
def run_grid_search(data_x, data_y, clf_name, scoring):
    if clf_name == 'lr':
        print "Classifier: Logistic regression"
        clfs = [LR()]
        param_grid = {"C": [0.1, 0.25, 0.5, 1]}

    elif clf_name == 'rf':
        print "Classifier: Random forest"
        clfs = [
            RF(n_estimators=20,
               max_depth=20,
               criterion='entropy',
               bootstrap=False,
               max_features=20,
               min_samples_split=4,
               min_samples_leaf=4)
        ]
        # use a full grid over all parameters
        param_grid = {
            "max_features": [20, 25],
            "min_samples_split": [2, 3, 4],
            "min_samples_leaf": [2, 3, 4]
        }
    elif clf_name == 'svm':
        print "Classifier: SVM"
        clfs = [SVM(penalty='l2', loss='squared_hinge', dual=False)]
        param_grid = {"C": [0.001, 0.1, 1, 5]}
    else:
        print "Unsupported classifier:", clf_name

    # run grid search
    for clf in clfs:
        print clf
        grid_search = GridSearchCV(clf, param_grid=param_grid, scoring=scoring)
        start = time()
        grid_search.fit(data_x, data_y)

        print(
            "GridSearchCV took %.2f seconds for %d candidate parameter settings."
            % (time() - start, len(grid_search.cv_results_['params'])))
        report(grid_search.cv_results_)
Esempio n. 20
0
def r_search(x, y):
    #random search params
    lr_params = {'penalty': ['l1', 'l2'], 'C': sp_rand(1e-5, .1)}
    svm_params = {'kernel': ['rbf', 'linear'], 'C': sp_rand(10, 1e5)}
    rf_params = {
        'criterion': ['gini', 'entropy'],
        'n_estimators': sp_randint(50, 200),
        'bootstrap': [True, False]
    }
    gbc_params = {
        'learning_rate': sp_rand(1e-6, 1e-1),
        'n_estimators': sp_randint(50, 200),
        'loss': ['deviance', 'exponential']
    }

    data = {}
    xs, ys = balanced_subsample(x, y)
    lst = [LR(verbose=1), RF(verbose=1), SVM(verbose=True), GBC(verbose=1)]
    names = ['LR', 'RF', 'SVM', 'GB']
    params = [lr_params, rf_params, svm_params, gbc_params]
    for idx in range(len(lst)):
        n_iter_search = 60
        start = time.time()
        rsearch = random_search(estimator=lst[idx],
                                param_distributions=params[idx],
                                n_iter=n_iter_search,
                                scoring='roc_auc',
                                fit_params=None,
                                n_jobs=1,
                                iid=True,
                                refit=True,
                                cv=5,
                                verbose=0,
                                random_state=8)
        rsearch.fit(xs, ys)
        data[names[idx]] = rsearch.cv_results_
        print(names[idx] + " results complete.")
        print("RandomizedSearchCV took %.2f seconds for %d candidates"
              " parameter settings." % ((time.time() - start), n_iter_search))
    return (data)
Esempio n. 21
0
def opt_svm(x_train, y_train, x_valid, y_valid):
    """Obtem os melhores valores de C e kernel para o SVM"""

    C = [0.01, 0.1, 1]
    kernel = ['poly', 'rbf']

    res = np.zeros((len(kernel), len(C)))

    for i, k in enumerate(kernel):
        for j, c in enumerate(C):
            svm = SVM(C=c, kernel=k, random_state=0)
            svm.fit(x_train, y_train)
            res[i][j] = math.sqrt(sk.metrics.mean_squared_error(svm.predict(x_valid),
                                                                y_valid))

    i, j = np.unravel_index(res.argmin(), res.shape)

    print(f'SVM = {res[i][j]}')
    print(f':: kernel = {kernel[i]}')
    print(f':: C      = {C[j]}')

    return res[i][j]
Esempio n. 22
0
    def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier,
                 save_conf_mat):

        self.agent = agent
        if self.agent is None:
            self.agent = np.ones(train_X.shape[1])

        cols = np.flatnonzero(self.agent)
        if (cols.shape[0] == 0):
            print('[Error!] There are 0 features in the agent......')
            exit(1)

        self.train_X = train_X[:, cols]
        self.test_X = test_X[:, cols]
        self.train_Y = train_Y
        self.test_Y = test_Y
        self.classifier = classifier

        if (self.classifier.lower() == 'knn'):
            self.clf = KNN()
        elif (self.classifier.lower() == 'rf'):
            self.clf = RF()
        elif (self.classifier.lower() == 'svm'):
            self.clf = SVM()
        else:
            self.clf = None
            print('\n[Error!] We don\'t currently support {} classifier...\n'.
                  format(classifier))
            exit(1)

        self.predictions = self.classify()
        self.accuracy = self.compute_accuracy()
        self.precision = self.compute_precision()
        self.recall = self.compute_recall()
        self.f1_score = self.compute_f1()
        self.confusion_matrix = self.compute_confusion_matrix()
        self.plot_confusion_matrix(save_conf_mat)
Esempio n. 23
0
                def iterate(cself, svm, classes):
                    cself.mention('Training SVM...')
                    D = spdiag(classes)
                    qp.update_H(D * K * D)
                    qp.update_Aeq(classes.T)
                    alphas, obj = qp.solve(cself.verbose)

                    # Construct SVM from solution
                    svm = SVM(kernel=self.kernel,
                              gamma=self.gamma,
                              p=self.p,
                              verbose=self.verbose,
                              sv_cutoff=self.sv_cutoff)
                    svm._X = bs.instances
                    svm._y = classes
                    svm._alphas = alphas
                    svm._objective = obj
                    svm._compute_separator(K)
                    svm._K = K

                    cself.mention('Recomputing classes...')
                    p_conf = svm._predictions[-bs.L_p:]
                    pos_classes = np.vstack([
                        _update_classes(part)
                        for part in partition(p_conf, bs.pos_groups)
                    ])
                    new_classes = np.vstack(
                        [-np.ones((bs.L_n, 1)), pos_classes])

                    class_changes = round(
                        np.sum(np.abs(classes - new_classes) / 2))
                    cself.mention('Class Changes: %d' % class_changes)
                    if class_changes == 0:
                        return None, svm

                    return {'svm': svm, 'classes': new_classes}, None
Esempio n. 24
0
        output = net.forwardToHidden(data)
        Y.append(target.to("cpu"))
        X.append(output.to("cpu"))

    return torch.cat(X, dim=0).data.numpy(), torch.cat(Y, dim=0).data.numpy()


print("Creating Networks")
hps = setup_hparams(sys.argv[1:])
logger, net = setup_network(hps)
net = net.to(device)

print("loading Data")
trainloader, valloader, testloader = get_dataloaders(bs=4)

print("Passing training data through network")
xtrain, ytrain = passThroughNetwork(trainloader)
# print("Passing validation data through network")
# xval, yval = passThroughNetwork(valloader)
print("Passing test data through network")
xtest, ytest = passThroughNetwork(testloader)

print("Training SVM on CPU")
svm = SVM(kernel='rbf', C=1)
svm.fit(xtrain, ytrain)
pred = svm.predict(xtest)

acc = accuracy_score(ytest, pred)

print("Accuracy on test Set:", acc * 100)
Esempio n. 25
0
def main():

	config = init()
	print("Mangrove Classification")
	print("SVM Classifier")
	print("Classifying with: ", config.hyperparams)
	print("Segmenting with: ", config.segment)
	print("="*50)

	if config.regenerate_features:

		print("Getting data lists...")
		(train_img_list, train_lbl_list), (test_img_list, test_lbl_list) = get_image_paths(config.image_paths)

		print("Computing average SuperPixel size...")
		avg_size = compute_avg_size(train_img_list, config.segment)
		print("Average superpixel size is: {}".format(avg_size))

		print("Generating features...")
		train_features, train_labels = extract_features(train_img_list, train_lbl_list, config, avg_size)
		test_features, test_labels = extract_features(test_img_list, test_lbl_list, config, avg_size)

		print("Preprocessing...")
		preprocessor = get_preprocessor(config.preprocess, train_features)
		train_features = preprocessor.process(train_features)
		test_features = preprocessor.process(test_features)

		print("Saving features...")
		train_file = open(config.save_path["train_features"], 'wb')
		test_file = open(config.save_path["test_features"], 'wb')

		train_save = (train_features, train_labels)
		test_save = (test_features, test_labels)

		pickle.dump(train_save, train_file)
		pickle.dump(test_save, test_file)

		train_file.close()
		test_file.close()

	else:

		print("Loading features...")
		train_file = open(config.save_path["train_features"], 'rb')
		test_file = open(config.save_path["test_features"], 'rb')

		train_features, train_labels = pickle.load(train_file)
		test_features, test_labels = pickle.load(test_file)

		train_file.close()
		test_file.close()

	print("Unique Labels:")
	print(np.unique(train_labels))

	print("Training...")
	start_time = time.time()
	classifier = SVM(**config.hyperparams)
	classifier.fit(train_features, train_labels)
	elapsed_time = time.time() - start_time
	print("Training took {0:.2f} seconds".format(elapsed_time))

	print("Predicting...")
	start_time = time.time()
	pred = classifier.predict(test_features)
	elapsed_time = time.time() - start_time
	print("Predicting took {0:.2f} seconds".format(elapsed_time))

	report, acc, iou, precision, confusion = evaluate(test_labels, pred)
	save_results(report, acc, iou, precision, confusion, config.save_path["results"])

	print(report)
	print()
	print("Accuracy: {0:.4f}".format(acc))
	print("Precision: {0:.4f}".format(precision))
	print("IOU: {0:.4f}".format(iou))
	print()
def test_svm(data: list, label: list) -> None:
    svm = SVM(gamma="scale", kernel="linear")
    svm.fit(data, label)
    plot_model(np.array(data), svm)
Esempio n. 27
0
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 15:21:35 2013

@author: ed203246
"""

from sklearn import datasets
from sklearn.svm import LinearSVC as SVM
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.feature_selection import SelectKBest
from epac.map_reduce.reducers import PvalPerms
import numpy

X, y = datasets.make_classification(n_samples=100,
                                    n_features=200,
                                    n_informative=2)
X = numpy.random.rand(*X.shape)

from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100)
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

self = perms_cv_svm
key = 'LinearSVC(loss=l1)'
self = PvalPerms()
Esempio n. 28
0
def svm_ai(logfile, train_features, train_labels, test_features, test_labels,
           k):
    return SVM(C=k, kernel='linear', cache_size=7000)
Esempio n. 29
0
x = vect.fit_transform(x)
print('词袋处理后的第一个样本的属性为:')
print(x[0])

# 构建TF-IDF特征,归一化
tf_transformer = TfidfTransformer().fit(x)
x = tf_transformer.transform(x)
print('再经TF-IDF处理后的第一个样本的属性为:')
print(x[0])

# 随机分类训练集与测试集
trainX, testX, trainY, testY = train_test_split(
    x, y, test_size=0.2, random_state=1)

# 支持向量机
modle = SVM(C=1000.0, kernel='rbf', random_state=1)
start = time.perf_counter()
modle.fit(trainX, trainY)
end = time.perf_counter()
print('Running time: %.12s ms' % ((end - start) * 1000))

# 输出评估结果
expected = testY
predict = modle.predict(testX)
print('Report:')
print(metrics.classification_report(expected, predict))

# 建立混淆矩阵
lable = list(set(expected))
matrix = pd.DataFrame(metrics.confusion_matrix(
    expected, predict, labels=lable), index=lable, columns=lable)
Esempio n. 30
0
print 'imputing with one-hot'
data_onehot = imp.binarize_data(x, cat_cols)

# replace missing data with predictions using random forest
print 'imputing with predicted values from random forest'
clf = RandomForestClassifier(n_estimators=100, criterion='gini')
data_rf = imp.predict(x, cat_cols, missing_data_cond, clf)

# replace missing data with predictions using SVM
print 'imputing with predicted values usng SVM'
clf = SVM(penalty='l2',
          loss='squared_hinge',
          dual=True,
          tol=0.0001,
          C=1.0,
          multi_class='ovr',
          fit_intercept=True,
          intercept_scaling=1,
          class_weight=None,
          verbose=0,
          random_state=None,
          max_iter=1000)
data_svm = imp.predict(x, cat_cols, missing_data_cond, clf)

# replace missing data with predictions using logistic regression
print 'imputing with predicted values usng logistic regression'
clf = LogisticRegression(penalty='l2',
                         dual=False,
                         tol=0.0001,
                         C=1.0,
                         fit_intercept=True,
                         intercept_scaling=1)