def __init__(self, task='spam'): super(TaskTrainer, self).__init__() from sklearn.svm import SVC as SVM self.task = task if task == 'vehicle': self.env = SVM(C=1e2, kernel='rbf', random_state=0) # For vehicle task elif task == 'page': self.env = SVM(C=1e2, kernel='rbf', random_state=0, gamma=1e-2) # For page blocks elif task == 'credit': self.env = DT(max_depth=4) # For credit card task elif task == 'spam': self.env = LogisticRegression(C=1e2, random_state=0) # For spam detection task
def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier, save_conf_mat): self.train_X = train_X self.test_X = test_X self.train_Y = train_Y self.test_Y = test_Y self.classifier = classifier if (self.classifier.lower() == 'knn'): self.clf = KNN() elif (self.classifier.lower() == 'rf'): self.clf = RF() elif (self.classifier.lower() == 'svm'): self.clf = SVM() else: self.clf = None print('\n[Error!] We don\'t currently support {} classifier...\n'. format(classifier)) exit(1) if (agent == None): self.agent = np.ones(train_X.shape[1]) self.predictions = self.classify() self.accuracy = self.compute_accuracy() self.precision = self.compute_precision() self.recall = self.compute_recall() self.f1_score = self.compute_f1() self.confusion_matrix = self.compute_confusion_matrix() self.plot_confusion_matrix(save_conf_mat)
def main(x, y, task): #ys = [yr, ym, y25] #y_names = ['readm', 'mort_h', 'pheno25'] #xs = [x48, onehot, w2v, w48, sentences] #x_names = ['48h', 'sparse_dx', 'w2v', 'w2v_48h', 'sentences'] lr = LR(C=1e-4, penalty='l2', verbose=1) #sag if multiclass/multilabel svm = SVM(C=1e5, verbose=True) rf = RF(n_estimators=60, verbose=1) gbc = GBC(n_estimators=200, learning_rate=1e-3, verbose=1) models = [lr, svm, rf, gbc] names = ['LR', 'SVM', 'RF', 'GBC'] data = {} for idx in range(len(models)): if task != 'binary': data[names[idx]] = {} for ix in range(25): dat = run_experiment(x, y[:, ix], models[idx], task) data[names[idx]][ix] = dat else: dat = run_experiment(x, y, models[idx], task) data[names[idx]] = dat return (data)
def _build_target_classifier(args, samples, one_vs_all_labels, transferability_values): compute_kernel = lambda x, y: _compute_target_classifier_kernel_matrix( args, samples, one_vs_all_labels, transferability_values) classifier = SVM(kernel=compute_kernel) classifier.fit(samples, one_vs_all_labels) return classifier
def __init__(self, kernel='rbf', gamma='scale', tol=0.001, nu=0.5, shrinking=True, max_iter=1000): """ Unsupervised Outlier Detection. Arguments --------- kernel : {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, optional (default=rbf). Specifies the kernel type to be used in the algorithm. It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable gamma : {‘scale’, ‘auto’} or float, default=’scale’ Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. tol : float, default=1e-3 Tolerance for stopping criterion nu : float, default=0.5 An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken max_iter : int, default=-1 Hard limit on iterations within solver, or -1 for no limit. Reference --------- For more information, please visit https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html """ self.model = SVM(kernel=kernel, gamma=gamma, tol=tol, nu=nu, shrinking=shrinking, max_iter=max_iter) self.transformer = None
def train_model(param): model= SVM() C = 10**(param) print("Param C= ",C) model.set_params(C=C,kernel='linear') model.fit(X_train,Y_train) print("Test Accuracy: ",model.score(X_test,Y_test)) return model
def fit(self, X, y): for C in self.c_values: svm = SVM(dual=False, penalty="l1", C=C, class_weight='auto') svm.fit(X, y) if np.sum(svm.coef_!=0) >= self.n_non_null: self.svm = svm self.coef_ = self.svm.coef_ self.C = C break return self
def optimal_svm_kernel (X_train, y_train, X_validate, y_validate): best_kernel = None best_accuracy = 0 for kernel in ['linear', 'rbf', 'poly', 'sigmoid']: classifier = SVM(X_train, y_train, kernel) accuracy = classifier.score(X_validate, y_validate) if best_kernel is None or accuracy > best_accuracy: best_accuracy = accuracy best_kernel = kernel print kernel, ":", accuracy return best_kernel, best_accuracy
def _get_source_class_classifiers(args, all_samples): source_class_classifiers = [ ] # To find a mapping between classifier and label, just use source_class_classifiers[i] and source_class_list[i] for source_class in args.source_class_list: one_vs_all_labels = [0] * len(all_samples) for i in range(len(args.source_samples)): one_vs_all_labels[ i] = 1 if args.source_labels[i] == source_class else 0 classifier = SVM(kernel="linear") classifier.fit(all_samples, one_vs_all_labels) source_class_classifiers.append(classifier) return source_class_classifiers
def train(self): logging.info('-' * 20) logging.info('Start training the %s model', self.model) train_data = self.feature_extractor.extract_feature( self.data_loader.get_trainset()) if self.model == 'GNB': # Gaussian naive bayes self.classifier = GNB() elif self.model == 'BNB': # Bernoulli naive bayes self.classifier = BNB() # self.tok = RT(r'\w+') # vectorizer = Vectorizer(tokenizer=self.tok.tokenize) # train_data = self.data_loader.get_trainset() # train_data = [vectorizer.fit_transform(train_data[0]).toarray(), train_data[1]] # self.vocabulary = vectorizer.get_feature_names() elif self.model == 'MNB': # Multinomial naive bayes self.classifier = MNB() elif self.model == 'LR': # Logistic regression param = {'C': [10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01]} self.classifier = GS(cv=5, estimator=LR(penalty=self.penalty, max_iter=self.epoch, solver='liblinear'), param_grid=param) elif self.model == 'SVM': # Support vector machine self.penalty = self.penalty if self.penalty in ['l1', 'l2' ] else 'l2' dual = self.penalty == 'l2' #self.classifier = SVM(penalty=self.penalty, C=self.c, max_iter=self.epoch, dual=dual) param = {'C': [10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01]} self.classifier = GS(cv=5, estimator=SVM(penalty=self.penalty, dual=dual, max_iter=self.epoch), param_grid=param) elif self.model == 'R': # RandomGuess self.classifier = DC(strategy='stratified') else: logging.info('Unsupported model : %s', self.model) exit(0) self.classifier.fit(train_data[0], train_data[1]) self.classifier.predict(train_data[0]) predictions = self.classifier.predict(train_data[0]) acc = evaluator.accuracy_score(train_data[1], predictions) return acc
def iterate(cself, svm, selectors, instances, K): cself.mention('Training SVM...') alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_confs = svm.predict(bs.pos_instances) pos_selectors = bs.L_n + np.array([ l + np.argmax(p_confs[l:u]) for l, u in slices(bs.pos_groups) ]) new_selectors = np.hstack([neg_selectors, pos_selectors]) if selectors is None: sel_diff = len(new_selectors) else: sel_diff = np.nonzero(new_selectors - selectors)[0].size cself.mention('Selector differences: %d' % sel_diff) if sel_diff == 0: return None, svm elif sel_diff > 5: # Clear results to avoid a # bad starting point in # the next iteration qp.clear_results() cself.mention('Updating QP...') indices = (new_selectors, ) K = K_all[indices].T[indices].T D = spdiag(classes) qp.update_H(D * K * D) return { 'svm': svm, 'selectors': new_selectors, 'instances': bs.instances[indices], 'K': K }, None
def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier, save_conf_mat, averaging): self.agent = agent if self.agent is None: self.agent = np.ones(train_X.shape[1]) cols = np.flatnonzero(self.agent) if (cols.shape[0] == 0): print('[Error!] There are 0 features in the agent......') exit(1) # store the train and test features and labels self.train_X = train_X[:, cols] self.test_X = test_X[:, cols] self.train_Y = train_Y self.test_Y = test_Y # set the classifier type self.classifier = classifier # get the unique labels self.labels = np.unique(train_Y) # select the averaging procedure if (len(self.labels) == 2): self.averaging = "binary" else: self.averaging = averaging # setup the classifier if (self.classifier.lower() == 'knn'): self.clf = KNN() elif (self.classifier.lower() == 'rf'): self.clf = RF() elif (self.classifier.lower() == 'svm'): self.clf = SVM() else: self.clf = None print('\n[Error!] We don\'t currently support {} classifier...\n'. format(classifier)) exit(1) # call the member functions self.predictions = self.classify() self.accuracy = self.compute_accuracy() self.precision = self.compute_precision() self.recall = self.compute_recall() self.f1_score = self.compute_f1() self.confusion_matrix = self.compute_confusion_matrix() self.plot_confusion_matrix(save_conf_mat)
def make_model(opts, input_shape, aux_shape): targets, multiclass, deep = get_setup(opts) if opts.model == 'lstm': if aux_shape: model = hierarchical_lstm(input_shape=input_shape, aux_shape=aux_shape, targets=targets, hidden = opts.hidden_size, multiclass= multiclass, learn_rate=opts.learning_rate) else: model = lstm_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, multiclass= multiclass, learn_rate=opts.learning_rate) elif opts.model == 'cnn': if aux_shape: model = hierarchical_cnn(input_shape=input_shape, aux_shape=aux_shape, targets=targets, hidden = opts.hidden_size, multiclass= multiclass, learn_rate=opts.learning_rate) else: model = cnn_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, multiclass= multiclass, learn_rate=opts.learning_rate) elif opts.model == 'mlp': model = mlp_model(input_shape=input_shape, targets=targets, hidden = opts.hidden_size, multiclass= multiclass, learn_rate=opts.learning_rate) elif opts.model == 'svm': if targets>1: model = OneVsRestClassifier(SVM(C=50000, kernel = 'rbf', max_iter= 1000, verbose = True, decision_function_shape = 'ovr', probability = True)) else: model = SVM(C=1e4, kernel = 'linear', verbose = True, probability = True, max_iter= 1000) elif opts.model == 'rf': if targets>1: model = RF(n_estimators = 450, class_weight = 'balanced', criterion = 'entropy', bootstrap = False, verbose = 1) else: model = RF(n_estimators = 450, verbose = 1) elif opts.model =='gbc': if targets>1: model = OneVsRestClassifier(GBC(n_estimators = 484, learning_rate = 0.0984, verbose = 1)) else: model = GBC(n_estimators = 400, learning_rate = 0.09, verbose = 1) elif opts.model == 'lr': if targets>1: model = OneVsRestClassifier(LR(max_iter= 1000, class_weight = 'balanced', multi_class = 'ovr', C = .09, penalty = 'l1', verbose = 1)) #sag if multiclass/multilabel else: model = LR(C = 1e-3, penalty = 'l2', verbose = 1) #sag if multiclass/multilabel else: model = None return model
def classifySVM(Xtrain, Ytrain, Xval, Yval): # set range of C parameters to test #cvals = np.logspace(-2,7,40) cvals = [405] # set range of gamma values to test #gammas = np.logspace(-10,2,40) gammas = [0.00015] # find most accurate combination of C, gamma best_accuracy = 0 best_model = None best_c = None best_g = None # cycle through all combinations of C, gamma for c in cvals: for g in gammas: clf = SVM(C=c, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=g, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) clf.fit(Xtrain, Ytrain) accuracy = clf.score(Xval, Yval) if accuracy >= best_accuracy: print "C = ", c, " gamma = ", g, " -> ", accuracy best_accuracy = accuracy best_model = clf best_c = c best_g = g print "Optimal SVM parameters: C = ", best_c, "and best gamma = ", best_g print "Best validation accuracy = " + str(best_accuracy) train_accuracy = best_model.score(Xtrain, Ytrain) print "With training accuracy = " + str(train_accuracy) return best_model
def build_classifier(self): """ build both and LDA and an SVM classifier for offline training. can lateron be used for online training :return: """ self.clf = [LDA(n_components=None, priors=None, shrinkage='auto', solver='eigen', store_covariance=False, tol=0.0001), SVM(kernel='rbf', shrinking=True, probability=True, gamma='scale')] # self.clf.fit(self.features, self.labels) [c.fit(self.features, self.labels) for c in self.clf] # possible to use methods predict(X), predict_log_proba(X) or predict_proba() self.cv_scores = [] [self.cv_scores.append(cvs(estimator=c, X=self.features, y=self.labels, cv=10, n_jobs=-1)) for c in self.clf] [print('mean cv score of clf {:d} is'.format(i), np.mean(cv)) for i, cv in enumerate(self.cv_scores)]
def init_SVM(params): C = getattr(params, 'C', 1.0) kernel = getattr(params, 'kernel', 'rbf') degree = getattr(params, 'degree', 3) gamma = getattr(params, 'gamma', 'auto') coef0 = getattr(params, 'coef0', 0.0) shrinking = getattr(params, 'shrinking', True) probability = getattr(params, 'probability', False) tol = getattr(params, 'tol', 0.001) cache_size = getattr(params, 'cache_size', 200) class_weight = getattr(params, 'class_weight', None) verbose = getattr(params, 'verbose', False) max_iter = getattr(params, 'max_iter', -1) decision_function_shape = getattr(params, 'decision_function_shape', 'ovr') random_state = getattr(params, 'random_state', None) return SVM(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, max_iter=max_iter, decision_function_shape=decision_function_shape, random_state=random_state)
def impute(data, imputer, imp_method, params_dict): imp_data = None if imp_method == 'RandomReplace': imp_data = imputer.replace(data, params_dict['miss_data_cond']) elif imp_method == 'Drop': imp_data = imputer.drop(data, params_dict['miss_data_cond']) elif imp_method == 'Summary': imp_data = imputer.summarize(data, params_dict['summary_func'], params_dict['miss_data_cond']) elif imp_method == 'RandomForest': clf = RandomForestClassifier(n_estimators=100, criterion='gini') imp_data = imputer.predict(data, params_dict['cat_cols'], params_dict['miss_data_cond'], clf) elif imp_method == 'SVM': clf = SVM() imp_data = imputer.predict(data, params_dict['cat_cols'], params_dict['miss_data_cond'], clf) elif imp_method == 'LogisticRegression': clf = LogisticRegression() imp_data = imputer.predict(data, params_dict['cat_cols'], params_dict['miss_data_cond'], clf) elif imp_method == 'SVD': imp_data = imputer.factor_analysis(data, params_dict['cat_cols'], params_dict['miss_data_cond'], technique='SVD') elif imp_method == 'KNN': imp_data = imputer.knn(data, params_dict['n_neighbors'], params_dict['knn_summary_func'], params_dict['miss_data_cond'], params_dict['cat_cols']) elif imp_method == 'Identity': imp_data = data else: raise Exception("Imputation method {} is not valid".format(imp_method)) return imp_data
def kv_pipe(DX, labels, groups, tr_options, report_options): ''' @pre: DX hash table database of dx histories for each patient per encounter. DX codes are represented by index in ICD9 groups dictionary. - labels is hash table in {s: {y: 0/1, t: time} ... } format for each subject s in subset S. - groups is a reference hash table of indices of ICD9 group codes. - tr_options is a hash table with options for X_training options (e.g., flat, window size, ...) - report_options is a hash_table with options for result reporting @post: pickle or h5 format for model and hash table of training and testing data ''' random.seed(11) skf = StratifiedKFold(n_splits=5, random_state=7) splits = [(i[0], list(i[1].items())[1][1]) for i in list(labels.items())] x_split, y_split = zip(*[(a, b) for a, b in splits]) x_split, y_split = np.array(x_split), np.array(y_split) for train_index, test_index in skf.split(x_split, y_split): #1. downsample training split pos = [i for i in train_index if y_split[i] == 1] neg = random.sample([i for i in train_index if y_split[i] == 0], len(pos)) tr_split = x_split[neg + pos] random.shuffle(tr_split) te_split = x_split[test_index] #2. split labels hash table by kfold tr_subj = {key: labels[key] for key in tr_split} te_subj = {key: labels[key] for key in te_split} #3. generate train = [(x_tr,y_tr), ...] and test = [(x_te, y_te), ...] train, test = generate_x_y(DX, tr_subj, te_subj) x_tr, y_tr = zip(*[(a, b) for a, b in train]) x_te, y_te = zip(*[(a, b) for a, b in test]) #4. train models lr = LR(warm_start=True, C=1e-3, penalty='l2', verbose=1) #sag if multiclass/multilabel svm = SVM(C=1e4, kernel='linear', verbose=True, probability=True, max_iter=1000) rf = RF(warm_start=True, n_estimators=450, verbose=1) return
def run_grid_search(data_x, data_y, clf_name, scoring): if clf_name == 'lr': print "Classifier: Logistic regression" clfs = [LR()] param_grid = {"C": [0.1, 0.25, 0.5, 1]} elif clf_name == 'rf': print "Classifier: Random forest" clfs = [ RF(n_estimators=20, max_depth=20, criterion='entropy', bootstrap=False, max_features=20, min_samples_split=4, min_samples_leaf=4) ] # use a full grid over all parameters param_grid = { "max_features": [20, 25], "min_samples_split": [2, 3, 4], "min_samples_leaf": [2, 3, 4] } elif clf_name == 'svm': print "Classifier: SVM" clfs = [SVM(penalty='l2', loss='squared_hinge', dual=False)] param_grid = {"C": [0.001, 0.1, 1, 5]} else: print "Unsupported classifier:", clf_name # run grid search for clf in clfs: print clf grid_search = GridSearchCV(clf, param_grid=param_grid, scoring=scoring) start = time() grid_search.fit(data_x, data_y) print( "GridSearchCV took %.2f seconds for %d candidate parameter settings." % (time() - start, len(grid_search.cv_results_['params']))) report(grid_search.cv_results_)
def r_search(x, y): #random search params lr_params = {'penalty': ['l1', 'l2'], 'C': sp_rand(1e-5, .1)} svm_params = {'kernel': ['rbf', 'linear'], 'C': sp_rand(10, 1e5)} rf_params = { 'criterion': ['gini', 'entropy'], 'n_estimators': sp_randint(50, 200), 'bootstrap': [True, False] } gbc_params = { 'learning_rate': sp_rand(1e-6, 1e-1), 'n_estimators': sp_randint(50, 200), 'loss': ['deviance', 'exponential'] } data = {} xs, ys = balanced_subsample(x, y) lst = [LR(verbose=1), RF(verbose=1), SVM(verbose=True), GBC(verbose=1)] names = ['LR', 'RF', 'SVM', 'GB'] params = [lr_params, rf_params, svm_params, gbc_params] for idx in range(len(lst)): n_iter_search = 60 start = time.time() rsearch = random_search(estimator=lst[idx], param_distributions=params[idx], n_iter=n_iter_search, scoring='roc_auc', fit_params=None, n_jobs=1, iid=True, refit=True, cv=5, verbose=0, random_state=8) rsearch.fit(xs, ys) data[names[idx]] = rsearch.cv_results_ print(names[idx] + " results complete.") print("RandomizedSearchCV took %.2f seconds for %d candidates" " parameter settings." % ((time.time() - start), n_iter_search)) return (data)
def opt_svm(x_train, y_train, x_valid, y_valid): """Obtem os melhores valores de C e kernel para o SVM""" C = [0.01, 0.1, 1] kernel = ['poly', 'rbf'] res = np.zeros((len(kernel), len(C))) for i, k in enumerate(kernel): for j, c in enumerate(C): svm = SVM(C=c, kernel=k, random_state=0) svm.fit(x_train, y_train) res[i][j] = math.sqrt(sk.metrics.mean_squared_error(svm.predict(x_valid), y_valid)) i, j = np.unravel_index(res.argmin(), res.shape) print(f'SVM = {res[i][j]}') print(f':: kernel = {kernel[i]}') print(f':: C = {C[j]}') return res[i][j]
def __init__(self, train_X, test_X, train_Y, test_Y, agent, classifier, save_conf_mat): self.agent = agent if self.agent is None: self.agent = np.ones(train_X.shape[1]) cols = np.flatnonzero(self.agent) if (cols.shape[0] == 0): print('[Error!] There are 0 features in the agent......') exit(1) self.train_X = train_X[:, cols] self.test_X = test_X[:, cols] self.train_Y = train_Y self.test_Y = test_Y self.classifier = classifier if (self.classifier.lower() == 'knn'): self.clf = KNN() elif (self.classifier.lower() == 'rf'): self.clf = RF() elif (self.classifier.lower() == 'svm'): self.clf = SVM() else: self.clf = None print('\n[Error!] We don\'t currently support {} classifier...\n'. format(classifier)) exit(1) self.predictions = self.classify() self.accuracy = self.compute_accuracy() self.precision = self.compute_precision() self.recall = self.compute_recall() self.f1_score = self.compute_f1() self.confusion_matrix = self.compute_confusion_matrix() self.plot_confusion_matrix(save_conf_mat)
def iterate(cself, svm, classes): cself.mention('Training SVM...') D = spdiag(classes) qp.update_H(D * K * D) qp.update_Aeq(classes.T) alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = bs.instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_conf = svm._predictions[-bs.L_p:] pos_classes = np.vstack([ _update_classes(part) for part in partition(p_conf, bs.pos_groups) ]) new_classes = np.vstack( [-np.ones((bs.L_n, 1)), pos_classes]) class_changes = round( np.sum(np.abs(classes - new_classes) / 2)) cself.mention('Class Changes: %d' % class_changes) if class_changes == 0: return None, svm return {'svm': svm, 'classes': new_classes}, None
output = net.forwardToHidden(data) Y.append(target.to("cpu")) X.append(output.to("cpu")) return torch.cat(X, dim=0).data.numpy(), torch.cat(Y, dim=0).data.numpy() print("Creating Networks") hps = setup_hparams(sys.argv[1:]) logger, net = setup_network(hps) net = net.to(device) print("loading Data") trainloader, valloader, testloader = get_dataloaders(bs=4) print("Passing training data through network") xtrain, ytrain = passThroughNetwork(trainloader) # print("Passing validation data through network") # xval, yval = passThroughNetwork(valloader) print("Passing test data through network") xtest, ytest = passThroughNetwork(testloader) print("Training SVM on CPU") svm = SVM(kernel='rbf', C=1) svm.fit(xtrain, ytrain) pred = svm.predict(xtest) acc = accuracy_score(ytest, pred) print("Accuracy on test Set:", acc * 100)
def main(): config = init() print("Mangrove Classification") print("SVM Classifier") print("Classifying with: ", config.hyperparams) print("Segmenting with: ", config.segment) print("="*50) if config.regenerate_features: print("Getting data lists...") (train_img_list, train_lbl_list), (test_img_list, test_lbl_list) = get_image_paths(config.image_paths) print("Computing average SuperPixel size...") avg_size = compute_avg_size(train_img_list, config.segment) print("Average superpixel size is: {}".format(avg_size)) print("Generating features...") train_features, train_labels = extract_features(train_img_list, train_lbl_list, config, avg_size) test_features, test_labels = extract_features(test_img_list, test_lbl_list, config, avg_size) print("Preprocessing...") preprocessor = get_preprocessor(config.preprocess, train_features) train_features = preprocessor.process(train_features) test_features = preprocessor.process(test_features) print("Saving features...") train_file = open(config.save_path["train_features"], 'wb') test_file = open(config.save_path["test_features"], 'wb') train_save = (train_features, train_labels) test_save = (test_features, test_labels) pickle.dump(train_save, train_file) pickle.dump(test_save, test_file) train_file.close() test_file.close() else: print("Loading features...") train_file = open(config.save_path["train_features"], 'rb') test_file = open(config.save_path["test_features"], 'rb') train_features, train_labels = pickle.load(train_file) test_features, test_labels = pickle.load(test_file) train_file.close() test_file.close() print("Unique Labels:") print(np.unique(train_labels)) print("Training...") start_time = time.time() classifier = SVM(**config.hyperparams) classifier.fit(train_features, train_labels) elapsed_time = time.time() - start_time print("Training took {0:.2f} seconds".format(elapsed_time)) print("Predicting...") start_time = time.time() pred = classifier.predict(test_features) elapsed_time = time.time() - start_time print("Predicting took {0:.2f} seconds".format(elapsed_time)) report, acc, iou, precision, confusion = evaluate(test_labels, pred) save_results(report, acc, iou, precision, confusion, config.save_path["results"]) print(report) print() print("Accuracy: {0:.4f}".format(acc)) print("Precision: {0:.4f}".format(precision)) print("IOU: {0:.4f}".format(iou)) print()
def test_svm(data: list, label: list) -> None: svm = SVM(gamma="scale", kernel="linear") svm.fit(data, label) plot_model(np.array(data), svm)
# -*- coding: utf-8 -*- """ Created on Thu May 23 15:21:35 2013 @author: ed203246 """ from sklearn import datasets from sklearn.svm import LinearSVC as SVM from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.feature_selection import SelectKBest from epac.map_reduce.reducers import PvalPerms import numpy X, y = datasets.make_classification(n_samples=100, n_features=200, n_informative=2) X = numpy.random.rand(*X.shape) from epac import Perms, CV, Methods perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100) perms_cv_svm.run(X=X, y=y) perms_cv_svm.reduce() self = perms_cv_svm key = 'LinearSVC(loss=l1)' self = PvalPerms()
def svm_ai(logfile, train_features, train_labels, test_features, test_labels, k): return SVM(C=k, kernel='linear', cache_size=7000)
x = vect.fit_transform(x) print('词袋处理后的第一个样本的属性为:') print(x[0]) # 构建TF-IDF特征,归一化 tf_transformer = TfidfTransformer().fit(x) x = tf_transformer.transform(x) print('再经TF-IDF处理后的第一个样本的属性为:') print(x[0]) # 随机分类训练集与测试集 trainX, testX, trainY, testY = train_test_split( x, y, test_size=0.2, random_state=1) # 支持向量机 modle = SVM(C=1000.0, kernel='rbf', random_state=1) start = time.perf_counter() modle.fit(trainX, trainY) end = time.perf_counter() print('Running time: %.12s ms' % ((end - start) * 1000)) # 输出评估结果 expected = testY predict = modle.predict(testX) print('Report:') print(metrics.classification_report(expected, predict)) # 建立混淆矩阵 lable = list(set(expected)) matrix = pd.DataFrame(metrics.confusion_matrix( expected, predict, labels=lable), index=lable, columns=lable)
print 'imputing with one-hot' data_onehot = imp.binarize_data(x, cat_cols) # replace missing data with predictions using random forest print 'imputing with predicted values from random forest' clf = RandomForestClassifier(n_estimators=100, criterion='gini') data_rf = imp.predict(x, cat_cols, missing_data_cond, clf) # replace missing data with predictions using SVM print 'imputing with predicted values usng SVM' clf = SVM(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) data_svm = imp.predict(x, cat_cols, missing_data_cond, clf) # replace missing data with predictions using logistic regression print 'imputing with predicted values usng logistic regression' clf = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1)