def bayes_generate_model(args): ''' Function creates model for bayesian classifier ''' bt = BayesianTest(dbfile=args.db_file, max_token_size=args.max_token_size) if args.feats is not None: features = eval(args.feats) if isinstance(features,dict): e = Entry(id=None, guid=None, entry=None, language=None) if not e.check_feats(features): print 'Incorrect format of feature dictionary' return else: features = bt.get_best_features(count=args.count, n_fold_cv=args.n_fold_cv) bt.create_model(args.model, used_features=features, count=args.count)
def bayes_test(args): ''' Function starts test of bayesian classifier with given dataset and classifier parameters. ''' bt = BayesianTest(dbfile=args.db_file, max_token_size=args.max_token_size) if args.feats is not None: features = eval(args.feats) if isinstance(features,dict): e = Entry(id=None, guid=None, entry=None, language=None) if not e.check_feats(features): print 'Incorrect format of feature dictionary' return else: features = bt.get_best_features(count=args.count, n_fold_cv=args.n_fold_cv) bt.run(features=features, count=args.count, n_fold_cv=args.n_fold_cv)
def svm_classify(args): ''' Manually classify given text with some SVM model ''' # load model and create classifier svm = SVM(kernel=None, C=None) token_list = svm.load_model(args.model) # convert text to vector X entry = Entry(id=None, guid=None, entry=args.text, language=None, max_token_size=1) X = np.zeros((1,len(token_list))) for token in entry.get_token_all(): if token.get_data_str() in token_list: X[0][token_list.index(token.get_data_str())] = 1 # classify text print int(svm.predict(X)[0])