예제 #1
0
파일: control.py 프로젝트: tommarek/dip
def bayes_generate_model(args):
    '''
    Function creates model for bayesian classifier
    '''
    bt = BayesianTest(dbfile=args.db_file, max_token_size=args.max_token_size)

    if args.feats is not None:
        features = eval(args.feats)
        if isinstance(features,dict):
            e = Entry(id=None, guid=None, entry=None, language=None)
            if not e.check_feats(features):
                print 'Incorrect format of feature dictionary'
                return
    else:
        features = bt.get_best_features(count=args.count, n_fold_cv=args.n_fold_cv)

    bt.create_model(args.model, used_features=features, count=args.count)
예제 #2
0
파일: control.py 프로젝트: tommarek/dip
def bayes_test(args):
    '''
    Function starts test of bayesian classifier with given dataset and classifier
    parameters.
    '''
    bt = BayesianTest(dbfile=args.db_file, max_token_size=args.max_token_size)

    if args.feats is not None:
        features = eval(args.feats)
        if isinstance(features,dict):
            e = Entry(id=None, guid=None, entry=None, language=None)
            if not e.check_feats(features):
                print 'Incorrect format of feature dictionary'
                return
    else:
        features = bt.get_best_features(count=args.count, n_fold_cv=args.n_fold_cv)

    bt.run(features=features, count=args.count, n_fold_cv=args.n_fold_cv)
예제 #3
0
파일: control.py 프로젝트: tommarek/dip
def svm_classify(args):
    '''
    Manually classify given text with some SVM model
    '''
    # load model and create classifier
    svm = SVM(kernel=None, C=None)
    token_list = svm.load_model(args.model)

    # convert text to vector X
    entry = Entry(id=None, guid=None, entry=args.text, language=None,
                    max_token_size=1)
    X = np.zeros((1,len(token_list)))
    for token in entry.get_token_all():
        if token.get_data_str() in token_list:
            X[0][token_list.index(token.get_data_str())] = 1

    # classify text
    print int(svm.predict(X)[0])