コード例 #1
0
def evaluate():
    print 'loading model......'
    with open(yml_path, 'r') as f:
        yaml_string = yaml.load(f)
    model = model_from_yaml(yaml_string)

    print 'loading weights......'
    model.load_weights(h5_path)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    testData = pd.read_csv(testPath, sep="\t")
    testX = list(testData["text"])
    y_pred = []
    print testX[0]
    for i in xrange(len(testData)):

        data = input_transform(str(testX[i]))
        data.reshape(1, -1)
        pred_value = model.predict_classes(data)
        y_pred.append(pred_value[0])

    save_path = "doc/result.txt"
    desc = "basic lstm"
    result_str = result.printMultiResult(testData["score"], y_pred)
    result.saveResult(save_path, desc, result_str)
コード例 #2
0
def trainModel(xtrain, xtest, ytrain, ytest):
    classifiers = [
        # KNeighborsClassifier(3),
        # SVC(kernel="linear",  probability=True),
        # NuSVC(probability=True),
        # DecisionTreeClassifier(),
        RandomForestClassifier(),
        # AdaBoostClassifier(),
        # GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=200,
        #          subsample=1.0, criterion='friedman_mse', min_samples_split=2,
        #          min_samples_leaf=1, min_weight_fraction_leaf=0.,
        #          max_depth=5),
        # GradientBoostingClassifier(),
        # GaussianNB(),
        # LinearDiscriminantAnalysis(),
        # QuadraticDiscriminantAnalysis()
    ]

    log_cols = ["Classifier", "Accuracy", "Log Loss"]
    log = pd.DataFrame(columns=log_cols)
    for clf in classifiers:
        clf.fit(xtrain, ytrain)
        name = clf.__class__.__name__

        print("=" * 30)
        print(name)

        print('****Results****')
        train_predictions = clf.predict(xtest)
        # acc = accuracy_score(ytest, train_predictions)
        # print("Accuracy: {:.4%}".format(acc))

        train_porb_predictions = clf.predict_proba(xtest)
        ll = log_loss(ytest, train_porb_predictions)
        print("Log Loss: {}".format(ll))

        # printResult(ytest, train_predictions)
        # result.printMultiResult(ytest, train_predictions)

        save_path = "doc/result.txt"
        desc = "sentiment by tfidf "
        result_str = result.printMultiResult(ytest, train_predictions)
        result.saveResult(save_path, desc, result_str)

        #
        # log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols)
        # log = log.append(log_entry)

    print("=" * 30)
コード例 #3
0
def evaluate_testData(xtest, ytest):
    ypred = []
    model = senti_rule.senti_rule_model()
    index = 0
    for text in xtest:
        index += 1
        if index % 1000 == 0:
            print "index:", index

        tokens = model.splitWord(text)
        score = model.sentiScoreDoc(tokens)
        if score < 0:
            ypred.append(1)
        elif score > 0:
            ypred.append(0)
        else:
            ypred.append(2)

    # result.printMultiResult(ytest, ypred)

    save_path = "doc/result.txt"
    desc = "sentiment by rule "
    result_str = result.printMultiResult(ytest, ypred)
    result.saveResult(save_path, desc, result_str)
コード例 #4
0
def showResult():
    save_path = "doc/result.txt"
    desc = "text_cnn with textcnn_index"
    result_str = result.printMultiResult(testData["score"], all_predictions)
    result.saveResult(save_path, desc, result_str)