def evaluate(): print 'loading model......' with open(yml_path, 'r') as f: yaml_string = yaml.load(f) model = model_from_yaml(yaml_string) print 'loading weights......' model.load_weights(h5_path) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) testData = pd.read_csv(testPath, sep="\t") testX = list(testData["text"]) y_pred = [] print testX[0] for i in xrange(len(testData)): data = input_transform(str(testX[i])) data.reshape(1, -1) pred_value = model.predict_classes(data) y_pred.append(pred_value[0]) save_path = "doc/result.txt" desc = "basic lstm" result_str = result.printMultiResult(testData["score"], y_pred) result.saveResult(save_path, desc, result_str)
def trainModel(xtrain, xtest, ytrain, ytest): classifiers = [ # KNeighborsClassifier(3), # SVC(kernel="linear", probability=True), # NuSVC(probability=True), # DecisionTreeClassifier(), RandomForestClassifier(), # AdaBoostClassifier(), # GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=200, # subsample=1.0, criterion='friedman_mse', min_samples_split=2, # min_samples_leaf=1, min_weight_fraction_leaf=0., # max_depth=5), # GradientBoostingClassifier(), # GaussianNB(), # LinearDiscriminantAnalysis(), # QuadraticDiscriminantAnalysis() ] log_cols = ["Classifier", "Accuracy", "Log Loss"] log = pd.DataFrame(columns=log_cols) for clf in classifiers: clf.fit(xtrain, ytrain) name = clf.__class__.__name__ print("=" * 30) print(name) print('****Results****') train_predictions = clf.predict(xtest) # acc = accuracy_score(ytest, train_predictions) # print("Accuracy: {:.4%}".format(acc)) train_porb_predictions = clf.predict_proba(xtest) ll = log_loss(ytest, train_porb_predictions) print("Log Loss: {}".format(ll)) # printResult(ytest, train_predictions) # result.printMultiResult(ytest, train_predictions) save_path = "doc/result.txt" desc = "sentiment by tfidf " result_str = result.printMultiResult(ytest, train_predictions) result.saveResult(save_path, desc, result_str) # # log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols) # log = log.append(log_entry) print("=" * 30)
def evaluate_testData(xtest, ytest): ypred = [] model = senti_rule.senti_rule_model() index = 0 for text in xtest: index += 1 if index % 1000 == 0: print "index:", index tokens = model.splitWord(text) score = model.sentiScoreDoc(tokens) if score < 0: ypred.append(1) elif score > 0: ypred.append(0) else: ypred.append(2) # result.printMultiResult(ytest, ypred) save_path = "doc/result.txt" desc = "sentiment by rule " result_str = result.printMultiResult(ytest, ypred) result.saveResult(save_path, desc, result_str)
def showResult(): save_path = "doc/result.txt" desc = "text_cnn with textcnn_index" result_str = result.printMultiResult(testData["score"], all_predictions) result.saveResult(save_path, desc, result_str)