def sections(txt): txt = remove_tag(txt) # txt da pulire dei tag dell'html (time,res) = model.predict(txt) (_, res) = metrics.compute(txt, res) msg = {'data': res,'query':txt,'time': time} response = app.response_class( response=json.dumps(msg, indent=4), status=200, mimetype='application/json' ) return response
def test(txt, type="S"): model = Doc2Vec_model(type=type) model.load() import json (time, res) = model.predict(txt) (qnorm, res) = metrics.compute(txt, res) print( json.dumps( { 'query': txt, 'query_normalized': qnorm, 'res': res, 'time': time }, indent=4, sort_keys=True))
import pandas as pd from utils import data from utils import metrics from sklearn.naive_bayes import GaussianNB # Dataset 1 (Latin letters) # Training trainX, trainY = data.load_data('train_1.csv') clf = GaussianNB() clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_1.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'GNB-DS1.csv') metrics.compute(predictions, testY, 'GNB-DS1.csv') data.generate_cm(predictions, testY, 'GNB-DS1.png') # Dataset 2 (Greek letters) # Training trainX, trainY = data.load_data('train_2.csv') clf = GaussianNB() clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_2.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'GNB-DS2.csv') metrics.compute(predictions, testY, 'GNB-DS2.csv') data.generate_cm(predictions, testY, 'GNB-DS2.png')
import pandas as pd from utils import data from utils import metrics from sklearn.neural_network import MLPClassifier # Dataset 1 (Latin letters) # Training trainX, trainY = data.load_data('train_1.csv') clf = MLPClassifier(activation='logistic', solver='sgd') clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_1.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'Base-MLP-DS1.csv') metrics.compute(predictions, testY, 'Base-MLP-DS1.csv') data.generate_cm(predictions, testY, 'Base-MLP-DS1.png') # Dataset 2 (Greek letters) # Training trainX, trainY = data.load_data('train_2.csv') clf = MLPClassifier(activation='logistic', solver='sgd') clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_2.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'Base-MLP-DS2.csv') metrics.compute(predictions, testY, 'Base-MLP-DS2.csv') data.generate_cm(predictions, testY, 'Base-MLP-DS2.png')
} # Dataset 1 (Latin letters) # Training trainX, trainY = data.load_data('train_1.csv') clf = GridSearchCV(DecisionTreeClassifier(), param_grid, verbose=1) clf.fit(trainX, trainY) # Validation validX, validY = data.load_data('val_1.csv') print(f'Score: {round(clf.score(validX, validY), 3)}') print(f'Parameters chosen: {clf.best_params_}') # Testing testX, testY = data.load_data('test_with_label_1.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'Best-DT-DS1.csv') metrics.compute(predictions, testY, 'Best-DT-DS1.csv') data.generate_cm(predictions, testY, 'Best-DT-DS1.png') # Dataset 2 (Greek letters) # Training trainX, trainY = data.load_data('train_2.csv') clf = GridSearchCV(DecisionTreeClassifier(), param_grid, verbose=1) clf.fit(trainX, trainY) # Validation validX, validY = data.load_data('val_2.csv') print(f'Score: {round(clf.score(validX, validY), 3)}') print(f'Parameters chosen: {clf.best_params_}') # Testing testX, testY = data.load_data('test_with_label_2.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'Best-DT-DS2.csv')