from os.path import dirname, join from sklearn.feature_extraction.text import TfidfVectorizer from load_data import load_dataset from model import SVCModel data_train = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "restaurant", "train.xlsx") data_dev = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "restaurant", "dev.xlsx") X_train, y_train = load_dataset(data_train) X_dev, y_dev = load_dataset(data_dev) model = SVCModel("Tfidf Trigram", TfidfVectorizer(ngram_range=(1, 3))) model.load_data(X_train, y_train) model.fit_transform() model.train() model.evaluate(X_dev, y_dev) model.export(folder="exported/svc")
from load_data import load_dataset from model import SVCModel data_train = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "restaurant", "train.xlsx") data_dev = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "restaurant", "dev.xlsx") X_train, y_train = load_dataset(data_train) X_dev, y_dev = load_dataset(data_dev) X = X_train + X_dev y = y_train + y_dev models = [ SVCModel("Tfidf Bigram", TfidfVectorizer(ngram_range=(1, 2))), SVCModel("Tfidf Trigram", TfidfVectorizer(ngram_range=(1, 3))), SVCModel("Count Bigram", CountVectorizer(ngram_range=(1, 2))), SVCModel("Count Trigram", CountVectorizer(ngram_range=(1, 3))) ] for n in [2000, 5000, 10000]: model = SVCModel("Count Max Feature {}".format(n), CountVectorizer(max_features=n)) models.append(model) for n in [2000, 5000, 10000]: model = SVCModel("Count Max Feature {}".format(n), TfidfVectorizer(max_features=n)) models.append(model)
from model import SVCModel import sqlite3 from tensorflow.keras.models import load_model import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #conn = sqlite3.connect('database.db') #print ("Opened database successfully") #conn.execute('CREATE TABLE table1 (Input TEXT, Prediction TEXT)') #print ("Table created successfully") #conn.close() app = Flask(__name__) model = SVCModel() clf_path = 'C:/Users/jaspreetsingh5/Documents/Fitara_pred_flask/models/final_prediction.pkl' with open(clf_path, 'rb') as f: model.clf = pickle.load(f) vec_path = 'C:/Users/jaspreetsingh5/Documents/Fitara_pred_flask/models/CountVectorizer.pkl' with open(vec_path, 'rb') as f: model.vectorizer = pickle.load(f) model.clf1 = load_model(r'C:\Users\jaspreetsingh5\Documents\flask\my_model.h5') vec_path1 = 'C:/Users/jaspreetsingh5/Documents/Fitara_pred_flask/models/rnntokenizernn.pkl' with open(vec_path1, 'rb') as f: model.vectorizer1 = pickle.load(f)
from os.path import dirname, join from sklearn.feature_extraction.text import TfidfVectorizer from load_data import load_dataset from model import SVCModel data_train = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "hotel", "train.xlsx") data_dev = join(dirname(dirname(dirname(__file__))), "data", "vlsp2018", "corpus", "hotel", "dev.xlsx") X_train, y_train = load_dataset(data_train) X_dev, y_dev = load_dataset(data_dev) X = X_train + X_dev y = y_train + y_dev model = SVCModel("Tfidf Trigram", TfidfVectorizer(ngram_range=(1, 3), max_features=7000)) model.load_data(X, y) model.fit_transform() model.train() model.evaluate(X_dev, y_dev) model.export(folder="exported/svc_full")
stop_words = 'english' if 'lemmatizer' in options: lemmatizer = True # read data from files _train_text_lst, _train_label_lst, _test_text_lst, _test_label_lst = parse_reuters() # remove classes with very few occurrences topics_selected = topics_with_occurences_gt(threshold=3) train_text_lst, train_label_lst, test_text_lst, test_label_lst = remove_minor_classes(topics_selected) # label transformer label_binarizer = preprocessing.MultiLabelBinarizer(topics_selected) label_binarizer.fit(train_label_lst) y_train = label_binarizer.transform(train_label_lst) y_test = label_binarizer.transform(test_label_lst) # model model_name = '{}_{}_{}'.format(model_id, 'non-stopwords' if stop_words is None else 'use-stopwords', 'non-lemma' if not lemmatizer else 'use-lemma') if model_id == 'NaiveBayes': model = NBModel(model_name, stop_words, lemmatizer) elif model_id == 'LR': model = LRModel(model_name, stop_words, lemmatizer) elif model_id == 'SVC': model = SVCModel(model_name, stop_words, lemmatizer) elif model_id == 'XGBoost': model = XGBModel(model_name, stop_words, lemmatizer, max_features=10000) else: raise ValueError('This model %s is not supported' % model_id) model.train_validate_predict(train_text_lst, y_train, test_text_lst, y_test, topics_selected)