Ejemplo n.º 1
0
def handle_data():
    if request.method == 'POST':
        return render_template("search.html")
    if request.method == 'GET':
        query = request.args.get('content')

        model = NLPModel()

        clf_path = 'lib/models/SentimentClassifier.pkl'
        with open(clf_path, 'rb') as f:
            model.clf = pickle.load(f)

        vec_path = 'lib/models/TFIDFVectorizer.pkl'
        with open(vec_path, 'rb') as f:
            model.vectorizer = pickle.load(f)

        user_query = query
        uq_vectorized = model.vectorizer_transform(np.array([user_query]))
        prediction = model.predict(uq_vectorized)
    # print(prediction)
        pred_proba = model.predict_proba(uq_vectorized)

        confidence = round(pred_proba[0], 3)
        print(prediction,confidence)

        if prediction == 0:
            filename = 'cry.jpg'
            return send_file(filename, mimetype='image/jpg')
        else:
            filename = 'smile.jpg'
            return send_file(filename, mimetype='image/jpg')
Ejemplo n.º 2
0
def build_model():
    model = NLPModel()
    with open('data/train.csv') as f:
        data = pd.read_csv(f, sep=',', header=0)

    # Use only the 1 star and 5 star reviews
    # For this example, we want to only predict positive or negative sentiment using the extreme cases.
    pos_neg = data[(data['Rating'] <= 2) | (data['Rating'] >= 4)]

    ## Relabel as 0 for negative and 1 for positive¶
    pos_neg['Binary'] = pos_neg.apply(lambda x: 0 if x['Rating'] < 2 else 1,
                                      axis=1)

    #Fit a vectorizer to the vocabulary in the dataset
    #pos_neg.loc[:, 'Phrase']

    pos_neg.dropna(subset=['Review Text'], inplace=True)

    X = model.vectorizer_fit_transform(pos_neg.loc[:, 'Review Text'])
    print('Vectorizer fit transform complete')

    y = pos_neg.loc[:, 'Binary']

    # split X and y into training and testing sets
    # by default, it splits 75% training and 25% test
    # random_state=1 for reproducibility
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    #print(X_train.shape)
    #print(X_test.shape)
    #print(y_train.shape)
    #print(y_test.shape)
    model.train(X_train, y_train)
    print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()
Ejemplo n.º 3
0
def build_model():
    model = NLPModel()

    data = pd.read_csv('extract_combined.csv')
    data2 = pd.read_csv('labels.csv', error_bad_lines=False)
    merged = pd.merge(data, data2)
    yn = {'Yes': 1, 'No': 0}

    merged.is_fitara = [yn[i] for i in merged.is_fitara]

    model.vectorizer_fit(data.loc[:, 'text'])
    print('Vectorizer fit complete')

    X = model.vectorizer_transform(data.loc[:, 'text'])
    print('Vectorizer transform complete')
    y = merged.loc[:, 'is_fitara']

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model.train(X_train, y_train)
    print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()
Ejemplo n.º 4
0
def build_model():
	model = NLPModel()
	
	with open('lib/data/train.tsv') as f:
		data = pd.read_csv(f, sep='\t')
		
	pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]
	pos_neg['Binary'] = pos_neg.apply(lambda x: 0 if x['Sentiment'] == 0 else 1, axis=1)
		
	model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
	print('Vectorizer fit complete')
	
	X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
	print('Vectorizer tranform complete')
	y = pos_neg.loc[:, 'Binaryy']
	
	X_train, X_test, y_train, y_test = train_test_split(X, y)
	
	model.train(X_train, y_train)
	Print('Model training complete')
	
	model.pickle_clf()
	model.pickle_vectorizer()
	
	model.plot_roc(X_train, y_test)
Ejemplo n.º 5
0
Archivo: app.py Proyecto: zzsza/TIL
from flask import Flask
from flask_restful import reqparse, abort, Api, Resource
import pickle
import numpy as np
from model import NLPModel

app = Flask(__name__)
api = Api(app)

model = NLPModel()

clf_path = 'lib/models/SentimentClassifier.pkl'
with open(clf_path, 'rb') as f:
    model.clf = pickle.load(f)

vec_path = 'lib/models/TFIDFVectorizer.pkl'
with open(vec_path, 'rb') as f:
    model.vectorizer = pickle.load(f)

# argument parsing
parser = reqparse.RequestParser()
parser.add_argument('query')


@app.route('/')
def main():
    return "Main Page\nIf you use curl, using 'curl -X GET http://127.0.0.1:5000/prediction -d query='that movie was boring''"


class PredictSentiment(Resource):
    def get(self):
Ejemplo n.º 6
0
def build_model():
    model = NLPModel()

    # filename = os.path.join(
    #     os.path.dirname(__file__), 'chalicelib', 'all/train.tsv')
    with open('lib/data/train.tsv') as f:
        data = pd.read_csv(f, sep='\t')

    pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]

    pos_neg['Binary'] = pos_neg.apply(lambda x: 0
                                      if x['Sentiment'] == 0 else 1,
                                      axis=1)

    model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer fit complete')

    X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer transform complete')
    y = pos_neg.loc[:, 'Binary']

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model.train(X_train, y_train)
    print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()

    model.plot_roc(X_test, y_test, size_x=12, size_y=12)
Ejemplo n.º 7
0
from django.shortcuts import render

from .apps import ClassifiermodelConfig

# Create your views here.
from django.http import HttpResponse, JsonResponse
from django.shortcuts import get_object_or_404
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .apps import WebappConfig


from model import NLPModel()

model = NLPModel()

# clf_path = 'lib/models/SentimentClassifier.pkl'
# with open(clf_path, 'rb') as f:
#     model.clf = pickle.load(f)
#
# vec_path = 'lib/models/TFIDFVectorizer.pkl'
# with open(vec_path, 'rb') as f:
#     model.vectorizer = pickle.load(f)

# argument parsing
parser = reqparse.RequestParser()
parser.add_argument('query')

class call_model(APIView):
Ejemplo n.º 8
0
if __name__ == '__main__':
    configs = Config()
    arg_length = len(sys.argv)

    assert arg_length >= 2

    char2idx, idx2char, configs.vocabulary_length = load_vocabulary(configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph)

    input = " ".join(sys.argv[1:])
    print(input)
    predict_input_enc, predict_input_enc_length = enc_processing([input], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
    predict_output_dec, predict_output_dec_length = dec_output_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
    predict_target_dec = dec_target_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)

    model = NLPModel(configs)
    if os.path.exists(configs.f_name):
        model.load_model(configs.f_name)

    for i in range(configs.max_sequence_length):
        if i > 0:
            predict_output_dec, predict_output_decLength = dec_output_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
            predict_target_dec = dec_target_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
        # 예측을 하는 부분이다.

        dataset_test = dataset_process(predict_input_enc, predict_output_dec, predict_target_dec, 1)
        for (feature, _) in dataset_test.take(1):
            predictions = model.predict(feature)

        answer, finished = pred_next_string(predictions.numpy(), idx2char)
Ejemplo n.º 9
0
def build_model():
    # builds sentiment classifier and vectorizer
    model = NLPModel()
    train_data_dir = 'lib/data/train.tsv'
    with open(train_data_dir) as f:
        data = pd.read_csv(f, sep='\t')

    pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]
    pos_neg['Binary'] = pos_neg.apply(
        lambda x: 0 if x['Sentiment'] == 0 else 1, axis=1)

    model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
    X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer transform complete')

    y = pos_neg.loc[:, 'Binary']
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    model.train(X_train, y_train)

    model.pickle_clf()
    model.pickle_vectorizer()
    print('Sentiment Classifier Built')

    # builds diamond price predictor
    model_two = DiamondPredictor() 
    df = sns.load_dataset('diamonds')
    train, test = train_test_split(df.copy(), random_state=0)
    cut_ranks = {'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5}
    train.cut = train.cut.map(cut_ranks)
    test.cut = test.cut.map(cut_ranks)
    features = ['carat', 'cut']
    target = 'price'
    model_two.train(train[features], train[target])
    model_two.pickle_model()
    print('Diamond Regressor Built')
Ejemplo n.º 10
0
from model import NLPModel

from flask import Flask
from flask_cors import CORS
from flask_restful import Resource, Api, reqparse

app = Flask(__name__)
CORS(app)
api = Api(app)

model = NLPModel()
model.load_vectorizer()
model.load_clf()

parser = reqparse.RequestParser()
parser.add_argument('query')


# PredictSentiment
# Predicts the sentiment of an input review
class PredictSentiment(Resource):
    def get(self):
        args = parser.parse_args()
        user_query = args['query']

        prediction = model.predict([user_query])
        text = 'Positive' if prediction == [1] else 'Negative'

        output = {'prediction': text}

        return output
Ejemplo n.º 11
0
def main():
    configs = Config()

    data_out_path = os.path.join(os.getcwd(), './out')
    os.makedirs(data_out_path, exist_ok=True)

    # 데이터를 통한 사전 구성 한다.
    char2idx, idx2char, configs.vocabulary_length = load_vocabulary(
        configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph)
    # 훈련 데이터와 테스트 데이터를 가져온다.
    train_input, train_label, eval_input, eval_label = load_data(
        configs.data_path)

    # 훈련셋 인코딩 만드는 부분이다.
    train_input_enc, train_input_enc_length = enc_processing(
        train_input, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 훈련셋 디코딩 입력 부분 만드는 부분이다.
    train_output_dec, train_output_dec_length = dec_output_processing(
        train_label, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 훈련셋 디코딩 출력 부분 만드는 부분이다.
    train_target_dec = dec_target_processing(train_label, char2idx,
                                             configs.max_sequence_length,
                                             configs.tokenize_as_morph)

    # 평가셋 인코딩 만드는 부분이다.
    eval_input_enc, eval_input_enc_length = enc_processing(
        eval_input, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 평가셋 인코딩 만드는 부분이다.
    eval_output_dec, eval_output_dec_length = dec_output_processing(
        eval_label, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 평가셋 인코딩 만드는 부분이다.
    eval_target_dec = dec_target_processing(eval_label, char2idx,
                                            configs.max_sequence_length,
                                            configs.tokenize_as_morph)

    dataset_train = dataset_process(train_input_enc, train_output_dec,
                                    train_target_dec, configs.batch_size)
    dataset_eval = dataset_process(eval_input_enc, eval_output_dec,
                                   eval_target_dec, configs.batch_size)

    model = NLPModel(configs)
    if os.path.exists(configs.f_name):
        model.load_model(configs.f_name)

    for i, (features,
            labels) in enumerate(dataset_train.take(configs.train_steps)):
        model.train(features, labels)
        if i % 200 == 0:
            model.save_model(configs.f_name)

            predict_input_enc, predic_input_enc_length = enc_processing(
                ["가끔 궁금해"], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)
            # 학습 과정이 아니므로 디코딩 입력은
            # 존재하지 않는다.(구조를 맞추기 위해 넣는다.)
            predict_output_dec, predic_output_decLength = dec_output_processing(
                [""], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)
            # 학습 과정이 아니므로 디코딩 출력 부분도
            # 존재하지 않는다.(구조를 맞추기 위해 넣는다.)
            predict_target_dec = dec_target_processing(
                [""], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)

            for i in range(configs.max_sequence_length):
                if i > 0:
                    predict_output_dec, _ = dec_output_processing(
                        [answer], char2idx, configs.max_sequence_length,
                        configs.tokenize_as_morph)
                    predict_target_dec = dec_target_processing(
                        [answer], char2idx, configs.max_sequence_length,
                        configs.tokenize_as_morph)
                # 예측을 하는 부분이다.

                dataset_test = dataset_process(predict_input_enc,
                                               predict_output_dec,
                                               predict_target_dec, 1)
                for (feature, _) in dataset_test.take(1):
                    predictions = model.predict(feature)

                answer, finished = pred_next_string(predictions.numpy(),
                                                    idx2char)

                if finished:
                    break

            # 예측한 값을 인지 할 수 있도록
            # 텍스트로 변경하는 부분이다.
            print("answer: ", answer)
def train_model():
    path = 'lib/data/imdb_labelled.txt'
    data = pd.read_csv(path, sep='\t', header=None)
    data.columns = ['text', 'score']

    reviews_train, reviews_test, y_train, y_test = train_test_split(
        data['text'],
        data['score'],
        test_size=0.2,
    )

    model = NLPModel()
    model.fit_vectorizer(reviews_train)
    X_train = model.transform_vectorizer(reviews_train)
    X_test = model.transform_vectorizer(reviews_test)

    model.train(X_train, y_train)

    model.report_accuracy(X_test, y_test, 'lib/model/accuracy')

    model.pickle_vectorizer()
    model.pickle_clf()
Ejemplo n.º 13
0
def build_model():
    model = NLPModel()

    #unzip the dataFiles in the folder where this file is saved before executing the below statements
    df_extract_combined = pd.read_csv('extract_combined.csv')
    df_labels = pd.read_csv('labels.csv')

    df_final = pd.merge(df_extract_combined, df_labels, on='document_name')
    df_text_data = df_final[['text', 'is_fitara']]

    for i in range(len(df_text_data)):
        df_text_data['text'][i] = re.sub('[^a-zA-Z]', ' ',
                                         df_text_data['text'][i])

    df_text_data['text'] = df_text_data['text'].apply(applyLemmatizer)

    #df_text_data['text'] = df_text_data['text'].apply(stopwords)

    le = LabelEncoder()
    df_text_data['is_fitara'] = le.fit_transform(df_text_data['is_fitara'])

    model.vectorizer_fit(df_text_data.loc[:, 'text'])
    #print('Vectorizer fit complete')

    X = model.vectorizer_transform(df_text_data.loc[:, 'text'])
    #print('Vectorizer transform complete')
    y = df_text_data.loc[:, 'is_fitara']

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model.train(X_train, y_train)
    #print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()
Ejemplo n.º 14
0
def build_model():
    model = NLPModel()
    with open('./data/train.tsv') as f:
        data = pd.read_csv(f, sep='\t')

    print(data.columns)
    pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]

    pos_neg['Binary'] = np.where(pos_neg['Sentiment'] == 0, 0, 1)

    model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer fit complete')

    X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer transform complete')
    y = pos_neg.loc[:, 'Binary']

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model.train(X_train, y_train)
    print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()
Ejemplo n.º 15
0
# Otherwise, their weights will be unavailable in the threads after the session there has been set

with CustomObjectScope({'GlorotUniform': glorot_uniform()}):

    sess = tf.compat.v1.Session()
    set_session(sess)
    model = load_model('models/model.h5')
    model._make_predict_function()
    graph = tf.get_default_graph()
'''
with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
    model = tf.keras.models.load_model('models/model.h5')
    sess = tf.compat.v1.Session()
    model._make_predict_function()
    graph = tf.get_default_graph()'''
model1 = NLPModel()
import h5py
#model2 = NLPModel2()

clf_path = 'models/Classifier.pkl'
with open(clf_path, 'rb') as f:
    model1.clf = pickle.load(f)

vec_path = 'models/TFIDFVectorizer.pkl'
with open(vec_path, 'rb') as f:
    model1.vectorizer = pickle.load(f)

parser = reqparse.RequestParser()
parser.add_argument('query')