Esempio n. 1
0
    def load(model_dir):
        with open(os.path.join(model_dir, "args.json")) as args_file:
            setup_json = json.load(args_file)

        vocab_path = os.path.join(model_dir, os.path.basename(setup_json["vocab_path"]))
        model_path = os.path.join(model_dir, os.path.basename(setup_json["model_path"]))
        weights_path = os.path.join(model_dir, os.path.basename(setup_json["weights_path"]))

        # load vocab
        with open(vocab_path) as vocab_handle:
            vocab = json.load(vocab_handle)

        # load weights

        with open(weights_path) as weights_handle:
            weights = json.load(weights_handle)
            weights = np.asarray(weights)

        n_class = setup_json['n_class']
        dropout = setup_json['dropout']
        embed_dim = setup_json['embed_dim']
        no_layers = setup_json['no_of_layers']
        # Setup a model
        encoder = TrainPipelineBuilder._get_encoder(setup_json['encoder_name'], vocab, weights, no_layers, embed_dim,
                                                    dropout)

        model = TextClassifier.TextClassifier(encoder, n_class)
        chainer.serializers.load_npz(model_path, model)

        return (model, vocab, setup_json, weights)
Esempio n. 2
0
def stratifiedkfold_cv(x, y, clf_class, shuffle=True, n_folds=5, **kwargs):
    #stratifiedk_fold = StratifiedKFold(y, n_folds=n_folds, shuffle=shuffle)
    stratifiedk_fold = StratifiedKFold(n_splits=5)
    y_pred = y[:]
    for train_index, test_index in stratifiedk_fold.split(x, y):
        X_train, X_test = x[train_index], x[test_index]
        y_train = y[train_index]
        clf = clf_class(**kwargs)
        clf.fit(X_train, y_train)
        y_pred[test_index] = clf.predict(X_test)
    return y_pred

    NB = MultinomialNB
    print(
        precision_score(y,
                        stratifiedkfold_cv(vec.transform(x), np.array(y), NB),
                        average='macro'))

    ### 自己的文本分类
    import TextClassifier
    text_classifier = TextClassifier()
    text_classifier.fit(x_train, y_train)
    print(text_classifier.predict('这 是 有史以来 最 大 的 一 次 军舰 演习'))
    print(text_classifier.score(x_test, y_test))

    ## svm分离器
    from sklearn.svm import SVC
    svm = SVC(kernel='linear')
    svm.fit(vec.transform(x_train), y_train)
    svm.score(vec.transform(x_test), y_test)
Esempio n. 3
0
def textclassifier_run(data, ngram, minimum_lyric_appearance):
    # Use the formatted file to a dataframe consisting of Genre | Lyrics | Genre_Id
    textClassifier = TextClassifier(data, ngram, minimum_lyric_appearance)

    # Divide dataset
    x_train_tfidf, x_test_counts, y_train, y_test = textClassifier.divide_test_and_train(
    )

    #  Create the model-objects
    textClassifier.create_models(x_train_tfidf, y_train)

    # Predict the y-vector of the test-set
    y_predicted = textClassifier.get_predict_values(x_test_counts)

    # Predict the y_vector using combined classifier
    y_predicted["combined_classifier"] = textClassifier.combine_classifiers(
        y_predicted)

    return y_predicted, y_test
Esempio n. 4
0
    def populateDB(self, tableName, tweets, weekNo):
        for tweetDict in tweets:
            tweet = tweetDict.get("tweet")
            sentimentOfTweet = tweetDict.get("sentiment")
            cleanTweet = str(TextClassifier.stopWordRemover(tweet))
            cur.execute(
                """SELECT COUNT(*) FROM """ + tableName + """ WHERE tweet=?""",
                (tweet, ))
            if cur.fetchone()[0] >= 1:
                continue
            else:
                cur.execute(
                    """INSERT INTO """ + tableName +
                    """ (tweet,cleanTweet,sentiment,weekNo) VALUES (?,?,?,?)""",
                    (
                        tweet,
                        cleanTweet,
                        sentimentOfTweet,
                        weekNo,
                    ))

        con.commit()
Esempio n. 5
0
    def getTreeviewData(self):
        #method to get all the data for the tree view
        #getting the data from table and putting it into lists
        #getting filmIDs
        f = open("filmTitle.txt", "r")
        f.seek(0)
        fileList = f.readlines()
        filmTitle = fileList[0]
        film = str(filmTitle.strip())
        tweetIDList = []
        cur.execute("""SELECT tweetID FROM """ + film)
        for i in cur.fetchall():
            tweetIDList.append(i[0])
        cur.execute("""SELECT tweet FROM """ + film)
        tweetList = []
        for i in cur.fetchall():
            tweetList.append(TextClassifier.cleanString(i[0]))
        cur.execute("""SELECT sentiment FROM """ + film)
        sentimentList = []
        for i in cur.fetchall():
            sentimentList.append(i[0])
        x = 0
        for i in tweetIDList:
            self.tweetID = i
            self.tweet = tweetList[x]
            self.sentiment = sentimentList[x]
            x = x + 1
            self.tweetsTreeview.insert(
                "",
                i,
                text=i,
                values=(self.tweetID, self.tweet,
                        self.sentiment))  # inserting attribiutes

        cur.execute("""SELECT COUNT(*) FROM """ + str(film))
        self.volumeOfTweetsLabel.config(text="Total Number Of Tweets: " +
                                        str(cur.fetchone()[0]))
    def testTextClassifier(self):
        assert TextClassifier.equals('hello', 'hello')
        assert TextClassifier.equals('hello', 'Hello')
        assert not TextClassifier.equals('hello', 'goodbye')

        assert TextClassifier.case_sensitive_equals('hello', 'hello')
        assert not TextClassifier.case_sensitive_equals('hello', 'Hello')
        assert not TextClassifier.case_sensitive_equals('hello', 'goodbye')

        assert TextClassifier.starts_with('hello', 'he')
        assert TextClassifier.starts_with('hello', 'HE')
        assert not TextClassifier.starts_with('he', 'hello')

        assert TextClassifier.contains('hello', 'he')
        assert TextClassifier.contains('hello', 'HE')
        assert TextClassifier.contains('hello', 'll')
        assert not TextClassifier.contains('hello', 'ol')

        assert TextClassifier.fuzzy_equals('hell', 'hello')
        assert TextClassifier.fuzzy_equals('hello', 'hell')
        assert TextClassifier.fuzzy_equals('hello', 'hellp')
        assert TextClassifier.fuzzy_equals('hellp', 'hello')
        assert not TextClassifier.fuzzy_equals('hello', 'hello')
        assert not TextClassifier.fuzzy_equals('hellp', 'HEllp')
        assert not TextClassifier.fuzzy_equals('help', 'pleh')
# To install Python :  C:\Users\usager\AppData\Local\Programs\Python\Python36\Scripts> pip3 install --upgrade tensorflow
# To install opencv :  C:\Users\usager\AppData\Local\Programs\Python\Python36\Scripts> pip3 install opencv-python
#
#########################################################

import tensorflow as tf
import numpy as np
import sys
import cv2
import time

import Helper as Hlp
import NotesDeCours as NC
import FashionClassifier as FC
import TextClassifier as TC
#import TensorBoard as TB

#########################################################
#
# Main body
#
#########################################################
#NC.EspaceReserve()
#NC.Variable()
#NC.Matrice()
#NC.MultiNode();
#NC.BaseGraphe(4,3)

#FC.StartTutorial()
TC.StartTutorial()
Esempio n. 8
0
#Done for now

from TextClassifier import *

test_path = './data/train/'
path = './data/test/'
model = TextClassifier()

data = model.organize_text(path + 'pos/', path + 'neg/')
tests = model.organize_text(test_path + 'pos/', test_path + 'neg/')
model.init_tokenizer(data['text'])
X_train = np.array(model.proccess_text(data['text']))
X_test = np.array(model.proccess_text(tests['text']))
y_train = data['label']
y_test = tests['label']
print(X_train.shape)
model.train(X_train, y_train, X_test, y_test)
scores = model.eval(X_test, y_test)
print("Accuracy: %.2f%%" % (scores[1] * 100))

import gc
gc.collect()
Esempio n. 9
0
from flask import Flask, jsonify, request, json, session, redirect, url_for, Response
from flask_cors import CORS
import TextClassifier as textclassifier

app = Flask(__name__)
app.secret_key = "1234"
CORS(app, supports_credentials=True)
predictions = textclassifier.TextClassifier()


@app.route("/predict", methods=['POST'])
def predict():
    details = request.get_json()
    if 'url' in details:
        url = details['url']
        try:
            flair = predictions.logreg_predict_class(url)
            return jsonify(flair)
        except Exception as e:
            print(e)
            return str(e), 400
    else:
        return "Oops! Incorrect format", 400


@app.route("/automated_testing", methods=['POST'])
def automated_testing():
    if 'upload_file' in request.files:
        f = request.files['upload_file']
        model = "log"
        if 'model' in request.form:
Esempio n. 10
0
import TextClassifier
# import GoogleSearch
import sys

########################################################################################################################

print("\nClassifying your input...")

# TextRazor API key (limited to 500 requests per day)
text_razor_api_key = "a1d06d38ab618db113866db22344e6eccc49af1d4603088f11a6cd43"

# Instantiate a text classifier
classifier = TextClassifier.TextClassifier(text_razor_api_key)

# Specify the input file along with its "json" format
input_file = sys.argv[1]

# Classify the text in the input file, and save the result in an output file
output_file = sys.argv[2]
classification_res = classifier.analyze(input_file, output_file)

print("Classification is complete (see \"" + output_file + "\").")
Esempio n. 11
0
 def getTweetSentiment(self, tweet):
     return (TextClassifier.getFinalSentiment(tweet))