Esempio n. 1
0
def classify_sentence(clf, user_input):
    import features
    import pandas as pd
    keys = ["id",
            "wordCount",
            "stemmedCount",
            "stemmedEndNN",
            "CD",
            "NN",
            "NNP",
            "NNPS",
            "NNS",
            "PRP",
            "VBG",
            "VBZ",
            "startTuple0",
            "endTuple0",
            "endTuple1",
            "endTuple2",
            "verbBeforeNoun",
            "qMark",
            "qVerbCombo",
            "qTripleScore",
            "sTripleScore",
            "class"]
    myFeatures = features.features_dict('1', user_input, 'X')
    values = []
    for key in keys:
        values.append(myFeatures[key])
    s = pd.Series(values)
    width = len(s)
    myFeatures = s[1:width-1]  # All but the last item (this is the class for supervised learning mode)   # noqa: E501
    predict = clf.predict([myFeatures])
    return predict[0].strip()
def classify_sentence(clf,user_input):
    keys = ["id",
    "wordCount",
    "stemmedCount",
    "stemmedEndNN",
    "CD",
    "NN",
    "NNP",
    "NNPS",
    "NNS",
    "PRP",
    "VBG",
    "VBZ",
    "startTuple0",
    "endTuple0",
    "endTuple1",
    "endTuple2",
    "verbBeforeNoun",
    "qMark",
    "qVerbCombo",
    "qTripleScore",
    "sTripleScore",
    "class"]
    myFeatures = features.features_dict('1',user_input, 'X')
    values=[]
    for key in keys:
        values.append(myFeatures[key])
    s = pd.Series(values)
    width = len(s)
    myFeatures = s[1:width-1]
    #print(myFeatures)
    #clf.fit(train[features], train['class'])
    predict = clf.predict([myFeatures])
    predictions = clf.predict_proba([myFeatures])

    #print("Predictions")
    #print(predictions)
    if (predict[0].strip()) =="C":
        val1 = predictions[0][0]
        if val1 >= 0.75:
            print("CHAT!")
        else:
            print("You tell me?")
    elif (predict[0].strip()) == "Q":
        val2 = predictions[0][2]
        if val2 >= 0.75:
            print("QUES!")
        else:
            print("You tell me?")
    else:
        val3 = predictions[0][3]
        if val3 >= 0.75:
            print("STAT!")
        else:
            print("You tell me?")
Esempio n. 3
0
def sent_class(sentence):
    id = 1  # features needs an ID passing in at moment - maybe redundant?

    f = features.features_dict(str(id), sentence)
    fseries = features.features_series(f)
    width = len(fseries)
    fseries = fseries[
        1:width -
        1]  # All but the first and last item (strip ID and null class off)

    # Get a classification prediction from the Model, based on supplied features
    sentence_class = loaded_model.predict([fseries])[0].strip()

    return sentence_class
Esempio n. 4
0
def sentenceForestClass(sentence):
    with open(MODEL_LOC, 'rb') as f:
        rf = pickle.load(f, encoding='latin1')

    id = hashtext(
        sentence
    )  #features needs an ID passing in at moment - maybe redundant?
    fseries = features.features_series(features.features_dict(id, sentence))
    width = len(fseries)
    fseries = fseries[
        1:width -
        1]  #All but the first and last item (strip ID and null class off)

    #Get a classification prediction from the Model, based on supplied features
    sentenceClass = rf.predict([fseries])[0].strip()

    return sentenceClass
Esempio n. 5
0
def sentence_rf_class(sentence):
    """
    Pass in a sentence, with unique ID and pass back a classification code
    Use a pre-built Random Forest model to determine classification based on
    features extracted from the sentence.  
    """
    # Load a pre-built Random Forest Model
    with open(RF_MODEL_LOCATION, 'rb') as f:
        rf = pickle.load(f)
    
    id = hashtext(sentence)  #features needs an ID passing in at moment - maybe redundant?
    fseries = features.features_series(features.features_dict(id,sentence))
    width = len(fseries)
    fseries = fseries[1:width-1]  #All but the first and last item (strip ID and null class off)
    
    #Get a classification prediction from the Model, based on supplied features
    sentence_class = rf.predict([fseries])[0].strip()
    
    return sentence_class
Esempio n. 6
0
reader = csv.reader(fin)

loopCount = 0
next(reader)  #Assume we have a header
for line in reader:
    sentence = line[0]
    c = line[1]  #class-label
    id = hashlib.md5(
        str(sentence).encode('utf-8')).hexdigest()[:16]  # generate a unique ID

    output = ""
    header = ""

    #get header and string output
    #output, header = features.get_string(id,sentence,c)
    f = features.features_dict(id, sentence, c)

    for key in keys:
        value = f[key]
        header = header + ", " + key
        output = output + ", " + str(value)

    if loopCount == 0:  # only extract and print header for first dict item
        header = header[1:]  #strip the first ","" off
        print(header)
        fout.writelines(header + '\n')

    output = output[1:]  #strip the first ","" off

    loopCount = loopCount + 1
    print(output)
#sentence = "Can a dog see in colour?"
#
#sentence = features.strip_sentence(sentence)
#print(sentence)
#pos = features.get_pos(sentence)
#triples = features.get_triples(pos)
#print(triples)

sentences = [
    "Can a dog see in colour?", "Hey, How's it going?",
    "Oracle 12.2 will be released for on-premises users on 15 March 2017",
    "When will Oracle 12 be released"
]
id = 1
for s in sentences:
    features_dict = features.features_dict(str(id), s)
    features_string, header = features.get_string(str(id), s)
    #    print(features_dict)
    #    print(features_string)
    id += 1

from sklearn.ensemble import RandomForestClassifier

FNAME = 'C://Users/Abhay/Downloads/NLPBot-master/NLPBot-master/analysis/featuresDump.csv'

df = pd.read_csv(filepath_or_buffer=FNAME, )
#print(str(len(df)), "rows loaded")

df.columns = df.columns[:].str.strip()
df['class'] = df['class'].map(lambda x: x.strip())
Esempio n. 8
0
    "id", "wordCount", "stemmedCount", "stemmedEndNN", "CD", "NN", "NNP",
    "NNPS", "NNS", "PRP", "VBG", "VBZ", "startTuple0", "endTuple0",
    "endTuple1", "endTuple2", "verbBeforeNoun", "qMark", "qVerbCombo",
    "qTripleScore", "sTripleScore", "class"
]

rows = []

next(reader)  #Assume we have a header
for line in reader:
    sentence = line[0]
    c = line[1]  #class-label
    id = hashlib.md5(
        str(sentence).encode('utf-8')).hexdigest()[:16]  # generate a unique ID

    f = features.features_dict(id, sentence, c)
    row = []

    for key in keys:
        value = f[key]
        row.append(value)
    rows.append(row)

faq = pd.DataFrame(rows, columns=keys)
fin.close()
featureNames = faq.columns[1:width -
                           1]  #remove the first ID col and last col=classifier
faqPreds = clf.predict(faq[featureNames])

predout = pd.DataFrame({
    'id': faq['id'],
import pandas as pd
import sys
import features

CODE_LOC = 'C:\\Users\\Vishakha Lall\\Projects\\Python\\TestNLTK'
DATA_LOC = 'C:\\Users\\Vishakha Lall\\Projects\\Python\\TestNLTK\\sentences.csv'

sentences = pd.read_csv(filepath_or_buffer=DATA_LOC)
print(sentences.head(10))

sentence = "Can a dog see in colour?"

sentence = features.strip_sentence(sentence)
print(sentence)
pos = features.get_pos(sentence)
triples = features.get_triples(pos)

print(triples)
sentences = [
    "Can a dog see in colour?", "Hey, How's it going?",
    "Oracle 12.2 will be released for on-premises users on 15 March 2017",
    "When will Oracle 12 be released"
]
id = 1
for s in sentences:
    features_dict = features.features_dict(str(id), s)
    features_string, header = features.get_string(str(id), s)
    print(features_dict)
    #print(features_string)
    id += 1
print(sentence)
pos = features.get_pos(sentence)
triples = features.get_triples(pos)

print(triples)

#Dictionary of features
sentence = [
    "Sorry, I don't know about the weather.",
    "That is a tricky question to answer.", "What does OCM stand for",
    "MAX is a Mobile Application Accelerator", "Can a dog see in colour?",
    "how are you"
]
id = 1
for s in sentence:
    features_dict = features.features_dict(str(id), s)
    features_string, header = features.get_string(str(1), s)
    print(features_dict)

    id += 1

#Building a machine learning model
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

df = pd.read_csv("featuresDump.csv")
print(str(len(df)), "rows loaded")

#Strip any leading space from col names
df.columns = df.columns[:].str.strip()