Python NaiveBayesClassifier.prob_classifyの例、textblob.classifiers.NaiveBayesClassifier.prob_classify Pythonの例

コード例 #1

0

ファイルを表示

ファイル: classifier.py プロジェクト: Jamesm4/Sentiment-Classifier

class Classifier:

  def __init__(self):
    fp = open("./data/train.csv")
    self.cl = NaiveBayesClassifier(fp, format="csv")
    fp.close()

  def test(self):
    return self.cl.classify("This is a test sentence")

  def classify(self, text):
    return self.cl.classify(text)

  def n_classify(self, text):
    dist = self.cl.prob_classify(text)

    probs = {"sentiments": []}
    for s in dist.samples():
      if dist.prob(s) >= .10:
        probs["sentiments"].append({s: dist.prob(s)})

    return json.dumps(probs)

  def accuracy(self):

    fp = open('./data/train.csv')
    train_accuracy = self.cl.accuracy(fp, format="csv")
    fp.close()
    fp = open('./data/test.csv')
    test_accuracy = self.cl.accuracy(fp, format="csv")
    fp.close()
    return json.dumps({"train_accuracy": train_accuracy, "test_accuracy": test_accuracy})

  def labels(self):
    return json.dumps({"labels": self.cl.labels()})

コード例 #2

0

ファイルを表示

class Model(object):
    """docstring for Model"""
    def __init__(self, name='Guess', config={}):
        self.name = name
        self.config = config
        self.clf = NaiveBayesClassifier([])

    def train(self, training_data):

        safe_training = []

        for example in training_data:
            safe_training.append((example.get('text'), example.get('label')))

        self.clf.update(safe_training)

    def evaluate(self, text):
        label = self.clf.classify(text)
        prob_dist = self.clf.prob_classify(text)
        label_prob = prob_dist.prob(label)
        return label, label_prob

    def get_classes(self):
        return self.clf.labels()

    def save(self):
        pass

    def load(self):
        pass

コード例 #3

0

ファイルを表示

def PLN_CSV(text):
    """
    function that uses supervised machine learning to classify whether the message is positive or negative,
    at the end it returns a list

    (PLN) Natural Language Processing...

    """
    feelings_list = []
    try:
        feelings = pd.read_csv('analysistext/pln/feelings.csv',
                               sep=';',
                               header=None)
        clf = NaiveBayesClassifier(feelings.values, format="csv")
    except:
        print("Não conseguiu abrir o arquivo ou ele não existe")
        # A  acurácia vai ser None
        clf = None
    # separação da probabilidade
    dist_prob = clf.prob_classify(text)
    dist_prob_max = dist_prob.max()
    dist_prob_positivo = dist_prob.prob('positivo')
    dist_prob_negativo = dist_prob.prob('negativo')

    feelings_list.append({
        "dist_prob_max": dist_prob_max,
        "dist_prob_positivo": dist_prob_positivo,
        "dist_prob_negativo": dist_prob_negativo
    })
    return feelings_list

コード例 #4

0

ファイルを表示

ファイル: labels.py プロジェクト: replive/nightfury

class HelpLabeler(object):
    HELP_DATA = 'help_data.json'
    def __init__(self):
        with open(self.HELP_DATA, 'r') as fp:
            self.c = NaiveBayesClassifier(fp, format="json")
        with open(self.HELP_DATA, 'r') as fp:
            self.help_json = {}
            for i in json.load(fp):
                self.help_json[i['text']] = i['label']

    def get_label(self, text, lower_placeholders=[]):
        text = text.lower()
        self.save_help(text)
        prob_dist = self.c.prob_classify(text)
        label = prob_dist.max()
        prob = round(prob_dist.prob(label), 2)
        if prob > 0.7:
            return(label)
        else:
            return(None)

    def save_help(self, lower_text):
        try:
            self.help_json[lower_text]
        except KeyError:
            self.help_json[lower_text] = 'unknown'

        with open(self.HELP_DATA, 'w') as fp:
            json.dump([{'text': k, 'label': v} for k, v in self.help_json.items()], fp, indent=4)

コード例 #5

0

ファイルを表示

ファイル: nb_classifier.py プロジェクト: Rositsazz/Movri

class NBClassifier:
    def __init__(self, train_data_file):
        self._train_data_file = train_data_file
        f = open(self._train_data_file, 'r+')
        self._cl = NaiveBayesClassifier(f, format="json")
        f.close()

    def update_train_set(self, sentence):
        new_data = [(sentence.str_sentence, sentence.label)]
        self._cl.update(new_data)
        self._save_data_to_file()

    def _save_data_to_file(self):
        TEXT = "{\"text\":\""
        LABEL = "\", \"label\":\""
        dict_str = ",\n".join([
            str(TEXT + str(el[0]) + LABEL + str(el[1]) + "\"}")
            for el in self._cl.train_set
        ])
        f = open(self._train_data_file, 'r+')
        f.write("[" + dict_str + "]")
        f.close()

    def prob_classify(self, sentence):
        # import ipdb; ipdb.set_trace()
        return self._cl.prob_classify(sentence).max()

コード例 #6

0

ファイルを表示

def enginemongo(text):
    from textblob.classifiers import NaiveBayesClassifier
    trainingset = db.trainingset.find()
    tsarr = []
    for t in trainingset:
        tsarr.append((t["question"], t["answer"]))

    print(tsarr)
    cl = NaiveBayesClassifier(tsarr)
    prob_dist = cl.prob_classify(text)
    print("TEST:", text, " ", prob_dist, " ", prob_dist.max())
    maxprob = 0
    maxanswer = ""
    for a in prob_dist.samples():
        pd = round(prob_dist.prob(a), 2)
        if (pd > maxprob):
            maxprob = pd
            maxanswer = a
        print(a, ":", round(prob_dist.prob(a), 2))
    print(cl.show_informative_features())
    print("RISPOSTA:", maxanswer, " --- ", maxprob)
    aa = cl.extract_features(text)
    print(aa)
    print("---------------------------------------")
    return {"answer_key": maxanswer, "answer_prob": maxprob}

コード例 #7

0

ファイルを表示

ファイル: labels.py プロジェクト: sampr0/nightfury

class HelpLabeler(object):
    HELP_DATA = 'help_data.json'

    def __init__(self):
        with open(self.HELP_DATA, 'r') as fp:
            self.c = NaiveBayesClassifier(fp, format="json")
        with open(self.HELP_DATA, 'r') as fp:
            self.help_json = {}
            for i in json.load(fp):
                self.help_json[i['text']] = i['label']

    def get_label(self, text, lower_placeholders=[]):
        text = text.lower()
        self.save_help(text)
        prob_dist = self.c.prob_classify(text)
        label = prob_dist.max()
        prob = round(prob_dist.prob(label), 2)
        if prob > 0.7:
            return (label)
        else:
            return (None)

    def save_help(self, lower_text):
        try:
            self.help_json[lower_text]
        except KeyError:
            self.help_json[lower_text] = 'unknown'

        with open(self.HELP_DATA, 'w') as fp:
            json.dump([{
                'text': k,
                'label': v
            } for k, v in self.help_json.items()],
                      fp,
                      indent=4)

コード例 #8

0

ファイルを表示

ファイル: server.py プロジェクト: sanchitghai24/ChatBot

def classifier(something):
    speech = something

    train = []
    test = []

    with open("training.csv") as csvfile:
        reader = csv.reader(csvfile)  # change contents to floats
        for row in reader:  # each row is a list
            train.append(row)

        with open("test.csv") as csvfile:
            reader = csv.reader(csvfile)  # change contents to floats
            for row in reader:  # each row is a list
                test.append(row)

    cl = NaiveBayesClassifier(train)
    cl.classify("This is an amazing library!")
    prob_dist = cl.prob_classify("This one's a doozy.")
    prob_dist.max()
    round(prob_dist.prob("machine"), 2)
    round(prob_dist.prob("no machine"), 2)
    blob = TextBlob(speech, classifier=cl)
    blob.classify()
    for s in blob.sentences:
        print("\n\n\n" + str(s))
        print("\n" + str(s.classify()))
        return (s.classify())

コード例 #9

0

ファイルを表示

def getresult():
    if request.method == "POST":
        # try:

        print("########", request.args)

        body = request.data
        a = json.loads(body.decode('utf-8'))
        print(a)

        with open("pt_1.csv", "r", encoding="utf8") as fp:
            c1 = NaiveBayesClassifier(fp)

        #w = c1.classify(a)
        prob_list = c1.prob_classify(a)
        print(prob_list.max())
        ab = prob_list.max()
        print("Ex ", round(prob_list.prob("Ex"), 3))
        print("In", round(prob_list.prob("In"), 3))

        #if w==''

        #print(w)

        # if ans is not None:
        #   return ans
        # else:
        #   return 'none'
        # except:
        #   return "excetions"
        #return str(a)
    return str(ab)

コード例 #10

0

ファイルを表示

def traintestclassifier(train_dataset, test_dataset, classifystring):
    classifier = NaiveBayesClassifier(train_dataset)
    prob_dist = classifier.prob_classify(classifystring)
    dep_prob = round(prob_dist.prob("dep"), 2)
    happy_prob = round(prob_dist.prob("happy"), 2)
    print(dep_prob, "is the probability of this sentence being depressing")
    print(happy_prob, "is the probability of this sentence being happy")
    print(classifier.accuracy(test_dataset), "Is the accuracy")

コード例 #11

0

ファイルを表示

def classify(text):
    classifier = NaiveBayesClassifier(train, feature_extractor=extract)
    prob_dist = classifier.prob_classify(text)
    label = prob_dist.max()
    if prob_dist.prob(label) > 0.5:
        return label
    else:
        return None

コード例 #12

0

ファイルを表示

ファイル: Annotator.py プロジェクト: arahim1795/4079_FYP

    def server_annotation(self, article: str, model: str) -> List[Tuple[str]]:
        # init: load saved model
        load_success, annotated_data = self._load(model)

        annotated_data = [tuple((datum[0], datum[1])) for datum in annotated_data]

        classifier = NaiveBayesClassifier(annotated_data)

        # 1: paragraphing
        paragraphed_text = self._paragrapher(article)

        # 2: pre-cleansing
        paragraphed_text = [
            self._pre_cleanse_text(paragraph) for paragraph in paragraphed_text
        ]
        paragraphed_text = list(filter(None, paragraphed_text))

        # 3: segmenting
        segmented_text = []
        for paragraph in paragraphed_text:
            segmented_text += self._sentence_segmentor(paragraph)

        # 4: post-cleansing
        segmented_text = [
            self._post_cleanse_text(sentence) for sentence in segmented_text
        ]
        segmented_text = list(filter(None, segmented_text))

        # 5: lemmatising
        segmented_text = [
            self._lemmatise_sentence(sentence) for sentence in segmented_text
        ]

        # 6: annotating
        annotated = []
        for i in range(len(segmented_text)):
            classification = classifier.prob_classify(segmented_text[i])
            if classification.max() == "1":
                annotated.append(
                    tuple(
                        (segmented_text[i], "pos", round(classification.prob("1"), 2))
                    )
                )
            elif classification.max() == "2":
                annotated.append(
                    tuple(
                        (segmented_text[i], "neu", round(classification.prob("2"), 2))
                    )
                )
            else:
                annotated.append(
                    tuple(
                        (segmented_text[i], "neg", round(classification.prob("3"), 2))
                    )
                )

        return annotated

コード例 #13

0

ファイルを表示

ファイル: run_test.py プロジェクト: cameronfabbri/smartTalk

def run_test(train, test, name):
   print "Training..."
   cll = NaiveBayesClassifier(train)
   print "Done training\n"
   accuracy = cll.accuracy(test)
   print "Accuracy: " + str(accuracy)

   # get matching lists of predicted and true labels
   pred_labels = list()
   true_labels = list()
   for obj in test:
      prob_label = cll.prob_classify(obj[0]).max()
      true_label = obj[1]
      true_labels.append(true_label)
      pred_labels.append(prob_label)

   # transform our labels to numbers
   labels = cll.labels()
   i = 0
   label_num = dict()
   for label in labels:
      label_num[label] = i
      i = i + 1

   # match our predicted and true labels with the number representations
   true_label_nums = list()
   pred_label_nums = list()
   for true_l, pred_l in zip(true_labels, pred_labels):
      true_label_nums.append(label_num[true_l])
      pred_label_nums.append(label_num[pred_l])

   cm = confusion_matrix(true_label_nums, pred_label_nums)
   print cm
   print "\n"

   with open("test_results.txt", "a") as tr:
      tr.write(str(name) + "\n")
      tr.write(str(accuracy) + "\n")
      tr.write(str(cm))
      tr.write("\n\n")

   import matplotlib.pyplot as plt
   fig = plt.figure()
   ax = fig.add_subplot(111)
   cax = ax.matshow(cm)
   plt.title("Confusion Matrix For "+name)
   fig.colorbar(cax)
   ax.set_xticklabels(['']+labels)
   ax.set_yticklabels(['']+labels)
   plt.xlabel("Predicted")
   plt.ylabel("True")
   plt.savefig('plots/'+name+'.pdf', bbox_inches='tight')

コード例 #14

0

ファイルを表示

ファイル: testing_naive.py プロジェクト: Mangesh242/InstituteAssistant

def classify_v1(text):
    #<str> is passed to func
    text = bc.basic_cleanning(text)  #returned value is in <list> format
    #print(text)
    if text != []:
        with open('train_dataset.csv') as csv_file:
            cl = NaiveBayesClassifier(csv_file, format="csv")
            #cl = NaiveBayesClassifier() #pass dataset as list
            result = cl.classify(text)
            #print (type(result))  # <str> format
            prob_dist = cl.prob_classify(text)
            pos_result = round(prob_dist.prob("pos"), 2)
            neg_result = round(prob_dist.prob("neg"), 2)

            return result

コード例 #15

0

ファイルを表示

ファイル: HelloPeterSentiments.py プロジェクト: dlunga/serViz

def main():
    data =[]
    train =[]
    test =[] 
    with open('hellopeter_labelled.csv', 'rb') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',')
        spamreader = list(spamreader)
        for row in spamreader:
            if (row[13] =='strongly positive'): 
                data.append((row[8],'pos'))
            if (row[13] =='positive' ): 
                data.append((row[8],'pos'))
            if ( row[13] =='neutral' ): 
                data.append((row[8],'neu'))
            if ( row[13] =='negative'): 
                data.append((row[8],'neg'))
            if (row[13] =='strongly negative' ): 
                data.append((row[8],'neg'))
                
                
    train = data[:1000]
    test = data[1001:]
    
    for innf in test:
        print innf
            
    cl = NaiveBayesClassifier(train)
   
    for tnew in test: 
            print '%%%%%%%'
            print ' '
            print  tnew[0]
            print  tnew[1]
            print '%%%%%%%'
            print '#######'
            cl.classify(tnew[0])
            prob_class =  cl.prob_classify(tnew[0])
            print '----max prob---'
            print prob_class.max()
            print '-----+ve-----'
            print prob_class.prob("pos")
            print '-----neutral-----'
            print prob_class.prob("neu")
            print '------ve-----'
            print prob_class.prob("neg")
            
    cl.accuracy(test)

コード例 #16

0

ファイルを表示

ファイル: HelloPeterSentiments.py プロジェクト: dlunga/serViz

def main():
    data = []
    train = []
    test = []
    with open('hellopeter_labelled.csv', 'rb') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',')
        spamreader = list(spamreader)
        for row in spamreader:
            if (row[13] == 'strongly positive'):
                data.append((row[8], 'pos'))
            if (row[13] == 'positive'):
                data.append((row[8], 'pos'))
            if (row[13] == 'neutral'):
                data.append((row[8], 'neu'))
            if (row[13] == 'negative'):
                data.append((row[8], 'neg'))
            if (row[13] == 'strongly negative'):
                data.append((row[8], 'neg'))

    train = data[:1000]
    test = data[1001:]

    for innf in test:
        print innf

    cl = NaiveBayesClassifier(train)

    for tnew in test:
        print '%%%%%%%'
        print ' '
        print tnew[0]
        print tnew[1]
        print '%%%%%%%'
        print '#######'
        cl.classify(tnew[0])
        prob_class = cl.prob_classify(tnew[0])
        print '----max prob---'
        print prob_class.max()
        print '-----+ve-----'
        print prob_class.prob("pos")
        print '-----neutral-----'
        print prob_class.prob("neu")
        print '------ve-----'
        print prob_class.prob("neg")

    cl.accuracy(test)

コード例 #17

0

ファイルを表示

ファイル: Text_cleanign_singleFile.py プロジェクト: rabidesk/Python_Short_Project

def ClassifyDirectory(Dictionary_Excel_File, News_Text_Directory):
    # Merge all text files into one string called FILE
    FILE = ""
    all_files = os.listdir(News_Text_Directory)
    for i in range(0, len(all_files)):
        if (all_files[i].endswith(".txt")):
            # print(all_files[i])
            with open(News_Text_Directory + all_files[i], 'rt') as news:
                FILE = FILE + news.read()
                news.close()

    # clean text data from punctuation
    to_remove = "0123456789;.:?,#+%*/\t[]><'" + '"'
    table = {ord(char): ' ' for char in to_remove}
    FILE = FILE.translate(table)
    FILE = re.sub(' +', ' ', FILE)
    FILE = re.sub('\n ', '\n', FILE)
    FILE = re.sub('\n+', '\n', FILE)

    # extract the 'EXCEL' file data into a training data
    EXCEL = pd.read_excel(Dictionary_Excel_File, sheet_name=0, usecols=[0, 1])
    train = []
    n = len(EXCEL['FWD'])
    for i in range(n):
        if EXCEL['FWD'][i] == "" or math.isnan(EXCEL['Result'][i]):
            break
        if EXCEL['Result'][i] > 0.0:
            data = (EXCEL['FWD'][i], "Positive")
            train.append(data)
        else:
            data = (EXCEL['FWD'][i], "Negative")
            train.append(data)

    # train the classifier with training data
    CL = NaiveBayesClassifier(train)

    # Classify the merged text file
    Result_Probability = CL.prob_classify(FILE)
    Result = Result_Probability.max()
    print("Positive Probability: " + str(
        round(Result_Probability.prob("Positive") * 100, 2)) + "%, Negative Probability: " + str(
        round(Result_Probability.prob("Negative") * 100, 2)) + "%")
    print("Final Result: ", Result)
    return

コード例 #18

0

ファイルを表示

ファイル: labels.py プロジェクト: tunnelshade/nightfury

class InputLabeler(object):
    LABELS_DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                               'labels_data.json')

    def __init__(self):
        with open(self.LABELS_DATA, 'r') as fp:
            self.c = NaiveBayesClassifier(fp, format="json")
        with open(self.LABELS_DATA, 'r') as fp:
            self.labels_json = {}
            for i in json.load(fp):
                self.labels_json[i['text']] = i['label']

    def get_num_labels(self):
        return (len(self.get_labels()))

    def get_labels(self):
        labels = self.labels_json.values()
        labels.sort()
        return (set(labels))

    def get_label(self, text):
        text = text.lower()
        # self.save_placeholder(text)
        prob_dist = self.c.prob_classify(text)
        label = prob_dist.max()
        prob = round(prob_dist.prob(label), 2)
        if prob > 0.7:
            return (label)
        else:
            return (None)

    def save_placeholder(self, text):
        try:
            self.labels_json[text]
        except KeyError:
            self.labels_json[text] = 'unknown'

        with open(self.LABELS_DATA, 'w') as fp:
            json.dump([{
                'text': k,
                'label': v
            } for k, v in self.labels_json.items()],
                      fp,
                      indent=4)

コード例 #19

0

ファイルを表示

def engine(text):
    from textblob.classifiers import NaiveBayesClassifier
    from textblob.classifiers import MaxEntClassifier
    from textblob.classifiers import NLTKClassifier
    url_train = "https://"
    file_train = "train.csv"
    if not (os.path.isfile(file_train)):
        with open(file_train, 'wb') as handle:
            print("Train loaded from Request:", url_train)
            response = requests.get(url_train, stream=True)
            if not response.ok:
                # Something went wrong
                pass
            for block in response.iter_content(1024):
                handle.write(block)
            handle.close()
            print("Request DONE")
    else:
        print("Train loaded from cache:", file_train)

    with open(file_train, 'r', encoding="utf8") as fp:
        #cl = MaxEntClassifier(fp)

        cl = NaiveBayesClassifier(fp)

    # print(cl.classify("This is an amazing library!"))
    # print(cl.accuracy(test))
    # cl.update(test)
    # print(cl.accuracy(test))

    prob_dist = cl.prob_classify(text)
    print("TEST:", text, " ", prob_dist, " ", prob_dist.max())
    for a in prob_dist.samples():
        print(a, ":", round(prob_dist.prob(a), 2))
    print(cl.show_informative_features())
    aa = cl.extract_features(text)
    print(aa)
    print("---------------------------------------")

    return cl.classify(text)

コード例 #20

0

ファイルを表示

class TBSentiment(Model):
    """Wrapper around the TextBlob sentiment analyzer. Can train and test a
    using the standardized data format.
    
    Args:
        Model (): Initialize the model.
    """

    def __init__(self):
        self.cl = NaiveBayesClassifier([])

    def classify(self, comment):
        prob_dist = self.cl.prob_classify(comment)
        pol_pred = prob_dist.max()
        confidence = prob_dist.prob(pol_pred)
        return pol_pred, confidence

    def train(self, data, eval=None, d_print=False):
        """Train the TextBlob object on custom data.
        
        Args:
            data (:obj:`list` of :obj:`tuple`): Take a list of tuples with
                format (comment, polarity in ["pos", "neg"]).
        """

        self.cl.update(data)

    def test(self, data):
        """Test the TextBlob object on custom data.
        
        Args:
            data (:obj:`list` of :obj:`tuple`): Take a list of tuples with
                format (comment, polarity in ["pos", "neg"]).

        Returns:
            :obj:`tuple`: Return the successes and failures in a list (:obj:`list`, :obj:`list`)
        """
        return

コード例 #21

0

ファイルを表示

def naive_bayes_classify(data):
    class_to_predict = 'type'  # product importance
    all_data = [
        tuple(x)
        for x in data[['text', class_to_predict]].to_records(index=False)
    ]

    text_counts = {}
    for item in all_data:
        for word in set(item[0].split()):
            if word in text_counts:
                text_counts[word] += 1
            else:
                text_counts[word] = 1

    for i in range(len(all_data)):
        new_text = ''
        for word in all_data[i][0].split():
            if text_counts[word] >= 5:
                new_text += ' ' + word
        all_data[i] = (new_text, all_data[i][1])

    print('Finished preprocessing!')

    test_corpus = all_data[3000:3600]
    training_corpus = all_data[:3000]

    model = NBC(training_corpus, verbose=True)
    print('Done training!')
    print('Accuracy: ' + str(model.accuracy(test_corpus)))

    y_pred = []
    y_true = []
    for test_item in test_corpus:
        y_pred.append(model.prob_classify(test_item[0]).max())
        y_true.append(test_item[1])

    print('F1 score: ' + str(f1_score(y_true, y_pred, average='weighted')))

コード例 #22

0

ファイルを表示

ファイル: labels.py プロジェクト: replive/nightfury

class InputLabeler(object):
    LABELS_DATA = 'labels_data.json'
    def __init__(self):
        with open(self.LABELS_DATA, 'r') as fp:
            self.c = NaiveBayesClassifier(fp, format="json")
        with open(self.LABELS_DATA, 'r') as fp:
            self.labels_json = {}
            for i in json.load(fp):
                self.labels_json[i['text']] = i['label']

    def get_num_labels(self):
        return(len(self.get_labels()))

    def get_labels(self):
        labels = self.labels_json.values()
        labels.sort()
        return(set(labels))

    def get_label(self, text):
        text = text.lower()
        # self.save_placeholder(text)
        prob_dist = self.c.prob_classify(text)
        label = prob_dist.max()
        prob = round(prob_dist.prob(label), 2)
        if prob > 0.7:
            return(label)
        else:
            return(None)

    def save_placeholder(self, text):
        try:
            self.labels_json[text]
        except KeyError:
            self.labels_json[text] = 'unknown'

        with open(self.LABELS_DATA, 'w') as fp:
            json.dump([{'text': k, 'label': v} for k,v in self.labels_json.items()], fp, indent=4)

コード例 #23

0

ファイルを表示

import data_sets

#train = data_sets.en_train
#test = data_sets.en_test
train = data_sets.subte_train
test = data_sets.subte_test

#tx_cl = "I feel amazing!"
#tx_prob = "This one's a doozy."
tx_cl = "El subte esta demorado"
tx_prob = "El subte funciona bien"

cl = NaiveBayesClassifier(train)
print cl.classify(tx_cl)
print cl.classify("El subte funciona bien")
prob_dist = cl.prob_classify(tx_prob)
print prob_dist.max()
print round(prob_dist.prob("pos"), 2)
print round(prob_dist.prob("neg"), 2)

print cl.accuracy(data_sets.en_test)
print cl.show_informative_features(5)

#Using TextBlob
blob = TextBlob("No funca por que hay obras para mejorar la cosa",
                classifier=cl)
print blob.sentiment
print blob.classify()

blob = TextBlob("El subte funciona normal", classifier=cl)
print blob.sentiment

コード例 #24

0

ファイルを表示

ファイル: queryClassifier.py プロジェクト: vrunda13/HPE

def mainQuery(query):
   generic_questions = ("Let's go","You never wanted to go out with 'me, did you?","Who knows?","What annoys you?",
                     "you've heard of him?","What were you doing?","Thank you anyway","No problem",
                     'She okay?',"Yes, I have a question.","What is your question?","What are your hobbies?",
                     "You know how sometimes you just become this 'persona'?  And you don't know how to quit?",
                     "what's up?",'sup people? I see the weather\'s getting better over there, Ben.',
                     "how are you doing?","Hi","Hello","Hey","How's you?","Have you heard the news?",
                     'i had the same problem your having so thats my i made my own.',"What is your favorite book?",
                     "good night","good morning","good afternoon","good evening","So what's your favorite color?",
                     'What good stuff?',"what's new?","How's life?","That is good to hear",
                     "I am doing well, how about you?","I am doing well, how about you?","I'm also good.",
                     "What are you then?",'What are you working on?',"Who are you?","What is it like?",
                     "How do you work?","Who is your appointment with?","What languages do you like to use?",
        )


   technical_questions=("Clearpass is extended to IT systems using which API?",
                     "Which browsers are supported for ClearPass?",
                     "Which  virtualization platforms  is supported by Clearpass?",
                     "name the authentication/authorization sources used by clearpass.",
                     "does Clearpass use ipv6 or ipv4 addressing?",
                     "how many sessioons can  be provided by ClearPass C2000 Hardware Appliance?",
                     "how does Admin/Operator access security?",
                     'Virtual Appliances are supported on which platforms?',
                     "Name the ClearPass Hardware Appliance Ports.",
                     "What is the expansion of OCSP?",
                     "what are the active Profiling Methods?",
                     "What are cookies?",
                     "what does dynamic authorisation mean?",
                     "Which standard the clearpass Guest is built on?",
                     "which protocol is used by the  NAS  to authenticate the user ?",
                     "Which network connectivity is provisioned for Clearpass Guest?",
                     "What is NAS?",
                     "What are the possible states of a session?",
                     "what does dynamic authorisation mean?",
                     'Which standard the clearpass Guest is built on?',
                     "Which network connectivity is provisioned for Clearpass Guest?",
                     "What is the use of airgroup?",
                     "What are cookies used for?",
                     'Is Windows Server 2008 "Server Core" appropriate for a SQL Server instance?',
                     "Is there any list of the network devices supported by clearpass for 802.1x auth",
                     "How can I Block my users from installing new virtual machines",
                     "Is there any list of medical devices compatible with clearpass ?",
                     "what are Good branching and merging tutorials for TortoiseSVN?",
                     "how to Add scripting functionality to .NET applications",
                     "why is VMWare Server Under Linux Secondary NIC connection",
                     "Setting up Continuous Integration with SVN",
                     "Does CruiseControl.NET run on IIS 7.0?",
                     "what to do  when there are users in both Edmonton and Toronto that access the same “Corpnet” Wireless LAN.",
                     "what are the  three hardware appliance platforms that aruba provides?",
                     "how to Powering Off the ClearPass Hardware Appliance?",
                     "what are the Supported Hypervisors for clearpass?"
                    )


   generic_questions = [(x, 'generic') for x in generic_questions]
   technical_questions = [(x, 'tech') for x in technical_questions]

   training_set = []
   training_set.extend(generic_questions)
   training_set.extend(technical_questions)

   Qclassifier = NaiveBayesClassifier(training_set)
   #print(Qclassifier.show_informative_features(), Qclassifier.labels())

   #test_queries=("What are cookies used for?","what are the Integrated and Third-Party Profiling Methods?","Hi. Good morning","what does dynamic authorisation mean?","Howdy?")
   #for t in test_queries:
   prob_dist = Qclassifier.prob_classify(query)
   #print(t, '\n', prob_dist.max(), prob_dist.prob(prob_dist.max()))
   if(prob_dist.max()=="tech"):
       return "tech"
   elif(prob_dist.max()=="generic"):
       return "generic"
   else:
       return None

コード例 #25

0

ファイルを表示

# Testing accuracy with a testset
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]

print(cls.accuracy(test))

print(cls.classify("Their burgers are amazing"))
print(cls.classify("I don't like their pizza."))

from textblob import TextBlob

blob = TextBlob("The beer was amazing. "
                "But the hangover was horrible. My boss was not happy.",
                classifier=cls)
print(blob.classify())


# Fetching label probablities
prob_dist = cls.prob_classify("Their burgers are amazing")

print(prob_dist.max())

print(round(prob_dist.prob("pos"), 2))

print(round(prob_dist.prob("neg"), 2))

コード例 #26

0

ファイルを表示

ファイル: Catherine'sFinalProject.py プロジェクト: 4jozefbobek/SF_DAT_15_Work

 ]

test = [('I am still waiting for a call back', 'neg'),
     ('Im an accountant and I had a question about balancing reports', 'pos'),
     ('declining everything', 'neg'),
     ('I have been waiting on hold for 20 minutes', 'neg'),
     ('This problem is still not resolved', 'neg')
 ]

from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

cl.classify("im in offline mode")

prob_dist = cl.prob_classify("im in offline mode")
prob_dist.max()
round(prob_dist.prob("pos"), 2)
round(prob_dist.prob("neg"), 2)    

cl.classify("we are busy with dinner service and need help")

prob_dist = cl.prob_classify("we are busy with dinner service and need help")
prob_dist.max()
round(prob_dist.prob("pos"), 2)
round(prob_dist.prob("neg"), 2)    


polarity=[]

コード例 #27

0

ファイルを表示

ファイル: textblob_classification_system.py プロジェクト: saimadhu-polamuri/textblob_learn

 		('I feel amazing!', 'pos'),
 		('Gary is a friend of mine.', 'pos'),
 		("I can't believe I'm doing this.", 'neg')]

print test 		

print train 		
cl = NaiveBayesClassifier(train)	# Learning classifier with NaiveBayesClassifier

#	Classifying Text ( Call the classify(text) method to use the classifier.)
test_check = cl.classify("This is an amazing library!")
print test_check

#	You can get the label probability distribution with the prob_classify(text) method.

prob_dist = cl.prob_classify("This one's a doozy.")
print prob_dist.max()
print round(prob_dist.prob("pos"), 2)
print round(prob_dist.prob("neg"), 2)
print prob_dist.prob("pos")
print prob_dist.prob("neg")

blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
print blob.classify()


# Evaluating Classifiers (To compute the accuracy on our test set, use the accuracy(test_data) method.)
print cl.accuracy(test)

# Updating Classifiers with New Data (Use the update(new_data) method to update a classifier with new training data.)

コード例 #28

0

ファイルを表示

         ('thank you', 'pos'), ('thank you', 'pos'),
         ('quick question about how to add a user', 'pos'),
         ('monthly subscription charge question', 'pos')]

test = [('I am still waiting for a call back', 'neg'),
        ('Im an accountant and I had a question about balancing reports',
         'pos'), ('declining everything', 'neg'),
        ('I have been waiting on hold for 20 minutes', 'neg'),
        ('This problem is still not resolved', 'neg')]

from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

cl.classify("im in offline mode")

prob_dist = cl.prob_classify("im in offline mode")
prob_dist.max()
round(prob_dist.prob("pos"), 2)
round(prob_dist.prob("neg"), 2)

cl.classify("we are busy with dinner service and need help")

prob_dist = cl.prob_classify("we are busy with dinner service and need help")
prob_dist.max()
round(prob_dist.prob("pos"), 2)
round(prob_dist.prob("neg"), 2)

polarity = []


def GetPolarity(string):

コード例 #29

0

ファイルを表示

ファイル: classifier.py プロジェクト: a24ibrah/Arabic_Classifier

'''Dataset source: Abdulla N. A., Mahyoub N. A., Shehab M., Al-Ayyoub M.,
        ìArabic Sentiment Analysis: Corpus-based and Lexicon-basedî,
        IEEE conference on Applied Electrical Engineering and Computing Technologies (AEECT 2013),
        December 3-12, 2013, Amman, Jordan. (Accepted for Publication).'''

# creating Naive Bayes Classifier
from textblob.classifiers import NaiveBayesClassifier

cl = NaiveBayesClassifier("train.csv", format="csv")
#cl = NaiveBayesClassifier(train)

# Test model with its two labels
print cl.classify(u" احسن علاج هذا")

# second cl model test
prob_dist = cl.prob_classify(u"ك يوم يا ظالم,")
print prob_dist.max()
print prob_dist.prob("positive")
print prob_dist.prob("negative")

# compute the accuracy on our test set
print "accuracy on the test set:{} ".format(cl.accuracy("testing.csv", format="csv"))

# display a listing of the most informative features.
cl.show_informative_features(5)

# add new data
new_data = [(u"كلام صحيح من شان هيك الدول اللي ما فيها بطالة والمجتمعات المفتوحة بتقل فيها المشاكل النفسية", 'positive'),
           (u"لا طبعا التقرب الى الله هو خير علاج للحالات النفسية", 'positive'),
           (u"تفائلوا بالخير تجدوه", 'positive'),
           (u"يا ترى الحكومه بدها تزيد دعم المواطن الي الله يكون في عونه", 'negative')]

コード例 #30

0

ファイルを表示

ファイル: textblob_sentiment_analysis_merging_sentences_pos_and_neg.py プロジェクト: fernando-andutta/WikiLetters

with open('test.json', 'r') as test_file:
    model1_accuracy = model1.accuracy(test_file, format=None)
    print("model1 accuray = '%s' " %model1_accuracy)


###############################################################################
###############################################################################

print("#################################################")
text3 = "We did not like his results."
#probability_classification_chosen = 'neg'
#probability_positive = '0.11'
#probability_negative = '0.89'

print("Assessing text = " + text3)
model1_prob_dist = model1.prob_classify(text3)
probability_classification_chosen = model1_prob_dist.max()
print("probability_classification = '%s' " %probability_classification_chosen)

probability_positive = round(model1_prob_dist.prob("pos"), 2)
print("probability_positive = '%s' " %probability_positive)

probability_negative = round(model1_prob_dist.prob("neg"), 2)
print("probability_negative = '%s' " %probability_negative)

text_classification = "NOT-PROPERLY-CLASSIFIED"
if probability_positive<=0.55 and probability_negative<=0.55:
    text_classification = "NEUTRAL"

elif probability_positive>0.55 and probability_positive<=0.75 and probability_negative<=0.55:
    text_classification = "SLIGHTLY-POSITIVE"

コード例 #31

0

ファイルを表示

			b = row['timestamp'].replace('-',' ').replace(':',' ').split()
			b = [int(x) for x in b]
			time = datetime.datetime(b[0],b[1],b[2],b[3],b[4],b[5])

			# Skip if not at time yet
			if start_time > time:
				continue

			# Break if past endtime
			if end_time < time:
				break

			# Add to sentiment
			n+=1
			newline=row['text'].decode('utf-8')
			prob_dist=cl.prob_classify(newline)
			line_sent = prob_dist.max()
			if line_sent==' pos':
				sentiment+=1
			elif line_sent==' neg':
				sentiment-=1

			# If interval has been reach, start a new bin
			if abs(time.minute - minutes) >= interval:

				# Record time variables
				times.append(tot_time)
				tot_time += interval
				labels.append(str(time.hour) + ':' + str(time.minute))
				minutes = time.minute

コード例 #32

0

ファイルを表示

ファイル: classifier.py プロジェクト: duneding/gensory

import data_sets

#train = data_sets.en_train
#test = data_sets.en_test
train = data_sets.subte_train
test = data_sets.subte_test

#tx_cl = "I feel amazing!"
#tx_prob = "This one's a doozy."
tx_cl = "El subte esta demorado"
tx_prob = "El subte funciona bien"

cl = NaiveBayesClassifier(train)
print cl.classify(tx_cl)
print cl.classify("El subte funciona bien")
prob_dist = cl.prob_classify(tx_prob)
print prob_dist.max()
print round(prob_dist.prob("pos"), 2)
print round(prob_dist.prob("neg"), 2)

print cl.accuracy(data_sets.en_test)
print cl.show_informative_features(5)

#Using TextBlob
blob = TextBlob("No funca por que hay obras para mejorar la cosa", classifier=cl)
print blob.sentiment
print blob.classify()

blob = TextBlob("El subte funciona normal", classifier=cl)
print blob.sentiment
print blob.classify()

コード例 #33

0

ファイルを表示

ファイル: test_classifiers.py プロジェクト: Arttii/TextBlob

class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

    def test_init_with_tsv_file(self):
        cl = NaiveBayesClassifier(TSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_bad_format_specifier(self):
        assert_raises(ValueError,
            lambda: NaiveBayesClassifier(CSV_FILE, format='unknown'))

    def test_repr(self):
        assert_equal(repr(self.classifier),
            "<NaiveBayesClassifier trained on {0} instances>".format(len(train_set)))

コード例 #34

0

ファイルを表示

ファイル: movie_sentiment_TextBlob_0420.py プロジェクト: ethancheung2013/Kaggle_GA_MovieReview

# # cl = NaiveBayesClassifier(newTrMerged)

cl = NaiveBayesClassifier(newTrMerged)
print "end training"

# # open test file and evaluate prediction probabiity
test_df = read_csv('test1_org.csv')

tr_ID = test_df['ID']#[:5]
tr_review = test_df['review']#[:5]

newTestMerged = zip(tr_review,tr_ID)

with open('result.csv', 'wb') as csvfile:
    resultwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
    resultwriter.writerow(("ID","Predicted"))
    emptyCl = []
    g = (line for line in newTestMerged)
    for line in g:
        expected_label = cl.classify(line[0])
        emptyCl.append(expected_label)
        prob_dist = cl.prob_classify(line[0])
        prob_pos = prob_dist.prob("1")
        result = line[1], prob_pos 
        resultwriter.writerow(result)

print("done in %fs" % (time() - t0))

コード例 #35

0

ファイルを表示

ファイル: textblob_sentiment_analysis_testing_5classes.py プロジェクト: fernando-andutta/WikiLetters

    model_accuracy = model.accuracy(test_file, format=None)
    print("model accuray = '%s' " % model_accuracy)

###############################################################################
# CREATING A NEUTRAL CLASS FROM POSITIVE AND NEGATIVE
###############################################################################

print("#################################################")

text3 = "We did not like his results."
#probability_classification_chosen = 'neg'
#probability_positive = '0.11'
#probability_negative = '0.89'

print("Assessing text = " + text3)
model_prob_dist = model.prob_classify(text3)
#probability_classification_chosen = model_prob_dist.max()
#print("probability_classification = '%s' " %probability_classification_chosen)

probability_positive = round(model_prob_dist.prob("pos"), 2)
#print("probability_positive = '%s' " %probability_positive)

probability_negative = round(model_prob_dist.prob("neg"), 2)
#print("probability_negative = '%s' " %probability_negative)

text_classification = "NOT-PROPERLY-CLASSIFIED"
if probability_positive <= 0.55 and probability_negative <= 0.55:
    text_classification = "NEUTRAL"

elif probability_positive > 0.55 and probability_positive <= 0.75 and probability_negative <= 0.55:
    text_classification = "SLIGHTLY-POSITIVE"

コード例 #36

0

ファイルを表示

ファイル: polarity.py プロジェクト: lakeesh10/MrFavorite

with open("yelplinks.txt") as f:
    array= f.readlines()
for line in array:
    line1=line.split('\n')
    openfile= "wordstopolarity/"+line1[0]+".txt"
    outputfile = open ("polarity/"+line1[0]+".txt" , "w+")
    outputfiletest = open ("polaritytesting/"+line1[0]+".txt" , "w+")
    k=0
    with open(openfile) as s:
        for line in s:
			text = line
			if k % 200 ==0 :
				print line1[0]+ "\t"+str (k)
			k=k+1
			naivebayes=naive.prob_classify(text)
			naivebayes_max=naivebayes.max()
			naivebayes_prob=round(naivebayes.prob(naivebayes_max), 3)
			naivebayes_value=0
			if str(naivebayes_max)=="pos":
				naivebayes_value= 1
			else:
				naivebayes_value= -1

			decisionTest=[(review_features(text))]
			decisionTree=decision.classify_many(decisionTest)
			decisionTree_value=0
			if str(decisionTree[0])=="pos":
				decisionTree_value=1
			else:
				decisionTree_value= -1

コード例 #37

0

ファイルを表示

#
# w= Word('running')
# print w.lemmatize()

#Text Classify

train = [('I love this sandwich.', 'pos'),
         ('this is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('this is my best work.', 'pos'), ("what an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('he is my sworn enemy!', 'neg'),
         ('my boss is horrible.', 'neg')]

test = [('the beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)
print cl.classify("This is an amazing library!")
print cl.accuracy(test)

print cl.show_informative_features(5)

prob_dist = cl.prob_classify("This one's a doozy.")
print

prob_dist.max()

コード例 #38

0

ファイルを表示

ファイル: clasifica_naive.py プロジェクト: jecr/icr-caja

historicos = {}
fPolH = open('util/politicos-historico.txt', 'r')
for item in fPolH:
    historicos[item.strip()] = 'politico'

fMedH = open('util/medios-historico.txt', 'r')
for item in fMedH:
    historicos[item.strip()] = 'medio'

print('\nClasificando:')
clasifSalida = {}
for item in clasificaEsto:
    if item in historicos:
        clasifSalida[item] = historicos[item]
    else:
        prob_dist = clasificador.prob_classify(clasificaEsto[item])
        if round(prob_dist.prob(prob_dist.max()), 3) == 1:
            clasifSalida[item] = prob_dist.max()
        else:
            clasifSalida[item] = 'ciudadano'

print 'Leyendo lista completa de usuarios...'
fUserList = open(sys.argv[2], 'r')
for item in fUserList:
    item = item.strip()
    if not item in clasifSalida:
        if item in historicos:
            clasifSalida[item] = historicos[item]
        else:
            clasifSalida[item] = 'ciudadano'

コード例 #39

0

ファイルを表示

pol_labels = pol_df['labels'].copy()
pol_labels[pol_mask] = 'pos'
pol_labels[~pol_mask] = 'neg'
pol_df['etc'] = pol_labels
pol_df
nb_training = set()

for i, row in pol_df.iterrows():
    nb_training.add((row[0], row[2]))
for i, row in adj_df.iterrows():
    nb_training.add((row[0], row[2]))

nb_training

nbc = NaiveBayesClassifier(nb_training)
prob_dist = nbc.prob_classify('trump hates racism')
prob_dist.max()
prob_dist.prob('neg')

nb_name = 'naivebayesclassifier.pkl'
with open(nb_name, 'wb') as f:
    pickle.dump(nbc, f)

lin_reg_training = {}

for i, row in adj_df.iterrows():
    lin_reg_training[row[0]] = round(row[1] / 10.0, 3)
for i, row in pol_df.iterrows():
    lin_reg_training[row[0]] = round(row[1] / 10.0, 3)

with open('sentiment_lexicon.pkl', 'wb') as f:

コード例 #40

0

ファイルを表示

ファイル: sms.py プロジェクト: sayantikabanik/spamclassify

for d in data:
    ol.append(d.tolist())
print ol
train=ol[:60]
test=ol[29900:]



cl = NaiveBayesClassifier(train)


accuracy = cl.accuracy(test)
print("Accuracy: {0}".format(accuracy))



res= pd.read_csv('foo.csv')


res=res.values
print res
pl=[]
for r in res:
    pl.append(r[1])
print pl
pred=cl.prob_classify(pl)
print pred.max()

# Show 5 most informative features
#cl.show_informative_features(5)

コード例 #41

0

ファイルを表示

class Emote(object):

    emoteClassOn = False  # Is Emote being used as a library or class?
    runningScript = False  # Or is Emote being run as a script directly?
    firstTime = True  # Emote running for the first time?

    pickledOn = False  # Is a pickled database detected?
    SQLDataOn = False  # Is a SQL database detected?

    fullCount = ""  # The string result detailing the full amount of classifications (sorted by type and frequency) that the current training database contains

    writtenAnalysis = False  # Turn writte analysis on?
    levelsAnalysis = True  # Turn full levels analysis on?
    defaultCorpus = ""  # What's the default corpus?

    # connectDB = sqlite3.connect('base_corpus.db') # Using SQL db for base corpus texts

    def __init__(self,
                 message="",
                 pre_result="",
                 prob_dist=0,
                 prob_dist_max=0,
                 positive=0,
                 negative=0,
                 joy=0,
                 anger=0,
                 love=0,
                 hate=0,
                 certainty=0,
                 boredom=0,
                 intensity=0,
                 regret=0,
                 challenging=0,
                 agreeable=0,
                 desire=0,
                 calm=0,
                 sarcastic=0,
                 emphatic=0,
                 pride=0,
                 accusative=0,
                 admiration=0,
                 inquisitive=0,
                 modest=0,
                 instructive=0,
                 ambivalence=0,
                 vulgarity=0,
                 train=[],
                 cl=NaiveBayesClassifier([]),
                 punctCountDict={},
                 wordCount=0,
                 sentenceCount=0,
                 normalizedProbValues={},
                 sentences=[],
                 sentencesProbValues=[],
                 massResults=[]):

        self.train = train

        # PLACE THE TRAINING DATA (TUPLES) IN SELF.TRAIN BELOW

        self.train = []

        #

        self.message = message
        self.punctCountDict = punctCountDict
        self.wordCount = wordCount
        self.sentenceCount = sentenceCount

        self.pre_result = pre_result
        self.prob_dist = prob_dist
        self.prob_dist_max = prob_dist_max

        self.positive = positive
        self.negative = negative
        self.joy = joy
        self.anger = anger
        self.love = love
        self.hate = hate
        self.certainty = certainty
        self.boredom = boredom
        self.intensity = intensity
        self.regret = regret
        self.challenging = challenging
        self.agreeable = agreeable
        self.desire = desire
        self.calm = calm
        self.sarcastic = sarcastic
        self.emphatic = emphatic
        self.pride = pride
        self.accusative = accusative
        self.admiration = admiration
        self.inquisitive = inquisitive
        self.modest = modest
        self.instructive = instructive
        self.ambivalence = ambivalence
        self.vulgarity = vulgarity
        self.prob_dist = prob_dist
        self.prob_dist_max = prob_dist_max
        self.cl = cl
        self.normalizedProbValues = normalizedProbValues
        self.sentences = sentences
        self.sentencesProbValues = sentencesProbValues
        self.massResults = massResults

    def getInput(self, _message):
        global firstTime
        global runningScript
        global emoteClassOn
        if runningScript == True:
            if firstTime == False:
                self.message = input('\n\tWrite message to be analyzed: ')
                _message = self.message
                self.countPunct(_message)
                self.countWordSent(_message)
                self.runAnalysis(_message)
            else:
                print(
                    """\n\tNow starting Emote as a script. Use Emote Mass Analyzer to break down a text into individual sentence 
                 classifications, or import Emote as a library.""")
                firstTime = False
                self.initialTrain()
        else:
            if firstTime == True:
                # print("\nFIRST TIME IS TRUE")
                print("\n\tRunning Emote as a library..")
                self.message = _message
                emoteClassOn = True
                self.countPunct(_message)
                self.countWordSent(_message)
                self.runAnalysis(_message)
            else:
                # print("\nFIRST TIME IS FALSE")
                emoteClassOn = False
                self.message = _message
                self.countPunct(_message)
                self.countWordSent(_message)
                self.runAnalysis(_message)

    def initialTrain(self):
        # For interchangable corpuses.. uncomment code modifying selectedCorpus
        # selectedCorpus = input('\n\tEnter the name of the corpus file to load (Press enter to load default, from base_corpus.py): ')
        global defaultCorpus
        global pickledOn
        global SQLDataOn
        global SQLData
        global connectDB
        global fullCount

        # ` = str(self.train)
        fullDatabase = str(self.train)
        countPositive = fullDatabase.count("'positive')", 0, len(fullDatabase))
        countNegative = fullDatabase.count("'negative')", 0, len(fullDatabase))
        countLove = fullDatabase.count("'love')", 0, len(fullDatabase))
        countHate = fullDatabase.count("'hate')", 0, len(fullDatabase))
        countJoy = fullDatabase.count("'joy')", 0, len(fullDatabase))
        countAnger = fullDatabase.count("'anger')", 0, len(fullDatabase))
        countCertainty = fullDatabase.count("'certainty'", 0,
                                            len(fullDatabase))
        countConfusion = fullDatabase.count("'confusion'", 0,
                                            len(fullDatabase))
        countAmusement = fullDatabase.count("'amusement'", 0,
                                            len(fullDatabase))
        countBoredom = fullDatabase.count("'boredom'", 0, len(fullDatabase))
        countIntensity = fullDatabase.count("'intensity'", 0,
                                            len(fullDatabase))
        countRegret = fullDatabase.count("'regret'", 0, len(fullDatabase))
        countAgreeable = fullDatabase.count("'agreeable'", 0,
                                            len(fullDatabase))
        countChallenging = fullDatabase.count("'challenging'", 0,
                                              len(fullDatabase))
        countDesire = fullDatabase.count("'desire'", 0, len(fullDatabase))
        countCalm = fullDatabase.count("'calm'", 0, len(fullDatabase))
        countEmphatic = fullDatabase.count("'emphatic'", 0, len(fullDatabase))
        countSarcastic = fullDatabase.count("'sarcastic'", 0,
                                            len(fullDatabase))
        countInstructive = fullDatabase.count("'instructive'", 0,
                                              len(fullDatabase))
        countAccusative = fullDatabase.count("'accusative'", 0,
                                             len(fullDatabase))
        countAdmiration = fullDatabase.count("'admiration'", 0,
                                             len(fullDatabase))
        countInquisitive = fullDatabase.count("'inquisitive'", 0,
                                              len(fullDatabase))
        countModest = fullDatabase.count("'modest'", 0, len(fullDatabase))
        countPride = fullDatabase.count("'pride'", 0, len(fullDatabase))
        countAmbivalence = fullDatabase.count("'ambivalence'", 0,
                                              len(fullDatabase))
        countVulgarity = fullDatabase.count("'vulgarity'", 0,
                                            len(fullDatabase))

        fullCount = "\n\tNumbers and types of classifications in loaded database: \n"+ "\t\tPositive: " + str(countPositive) + "\t" + "Negative: " + str(countNegative) + \
    "\t\tJoy: " + str(countJoy) + "\t\t" + "Anger: " + str(countAnger) + "\t\tCertainty: " + str(countCertainty) + "\t" + "Confusion: " + str(countConfusion) + \
    "\t\tCertainty: " + str(countCertainty) + "\t" + "Confusion: " + str(countConfusion) + "\t\tAmusement: " + str(countAmusement) + "\t" + "Boredom: " + str(countBoredom) + \
    "\t\tIntensity: " + str(countIntensity) + "\t" + "Regret: " + str(countRegret) + "\t\tAgreeable: " + str(countAgreeable) + "\t" + "Challenging: " + str(countChallenging) + \
    "\t\tDesire: " + str(countDesire) + "\t" + "Calm: " + str(countCalm) + "\t\tEmphatic: " + str(countEmphatic) + "\t" + "Sarcastic: " + str(countSarcastic) + \
"\t\tInstructive: " + str(countInstructive) + "\t" + "Accusative: " + str(countAccusative) + "\t\tAdmiration: " + str(countAdmiration) + "\t" + "Inquisitive: " + str(countInquisitive) + \
"\t\tAdmiration: " + str(countAdmiration) + "\t" + "Inquisitive: " + str(countInquisitive) + "\t\tAmbivalence: " + str(countAmbivalence) + "\t" + "Vulgarity: " + str(countVulgarity)

        print(
            """\n\tNumbers and types of classifications in database to be loaded: \n"""
        )
        print("\t\tPositive: " + str(countPositive) + "\t" + "Negative: " +
              str(countNegative))
        print("\t\tLove: " + str(countLove) + "\t\t" + "Hate: " +
              str(countHate))
        print("\t\tJoy: " + str(countJoy) + "\t\t" + "Anger: " +
              str(countAnger))
        print("\t\tCertainty: " + str(countCertainty) + "\t" + "Confusion: " +
              str(countConfusion))
        print("\t\tAmusement: " + str(countAmusement) + "\t" + "Boredom: " +
              str(countBoredom))
        print("\t\tIntensity: " + str(countIntensity) + "\t" + "Regret: " +
              str(countRegret))
        print("\t\tAgreeable: " + str(countAgreeable) + "\t" +
              "Challenging: " + str(countChallenging))
        print("\t\tDesire: " + str(countDesire) + "\t" + "Calm: " +
              str(countCalm))
        print("\t\tEmphatic: " + str(countEmphatic) + "\t" + "Sarcastic: " +
              str(countSarcastic))
        print("\t\tInstructive: " + str(countInstructive) + "\t" +
              "Accusative: " + str(countAccusative))
        print("\t\tAdmiration: " + str(countAdmiration) + "\t" +
              "Inquisitive: " + str(countInquisitive))
        print("\t\tModest: " + str(countModest) + "\t" + "Pride: " +
              str(countPride))
        print("\t\tAmbivalence: " + str(countAmbivalence) + "\t" +
              "Vulgarity: " + str(countVulgarity))

        # if selectedCorpus != defaultCorpus and selectedCorpus != "":
        # defaultCorpus = selectedCorpus
        # elif selectedCorpus == "":
        # defaultCorpus = defaultCorpus
        # else:
        # defaultCorpus = "base_corpus.py"
        selectedCorpus = defaultCorpus

        try:
            path = os.getcwd()
            path = os.path.join(path, 'data', 'base_corpus.pickle')
            with open(path, 'rb') as fp:
                size = os.path.getsize(path)
                if size > 0:
                    pickledOn = True
                    print("\n\tPickled data found!")
                else:
                    pass
                fp.close()
        except IOError as err:
            pickledOn = False
            path = os.getcwd()
            print(
                "\n\tNo pickled data found.. now creating and loading pickle.."
            )
        # If corpus text in SQL db..
        # try:
        #     path = os.getcwd()
        #     path = os.path.join(path, '../data', 'base_corpus.db')
        #     with open(path, 'r') as fp:
        #         SQLDataOn = True
        #         size = os.path.getsize(path)
        #         if size > 5:
        #             SQLDataOn = True
        #             print("\n\tNo SQL found.")
        #         else:
        #             SQLDataOn = False
        #             print("\n\tSQL found!")
        #         fp.close()
        # except IOError as err:
        #     SQLDataOn = False
        #     print("\n\tNo SQL data found.. now creating and loading SQL.")

        # SHELVE STUFF
        # READING TRAINING DATA FROM FILE DEFAULTCORPUS
        if pickledOn == False:
            # Code below takes training data from text file input
            # path = os.getcwd()
            # path = os.path.join(path, 'data', 'base_corpus.py')
            # shelvedData = shelve.open('base_corpus.db')
            # if shelvedData:
            # pickledOn = True
            # with open(path, 'r') as fp:
            # print(fp)
            # fp = open(path,'r').read().tt('\n')
            # self.train = fp.readlines()
            # temp = [line[:-1] for line in self.train]
            # print(temp)
            # self.train = self.train.rstrip("\r\n")
            # for i in self.train:
            # i = i.encode('ascii', 'backslashreplace')
            # i = i.rstrip("\r\n")
            # print(i)
            # lines = tuple(open(path, 'r', encoding = 'utf-8'))
            # lines = lines.strip()
            # print(str(lines))
            # self.train = lines
            # print(self.train)

            print("\n\tOpening training data.")

            # if SQLDataOn == False:
            # self.sendToSQL()
            # currentTime = datetime.datetime.now().time()
            # print("\n\n\tTIME NEW DATABASE STARTED TRAINING: ", currentTime)
            # print("""\n\tStarting NaiveBayesClassifer training for """ + str(len(self.train)) + """ supervised classifications.. the initial training period will take a while.""")
            # elif SQLDataOn == True:
            # self.parseFromSQL()

            random.seed(1)
            random.shuffle(self.train)
            self.cl = NaiveBayesClassifier(self.train)
            print("\n\tTraining now..")
            # shelvedData["base"] = cl # SHELF vs PICKLE
            path = os.getcwd()
            path = os.path.join(path, 'data', 'base_corpus.pickle')
            fp = open(path, 'wb')
            print("\n\tLoaded training data into pickle file.")
            pickle.dump(self.cl, fp, protocol=pickle.HIGHEST_PROTOCOL)
            fp.close()
            print(
                "\n\tPickling complete, and will be loaded as the default database corpus next time, skipping the training period."
            )
            currentTime = datetime.datetime.now().time()
            print("\n\n\tTIME NEW DATABASE FINISHED TRAINING AND SAVING: ",
                  currentTime)
            # shelvedData.close() # SHELF vs PICKLE
        if pickledOn == True:
            try:
                # shelvedData = shelve.open("base_corpus.dat") # SHELF VS PICKLE
                path = os.getcwd()
                path = os.path.join(path, 'data', 'base_corpus.pickle')
                fp = open(path, "rb")
                self.cl = pickle.load(fp)
                fp.close()
                print("\n\tTraining has been loaded from the selected corpus.")
                print("\t\t" + fullCount)
            except IOError as err:
                print(
                    "\n\tError training pickle file.. system will exit. Go into the directory, delete the corrupt pickle file, and retry this script to train a new copy."
                )
                sys.exit()
            pass
        if emoteClassOn == True:
            self.runAnalysis(_message)
        else:
            self.getInput(_message)

    # If corpus data was stored in SQL..
    # def sendToSQL(self):
    #     c.execute("DROP TABLE IF EXISTS Base")
    #     c.execute("CREATE TABLE Base (Date_Sorted TEXT, Source TEXT, Message TEXT);")
    #     for i in self.train:
    #         # print(i)
    #         try:
    #             c.execute("INSERT INTO Base VALUES (?, ?, ?);", ('11-05-2016', 'general', i))
    #             connectDB.commit()
    #             print(i)
    #         except:
    #             print('err')
    #             pass
    #     c.close()

    # def parseFromSQL(self):
    #     global SQLData
    #     global connectDB
    #     print("Training data from SQL..")
    #     try:
    #         # connectDB.row_factory = sqlite3.Row
    #         c.execute("SELECT Message FROM base WHERE 1")
    #         # connectDB.text_factory = lambda x: x.decode("utf-8")
    #         all_rows = cursor.fetchall()
    #         # line = re.sub('[!@#$]', '', line)
    #         # all_rows = [row[0].strip for row in cursor.fetchall()]
    #         # for r in all_rows:
    #             # temp_row = r[0]
    #             # temp_row = temp_row.strip()
    #             # temp_row = re.sub('\r\n', '', temp_row)
    #             # temp_row = re.sub('\\', '', temp_row)
    #             # temp_row = unicodedata.normalize('NFKD', temp_row).encode('ascii','ignore')
    #             # print(temp_row)
    #             # temp_row = temp_row.replace("\\","")
    #             # SQLData.append(unicodedata.normalize('NFKD', temp_row))
    #             # SQLData.append(str(temp_row).strip())
    #     except:
    #         pass

    def countPunct(self, _message):
        numberCount = 0
        periodCount = 0
        commaCount = 0
        exclamationPtCount = 0
        questionMkCount = 0
        for char in _message:
            if char.isdigit() == True:
                numberCount += 1
            elif char == '.':
                periodCount += 1
            elif char == ',':
                commaCount += 1
            elif char == '!':
                exclamationPtCount += 1
            elif char == '?':
                questionMkCount += 1
            else:
                pass
        self.punctCountDict = {
            "numbers": numberCount,
            "periods_end": periodCount,
            "question_marks": questionMkCount,
            "exclamation_points": exclamationPtCount,
            "commas": commaCount
        }
        return self.punctCountDict

    def countWordSent(self, _message):
        _messageSplitWords = _message.split()
        _messageSplitSent = sent_tokenize(_message)
        self.wordCount = len(_messageSplitWords)
        # print("\n\tWord count in message: " + str(self.wordCount))
        self.sentenceCount = len(_messageSplitSent)
        # print("\n\tSentence count in message: " + str(self.sentenceCount))
        return self.wordCount, self.sentenceCount

    def split_into_sentences(self, _message):
        # global firstTime
        sentenceTempValStore = []
        self.normalizedProbValues = []
        # if firstTime == False:
        self.sentences = sent_tokenize(_message)
        if len(self.sentences) > 1:
            for i in self.sentences:
                self.runAnalysis(str(i))
                self.sentencesProbValues.append(self.normalizedProbValues)
            return self.sentencesProbValues
        else:
            pass

    def analyzeCSV(self, path):
        csvData = []
        csvTextData = []
        file = open(path, 'r')
        csv_file = csv.reader(file, delimiter=",")
        for row in csv_file:
            csvData.append(row[0])
            csvTextData.append(row[1])
        file.close()
        print("\n\t", csvData)
        print("\n\t", csvTextData)
        print("\n\t", csvTextData)
        print("\n\t", csvData)
        self.massResults = []
        for i in range(len(csvTextData)):
            self.runAnalysis(csvTextData[i])
            print(emote.normalizedProbValues)
            self.massResults.append(self.normalizedProbValues)
        path = os.getcwd()
        path = os.path.join(path, 'static', 'results.csv')
        csvFile = open('static/results.csv', 'w', newline='')
        for i in range(len(self.massResults)):
            # with open('static/results.csv', 'w', newline='') as csvFile:
            csvIndRowList = []
            csvResults = csv.writer(csvFile, delimiter=',')
            csvIndRowList.append(csvData[i])
            csvIndRowList.append(csvTextData[i])
            csvIndRowList.append(self.massResults[i][0])
            csvIndRowList.append(self.massResults[i][1])
            csvIndRowList.append(self.massResults[i][2])
            csvIndRowList.append(self.massResults[i][3])
            csvIndRowList.append(self.massResults[i][4])
            csvIndRowList.append(self.massResults[i][5])
            print("\n\tROW LIST", csvIndRowList)
            csvResults.writerow(csvIndRowList)
        csvFile.close()
        return csvResults, csvFile, self.massResults

    def runAnalysis(self, _message):
        global emoteClassOn
        global firstTime
        global runningScript
        if firstTime == True and emoteClassOn == True:
            print(
                "\n\n\tFirst time running analysis.. load pickle data. The initial analysis will be slower because of the loading."
            )
            path = os.getcwd()
            # path = os.path.join(path, '/Users/johnny/Documents/GitHub/emote/data', 'base_corpus.pickle')
            # path = os.getcwd()
            path = os.path.join(path, 'data', 'base_corpus.pickle')
            fp = open(path, 'rb')
            self.cl = pickle.load(fp)
            fp.close()
            emoteClassOn = False
            firstTime = False
        # print("\n\tAnalyzing " + "'"+str(_message)+"'" +"..")
        self.prob_dist = self.cl.prob_classify(_message)
        self.prob_dist_max = self.prob_dist.max()
        self.positive = round(self.prob_dist.prob("positive"), 4)
        self.negative = round(self.prob_dist.prob("negative"), 4)
        self.joy = round(self.prob_dist.prob("joy"), 4)
        self.anger = round(self.prob_dist.prob("anger"), 4)
        self.love = round(self.prob_dist.prob("love"), 4)
        self.hate = round(self.prob_dist.prob("hate"), 4)
        self.certainty = round(self.prob_dist.prob("certainty"), 4)
        self.confusion = round(self.prob_dist.prob("confusion"), 4)
        self.amusement = round(self.prob_dist.prob("amusement"), 4)
        self.boredom = round(self.prob_dist.prob("boredom"), 4)
        self.intensity = round(self.prob_dist.prob("intensity"), 4)
        self.regret = round(self.prob_dist.prob("regret"), 4)
        self.agreeable = round(self.prob_dist.prob("agreeable"), 4)
        self.challenging = round(self.prob_dist.prob("challenging"), 4)
        self.desire = round(self.prob_dist.prob("desire"), 4)
        self.calm = round(self.prob_dist.prob("calm"), 4)
        self.emphatic = round(self.prob_dist.prob("emphatic"), 4)
        self.sarcastic = round(self.prob_dist.prob("sarcastic"), 4)
        self.instructive = round(self.prob_dist.prob("instructive"), 4)
        self.accusative = round(self.prob_dist.prob("accusative"), 4)
        self.admiration = round(self.prob_dist.prob("admiration"), 4)
        self.inquisitive = round(self.prob_dist.prob("inquisitive"), 4)
        self.modest = round(self.prob_dist.prob("modest"), 4)
        self.pride = round(self.prob_dist.prob("pride"), 4)
        self.ambivalence = round(self.prob_dist.prob("ambivalence"), 4)
        self.vulgarity = round(self.prob_dist.prob('vulgarity'), 4)

        valueList = [
            self.positive, self.negative, self.joy, self.anger, self.love,
            self.hate, self.certainty, self.confusion, self.amusement,
            self.boredom, self.intensity, self.regret, self.agreeable,
            self.challenging, self.desire, self.calm, self.emphatic,
            self.sarcastic, self.instructive, self.accusative, self.admiration,
            self.inquisitive, self.modest, self.ambivalence, self.vulgarity
        ]

        posNegAbsVal = math.fabs(self.positive - self.negative)
        if posNegAbsVal <= .25:
            self.positive = self.positive * math.sqrt(
                self.positive) * math.sqrt(self.positive) * math.sqrt(
                    self.positive) * math.sqrt(self.positive)
            self.negative = self.negative * math.sqrt(
                self.negative) * math.sqrt(self.negative) * math.sqrt(
                    self.negative) * math.sqrt(self.negative)
        else:
            pass

        if runningScript == True:
            # print("\n")
            # print("\n\tProbability Values Pre-Normalization: ")
            # print("\tStrongest Emotion: " + self.prob_dist_max)
            # print("\tPositive: " + str(self.positive) + "\tNegative: " + str(self.negative))
            # print("\tJoy: " + str(self.joy) + "\tAnger: " + str(self.anger))
            # print("\tLove: " + str(self.love) + "\tHate: " + str(self.hate))
            # print("\tCertainty: " + str(self.certainty) + "\tConfusion: " + str(self.confusion))
            # print("\tAmusement: " + str(self.amusement) + "\tBoredom: " + str(self.boredom))
            # print("\tIntensity: " + str(self.intensity) + "\tRegret: " + str(self.regret))
            # print("\tAgreeable: " + str(self.agreeable) + "\tChallenging: " + str(self.challenging))
            # print("\tDesire: " + str(self.desire) + "\tCalm: " + str(self.calm))
            # print("\tEmphatic: " + str(self.emphatic) + "\tSarcastic: " + str(self.sarcastic))
            # print("\tInstructive: " + str(self.instructive) + "\tAccusative: " + str(self.accusative))
            # print("\tAdmiration: " + str(self.admiration) + "\tInquisitive: " + str(self.inquisitive))
            # print("\tModest: " + str(self.modest) + "\tPride: " + str(self.pride))
            # print("\tAmbivalence: " + str(self.ambivalence) + "\tVulgarity: " + str(self.vulgarity))
            # print("\n")
            # pdData = [{'positive': self.positive, 'negative' : self.negative, 'joy' : self.joy, 'anger' : self.anger,
            #                            'love': self.love, 'hate' : self.hate, 'certainty' : self.certainty, 'confusion' : self.confusion,
            #                            'amusement' : self.amusement, 'boredom' : self.boredom, 'intensity' : self.intensity, 'regret' : self.regret,
            #                            'agreeable': self.agreeable, 'challenging' : self.challenging, 'desire' : self.desire, 'calm' : self.calm,
            #                            'emphatic' : self.emphatic, 'sarcastic' : self.sarcastic, 'instructive' : self.instructive, 'accusative' : self.accusative,
            #                            'admiration' : self.admiration, 'inquisitive' : self.inquisitive, 'modest' : self.modest, 'pride' : self.pride,
            #                            'ambivalence' : self.ambivalence, 'vulgarity' : self.vulgarity}]
            self.normalizedProbValues = pd.Series({
                'positive': self.positive,
                'negative': self.negative,
                'joy': self.joy,
                'anger': self.anger,
                'love': self.love,
                'hate': self.hate,
                'certainty': self.certainty,
                'confusion': self.confusion,
                'amusement': self.amusement,
                'boredom': self.boredom,
                'intensity': self.intensity,
                'regret': self.regret,
                'agreeable': self.agreeable,
                'challenging': self.challenging,
                'desire': self.desire,
                'calm': self.calm,
                'emphatic': self.emphatic,
                'sarcastic': self.sarcastic,
                'instructive': self.instructive,
                'accusative': self.accusative,
                'admiration': self.admiration,
                'inquisitive': self.inquisitive,
                'modest': self.modest,
                'pride': self.pride,
                'ambivalence': self.ambivalence,
                'vulgarity': self.vulgarity
            })
            # self.normalizedProbValues = pd.DataFrame(pdData).astype(np.float32)
            # print("\n\t",self.normalizedProbValues)
            # print("\n\t", self.normalizedProbValues.describe())
            self.normalizeProbabilityPunctuation(_message)
            # return self.normalizedProbValues
            # return self.prob_dist, self.prob_dist_max, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity
            # return self.normalizedProbValues, self.prob_dist, self.prob_dist_max, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity

        else:
            # pdData = [{'positive': self.positive, 'negative' : self.negative, 'joy' : self.joy, 'anger' : self.anger,
            #                            'love': self.love, 'hate' : self.hate, 'certainty' : self.certainty, 'confusion' : self.confusion,
            #                            'amusement' : self.amusement, 'boredom' : self.boredom, 'intensity' : self.intensity, 'regret' : self.regret,
            #                            'agreeable': self.agreeable, 'challenging' : self.challenging, 'desire' : self.desire, 'calm' : self.calm,
            #                            'emphatic' : self.emphatic, 'sarcastic' : self.sarcastic, 'instructive' : self.instructive, 'accusative' : self.accusative,
            #                            'admiration' : self.admiration, 'inquisitive' : self.inquisitive, 'modest' : self.modest, 'pride' : self.pride,
            #                            'ambivalence' : self.ambivalence, 'vulgarity' : self.vulgarity}]
            self.normalizedProbValues = pd.Series({
                'positive': self.positive,
                'negative': self.negative,
                'joy': self.joy,
                'anger': self.anger,
                'love': self.love,
                'hate': self.hate,
                'certainty': self.certainty,
                'confusion': self.confusion,
                'amusement': self.amusement,
                'boredom': self.boredom,
                'intensity': self.intensity,
                'regret': self.regret,
                'agreeable': self.agreeable,
                'challenging': self.challenging,
                'desire': self.desire,
                'calm': self.calm,
                'emphatic': self.emphatic,
                'sarcastic': self.sarcastic,
                'instructive': self.instructive,
                'accusative': self.accusative,
                'admiration': self.admiration,
                'inquisitive': self.inquisitive,
                'modest': self.modest,
                'pride': self.pride,
                'ambivalence': self.ambivalence,
                'vulgarity': self.vulgarity
            })
            self.normalizeProbabilityPunctuation(_message)
            # return self.normalizedProbValues
            # return self.prob_dist, self.prob_dist_max, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity
            return self.normalizedProbValues, self.prob_dist, self.prob_dist_max, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity

    def normalizeProbabilityPunctuation(self, _message):
        # print("\n\t", self.punctCountDict)
        # print("\tNow normalizing probability based on punctuation count..")
        ############################################################################################################################################################
        # Base values below. Variables will be scaled off of linearly increasing relationships based off these values below, to determine different probability ranges.
        minWordCountRange = 0
        minSentenceCountRange = 0
        maxWordCountRange = 50
        maxSentenceCountRange = 3
        maxCommaCountRange = 6
        msgWordCountLeveler = 0
        msgSentenceCountLeveler = 0
        punctSlidingThreshold = 1
        # Code below contains the actual sliding algorithm for probability normalization through punctuation
        # START (The values in this if-then don't need to be sliding (mapped to a range), because anything longer than 50 words or 2 sentences will be considered "long").
        # This part of the algorithm is also not adjusted by the leveler, because the progression does not scale well enough based off the original values without manipulation.
        # Manipulation come from the msgWordCountLeveler and msgSentenceCountLeveler variables
        if minWordCountRange < self.wordCount < maxWordCountRange and minSentenceCountRange < self.sentenceCount <= maxSentenceCountRange:
            # print("\tProbability normalization based off of the first level of scaling.")
            punctSlidingThreshold = 1
            # Emphatic sentences more likely more likely (deep analytical thinking)
            # Values below are mapped to linearly scaling variables (to save having to numbers manually and repeatedly, of course).
            # PunctSlidingThreshold not used for commas for this instance case because multiplying by 1 does not give a high enough threshold
            # if minWordCountRange < self.wordCount < maxWordCountRange and self.sentenceCount >= maxSentenceCountRange and self.punctCountDict['commas'] <= 3:
            #     print("\tLong, slow writing, with many commas.")
            # elif minWordCountRange < self.wordCount < maxWordCountRange and self.sentenceCount < maxSentenceCountRange and self.punctCountDict['commas'] <= 3:
            #     print("\tQuick, rapid writing. Many short sentences, few commas.")
            # else:
            #     pass
            if self.punctCountDict['numbers'] >= punctSlidingThreshold:
                # More informative or descriptive message more likely
                # print("\tNumbers detected.")
                pass
            elif self.punctCountDict['periods_end'] >= punctSlidingThreshold:
                # print("\tPeriods detected.")
                pass
            elif self.punctCountDict['question_marks'] >= punctSlidingThreshold:
                if self.inquisitive <= .1:
                    self.inquisitive = .1
                else:
                    self.inquisitive = self.inquisitive / math.sqrt(
                        self.inquisitive
                    ) * self.punctCountDict['question_marks']
                # print("\tQuestions detected.")
            elif self.punctCountDict[
                    'exclamation_points'] >= punctSlidingThreshold:
                if self.intensity <= .1:
                    self.intensity = .1
                else:
                    self.intensity = self.intensity / math.sqrt(
                        self.intensity
                    ) * self.punctCountDict['exclamation_points']
                    # print("\tExclamations detected.")
            elif self.punctCountDict['commas'] >= punctSlidingThreshold * 1.5:
                # print("\tCommas detected.")
                pass
            else:
                pass
        # END
        # START
        if self.wordCount > maxWordCountRange or minSentenceCountRange > maxSentenceCountRange:
            # print("\tProbability normaliziation based off of a proportionally increased level of scaling from word / sentence count.")
            msgWordCountLeveler = int(self.wordCount / maxWordCountRange)
            msgSentenceCountLeveler = int(self.sentenceCount /
                                          maxSentenceCountRange)
            minWordCountRange = 1 * msgWordCountLeveler
            minSentenceCountRange = 1 * msgSentenceCountLeveler
            maxWordCountRange = maxWordCountRange * msgWordCountLeveler
            maxSentenceCountRange = minSentenceCountRange * msgSentenceCountLeveler
            # Make sure we're not dividing by 0
            if msgSentenceCountLeveler < 1:
                msgSentenceCountLeveler = 1
            punctSlidingThreshold = int(
                (punctSlidingThreshold *
                 (msgSentenceCountLeveler * msgWordCountLeveler /
                  msgSentenceCountLeveler)))
            if minWordCountRange < self.wordCount < maxWordCountRange and minSentenceCountRange < self.sentenceCount < maxSentenceCountRange:
                # Emphatic sentences more likely more likely (deep analytical thinking)
                # print("\tLong sentence detected.")
                # Punctuation threshold for commas are slightly higher than end marks, so they are multiplied by 1.5
                # if minWordCountRange < self.wordCount < maxWordCountRange and self.sentenceCount >= maxSentenceCountRange and self.commas < int(punctSlidingThreshold) * 1.5:
                #     print("\tQuick, rapid writing. Many short sentences, few commas.")
                # if minWordCountRange < self.wordCount < maxWordCountRange and self.sentenceCount < maxSentenceCountRange and self.commas >= int(punctSlidingThreshold) * 1.5:
                #     print("\tLong, slow writing, with many commas.")
                if self.punctCountDict['numbers'] >= punctSlidingThreshold:
                    # More informative or descriptive message more likely
                    # print("\tNumbers detected.")
                    pass
                elif self.punctCountDict[
                        'periods_end'] >= punctSlidingThreshold:
                    # print("\tPeriods detected.")
                    pass
                elif self.punctCountDict[
                        'question_marks'] >= punctSlidingThreshold:
                    if self.inquisitve <= .1:
                        self.inquisitve = .1
                    else:
                        self.inquisitive = self.inquisitive / math.sqrt(
                            self.inquisitive
                        ) * self.punctCountDict['question_marks']
                    # print("\tQuestions detected.")
                elif self.punctCountDict[
                        'exclamation_points'] >= punctSlidingThreshold:
                    if self.intensity <= .1:
                        self.intensity = .1
                    else:
                        self.intensity = self.intensity / math.sqrt(
                            self.intensity
                        ) * self.punctCountDict['exclamation_points']
                        # print("\tExclamations detected.")
                elif self.punctCountDict[
                        'commas'] >= punctSlidingThreshold * 1.5:
                    # print("\tCommas detected.")
                    pass
                else:
                    pass
            # END
        ############################################################################################################################################################
        # print("\n\tProbability Values Post-Normalization Counting Punctuation: ")
        # print(self.normalizedProbValues)
        # self.normalizeProbabilityOpposites(_message)
        self.normalizedProbValues = pd.Series({
            'positive': self.positive,
            'negative': self.negative,
            'joy': self.joy,
            'anger': self.anger,
            'love': self.love,
            'hate': self.hate,
            'certainty': self.certainty,
            'confusion': self.confusion,
            'amusement': self.amusement,
            'boredom': self.boredom,
            'intensity': self.intensity,
            'regret': self.regret,
            'agreeable': self.agreeable,
            'challenging': self.challenging,
            'desire': self.desire,
            'calm': self.calm,
            'emphatic': self.emphatic,
            'sarcastic': self.sarcastic,
            'instructive': self.instructive,
            'accusative': self.accusative,
            'admiration': self.admiration,
            'inquisitive': self.inquisitive,
            'modest': self.modest,
            'pride': self.pride,
            'ambivalence': self.ambivalence,
            'vulgarity': self.vulgarity
        })
        self.normalizeProbability(_message)
        # return self.normalizedProbValues

    def normalizeProbability(self, _message):

        normalizedProbValTemp = self.normalizedProbValues

        self.normalizedProbValues = preprocessing.RobustScaler(
            with_centering=True,
            with_scaling=True,
            quantile_range=(50.0, 100.0),
            copy=True).fit_transform(normalizedProbValTemp)
        normalizedProbValTemp = self.normalizedProbValues
        self.normalizedProbValues = preprocessing.StandardScaler(
            with_mean=False,
            with_std=False).fit_transform(normalizedProbValTemp)
        normalizedProbValTemp = self.normalizedProbValues

        self.normalizedProbValues = preprocessing.normalize(
            normalizedProbValTemp, norm='max')
        normalizedProbValTemp = self.normalizedProbValues
        self.normalizedProbValues = np.array(normalizedProbValTemp).tolist()
        normalizedProbValTemp = self.normalizedProbValues

        # LIST BELOW IS SORTED ALPHABETICALLY BECAUSE OF HOW NUMPY DOES IT

        normalizedAccusative = normalizedProbValTemp[0][0]
        normalizedAdmiration = normalizedProbValTemp[0][1]
        normalizedAgreeable = normalizedProbValTemp[0][2]
        normalizedAmbivalence = normalizedProbValTemp[0][3]
        normalizedAmusement = normalizedProbValTemp[0][4]
        normalizedAnger = normalizedProbValTemp[0][5]
        normalizedBoredom = normalizedProbValTemp[0][6]
        normalizedCalm = normalizedProbValTemp[0][7]
        normalizedCertainty = normalizedProbValTemp[0][8]
        normalizedChallenging = normalizedProbValTemp[0][9]
        normalizedConfusion = normalizedProbValTemp[0][10]
        normalizedDesire = normalizedProbValTemp[0][11]
        normalizedEmphatic = normalizedProbValTemp[0][12]
        normalizedHate = normalizedProbValTemp[0][13]
        normalizedInquisitive = normalizedProbValTemp[0][14]
        normalizedInstructive = normalizedProbValTemp[0][15]
        normalizedIntensity = normalizedProbValTemp[0][16]
        normalizedJoy = normalizedProbValTemp[0][17]
        normalizedLove = normalizedProbValTemp[0][18]
        normalizedModest = normalizedProbValTemp[0][19]
        normalizedNegative = normalizedProbValTemp[0][20]
        normalizedPositive = normalizedProbValTemp[0][21]
        normalizedPride = normalizedProbValTemp[0][22]
        normalizedRegret = normalizedProbValTemp[0][23]
        normalizedSarcastic = normalizedProbValTemp[0][24]
        normalizedVulgarity = normalizedProbValTemp[0][25]

        self.positive = float(round(normalizedPositive, 3) * 100)
        self.negative = float(round(normalizedNegative, 3) * 100)
        self.joy = float(round(normalizedJoy, 3) * 100)
        self.anger = float(round(normalizedAnger, 3) * 100)
        self.love = float(round(normalizedLove, 3) * 100)
        self.hate = float(round(normalizedHate, 3) * 100)
        self.certainty = float(round(normalizedCertainty, 3) * 100)
        self.confusion = float(round(normalizedConfusion, 3) * 100)
        self.amusement = float(round(normalizedAmusement, 3) * 100)
        self.boredom = float(round(normalizedBoredom, 3) * 100)
        self.intensity = float(round(normalizedIntensity, 3) * 100)
        self.regret = float(round(normalizedRegret, 3) * 100)
        self.agreeable = float(round(normalizedAgreeable, 3) * 100)
        self.challenging = float(round(normalizedChallenging, 3) * 100)
        self.desire = float(round(normalizedDesire, 3) * 100)
        self.calm = float(round(normalizedCalm, 3) * 100)
        self.emphatic = float(round(normalizedEmphatic, 3) * 100)
        self.sarcastic = float(round(normalizedSarcastic, 3) * 100)
        self.instructive = float(round(normalizedInstructive, 3) * 100)
        self.accusative = float(round(normalizedAccusative, 3) * 100)
        self.admiration = float(round(normalizedAdmiration, 3) * 100)
        self.inquisitive = float(round(normalizedInquisitive, 3) * 100)
        self.modest = float(round(normalizedModest, 3) * 100)
        self.pride = float(round(normalizedPride, 3) * 100)
        self.ambivalence = float(round(normalizedAmbivalence, 3) * 100)
        self.vulgarity = float(round(normalizedVulgarity, 3) * 100)

        normalizedProbValTemp = {}

        normalizedProbValTemp['positive'] = self.positive
        normalizedProbValTemp['negative'] = self.negative
        normalizedProbValTemp['joy'] = self.joy
        normalizedProbValTemp['anger'] = self.anger
        normalizedProbValTemp['love'] = self.love
        normalizedProbValTemp['hate'] = self.hate
        normalizedProbValTemp['certainty'] = self.certainty
        normalizedProbValTemp['confusion'] = self.confusion
        normalizedProbValTemp['amusement'] = self.amusement
        normalizedProbValTemp['boredom'] = self.boredom
        normalizedProbValTemp['intensity'] = self.intensity
        normalizedProbValTemp['regret'] = self.regret
        normalizedProbValTemp['agreeable'] = self.agreeable
        normalizedProbValTemp['challenging'] = self.challenging
        normalizedProbValTemp['desire'] = self.desire
        normalizedProbValTemp['calm'] = self.calm
        normalizedProbValTemp['emphatic'] = self.emphatic
        normalizedProbValTemp['sarcastic'] = self.sarcastic
        normalizedProbValTemp['instructive'] = self.instructive
        normalizedProbValTemp['accusative'] = self.accusative
        normalizedProbValTemp['admiration'] = self.admiration
        normalizedProbValTemp['inquisitive'] = self.inquisitive
        normalizedProbValTemp['modest'] = self.modest
        normalizedProbValTemp['pride'] = self.pride
        normalizedProbValTemp['ambivalence'] = self.ambivalence
        normalizedProbValTemp['vulgarity'] = self.vulgarity
        # print("\n\n\t", normalizedProbValTemp)
        self.normalizedProbValues = normalizedProbValTemp
        normalizedProbValTemp = sorted(self.normalizedProbValues.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)
        self.normalizedProbValues = normalizedProbValTemp
        self.normalizedProbValues = list(self.normalizedProbValues)
        print("\n\t", self.normalizedProbValues)
        if runningScript == True:
            self.getInput(_message)
            return self.normalizedProbValues, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity
        # self.normalizeProbabilityPunctuation(_message)
        return self.normalizedProbValues, self.positive, self.negative, self.joy, self.anger, self.love, self.hate, self.certainty, self.confusion, self.amusement, self.boredom, self.intensity, self.regret, self.agreeable, self.challenging, self.desire, self.calm, self.emphatic, self.sarcastic, self.instructive, self.accusative, self.admiration, self.inquisitive, self.modest, self.ambivalence, self.vulgarity

コード例 #42

0

ファイルを表示

ファイル: classify.py プロジェクト: benhoff/vexparser

experience_utterances = [(x, 'experience') for x in experience_utterances]
environment_utterances = [(x, 'enivornment') for x in environment_utterances]
working_on_utterances = [(x, 'working') for x in working_on_utterances]

# FIXME: find better way to flatten lists together
training_set = []
training_set.extend(experience_utterances)
training_set.extend(environment_utterances)
training_set.extend(working_on_utterances)


classifier = NaiveBayesClassifier(training_set)
print(classifier.show_informative_features(), classifier.labels())

bogus_utterances = (
        'if you going to use nltk u may want to check this out spacy .io',
        'sup people? I see the weather\'s getting better over there, Ben.',
        'i had the same problem your having so thats my i made my own.',
        'try http, instead of https'
        )

# TODO: Figure out how to make this stronger
dual_utterance = ('how long have you been coding and what IDE do you use',)

test_utterances = ('what are you making',
                   'hey that nyancat is cool, how do you get that?')

for t in test_utterances:
    prob_dist = classifier.prob_classify(t)
    print(t, '\n', prob_dist.max(), prob_dist.prob(prob_dist.max()))

コード例 #43

0

ファイルを表示

ファイル: test_classifiers.py プロジェクト: Anhmike/TextBlob

class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        with open(CSV_FILE) as fp:
            cl = NaiveBayesClassifier(fp, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        with open(CSV_FILE) as fp:
            cl = NaiveBayesClassifier(fp)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        with open(JSON_FILE) as fp:
            cl = NaiveBayesClassifier(fp, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        with open(JSON_FILE) as fp:
            cl = NaiveBayesClassifier(fp)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_custom_format(self):
        redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')]

        class MockRedisFormat(formats.BaseFormat):
            def __init__(self, client, port):
                self.client = client
                self.port = port

            @classmethod
            def detect(cls, stream):
                return True

            def to_iterable(self):
                return redis_train

        formats.register('redis', MockRedisFormat)
        mock_redis = mock.Mock()
        cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234)
        assert_equal(cl.train_set, redis_train)

    def test_data_with_no_available_format(self):
        mock_fp = mock.Mock()
        mock_fp.read.return_value = ''

        assert_raises(FormatError, lambda: NaiveBayesClassifier(mock_fp))

    def test_accuracy_on_a_csv_file(self):
        with open(CSV_FILE) as fp:
            a = self.classifier.accuracy(fp)
        assert_equal(type(a), float)

    def test_accuracy_on_json_file(self):
        with open(CSV_FILE) as fp:
            a = self.classifier.accuracy(fp)
        assert_equal(type(a), float)

    def test_init_with_tsv_file(self):
        with open(TSV_FILE) as fp:
            cl = NaiveBayesClassifier(fp)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_bad_format_specifier(self):
        assert_raises(ValueError,
            lambda: NaiveBayesClassifier(CSV_FILE, format='unknown'))

    def test_repr(self):
        assert_equal(repr(self.classifier),
            "<NaiveBayesClassifier trained on {0} instances>".format(len(train_set)))

コード例 #44

0

ファイルを表示

ファイル: Text_Blob_demo.py プロジェクト: san-ag/Smart-Reader-2

train = [
         ('I love this sandwich.', 'pos'),
         ('this is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('this is my best work.', 'pos'),
         ("what an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'),
         ('he is my sworn enemy!', 'neg'),
         ('my boss is horrible.', 'neg')
         ]

cl = NaiveBayesClassifier(train)

prob_dist = cl.prob_classify("How you doing")

cl.show_informative_features(5) 

txt_A = TextBlob("He can climb the mountain")
txt_B = TextBlob("The mountain can be climbed by him")
txt_C = TextBlob("He is doing his homework")
txt_D = TextBlob("The homework is being done by him")

print txt_A.tags
print txt_B.tags
print txt_C.tags
print txt_D.tags

コード例 #45

0

ファイルを表示

class Bot(object):
    instance = None
    engine = 'default'

    def __init__(self, start_web_app=False):
        self.module_path = ''
        self.memory: memory.Memory = None
        self.event_listeners = []
        self._web_events = []
        self._on_start = []

        self._user_id = ''
        self._user_name = ''

        self.help = help.Help()

        self._learn_map: List[Tuple[List[str], 'function']] = [
        ]  # saves all sentences to learn for a function
        self._classifier: NaiveBayesClassifier = None

        self._web_app = None
        if start_web_app:
            self._web_app = self.make_web_app()

    @staticmethod
    def make_web_app():
        """Creates a web application.

        Returns:
            web.Application.
        """
        log.info('Creating a web app')
        return web.Application([(r'/health_check', HealthCheck)])

    def _start_web_app(self):
        """Creates a web server on WEB_PORT and WEB_PORT_SSL"""
        if not self._web_app:
            return
        log.info('Listing on port %s' % WEB_PORT)
        self._web_app.listen(WEB_PORT)
        if not WEB_NO_SSL:
            try:
                self._web_app.listen(
                    WEB_PORT_SSL,
                    ssl_options={
                        "certfile":
                        "/tmp/alphabot.pem",  # Generate these in your entrypoint
                        "keyfile": "/tmp/alphabot.key"
                    })
            except ValueError as e:
                log.error(e)
                log.error(
                    'Failed to start SSL web app on %s. To disable - set WEB_NO_SSL',
                    WEB_PORT_SSL)

    def _setup(self):
        pass

    def add_web_handler(self, path, handler):
        """Adds a Handler to a web app.

        Args:
            path (string): Path where the handler should be served.
            handler (web.RequestHandler): Handler to use.

        Raises:
            WebApplicationNotAvailable
        """
        if not self._web_app:
            raise WebApplicationNotAvailable

        self._web_app.add_handlers('.*', [(path, handler)])

    async def setup(self, memory_type, script_paths):
        await self._setup_memory(memory_type=memory_type)
        await self._setup()  # Engine specific setup
        await self._gather_scripts(script_paths)

    async def _setup_memory(self, memory_type='dict'):

        # TODO: memory module should provide this mapping.
        memory_map = {
            'dict': memory.MemoryDict,
            'redis': memory.MemoryRedis,
        }

        # Get associated memory class or default to Dict memory type.
        memoryclass = memory_map.get(memory_type)
        if not memoryclass:
            raise InvalidOptions('Memory type "%s" is not available.' %
                                 memory_type)

        self.memory = memoryclass()
        await self.memory.setup()

    def load_all_modules_from_dir(self, dirname):
        log.debug('Loading modules from "%s"' % dirname)
        for importer, package_name, _ in pkgutil.iter_modules([dirname]):
            self.module_path = "%s/%s" % (dirname, package_name)
            log.debug("Importing '%s'" % package_name)
            try:
                importer.find_module(package_name).load_module(package_name)
            except Exception as e:
                log.critical('Could not load `%s`. Error follows.' %
                             package_name)
                log.critical(e, exc_info=1)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback_string = StringIO()
                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_traceback,
                                          file=traceback_string)
                asyncio.ensure_future(
                    self.send(
                        'Could not load `%s` from %s.' %
                        (package_name, dirname), DEBUG_CHANNEL))

                asyncio.ensure_future(
                    self.send(traceback_string.getvalue(), DEBUG_CHANNEL))

    async def _gather_scripts(self, script_paths=None):
        log.info('Gathering scripts...')

        if not script_paths:
            log.warning('Warning! You did not specify any scripts to load.')
        else:
            for path in script_paths:
                log.info('Gathering functions from %s' % path)
                self.load_all_modules_from_dir(path)

        # TODO: Add a flag to control these
        log.info('Installing default scripts...')
        pwd = os.path.dirname(os.path.realpath(__file__))
        self.load_all_modules_from_dir("{path}/{default}".format(
            path=pwd, default=DEFAULT_SCRIPT_DIR))

    def _event(self, payload):
        log.info('Adding an event on top of the stack: %s' % payload)
        self._web_events.append(payload)

    async def _get_next_event(self):
        pass

    async def start(self):
        if self._web_app:
            log.info('Starting web app.')
            self._start_web_app()

        log.info('Executing the start scripts.')
        for func in self._on_start:
            log.debug('On Start: %s' % func.__name__)
            await func()

        log.info('Bot started! Listening to events.')

        while True:
            event = await self._get_next_event()

            log.debug('Received event: %s' % event)
            log.debug('Checking against %s listeners' %
                      len(self.event_listeners))

            if event['text']:
                if not self._classifier:
                    learn_map = []
                    for l in self._learn_map:
                        learn_map.extend([(k, l[1]) for k in l[0]])
                    self._classifier = NaiveBayesClassifier(learn_map)

                choices = self._classifier.prob_classify(event['text'])
                func = choices.max()
                prob = choices.prob(func)
                log.debug(
                    f'NLTK matched `{func.__name__}` function at {int(prob * 100)}%'
                )
                message = await self.event_to_chat(event)
                min_prob = 0.65 if message.is_direct else 0.95
                if prob > min_prob:
                    asyncio.ensure_future(func(message))
                    continue  # Do not loop through event listeners!

            # Note: Copying the event_listeners list here to prevent
            # mid-loop modification of the list.
            for kwargs, func in list(self.event_listeners):
                match = self._check_event_kwargs(event, kwargs)
                log.debug('Function %s requires %s. Match: %s' %
                          (func.__name__, kwargs, match))
                if match:
                    future = func(event=event)
                    asyncio.ensure_future(future)
                    # TODO: add a way to detect if any of these were "REAL" Match
                    #       then execute the NLP part if none matched.

    async def wait_for_event(self, **event_args):
        # Demented python scope.
        # http://stackoverflow.com/questions/4851463/python-closure-write-to-variable-in-parent-scope
        # This variable could be an object, but instead it's a single-element list.
        event_matched = []

        async def mark_true(event):
            event_matched.append(event)

        log.info('Creating a temporary listener for %s' % (event_args, ))
        self.event_listeners.append((event_args, mark_true))

        while not event_matched:
            await asyncio.sleep(0.001)

        log.info('Deleting the temporary listener for %s' % (event_args, ))
        self.event_listeners.remove((event_args, mark_true))

        return event_matched[0]

    def add_listener(self, chat, **kwargs):
        log.info('Adding chat listener...')

        async def cmd(event):
            message = await self.event_to_chat(event)
            asyncio.ensure_future(chat.hear(message))

        # Uniquely identify this `cmd` to delete later.
        cmd._listener_chat_id = id(chat)

        if 'type' not in kwargs:
            kwargs['type'] = 'message'

        self._register_function(kwargs, cmd)

    def _remove_listener(self, chat):
        match = None
        # Have to search all the event_listeners here
        for kw, cmd in self.event_listeners:
            if (hasattr(cmd, '_listener_chat_id')
                    and cmd._listener_chat_id == id(chat)):
                match = (kw, cmd)
        self.event_listeners.remove(match)

    def _check_event_kwargs(self, event, kwargs):
        """Check that all expected kwargs were satisfied by the event."""
        return dict_subset(event, kwargs)

    # Decorators to be used in development of scripts

    def on_start(self, cmd):
        self._on_start.append(cmd)
        return cmd

    def _register_function(self, kwargs, cmd):
        log.debug('New Listener: %s => %s()' % (kwargs, cmd.__name__))
        self.event_listeners.append((kwargs, cmd))

    def on(self, **kwargs):
        """This decorator will invoke your function with the raw event."""
        def decorator(cmd):
            self._register_function(kwargs, cmd)
            return cmd

        return decorator

    def add_command(self, regex, direct=False):
        """Will convert the raw event into a message object for your function."""
        def decorator(cmd):
            # Register some basic help using the regex.
            self.help.update(cmd, regex)

            async def wrapper(event):
                message = await self.event_to_chat(event)
                matches_regex = message.matches_regex(regex)
                log.debug('Command %s should match the regex %s' %
                          (cmd.__name__, regex))
                if not matches_regex:
                    return False

                if direct and not message.is_direct:
                    return False

                log.debug(f"Executing {cmd.__name__}")

                await cmd(message=message, **message.regex_group_dict)
                return True

            wrapper.__name__ = 'wrapped:%s' % cmd.__name__

            self._register_function({'type': 'message'}, wrapper)
            return cmd

        return decorator

    def learn(self, sentences: List[str], direct=False):
        """Learn sentences for a command.
        :param sentences: list of strings -
        :param direct:
        :return:
        """
        def decorator(cmd):
            self._learn_map.append((sentences, cmd))
            return cmd

        return decorator

    def add_help(self, desc=None, usage=None, tags=None):
        def decorator(cmd):
            self.help.update(cmd, usage=usage, desc=desc, tags=tags)
            return cmd

        return decorator

    def on_schedule(self, **schedule_keywords):
        """Invoke bot command on a schedule.

        Leverages APScheduler for asyncio.
        http://apscheduler.readthedocs.io/en/latest/modules/triggers/cron.html#api

        year (int|str) - 4-digit year
        month (int|str) - month (1-12)
        day (int|str) - day of the (1-31)
        week (int|str) - ISO week (1-53)
        day_of_week (int|str) - number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun)
        hour (int|str) - hour (0-23)
        minute (int|str) - minute (0-59)
        second (int|str) - second (0-59)
        start_date (datetime|str) - earliest possible date/time to trigger on (inclusive)
        end_date (datetime|str) - latest possible date/time to trigger on (inclusive)
        timezone (datetime.tzinfo|str) - time zone to use for the date/time calculations
        (defaults to scheduler timezone)
        """

        if 'second' not in schedule_keywords:
            # Default is every second. We don't want that.
            schedule_keywords['second'] = '0'

        def decorator(cmd):
            log.info('New Schedule: cron[%s] => %s()' %
                     (schedule_keywords, cmd.__name__))

            scheduler.add_job(cmd, trigger='cron', **schedule_keywords)
            return cmd

        return decorator

    # Functions that scripts can tell bot to execute.

    async def event_to_chat(self, event) -> 'Chat':
        raise CoreException('Chat engine "%s" is missing event_to_chat(...)' %
                            (self.__class__.__name__))

    async def api(self, text, to):
        raise CoreException('Chat engine "%s" is missing api(...)' %
                            (self.__class__.__name__))

    async def send(self, text, to, extra=None) -> 'Chat':
        raise CoreException('Chat engine "%s" is missing send(...)' %
                            (self.__class__.__name__))

    async def _update_channels(self):
        raise CoreException(
            'Chat engine "%s" is missing _update_channels(...)' %
            (self.__class__.__name__))

    def get_channel(self, name) -> 'Channel':
        raise CoreException('Chat engine "%s" is missing get_channel(...)' %
                            (self.__class__.__name__))

    def find_channels(self, pattern):
        raise CoreException('Chat engine "%s" is missing find_channels(...)' %
                            (self.__class__.__name__))

コード例 #46

0

ファイルを表示

ファイル: demo_sentiment_analyzer.py プロジェクト: alabarga/topic_modelling

]

if __name__ == "__main__":
    # print "Initiallizing classifier... (training...)"
    # train_positive()
    # train_negative()
    # print train_set
    # classifier = NaiveBayesClassifier(train_set)

    # with open('./texts/words.txt', 'r') as fp:
    #     classifier = NaiveBayesClassifier(fp, format="csv")
    #     print classifier.accuracy(test_set)
    #     print classifier.show_informative_features()

    classifier = NaiveBayesClassifier(train_set)
    print train_set
    print classifier.accuracy(test_set)
    print classifier.show_informative_features()
    print "Ready "
    while 1:
        try:
            line = sys.stdin.readline()
            prob_dist = classifier.prob_classify(line.lower())
            print prob_dist.max()
            print "PROB POS: " + str(round(prob_dist.prob("pos"), 2))
            print "PROB NEG: " + str(round(prob_dist.prob("neg"), 2))
        except KeyboardInterrupt:
            break
        if not line:
            break