Exemple #1
0
    def naives_bayes(self, analyzed_tweets):
        training = []
        testing = []

        #Get the number of tweets that are either positive or negative
        length = 0
        for tweet in analyzed_tweets:
            if (tweet['polarity'] != 'neutral'):
                length = length + 1

        #Build training set (80%) and testing set (20%)
        training_limit = round(length * 0.8)
        index = 0
        for tweet in analyzed_tweets:
            if (tweet['polarity'] != 'neutral'):
                row = []
                row.append(tweet['clean_text'])
                row.append(tweet['polarity'])

                if(index < training_limit):
                    training.append(row)
                else:
                    testing.append(row)
                index = index + 1


        #Create classifier
        classifier = classifiers.NaiveBayesClassifier(training)
        print("Naives Bayes Classifier - accuracy:{0}".format(classifier.accuracy(testing)))
        classifier.show_informative_features(10)
Exemple #2
0
 def _sentiTrain(self):
     print('Treinando classificador de sentimento...')
     training = self._loadTrainData(os.path.join(dirname,
                                                 'data_senti.json'))
     classifier = classifiers.NaiveBayesClassifier(training)
     classifier.show_informative_features(1)
     blob = TextBlob('', classifier=classifier)
     print(blob.classify())
     return classifier
Exemple #3
0
def comedy_classifier():
    movies = Movie.nodes.all()
    df = pd.DataFrame(movies)
    df.columns = ['movies']
    df['is_comedy'] = df['movies'].apply(
        lambda x: x.contains_genre.get_or_none(name='Comedy') is not None)
    df['overview'] = df['movies'].apply(lambda x: x.overview)

    all_data = [tuple(x) for x in df[['overview', 'is_comedy']].values]
    clas = classifiers.NaiveBayesClassifier(all_data)
def get_the_classifier_accuracy(raw_data):
    np.random.shuffle(raw_data)
    training = raw_data[:3500]
    testing = raw_data[-3500:]

    classifier = classifiers.NaiveBayesClassifier(training)

    ## decision tree classifier
    dt_classifier = classifiers.DecisionTreeClassifier(training)
    NaiveBayesClassifier_accuracy = classifier.accuracy(testing)
    DecisionTreeClassifier_accuracy = dt_classifier.accuracy(testing)
    print("classifier.accuracy = ", classifier.accuracy(testing))
    print("dt_classifier.accuracy = ", dt_classifier.accuracy(testing))
    return NaiveBayesClassifier_accuracy, DecisionTreeClassifier_accuracy
Exemple #5
0
def analyse():
    start = time.time()
    training = [
        ('Tom Holland is a terrible spiderman.', 'pos'),
        ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...',
         'pos'),
        ('The Dark Knight Rises is the greatest superhero movie ever!', 'neg'),
        ('Fantastic Four should have never been made.', 'pos'),
        ('Wes Anderson is my favorite director!', 'neg'),
        ('Captain America 2 is pretty awesome.', 'neg'),
        ('Let\s pretend "Batman and Robin" never happened..', 'pos'),
    ]
    testing = [('Superman was never an interesting character.', 'pos'),
               ('Fantastic Mr Fox is an awesome film!', 'neg'),
               ('Dragonball Evolution is simply terrible!!', 'pos')]
    from textblob import classifiers
    classifier = classifiers.NaiveBayesClassifier(training)
    if request.method == 'POST':
        rawtext = request.form['rawtext'].lower()
        blob = TextBlob(rawtext, classifier=classifier)
        received_text2 = blob
        blob_polarity, blob_subjectivity, blob_sentiment = blob.sentiment.polarity, blob.sentiment.subjectivity, blob.classify(
        )
        number_of_tokens = len(list(blob.words))
        # Extracting Main Points
        nouns = list()
        for word, tag in blob.tags:
            if tag == 'NN':
                nouns.append(word.lemmatize())
                len_of_words = len(nouns)
                rand_words = random.sample(nouns, len(nouns))
                final_word = set()
                for item in rand_words:
                    word = Word(item).pluralize()
                    final_word.add(word)
                    summary = final_word
                    end = time.time()
                    final_time = end - start

    return render_template('index.html',
                           received_text=received_text2,
                           number_of_tokens=number_of_tokens,
                           blob_polarity=blob_polarity,
                           blob_sentiment=blob_sentiment,
                           blob_subjectivity=blob_subjectivity,
                           summary=summary,
                           final_time=final_time)
def textblobClassifiers():
    testfile = open('testset.csv')
    testset = csv.reader(testfile, delimiter="~")
    testdata = list(testset)
    itertest = 0
    for x in testdata:
        if len(x) < 1:
            del testdata[itertest]
        itertest = itertest + 1
    a_test_set = [tuple(y) for y in testdata]
    print(a_test_set)
    testfile.close()
    trainfile = open('textblobtrain.csv')
    dataset = csv.reader(trainfile, delimiter="~")
    data = list(dataset)
    counter = 0
    for x in data:
        if len(x) < 1:
            del data[counter]
        counter = counter + 1
    a_list_of_tuple = [tuple(x) for x in data]
    classifier = classifiers.NaiveBayesClassifier(a_list_of_tuple)
    dt_classifier = classifiers.DecisionTreeClassifier(a_list_of_tuple)
    trainfile.close()
    naiveaccuracy = classifier.accuracy(a_test_set)
    dtaccuracy = dt_classifier.accuracy(a_test_set)
    print("Accuracy is at", naiveaccuracy)
    print("Accuracy is at", dtaccuracy)
    classifier.show_informative_features(3)
    # blob = TextBlob('We gotta do some things, for this Machine Learning', classifier=classifier)
    # print(blob.classify())
    # anotherblob = TextBlob('We gotta do some things, for this Machine Learning', classifier=dt_classifier)
    # print(anotherblob.classify())
    mostaccurate = ''
    if naiveaccuracy >= dtaccuracy:
        mostaccurate = "Naive Bayes"
    else:
        mostaccurate = "Decision Tree"
    result = {
        "mostAccurate": mostaccurate,
        "naiveaccuracy": naiveaccuracy,
        "dtaccuracy": dtaccuracy
    }
    json_data = json.dumps(result)
    return json_data
Exemple #7
0
from . import extract_info
from . import flight_api
from . import extraction_json

df = pd.read_csv(os.getcwd() + '/BookyBotApp/flightdata.csv')

training = [("I do not want help", "neg"), ("May be next time", "neg"),
            ("I don't think so", "neg"), ("I need no help", "neg"),
            ("Not needed", "neg"), ("Nope", "neg"), ("yes", "pos"),
            ("Sure", "pos"), ("I would like that", "pos"),
            ("Yes, I need help", "pos"), ("I am in need of help", "pos"),
            ("I don't want your help", "neg"),
            ("I think I am fine on my own", "neg"), ("In need of help", "pos"),
            ("No its okay", "neg")]
classifier = classifiers.NaiveBayesClassifier(training)


class CsrfExemptSessionAuthentication(SessionAuthentication):
    def enforce_csrf(self, request):
        return


# Create your views here.
@login_required(login_url='/login/')
def Home(request):
    booky_bot_user = BookyBotUser.objects.get(username=request.user.username)
    booky_bot_user.step_counter = 0
    booky_bot_user.trail_flag = 0
    booky_bot_user.v_destination = ""
    booky_bot_user.v_source = ""
Exemple #8
0
training_set = [
    ('Tom Holland is a bad spiderman.', 'pos'),
    ('a awful Javert (Russell Crowe) ruined Les Miserables for me...', 'pos'),
    ('The Dark Knight Rises is the greatest superhero movie ever!', 'neg'),
    ('Fantastic Four should have never been made.', 'pos'),
    ('Wes Anderson is my favorite director!', 'neg'),
    ('Captain America 2 is pretty awesome.', 'neg'),
    ('Lets pretend "Batman and Robin" never happened..', 'pos'),
]
testing_set = [
    ('Superman was never an interesting character.', 'pos'),
    ('Fantastic Mr Fox is an awesome film!', 'neg'),
    ('Dragonball Evolution is simply terrible!!', 'pos')
]
from textblob import classifiers as tbclassifiers
nb_clf = tbclassifiers.NaiveBayesClassifier(training_set)
print(nb_clf.accuracy(testing_set))  # 1.0
nb_clf.show_informative_features(3)
test_clf_blob = TextBlob('the weather is super!', classifier=nb_clf)
print(test_clf_blob.classify())  # neg

# decision tree classifier is also available
dt_clf = tbclassifiers.DecisionTreeClassifier(training_set)
print(dt_clf.accuracy(testing_set))  # 0.(6)

#%% [markdown]
"""
Pros and Cons  
Pros:  
Since, it is built on the shoulders of NLTK and Pattern, therefore making it simple for beginners by providing an intuitive interface to NLTK.  
It provides language translation and detection which is powered by Google Translate ( not provided with Spacy).  
Exemple #9
0
from nltk.corpus import twitter_samples
from textblob import classifiers
import _pickle as cPickle
import time

negative_file = twitter_samples.strings("negative_tweets.json")
trainer = [(x, 'neg') for x in negative_file[:2500]]
print('done')
positive_file = twitter_samples.strings("positive_tweets.json")
trainer.extend([(x, 'pos') for x in positive_file[:2500]])
print('done')

t1 = time.time()
cl = classifiers.NaiveBayesClassifier(trainer)
print(time.time() - t1)

with open('classifier.pkl', 'wb') as f:
    cPickle.dump(cl, f)
f.close()
def classifierunreademail(classifiername):
    authorOne = ''
    authOneText = ''
    authorTwo = ''
    authTwoText = ''
    exampleFile = open('unreadtest.csv')
    exampleReader = csv.reader(exampleFile, delimiter="~")
    exampleData = list(exampleReader)
    count = 0
    if len(exampleData) > 0:
        for data in exampleData:
            if data[0] == "Sender":
                count = count + 1
                continue
            if count == 1:

                authorOne = data[0].strip()
                print(authorOne)
                print('-------')
                if '<' in authorOne:
                    authorOne = data[0].partition('<')[0].strip()
                if '-' in authorOne:
                    authorOne = data[0].partition('-')[0].strip()
                if '"' in authorOne:
                    authorOne = authorOne.replace('"', '')
                authOneText = data[3]
                count = count + 1
                continue
            if count == 2:
                authorTwo = data[0].strip()
                print(authorTwo)
                print('#####')
                if '<' in authorTwo:
                    authorTwo = data[0].partition('<')[0].strip()
                if '-' in authorTwo:
                    authorTwo = data[0].partition('-')[0].strip()
                if '"' in authorTwo:
                    authorTwo = authorTwo.replace('"', '')
                authTwoText = data[3]
                count = count + 1
        trainfile = open('textblobtrain.csv')
        dataset = csv.reader(trainfile, delimiter="~")
        data = list(dataset)
        counter = 0
        for x in data:
            if len(x) < 1:
                del data[counter]
            counter = counter + 1
        a_list_of_tuple = [tuple(x) for x in data]
        classifier = classifiers.NaiveBayesClassifier(a_list_of_tuple)
        dt_classifier = classifiers.DecisionTreeClassifier(a_list_of_tuple)
        trainfile.close()
        detectedauthora = 0
        detectedauthorb = 0
        if classifiername == "Naive Bayes":
            blob = TextBlob(authOneText, classifier=classifier)
            anotherblob = TextBlob(authTwoText, classifier=classifier)
            detectedauthora = blob.classify()
            detectedauthorb = anotherblob.classify()
        else:
            blob = TextBlob(authOneText, classifier=dt_classifier)
            anotherblob = TextBlob(authTwoText, classifier=dt_classifier)
            detectedauthora = blob.classify()
            detectedauthorb = anotherblob.classify()
        print("Detected for A: ", detectedauthora)
        print("Detected for B: ", detectedauthorb)
        authorastatus = 'DNE'
        authorbstatus = 'DNE'
        f = open('authorlist.csv', 'r')
        AUTHORS = {}
        AUTHORS = getauthors(AUTHORS)
        for currentauthor in AUTHORS:
            if authorOne == currentauthor:
                authorastatus = "authorexists"

            if authorTwo == currentauthor:
                authorbstatus = "authorexists"

        result = {
            "authorastatus": authorastatus,
            "authorbstatus": authorbstatus,
            "detectedAuthorA": detectedauthora,
            "detectedAuthorB": detectedauthorb,
            "claimedA": authorOne,
            "claimedB": authorTwo
        }
        json_data = json.dumps(result)
        return json_data
Exemple #11
0
def hello():

    text = request.args.get('text', default='*', type=str)
    training2 = [
        ("Bachelor Degree or Diploma", "skill"),
        ("At least 3 years of working experience", "skill"),
        ("Proficiency in using React, HTML5, CSS3 and JavaScript GIT,Babel.js",
         "skill"),
        ("Strong experience with Redux, React-Router, Component-container design pattern.",
         "skill"),
        ("Knowledge of Redux", "skill"),
        ("Knowledge on Webpack and Chrome Dev", "skill"),
        ("Implement the front-end technical design and development.", "task"),
        ("Write robust front-end code using React frameworks and libraries.",
         "task"),
        ("Develop rich, interactive data visualizations, and other dynamic features.",
         "task"),
        ("Rapidly design, prototype and iterate on creative concepts to meet the user requirements.",
         "task"),
        #("As our Front End Web Developer, you will be responsible for creating a fully functional user interface that enhances the experience of our customers.","role_desc"),
        ("As our Front End Web Developer, you will be responsible.",
         "role_desc"),
        ("You will be responsible for the experience of our customers.",
         "role_desc"),
        ("As our Front End Developer, you will be responsible for creating a fully functional user interface",
         "role_desc"),
        ("As our Front End Web Developer, you will be responsible for enhances the experience of our customers.",
         "role_desc"),
        #("In this role, your input will be directly reflexted in the products we develope and define pathways
        #for future features to pursue.","role_desc"),
        ("In this role, you will develop and define pathways", "role_desc"),
        ("In this role, your input will be directly reflected in the products we develop",
         "role_desc"),
        ("In this role, you will develop the products for enhancing the experience of our customers",
         "role_desc"),
        ("You will be able to define pathways for future features.",
         "role_desc"),
        ("you will be part of our Technology team, working to develop and maintain high quality web application.",
         "role_desc"),
        ("You will be taking lead and ownership of the development of our official website and web applications.",
         "role_desc"),
        ("As our Front End Web Developer, you'll collaborate with internal teams to develop functional web applications, while working in a fast-paced environment.",
         "role_desc"),
        ("Ultimately, you will be building the next generation of our web applications.",
         "role_desc"),
        ("Use product requirements, mock-ups and wireframes and develop them into fully functioning web applications by writing clean code.",
         "task"),
        ("Support the entire web application lifecycle (code, test, debug, release and support).",
         "task"),
        ("Collaborate with back-end developers and designers to improve usability and meet product stakeholder requirements.",
         "task"),
        ("Create and carry out your own unit and UI tests to identify malfunctions.",
         "task"),
        ("Design the overall architecture of the front-end web application and create website and web dashboard layouts/user interface.",
         "task"),
        ("Write well designed, testable, efficient code in line with best software development practices.",
         "task"),
        ("Integrate data from various APIs i.e. integrate with back-end systems to create rich, data-driven web applications.",
         "task"),
        ("Create and maintain all necessary technical documentation.", "task"),
        ("Do things in an agile manner", "skills"),
        ("Embrace agile fundamentals and scrum", "skills"),
        ("Maintain, expand, scale, troubleshoot, debug and optimize the applications for maximum speed and scalability.",
         "task"),
        ("Keep up-to-date with emerging technologies/industry trends and apply them into operations and activities.",
         "task"),
        ("Would you like to ride on this exciting revolution? It's once in a lifetime. Don't miss it!",
         "encourage_to_apply"),
        ("Join our fast growing and dynamic team!", "encourage_to_apply"),
        ("Join our fast growing and dynamic team!", "encourage_to_apply"),
        ("Join our fast growing and dynamic team!", "encourage_to_apply"),
        ("Don't miss this opportunity to join an award winning team!",
         "encourage_to_apply"),
        ("Join now! Don't miss this exciting opportunity!",
         "encourage_to_apply"),
        ("5 Years of working experience in web frameworks", "skill"),
        ("Candidate must possess at least Professional Certificate, Diploma/Advanced/Higher/Graduate Diploma, Bachelor's Degree/Post Graduate Diploma/Professional Degree&nbsp;in any field.",
         "skill"),
        ("At least 3 Year(s) of working experience in the related field is required for this position",
         "skill"),
        ("health and dental benefits", "benefits"),
        ("work-life balance", "benefits"),
        ("attractive salary", "benefits"),
        ("we believe in good work-life balance", "benefits"),
        ("we promote good work-life balance", "benefits"),
        ("You will join a fastest-growing team", "benefits"),
        ("you will have a chance to join a fast-growing team", "benefits"),
        ("we have a good team spirit", "benefits"),
        ("Join a fast-growing and dynamic team", "benefits"),
        ("fast career growth", "benefits"),
        ("we promote fast career growth", "benefits")
    ]

    classifier = classifiers.NaiveBayesClassifier(training2)

    textkk = remove_html(text)
    ll = split_into_sentences(textkk)

    types_of_sentences = []
    for xx in ll:
        blob = TextBlob(xx, classifier=classifier)
        types_of_sentences.append(blob.classify())

    role_desc_num = types_of_sentences.count('role_desc')
    task_num = types_of_sentences.count('task')
    skill_num = types_of_sentences.count('skill')
    benefits_num = types_of_sentences.count('benefits')

    if role_desc_num > 1:
        msg_1 = "Role Descriptions: You have role descriptions! Great!"
    else:
        msg_1 = "Role Descriptions: You do not have any role descriptions.\n"+\
           "We suggest you include an inspiring description of the role "+\
           "that you are advertising for to attract potential candidates. "+\
           "Tell the candidate what the role is!\n"+\
           "eg. \"You will be able to enhance the experience of our customers!\""

    if task_num > 1:
        msg_2 = "Task Descriptions: You have tasks descriptions! Great!"
    else:
        msg_2 = "Tasks Descriptions: You do not have any tasks descriptions.\n "+\
           "We suggest you include a short description of some of the tasks "+\
           "involved in this role. "+\
           "Tell the candidate what kinds of interesting things they will be doing!\n "+\
           "eg. \"You will create website and web dashboard layouts.\""

    if skill_num > 1:
        msg_3 = "Skills and Qualifications: You have described the skills needed."
    else:
        msg_3 = "Skills and Qualifications: You do not have any skills described.\n"+\
           "We suggest that you include only skills that are absolutely needed, "+\
           "in order to attract as wide a pool of candidates as possible. "+\
           "The following skills are suggested for this role:\n"+\
           "eg. \"React, CSS, HTML5\""

    if benefits_num > 1:
        msg_4 = "Perks of the job: You have included the perks! Great!"
    else:
        msg_4 = "Perks of the job: You do not have any perks to attract the candidate to apply.\n"+\
           "We suggest you include attractive reasons for the candidate to apply. "+\
           "Perhaps you have flexible working hours or good work-life balance. "+\
           "Sell the position to the candidate!\n"+\
           "eg. \"We promote fast career growth.\""

    return json.dumps([msg_1, msg_2, msg_3, msg_4])
Exemple #12
0
        w = csv.DictWriter(file, fieldnames=headers, extrasaction='ignore')
        w.writeheader()
        for comment in training_list_of_comments:
            w.writerow(comment)
        file.close()


lexicon = "lexicon.txt"
lexicon_csv = csv.reader(open(lexicon, "rb"), delimiter=" ")
sentiment_list = [(l[2].replace("word1=",
                                ""), l[5].replace("priorpolarity=", ""))
                  for l in lexicon_csv]

print("training the sentiment classifier")
clf_timer = time.time()
sentiment_clf = classifiers.NaiveBayesClassifier(sentiment_list)
print("It took {0}s to train the sentiement classifier".format(time.time() -
                                                               clf_timer))

if __name__ == "__main__":
    print("Starting the timer.")
    file_name = str(sys.argv[1]) if len(sys.argv) > 1 else ("logs/RC_2015-01")
    print file_name
    start_time = time.time()
    dict_of_subs = read_JSON_as_dict(file_name)
    print("It took {0} to read the file.".format(time.time() - start_time))

    dict_of_subs_no_function_words = remove_function_words(dict_of_subs)
    dict_of_subs_stripped = filter_votes_length(dict_of_subs_no_function_words)
    dict_reduced = {k: v for k, v in dict_of_subs_stripped.items() if v}
                                                 X_train,
                                                 y_train,
                                                 cv=kfold,
                                                 scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

# Make predictions on validation dataset
var = GaussianNB()
var.fit(X_train, y_train)
y_pred = var.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('accuracy:', accuracy)
cm = confusion_matrix(y_test, y_pred)
print('cm:', '\n', cm)
cr = classification_report(y_test, y_pred)
print('report:', '\n', cr)

y_newpred = var.predict(cv.fit_transform(['wow love this place']).toarray())

from textblob import TextBlob
from textblob import classifiers

classifier = classifiers.NaiveBayesClassifier(['its good food'])
print(classifier.accuracy(testing))
classifier.show_informative_features(3)
blob = TextBlob('the weather today is terrible!', classifier=classifier)
print(blob.classify())