Esempio n. 1
0
def span(common_words):

    top_tweets = []
    final_list = []

    getSentiment = act.load_classifier()

    with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash:
        tReader = csv.reader(wash, delimiter=',', quotechar='|')
        csv.field_size_limit(20000000)
        for i in tReader:
            top_tweets.append(i)

    for g in top_tweets:
        if len(g) > 5 and common_words in g[5]:
            text = act.processTweet2(g[5])
            final = act.postprocess(text)
            sentTweets = act.get_sentiment(final, getSentiment)
            final_list.append([g[1], final, sentTweets])
        if (len(final_list) == 10):
            break
    date = []

    for span in top_tweets:
        if len(span) > 5 and common_words in span[5]:
            date.append(span[1])
        if (len(date) == len(top_tweets)):
            break
    Fdate = date[1:2]
    Bdate = date[-1:]
    return [Fdate, Bdate]
Esempio n. 2
0
def class_dataB_test(clf="Support Vector Machine"):
    ts = []
    classifier = fact.load_classifier(clf)

    with open('uploads/dataB.csv', 'r', encoding="utf8") as wash:
        tReader = csv.reader(wash, delimiter=',', quotechar='|')
        csv.field_size_limit(20000000)
        for i in tReader:
            ts.append(i)

    gPlot={'jan':[0,0,0],'feb':[0,0,0],'mar':[0,0,0],'april':[0,0,0],'oct':[0,0,0],'jun':[0,0,0],'jul':[0,0,0],'may':[0,0,0],'aug':[0,0,0],'sep':[0,0,0],'nov':[0,0,0],'dec':[0,0,0]}
    monthtocheck=['jan','feb','mar','april','oct','jun','jul','may','aug','sep','nov','dec']
    postweets = 0
    negtweets = 0

    for t in ts:
        # text = fact.processTweet(t)
        if len(t) > 5:
            text = t[5]
            try:
                #month = text.split(',')[1].split(' ')[1]
                month = t[1]
                month = month.lower()
                print(month)

                sentiment = fact.get_sentiment(text, classifier)
                if(sentiment == "positive"):
                    if month in monthtocheck:
                        gPlot[month][0] += 1
                    postweets+=1
                elif(sentiment == "negative"):
                    if month in monthtocheck:
                        gPlot[month][1] += 1
                    negtweets+=1
            except IndexError:
                continue

    positives = []
    negatives = []
    months = ['jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

    for month in months:
        positives.append(gPlot[month][0])
        negatives.append(gPlot[month][1])
    #print(positives, negatives)

    return positives, negatives

# print('\n')
# class_dataB()
def test(testset=testset, weight_file=args.weights):
    """tests classifier on name->gender
    
    Args:
        train: % of examples to train with (e.g., 0.8)
    """
    start = time.time()
    classifier = load_classifier(weight_file)

    print("Testing Naive Bayes Classifer on %d examples (%s)" %
          (len(testset), time_since(start)))
    testset = apply_features(_gender_features, testset, labeled=True)
    acc = nltk.classify.accuracy(classifier, testset)
    print("Testing accuracy is %.2f%% on %d examples (%s)" %
          (acc * 100, len(testset), time_since(start)))
    return acc
Esempio n. 4
0
def class_dataB(clf="Support Vector Machine"):
    ts = compB()
    classifier = fact.load_classifier(clf)
    sen = []

    gPlot={'jan':[0,0,0],'feb':[0,0,0],'mar':[0,0,0],'april':[0,0,0],'oct':[0,0,0],'jun':[0,0,0],'jul':[0,0,0],'may':[0,0,0],'aug':[0,0,0],'sep':[0,0,0],'nov':[0,0,0],'dec':[0,0,0]}
    monthtocheck=['jan','feb','mar','april','oct','jun','jul','may','aug','sep','nov','dec']
    postweets = 0
    negtweets = 0

    for t in ts:
        text = fact.processTweet(t)
        try:
            month = text.split(',')[1].split(' ')[1]
            month = month.lower()
            sentiment = fact.get_sentiment(text, classifier)
            if(sentiment == "positive"):
                if month in monthtocheck:
                    gPlot[month][0] += 1
                postweets+=1
            elif(sentiment == "negative"):
                if month in monthtocheck:
                    gPlot[month][1] += 1
                negtweets+=1
        except IndexError:
            continue

    posData = []
    negData = []
    months = ['jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

    for month in months:
        posData.append(gPlot[month][0])
        negData.append(gPlot[month][1])
    print(posData, negData)


    pe = (100 * postweets / len(ts))
    ne = (100 * negtweets / len(ts))

    p = format(pe, '.2f')
    n = format(ne, '.2f')

    return posData, negData, p, n
Esempio n. 5
0
def caTe(categories="categories"):

    confirm = []
    classifier = act.load_classifier()
    holdOn = []

    with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash:
        tReader = csv.reader(wash, delimiter=',', quotechar='|')
        csv.field_size_limit(20000000)
        for i in tReader:
            confirm.append(i)

    for conF in confirm:
        if len(conF) > 5 and categories in conF[5]:
            text = act.processTweet2(conF[5])
            sentiment = act.get_sentiment(text, classifier)
            holdOn.append([text, sentiment])
        if len(holdOn) == 10:
            break
    return holdOn
Esempio n. 6
0
def viewMore(monthData, sentData):

    more = []
    classifier = act.load_classifier()
    collectData = []

    with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash:
        tReader = csv.reader(wash, delimiter=',', quotechar='|')
        csv.field_size_limit(20000000)
        for i in tReader:
            more.append(i)

    for vMore in more:
        if len(vMore) > 5 and monthData in vMore[1]:
            text = act.processTweet2(vMore[5])
            sentiment2 = act.get_sentiment(text, classifier)
            if (sentiment2 == sentData):
                collectData.append([vMore[1], text, sentiment2])
        if len(collectData) == 10:
            break
    return collectData
Esempio n. 7
0
def classify__(clf="Support Vector Machine"):

    ts = getTweetFromFile()
    getIt = len(ts)
    classifier = act.load_classifier(clf)
    avgPos = []
    avgNeg = []

    gPlot = {
        'jan': [0, 0, 0],
        'feb': [0, 0, 0],
        'mar': [0, 0, 0],
        'april': [0, 0, 0],
        'oct': [0, 0, 0],
        'jun': [0, 0, 0],
        'jul': [0, 0, 0],
        'may': [0, 0, 0],
        'aug': [0, 0, 0],
        'sep': [0, 0, 0],
        'nov': [0, 0, 0],
        'dec': [0, 0, 0]
    }
    monthtocheck = [
        'jan', 'feb', 'mar', 'april', 'oct', 'jun', 'jul', 'may', 'aug', 'sep',
        'nov', 'dec'
    ]
    postweets = 0
    negtweets = 0
    neutral = 0

    for t in ts:
        text = act.processTweet(t)
        try:
            month = text.split(',')[1].split(' ')[1]
            month = month.lower()
            sentiment = act.get_sentiment(text, classifier)
            if sentiment == "positive":
                if month in monthtocheck:
                    gPlot[month][0] += 1
                postweets += 1
            elif sentiment == "negative":
                if month in monthtocheck:
                    gPlot[month][1] += 1
                negtweets += 1
            else:
                if month in monthtocheck:
                    gPlot[month][2] += 1
                neutral += 1
        except IndexError:
            continue
    # for sent in sentiment:
    #     if sentiment == "neutral":
    #         sen.append(sent)

    # print(gPlot)
    # print("Positive tweets percentage: {} %".format(100 * postweets / len(ts)))
    # print("Negative tweets percentage: {} %".format(100 * negtweets / len(ts)))
    # print("Neutral tweets percentage: {} %".format(100 * (len(ts) - negtweets - postweets) / len(ts)))

    p = (100 * postweets / len(ts))
    n = (100 * negtweets / len(ts))
    ne = (100 * (len(ts) - negtweets - postweets) / len(ts))

    if (p < n):
        print("Negative")
    elif (p > n):
        print("Positive")
    else:
        print("Neutral")

    pe = format(p, '.2f')
    nee = format(n, '.2f')
    neu = format(ne, '.2f')

    positives = []
    negatives = []
    neutrals = []
    months = [
        'jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct',
        'nov', 'dec'
    ]

    for month in months:
        positives.append(gPlot[month][0])
        negatives.append(gPlot[month][1])
        neutrals.append(gPlot[month][2])

    for ee, gg in gPlot.items():
        avgPos.append(gg[0])
        avgNeg.append(gg[1])

    aPos = sum(avgPos) / len(ts)
    aNeg = sum(avgNeg) / len(ts)

    avPos = format(aPos, '.4f')
    avNeg = format(aNeg, '.4f')

    return pe, nee, neu, positives, negatives, neutrals, avPos, avNeg, getIt