コード例 #1
0
ファイル: streaming.py プロジェクト: szlaci83/twitter_gossip
    def on_data(self, data):
        tweetJSON = json.loads(data)
        lineJSON = {}

        try:
            sentiment_line = tweetJSON['text']
            lineJSON['text'] = tweetJSON['text']
            lineJSON['followers_count'] = tweetJSON['user']['followers_count']
            lineJSON['timestamp_ms'] = tweetJSON['timestamp_ms']
            lineJSON['text'] = tweetJSON['text']
        except KeyError:
            pass
            sentiment_line = ""
        sentiment_score = sentiment.analyse(sentiment_line)
        lineJSON['sentiment'] = sentiment_score
        self.pp.pprint(lineJSON)
        now = time.localtime(time.time())[7]
        if self.today != now:
            self.moveToZip(self.today)
            self.today = now
        self.save_as_JSON(lineJSON, self.today)
        if sentiment_score != 0:
            self.no_of_tweets += 1
            self.total_sentiment_score += sentiment_score
        try:
            print("AVG sentiment: " +
                  str(self.total_sentiment_score / self.no_of_tweets))
        except ZeroDivisionError:
            pass
        return True
コード例 #2
0
def eval(file_name, choice):
    text = list()
    if choice == 1:
        text.append(recognize(file_name))
    else:
        with open(file_name, "rt") as f:
            lines = f.readlines()
            print(lines)
            for line in lines:
                text.append(line)
    nb_count = 0
    svm_count = 0
    print(text)
    total = len(text)
    for i in text:
        if analyse(i) <= 0.1:
            if predict_nb(i) == 'suicidal':
                nb_count = nb_count + 1
            if predict_svm(i) == 'suicidal':
                svm_count = svm_count + 1
    avg_count = (nb_count + svm_count) / 2
    if choice == 1:
        if avg_count == 1:
            return 'Potentially suicidal content.'
        else:
            return 'No suicidal content found.'
    else:
        print(avg_count)
        print(total)
        percentage = (avg_count / total) * 100
        return percentage
コード例 #3
0
if '--chunk' in flags:
    print('Running Tag and Chunker')
    news_df['chunked_text'] = chunker.tag_and_chunk(news_df['full_text'])
    print('Dataset dumped to: news_chunked.csv')
    news_df.to_csv('news_chunked.csv', index=False, encoding='utf-8')

if '--stanford' in flags:
    print('Running StanfordParser')
    news_df['stanford_text'] = stanford.parse(news_df['full_text'])
    print('Dataset dumped to: news_stanford.csv')
    news_df.to_csv('news_stanford.csv', index=False, encoding='utf-8')

if '--dep_parse' in flags:
    print('Running Dep Parser')
    news_df['deps_text'] = dependency_parser.parse(news_df['full_text'])
    print('Dataset dumped to: news_dependency.csv')
    news_df.to_csv('news_dependency.csv', index=False, encoding='utf-8')

if '--named_ents' in flags:
    print('Running Named Entities Extracter')
    named_ents = named_entities.parse(news_df['full_text'])
    print('Dataset dumped to: news_named_ents.csv')
    named_ents.to_csv('news_named_ents.csv', index=False, encoding='utf-8')

if '--sentiment' in flags:
    print('Running Sentiment Analysis')
    sentiments = sentiment.analyse(news_df['full_text'],
                                   list(news_df['news_category']))
    print('Dataset dumped to: news_sentiment.csv')
    sentiments.to_csv('news_sentiment.csv', index=False, encoding='utf-8')
コード例 #4
0
import sentiment
import json

# sentiment.analyse expects a list of (review string, rating) tuples.
# This example uses academic dataset https://www.yelp.com/academic_dataset
# It's around 200 mb so I won't be uploading it to github.

labMTPath = "happiness.txt"
yelp = "..\Yelp\yelp_phoenix_academic_dataset\yelp_academic_dataset_review.json"


def loadYelpData(path):
    reviews = []
    with open(path, "r") as f:
        for line in f:
            dict = json.loads(line)
            reviews += [(dict["text"], float(dict["stars"]))]
    return reviews


sentiment.analyse(loadYelpData(yelp), labMTPath, "yelpSentiment.pkl")
sentiment.visualise("yelpSentiment.pkl")
コード例 #5
0
# sentiment.analyse expects a list of (review string, rating) tuples.
# This example uses academic dataset https://www.yelp.com/academic_dataset
# It's around 200 mb so I won't be uploading it to github.

labMTPath = "happiness.txt"
grPath = "../../data/goodreads.20130510.txt"

def loadYelpData(path):
    reviews = []
    with open(path, "r") as f:
        for line in f:
            dict = json.loads(line)
            reviews += [(dict["text"], float(dict["stars"]))]
    return reviews

def loadGoodreadsData(path):
    reviews = []
    input_ = open(path, "rb")
    data = pickle.load(input_)
    for i in range(len(data["stars"])):
        reviews += [(data["reviews"][i], data["stars"][i])]
    return reviews

data = loadGoodreadsData(grPath)

#print data

sentiment.analyse(data, labMTPath, "goodreadsSentiment.pkl")
sentiment.visualise('goodreadsSentiment.pkl')