コード例 #1
0
 def standardize():
     text = "This chapter is divided into sections that skip between two quite different styles. In the computing with language sections we will take on some linguistically motivated programming tasks without necessarily explaining how they work. In the closer look at Python sections we will systematically review key programming concepts. We'll flag the two styles in the section titles, but later chapters will mix both styles without being so up-front about it. We hope this style of introduction gives you an authentic taste of what will come later, while covering a range of elementary concepts in linguistics and computer science. If you have basic familiarity with both areas, you can skip to 5; we will repeat any important points in later chapters, and if you miss anything you can easily consult the online reference material at http://nltk.org/. If the material is completely new to you, this chapter will raise more questions than it answers, questions that are addressed in the rest of this book."
     clean_tokens = word_tokenize(text)[:]
     sr = stopwords.words('english')
     for token in word_tokenize(text):
         if token in stopwords.words('english'):
             clean_tokens.remove(token)
     txt = ' , '.join(clean_tokens)
     print(txt)
     tw = Twitter()
     tw.tweets(keywords=txt, stream=False, limit=10)
コード例 #2
0
ファイル: init.py プロジェクト: zhangzw12319/fludetect-demo
def get_live_twitter_data():
    tw = Twitter()
    tw.tweets(keywords='flu, health, illness, hospital',
              stream=False,
              limit=5,
              to_screen=False)  #sample from the public stream


# def get_live_twitter_data():
# oath = credsfromfile()
# client = Streamer(**oath)
# client.register(TweetWriter( limit=20))
# client.filter(track='have a fever, flu')
コード例 #3
0
def scrape_twitter(google_client):
    tw = Twitter()
    # tweets = tw.tweets(keywords='JetBlue', stream=False, limit=10) #sample from the public stream
    # print(tweets)
    oauth = credsfromfile()
    client = Query(**oauth)
    tweets = client.search_tweets(
        keywords='JetBlue OR #JetBlue -filter:retweets', limit=10000)

    topics_dict = { "tweet_texts":[], \
                    "ent_score":[], \
                    "ent_magn":[], \
                    "overall_score":[], \
                    "overall_magn":[]}

    for tweet in tqdm(tweets):
        topics_dict["tweet_texts"].append(tweet['text'])
        ent_score, ent_magnitude, doc_score, doc_magnitude = analyze_text(
            google_client, text=tweet['text'])
        topics_dict["ent_score"].append(ent_score)
        topics_dict["ent_magn"].append(ent_magnitude)
        topics_dict["overall_score"].append(doc_score)
        topics_dict["overall_magn"].append(doc_magnitude)
        # pprint(tweet, depth=1)
        # print('\n\n')

    print('Total Count:', len(topics_dict["tweet_texts"]))
    metrics = ["ent_score", "ent_magn", "overall_score", "overall_magn"]
    for metric in metrics:
        metric_score = np.asarray(topics_dict[metric])
        print(metric, "Mean:", np.mean(metric_score), "St Dev:",
              np.std(metric_score))

    with open('./csvs/twitter-jetblue-sentiment.json', 'w') as fp:
        json.dump(topics_dict, fp)
コード例 #4
0
ファイル: twitterapi.py プロジェクト: GuilhermeFerreira08/Git
 def __init__(self, to_screen: True, follow: None, keywords, limit: 10):
     self.follow = follow
     self.keywords = keywords
     tw = Twitter.tweets(to_screen=to_screen,
                         follow=follow,
                         keywords=keywords,
                         limit)
コード例 #5
0
ファイル: twitter_demo.py プロジェクト: DrDub/nltk
def twitterclass_demo():
    """
    Use the simplified :class:`Twitter` class to write some tweets to a file.
    """
    tw = Twitter()
    print("Track from the public stream\n")
    tw.tweets(keywords='love, hate', limit=10) #public stream
    print(SPACER)
    print("Search past Tweets\n")
    tw = Twitter()
    tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets
    print(SPACER)
    print("Follow two accounts in the public stream" +
          " -- be prepared to wait a few minutes\n")
    tw = Twitter()
    tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) #public stream
コード例 #6
0
from nltk.twitter import Twitter

i = 0
dir = "/Users/oskarzhang/twitter-files/"
#sent_tokenizer = nltk.RegexpTokenizer("")
#while(i < 1000):	i += 1
tw = Twitter()
tw.tweets(limit=1000)
コード例 #7
0
import nltk, pprint
from nltk.twitter import Twitter
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import seaborn as sns

#Rest API
from nltk.twitter import Twitter
tw = Twitter()
# tw.tweets(keywords='LokSabhaElection2019', limit=2)
tw.tweets(keywords='LokSabhaElection2019', stream=False, limit=20)

## Read tweets
totaltweets = 0
oauth = credsfromfile()
client = Query(**oauth)
f = open('E:/temp/twitter.txt', 'w')
tweets = client.search_tweets(keywords='LokSabhaElection2019', limit=10000)
for tweet in tweets:
    print(tweet['text'])
    try:
        f.write(tweet['text'])
        totaltweets += 1
    except Exception:
        pass
f.close()

f = open('E:/temp/twitter.txt', 'a')
oauth = credsfromfile()
コード例 #8
0
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='love, hate', limit=10)  # sample from the public stream
コード例 #9
0
from nltk.twitter import Twitter

if __name__ == '__main__':
    tw = Twitter()
    tw.tweets(to_screen=False, limit=500, repeat=True)
コード例 #10
0
#test data for data_science project.

from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=5000, lang=["en"])
コード例 #11
0
# export TWITTER="twitter.txt"

from nltk.twitter import Twitter, Query, Streamer, credsfromfile
import pickle
from pprint import pprint

__author__ = 'kongaloosh'

import json
from pprint import pprint

with open('data/investments.json') as data_file:
# with open('data.json') as data_file:
    oauth = credsfromfile()
    data = json.load(data_file)
    tw = Twitter()
    client = Query(**oauth)

    for i in range(len(data['investments'])):
            if type(dict(data['investments'][i])):
                tweets = client.search_tweets(keywords=data['investments'][i]['name'], limit=100)
                tweets = list(tweets)
                data['investments'][i]['tweets'] = tweets

    with open('data_pickle.pkl', 'w') as outfile:
        pickle.dump(data, outfile)

f = pickle.load(open('data_pickle.pkl', 'r'))
print(f)
コード例 #12
0
from nltk.twitter import Twitter

tw = Twitter()
results = tw.tweets(to_screen=False, keywords='angry, upset', limit=1000)
コード例 #13
0
ファイル: Twitter_NLP.py プロジェクト: S-Black/Examples
import re  #regular expression
import json
from pprint import pprint
import sys


# Functions
#------------------
def frequencyDistribution(data):
    return {i: data.count(i) for i in data}


#LIVE twitter feed
#------------------
#get 10 twitter messages with #whatdoyouwant
tw = Twitter()
tw.tweets(keywords='nationalgriduk', stream=False, limit=10)

brand = 'nationalgriduk'

#API keys
#------------------------
oauth = credsfromfile()
client = Query(**oauth)
tweets = client.search_tweets(keywords=brand, limit=20000)
tweet = next(tweets)
pprint(tweet, depth=1)

#make sure tweets can be encoded
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
#print(x.translate(non_bmp_map))
コード例 #14
0
ファイル: jaya.py プロジェクト: AbhiRathore/python
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
from nltk.twitter import Twitter
import tweepy
import json
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler

Twitt ="D:/randomt/twitter.txt"

tw = Twitter()
tw.tweets(keywords='jayalalitha', stream=False, limit=10)
コード例 #15
0
from nltk.twitter import Twitter

i = 0
dir = "/Users/oskarzhang/twitter-files/"
#sent_tokenizer = nltk.RegexpTokenizer("")
#while(i < 1000):	i += 1
tw = Twitter()
tw.tweets(limit = 1000)
	
コード例 #16
0
ファイル: twitter.py プロジェクト: rajataneja101/Python-Lab
import twython
import nltk
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='love, hate', limit=10) #sample from the public stream
コード例 #17
0
# -*- coding: utf-8 -*-
# 2.5.4節  ツイッターのデータ  ツイッターデータのアクセス方法
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(keywords='happy', limit=10)
コード例 #18
0
ファイル: Twitter.py プロジェクト: jennifer-wong/lin127
#********************** READ BEFORE USE  *********************************************
#NLTK uses a third party library called Twython for handling twitter and must be installed with 'pip install twython'
#prior to running this code
#The environment variable TWITTER must also be set to the path containing the credentials text file
#This can be done by copying the path the credentials.txt file is in, going to environment variables on your computer
#creating a new User variable named TWITTER and pasting the directory of the text file as the variable path
#When specifying a keyword and number of tweets and then running the code, A JSON file will be created and its location will be specified
#in the terminal.  That file needs to be moved to to the directory with this code and that file name will be used in the classification.py in the two lines where each classification is called.
#You can see this in the code as a JSON file is already specified, just change that/
#the documentation for nltk and twitter can be found at http://www.nltk.org/howto/twitter.html
from nltk.twitter import Twitter, credsfromfile
from pprint import pprint
oauth = credsfromfile()
tw = Twitter()
tw.tweets(keywords='shooting',  limit=10) #prints to terminal 
tw.tweets(keywords='shooting',to_screen=False,  limit=10) #prints to file
コード例 #19
0
#test data for data_science project.

from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=10000)
コード例 #20
0
import nltk
from nltk.twitter import Twitter

# access_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr"
# access_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN"
# consumer_key = "xoUFitbD6pKfkjLBkF7SVj0iw"
# consumer_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e"
# oauth_token = "918706943593549824-Z3gQO9jTS3UMxR2b4d1YP4gRrWstELr"
# oauth_token_secret = "Q93Y5ocsuLDKVrZIWgDkZTQaXEliO9KcikLHzpEFmpAaN"
# app_key = "xoUFitbD6pKfkjLBkF7SVj0iw"
# app_secret ="Lpgv9bgi2ns6wppoCyu8tX6JALe6vjAOZbxviEcaMo7S61sa2e"

tw = Twitter()

tw.tweets(keywords='cher, python, nlp, soccer, celine dion', limit=10)
コード例 #21
0
ファイル: twitter.py プロジェクト: BelkacemB/nltk
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from nltk.twitter import Twitter
import os


var = os.environ
os.environ["TWITTER"] = "C:/Users/admin/Documents/twitter-files"

tw = Twitter()
tw.tweets(keywords='algeria, algerie', limit=10)

# sample from the public stream

from nltk.corpus import twitter_samples
strings = twitter_samples.strings('tweets.20150430-223406.json')
for string in strings[:15]:
    print(string)
コード例 #22
0
ファイル: twitter_demo.py プロジェクト: vishalbelsare/nltk
def twitterclass_demo():
    """
    Use the simplified :class:`Twitter` class to write some tweets to a file.
    """
    tw = Twitter()
    print("Track from the public stream\n")
    tw.tweets(keywords="love, hate", limit=10)  # public stream
    print(SPACER)
    print("Search past Tweets\n")
    tw = Twitter()
    tw.tweets(keywords="love, hate", stream=False,
              limit=10)  # search past tweets
    print(SPACER)
    print("Follow two accounts in the public stream" +
          " -- be prepared to wait a few minutes\n")
    tw = Twitter()
    tw.tweets(follow=["759251", "6017542"], stream=True,
              limit=5)  # public stream
コード例 #23
0
from nltk.twitter import Twitter
tw = Twitter()
tw.tweets(to_screen=False, limit=20000, lang=['en'])
コード例 #24
0
import nltk
import twython

from nltk.twitter import Twitter

os.environ('TWITTER') = "/twitter-files"
#export TWITTER="/twitter-files"

tw = Twitter()
tw.tweets(keywords='love, hate', limit=10)
コード例 #25
0
ファイル: twitter.py プロジェクト: eddiesherlock/twitter
from nltk.twitter.common import json2csv
from nltk.twitter.common import json2csv_entities
from nltk.corpus import twitter_samples
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
import pandas as pd

tw = Twitter()
tw.tweets(follow=['759251'], limit=10)  # see what CNN is talking about