def TwitterStream(): # Another way to mine Twitter is to set up a stream. # A Twitter stream maintains an open connection to Twitter, # and waits for data to pour in. # Twitter.search() allows us to look at older tweets, # Twitter.stream() gives us the most recent tweets. for trend in Twitter().trends(cached=False): print trend # It might take a few seconds to set up the stream. stream = Twitter().stream("i love", timeout=30) pos_count=0 neg_count=0 #while True: for i in range(50): if(neg_count): ratio = pos_count / neg_count else: ratio = 0 print str(pos_count) + " " + str(neg_count) + " " + str(ratio)+"%" #print i #print "+ " + str(pos_count) #print "- " + str(neg_count) #print "- - -" # Poll Twitter to see if there are new tweets. stream.update() # The stream is a list of buffered tweets so far, # with the latest tweet at the end of the list. for tweet in reversed(stream): print tweet.text print tweet.language sent = pol(tweet.text) if(sent>0): pos_count+=1 else: neg_count+=1 # Clear the buffer every so often. stream.clear() # Wait awhile between polls. time.sleep(1) print "Final Twitter" print pos_count print neg_count
def obtenerTweets(request): twitterEn = Twitter(language='en') twitterEs = Twitter(language='es') idJuego = request.GET.get("id") juego = Juego.objects.get(id=idJuego) tweets = [] for tweet in twitterEs.search(juego.titulo, cached=False): tweets.append(tweet.text) for tweet in twitterEn.search(juego.titulo, cached=False): tweets.append(tweet.text) return render(request, 'obtenerTweets.html', {'tweets': tweets})
def crawl(topic, N=100, Nbatch=25): t = Twitter() # language='en','id' M = N // Nbatch #integer i, Tweets, keepCrawling = None, [], True for j in tqdm(range(M)): if keepCrawling: for tweet in t.search(topic, start=i, count=Nbatch): try: Tweets.append(tweet) i = tweet.id except: print("Twitter Limit reached") keepCrawling = False # Second Break (outer loop) break else: break print('Making sure we get the full tweets, please wait ...') for i, tweet in enumerate(tqdm(Tweets)): try: webPage = URL(tweet.url).download() soup = bs(webPage, 'html.parser') full_tweet = soup.find_all( 'p', class_='TweetTextSize')[0] #modify this to get all replies full_tweet = bs(str(full_tweet), 'html.parser').text Tweets[i]['fullTxt'] = full_tweet except: Tweets[i]['fullTxt'] = tweet.txt print('Done!... Total terdapat {0} tweet'.format(len(Tweets))) return Tweets
def get_replies(reply_id): import json from pattern.web import URL, Twitter reply_id = reply_id - 1 url = URL("https://api.twitter.com/1.1/statuses/mentions_timeline.json", method="get", query={"since_id": reply_id}) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) user_replies = {} bot_replies = {} try: data = json.loads(url.open().read()) for reply in data: name = reply["user"]["name"].encode('utf-8').strip() text = reply["text"].replace("@BotsVsQuotes", "").strip() if name == "BotsVsQuotes": #bot quotes text = text.split(":") char_name = text[0] bot_replies[char_name] = "".join(text[1:]).strip() else: #user quotes user_replies[name] = text except Exception as e: print e print e.src print e.src.read() return {}, {} return bot_replies, user_replies
def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i + 1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' + search_param + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def create_stream(phrase, queue): """ Celery task that connects to the twitter stream and runs a loop, periodically emitting tweet information to all connected clients. """ local = SocketIO(message_queue=queue) stream = Twitter().stream(phrase, timeout=30) for i in range(60): stream.update() for tweet in reversed(stream): sentiment = classify_tweet(tweet) x, y = vectorize_tweet(tweet) local.emit( 'tweet', { 'id': str(i), 'text': str(tweet.text.encode('ascii', 'ignore')), 'sentiment': sentiment, 'x': x, 'y': y }) stream.clear() time.sleep(1) return queue
def Pattern_Module_Twitter_Stream(): # Start Stop Watch t1 = time.time() # Create a list to Store the Data List = Twitter().stream('#Fail') # For 10 Instances for second in range(10): # Get Stream Data value = List.update(bytes=1024) # Add Value to List if not Empty if len(value) == 0: # Pass continue else: # Storing Results List.append() # Print Tweet print('Tweet: %s') % (value.text) # Get Sentiment print('Sentiment Analysis of Tweet: %s') % (TextBlob( str(value.text), analyzer=NaiveBayesAnalyzer()).sentiment[0].upper()) # Wait 3 Seconds between queries - Do not want to get blocked time.sleep(3) return time.time() - t1
def gettweets(searchterms): tweetlist = [] from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search(searchterms, cached=False): tweetlist.append(plaintext(tweet.text)) return tweetlist
def search(text): list = [] twitter = Twitter(language='en') for tweet in twitter.search(text, count=30, cached=False): list.append(tweet.text) return list
def twitter_search(): t = Twitter(language='es') i = None for j in range(3): # For pagination for r in t.search(query="#DonaldTrump", start=i, count=10): print(r.id, r.text, r.date) i = r.id print("----------------@@@@@@-------------")
def getTweetsByCoord(self, term, lat, lng): twitter = Twitter(language='en') tweets = [] for tweet in twitter.search('traffic', geo=(lat, lng)): tweets.append(tweet.text) return tweets
def trending(self): '''trending sentiment''' trendArray = [] for trend in Twitter().trends(cached=False): trendArray.append([trend,indicoio.sentiment(trend)]) return trendArray
def busco_en_twitter(cadena): t = Twitter() i = None for j in range(3): for tweet in t.search(cadena, start=i, count=10): print(tweet.text) print("-------") i = tweet.id
def find(tag): """ Finds content and user ID of posts with specified hashtag and saves to .txt file. """ twitter = open("twitter_data.txt", "r+") t = Twitter(language='en') #compiles 1000 tweets with the specified tag and saves content in file for tweet in t.search(tag, count=1000): twitter.write(str(tweet))
def fuzzy_find(thing): t = Twitter() fuzzy_things = fuzzy_list(thing) tweets = [] for item in fuzzy_things: new_tweets = t.search(item, count=50, throttle=2) for tweet in new_tweets: ids = map(lambda x: x.id, tweets) if ids.count(tweet.id) == 0: tweets.append(tweet) return tweets
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return { "Error": "Pass a string, from mine.py [7]", "Result": [None] } result = [] engineGoogle = Google(license=None, throttle=0.5, language=None) engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) engineFacebook = Facebook(license=None, throttle=1.0, language='en') engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) engineFlickr = Flickr(license=None, throttle=5.0, language=None) engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] ''' for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)]) [result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray] # print repr(plaintext(para.text)) # print repr(plaintext(para.url)) + '\n\n' # result.append(repr(plaintext(para.text))) ''' # Google for i in range(1, 5): result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) for i in range(1, 5): result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) ''' # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)]) for i in range(1,2): result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)]) ''' return { "Error": None, "Result": result } # return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
def setStream(keywordsStr): assert keywordsStr print("\n\n==== ====\n\n") print("Setting up the stream for keywords = {0}".format(keywords)) print("\nKeywords Twitter QUERY = \"{0}\"".format(colorGreen(keywordsStr))) if filePath: print("Will dump to file: {0}".format(colorGreen(filePath))) else: print("No file dump") # It might take a few seconds to set up the stream. stream = Twitter(throttle=0.5, language=lang).stream(keywordsStr, timeout=30) print("\nStream initialized") return stream
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return {"Error": "Pass a string, from mine.py [7]"} google = [{'text': '', 'url': '', 'title': ''}] twitter = [{'text': '', 'url': '', 'title': ''}] engineGoogle = Google(license=None, throttle=0.5, language=None) # engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) # engineFacebook = Facebook(license=None, throttle=1.0, language='en') # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) # engineFlickr = Flickr(license=None, throttle=5.0, language=None) # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] # Google for i in range(1, 2): for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=5): google.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) # Twitter for i in range(1, 2): for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=5): twitter.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter return {"Error": None, "Google": google, "Twitter": twitter}
def main(): # user input parser = argparse.ArgumentParser(description='Downloads tweets for a given search word') parser.add_argument('--term', help='Term to search tweets',required=True) parser.add_argument('--out', help='Output CSV file name', default='tweets.csv') args = parser.parse_args() # Twitter engine engine = Twitter(language='en') term = " ".join(args.term.split("_")) mkdir_p(os.path.dirname(args.out)) with open(args.out, "w") as outfile: print("Searching for tweets with '{}'".format(term)) writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL) # download tweets for tweet in engine.search(term, cached = False, start=1, count=30): csvrow = tweet.text.encode('utf-8') # write into CSV file writer.writerow([csvrow])
def setupUi(self, Dialog): Dialog.setObjectName("Dialog") Dialog.resize(823, 677) self.label = QtGui.QLabel(Dialog) self.label.setGeometry(QtCore.QRect(10, 10, 800, 400)) self.label.setFrameShape(QtGui.QFrame.WinPanel) self.label.setText("") self.label.setObjectName("label") self.listWidget = QtGui.QListWidget(Dialog) self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192)) self.listWidget.setObjectName("listWidget") self.widget = QtGui.QWidget(Dialog) self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25)) self.widget.setObjectName("widget") self.horizontalLayout = QtGui.QHBoxLayout(self.widget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.label_2 = QtGui.QLabel(self.widget) self.label_2.setObjectName("label_2") self.horizontalLayout.addWidget(self.label_2) self.lineEdit = QtGui.QLineEdit(self.widget) self.lineEdit.setObjectName("lineEdit") self.horizontalLayout.addWidget(self.lineEdit) self.pushButton = QtGui.QPushButton(self.widget) self.pushButton.setObjectName("pushButton") self.horizontalLayout.addWidget(self.pushButton) self.retranslateUi(Dialog) QtCore.QMetaObject.connectSlotsByName(Dialog) # self.pushButton.clicked.connect(self.on_buttom_pressed) self.listWidget.doubleClicked.connect(self.goTweet) # self.alText = u'' self.fullText = u'' self.twitter = Twitter(language='tr') self.prevId = None self.timer = QtCore.QTimer(Dialog) self.timer.timeout.connect(self.on_timer) self.dialog = Dialog self.twIds = []
def create_stream(phrase, queue): local = SocketIO(message_queue=queue) stream = Twitter().stream(phrase, timeout=120) for i in range(120): stream.update() for tweet in reversed(stream): sentiment = classify_tweet(tweet) x, y = vectorize_tweet(tweet) local.emit( 'tweet', { 'id': str(i), 'text': str(tweet.text.encode('ascii', 'ignore')), 'sentiment': sentiment, 'x': x, 'y': y }) stream.clear() time.sleep(1) return queue
def reply_tweet(tweet, reply_id, reply_user="******"): from pattern.web import URL, Twitter tweet = reply_user + " " + tweet url = URL("https://api.twitter.com/1.1/statuses/update.json", method="post", query={ "status": tweet, "in_reply_to_status_id": reply_id }) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) try: # Send the post request. url.open() except Exception as e: print e print e.src print e.src.read()
def get_tweets(self, search, nb, include_RT, useKey, keys): if not useKey: keys = None twitter = Twitter(language=self.dico_lang[self.language], license=keys) tweets = list() if not include_RT: for tweet in twitter.search(search, start=1, count=nb * 3): if not tweet.text.startswith('RT'): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) if len(tweets) == nb: break else: for tweet in twitter.search(search, start=1, count=nb): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) return tweets
def post_tweet(tweet): from pattern.web import URL, Twitter import json url = URL("https://api.twitter.com/1.1/statuses/update.json", method="post", query={"status": tweet}) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) try: # Send the post request. data = url.open().read() except Exception as e: print e print e.src print e.src.read() return None data = json.loads(data) return int(data[u'id'])
from pattern.search import search from pattern.vector import Document, Model, KNN # Classification is a supervised machine learning method, # where labeled documents are used as training material # to learn how to label unlabeled documents. # This example trains a simple classifier with Twitter messages. # The idea is that, if you have a number of texts with a "type" # (mail/spam, positive/negative, language, author's age, ...), # you can predict the type of other "unknown" texts. # The k-Nearest Neighbor algorithm classifies texts according # to the k documents that are most similar (cosine similarity) to the given input document. m = Model() t = Twitter() # First, we mine a model of a 1000 tweets. # We'll use hashtags as type. for page in range(1, 10): for tweet in t.search('#win OR #fail', start=page, count=100, cached=True): # If the tweet contains #win hashtag, we'll set its type to 'WIN': s = tweet.text.lower() # tweet in lowercase p = '#win' in s and 'WIN' or 'FAIL' # document labels s = Sentence(parse(s)) # parse tree with part-of-speech tags s = search('JJ', s) # adjectives in the tweet s = [match[0].string for match in s] # adjectives as a list of strings s = " ".join(s) # adjectives as string if len(s) > 0: m.append(Document(s, type=p, stemmer=None))
import sys, time from pattern.web import Twitter s = Twitter().stream( '#joy, #happiness, #hopeful, #pleasure, #harmony, #kindness, #affection, #love' ) for i in range(250): time.sleep(1) s.update(bytes=1024) print s[-1].text if s else ''
# This example retrieves tweets containing given keywords from Twitter. try: # We'll store tweets in a Datasheet. # A Datasheet is a table of rows and columns that can be exported as a CSV-file. # In the first column, we'll store a unique id for each tweet. # We only want to add the latest tweets, i.e., those we haven't seen yet. # With an index on the first column we can quickly check if an id already exists. # The pd() function returns the parent directory of this script + any given path. table = Datasheet.load(pd("eulogy.csv")) index = set(table.columns[0]) except: table = Datasheet() index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev = None for i in range(2): print(i) for tweet in engine.search("eulogy", start=prev, count=25, cached=False): print("") print(tweet.text) print(tweet.author) print(tweet.date) print(hashtags(tweet.text)) # Keywords in tweets start with a "#". print("")
from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search('"more important than"', cached=False): print plaintext(tweet.text)
from pattern.web import Twitter, plaintext for tweet in Twitter().search('"more important than"', cached=False): print plaintext(tweet.description.encode("ASCII", "ignore"))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Mar 25 19:37:34 2019 @author: alternatif """ from pattern.web import Twitter from pattern.en import tag from pattern.vector import KNN, count twitter, knn = Twitter(), KNN() for i in range(1, 3): for tweet in twitter.search('#win OR #fail', start=i, count=100): s = tweet.text.lower() p = '#win' in s and 'WIN' or 'FAIL' v = tag(s) v = [word for word, pos in v if pos == 'JJ'] # JJ = adjective v = count(v) # {'sweet': 1} if v: knn.train(v, type=p) print(knn.classify('sweet potato burger')) print(knn.classify('stupid autocorrect'))