def search_with_language_in_region(lang, capital_city, search_terms, file_name): """ Does a twitter search in the specified language in the area of a given capital city e.g. search_with_language_in_region('en', 'Paris', '#yoloswag', 'Paris_yoloswag') Inputs: expects strings for everything. lang: the language you want to search in [string], e.g. 'en' capital_city: the city you want to search around, found through pattern's geocode function, e.g. 'Paris' search_terms: duh. e.g. ['crimea','putin'] file_name: the file name you want to save the tweets as, will come out as e.g. nealiscool.pickle Outputs: a pickled dictionary of the tweets, which are saved on disk as tweets_gathered.pickle. The keys of the dicitonary are the unique tweet IDs. """ t = Twitter(language=lang) tweets_gathered = {} i = None for j in range(2): for tweet in t.search(search_terms, start=i, count=10,geo=geocode(capital_city)[:2]): print tweet.text print i = tweet.id tweets_gathered[tweet.id] = tweet.text f = open(file_name,'w') pickle.dump(tweets_gathered,f) f.close()
def crawl(topic, N=100, Nbatch=25): t = Twitter() # language='en','id' M = N // Nbatch #integer i, Tweets, keepCrawling = None, [], True for j in tqdm(range(M)): if keepCrawling: for tweet in t.search(topic, start=i, count=Nbatch): try: Tweets.append(tweet) i = tweet.id except: print("Twitter Limit reached") keepCrawling = False # Second Break (outer loop) break else: break print('Making sure we get the full tweets, please wait ...') for i, tweet in enumerate(tqdm(Tweets)): try: webPage = URL(tweet.url).download() soup = bs(webPage, 'html.parser') full_tweet = soup.find_all( 'p', class_='TweetTextSize')[0] #modify this to get all replies full_tweet = bs(str(full_tweet), 'html.parser').text Tweets[i]['fullTxt'] = full_tweet except: Tweets[i]['fullTxt'] = tweet.txt print('Done!... Total terdapat {0} tweet'.format(len(Tweets))) return Tweets
def search(): query = str(raw_input("enter search query: ")) t = Twitter() # i = None chances = 0 fileSave.write(query + "\n") allChances = 0 for tweet in t.search(query, start=None, count=5): print tweet.text # Calc tweet sentiment sent_int = sent(tweet.text) sent_str = str(sent_int) # print sent_str # Calc author's follower count follows_int = findFollows(tweet.author) follows_str = str(sent_int) # print follows_str # Calc chances; make cumulative chances = follows_int * sent_int print str(chances) + "\n" # File save save = sent_str + "\n" + follows_str + "\n \n" fileSave.write(save) allChances = allChances + chances print "OVERALL: " + str(allChances)
def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i + 1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' + search_param + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def get_replies(reply_id): import json from pattern.web import URL, Twitter reply_id = reply_id - 1 url = URL("https://api.twitter.com/1.1/statuses/mentions_timeline.json", method="get", query={"since_id": reply_id}) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) user_replies = {} bot_replies = {} try: data = json.loads(url.open().read()) for reply in data: name = reply["user"]["name"].encode('utf-8').strip() text = reply["text"].replace("@BotsVsQuotes", "").strip() if name == "BotsVsQuotes": #bot quotes text = text.split(":") char_name = text[0] bot_replies[char_name] = "".join(text[1:]).strip() else: #user quotes user_replies[name] = text except Exception as e: print e print e.src print e.src.read() return {}, {} return bot_replies, user_replies
def get_tweets(): '''This function parses Twitter to find tweets about a user-defined political figure ''' print 'This program measures the average sentiment of the populous towards a political candidate through the analysis of recent tweets\n' #introduce program to user print 'Enter the name of a candidate:' x = raw_input('> ') #receives name of candidate to search for print 'Enter number of tweets to search (max = 100)' twtNumstr = raw_input('> ') #recieve number of tweets to search for twtNum = int(twtNumstr) #convert to int to use in search if twtNum <= 1: #check if an invalid number was entered, and if so, correct it to either the minimum or maximum allowed twtNum = 2 print 'Invalid number entered. The minimum of 2 tweets will be used.' elif twtNum > 100: twtNum = 100 print 'Invalid number entered. The maximum of 100 tweets will be used.' t = Twitter() #search for tweets containing user-defined key word i = 0 twts = [] for j in range(1): for tweet in t.search(x, start=i, count=twtNum): twts.append(tweet.text) return twts
def Pattern_Module_Twitter_Stream(): # Start Stop Watch t1 = time.time() # Create a list to Store the Data List = Twitter().stream('#Fail') # For 10 Instances for second in range(10): # Get Stream Data value = List.update(bytes=1024) # Add Value to List if not Empty if len(value) == 0: # Pass continue else: # Storing Results List.append() # Print Tweet print('Tweet: %s') % (value.text) # Get Sentiment print('Sentiment Analysis of Tweet: %s') % (TextBlob(str(value.text), analyzer=NaiveBayesAnalyzer()).sentiment[0].upper()) # Wait 3 Seconds between queries - Do not want to get blocked time.sleep(3) return time.time() - t1
def gettweets(searchterms): tweetlist = [] from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search(searchterms, cached=False): tweetlist.append(plaintext(tweet.text)) return tweetlist
def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i+1,count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' +search_param+ ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def Pattern_Module_Twitter_Stream(): # Start Stop Watch t1 = time.time() # Create a list to Store the Data List = Twitter().stream('#Fail') # For 10 Instances for second in range(10): # Get Stream Data value = List.update(bytes=1024) # Add Value to List if not Empty if len(value) == 0: # Pass continue else: # Storing Results List.append() # Print Tweet print('Tweet: %s') % (value.text) # Get Sentiment print('Sentiment Analysis of Tweet: %s') % (TextBlob( str(value.text), analyzer=NaiveBayesAnalyzer()).sentiment[0].upper()) # Wait 3 Seconds between queries - Do not want to get blocked time.sleep(3) return time.time() - t1
def getTweetsByCoord(self, term, lat, lng): twitter = Twitter(language='en') tweets = [] for tweet in twitter.search('traffic', geo=(lat, lng)): tweets.append(tweet.text) return tweets
def search_tweets(self, celeb): ''' Pull tweets from the Twitter API that mention the given celebrity ''' twitter_api = Twitter(language='en') #TODO: up the count for the final project return twitter_api.search(celeb, count=3000)
def search(text): list = [] twitter = Twitter(language='en') for tweet in twitter.search(text, count=30, cached=False): list.append(tweet.text) return list
def twitter_search(): t = Twitter(language='es') i = None for j in range(3): # For pagination for r in t.search(query="#DonaldTrump", start=i, count=10): print(r.id, r.text, r.date) i = r.id print("----------------@@@@@@-------------")
def busco_en_twitter(cadena): t = Twitter() i = None for j in range(3): for tweet in t.search(cadena, start=i, count=10): print(tweet.text) print("-------") i = tweet.id
def find(tag): """ Finds content and user ID of posts with specified hashtag and saves to .txt file. """ twitter = open("twitter_data.txt", "r+") t = Twitter(language='en') #compiles 1000 tweets with the specified tag and saves content in file for tweet in t.search(tag, count=1000): twitter.write(str(tweet))
def fuzzy_find(thing): t = Twitter() fuzzy_things = fuzzy_list(thing) tweets = [] for item in fuzzy_things: new_tweets = t.search(item, count=50, throttle=2) for tweet in new_tweets: ids = map(lambda x: x.id, tweets) if ids.count(tweet.id) == 0: tweets.append(tweet) return tweets
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return { "Error": "Pass a string, from mine.py [7]", "Result": [None] } result = [] engineGoogle = Google(license=None, throttle=0.5, language=None) engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) engineFacebook = Facebook(license=None, throttle=1.0, language='en') engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) engineFlickr = Flickr(license=None, throttle=5.0, language=None) engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] ''' for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)]) [result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray] # print repr(plaintext(para.text)) # print repr(plaintext(para.url)) + '\n\n' # result.append(repr(plaintext(para.text))) ''' # Google for i in range(1, 5): result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) for i in range(1, 5): result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) ''' # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)]) for i in range(1,2): result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)]) ''' return { "Error": None, "Result": result } # return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return {"Error": "Pass a string, from mine.py [7]"} google = [{'text': '', 'url': '', 'title': ''}] twitter = [{'text': '', 'url': '', 'title': ''}] engineGoogle = Google(license=None, throttle=0.5, language=None) # engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) # engineFacebook = Facebook(license=None, throttle=1.0, language='en') # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) # engineFlickr = Flickr(license=None, throttle=5.0, language=None) # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] # Google for i in range(1, 2): for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=5): google.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) # Twitter for i in range(1, 2): for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=5): twitter.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter return {"Error": None, "Google": google, "Twitter": twitter}
def main(): # user input parser = argparse.ArgumentParser(description='Downloads tweets for a given search word') parser.add_argument('--term', help='Term to search tweets',required=True) parser.add_argument('--out', help='Output CSV file name', default='tweets.csv') args = parser.parse_args() # Twitter engine engine = Twitter(language='en') term = " ".join(args.term.split("_")) mkdir_p(os.path.dirname(args.out)) with open(args.out, "w") as outfile: print("Searching for tweets with '{}'".format(term)) writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL) # download tweets for tweet in engine.search(term, cached = False, start=1, count=30): csvrow = tweet.text.encode('utf-8') # write into CSV file writer.writerow([csvrow])
def trending(self): '''trending sentiment''' trendArray = [] for trend in Twitter().trends(cached=False): trendArray.append([trend,indicoio.sentiment(trend)]) return trendArray
def search(self, args): """ Usage: search [-fty] <keyword> search -h | --help Options: -h --help Show this help message. -f --facebook Search for keyword on Facebook. -t --twitter Search for keyword on Twitter. -y --youtube Search for keyword on YouTube. """ # Example args information: # {'--facebook': False, # '--help': False, # '--twitter': True, # '--youtube': False, # '': 'f'} engine = Twitter(language='en') ret = [] ''' generator = ({ 'text': tweet.text, 'author': tweet.author, 'date': tweet.date, 'hashtags': hashtags(tweet.text) } for tweet in engine.search('is cooler than', count=25, cached=False)) self.db.bulk_insert('test', generator) ''' for tweet in engine.search('is cooler than', count=25, cached=False): ret.append({ 'text': tweet.text, 'author': tweet.author, 'date': tweet.date, 'hashtags': hashtags(tweet.text) }) return str(ret)
def get_tweets(self, search, nb, include_RT, useKey, keys): if not useKey: keys = None twitter = Twitter( language=self.dico_lang[self.language], license=keys ) tweets = list() if not include_RT: for tweet in twitter.search(search, start=1, count=nb*3): if not tweet.text.startswith('RT'): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) if len(tweets) == nb: break else: for tweet in twitter.search(search, start=1, count=nb): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) return tweets
def Generate_Tweets(searchterm,filename_label): twitter_obj=Twitter(license=None, throttle=0.5,language='en') #throttle: time between requests. #now the twitter_obj can be searched, with the following parameters. # Twitter returns up to 1500 results for a search term. It has hourly limit of 150 queries. each call to search() is one query. So you can get like 15 queries of 100 each of 150 queries of 10 each. # Parameters for Twitter: # Start 1-1500/count # count: results per page=1-100 # SORT: RELEVANCY, Limit: 150/hour, throttle =0.5 f=open(filename_label,'a') for tweet in twitter_obj.search(searchterm,cached=False,language='en', sort ='RELEVANCY',count=100): unicode_tweet=plaintext(tweet.description) #Tweets are unicode, need to be converted to ascii before storing in file ascii_tweet=unicode_tweet.encode('ascii','ignore') f.write(ascii_tweet+'\n') f.close()
def setupUi(self, Dialog): Dialog.setObjectName("Dialog") Dialog.resize(823, 677) self.label = QtGui.QLabel(Dialog) self.label.setGeometry(QtCore.QRect(10, 10, 800, 400)) self.label.setFrameShape(QtGui.QFrame.WinPanel) self.label.setText("") self.label.setObjectName("label") self.listWidget = QtGui.QListWidget(Dialog) self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192)) self.listWidget.setObjectName("listWidget") self.widget = QtGui.QWidget(Dialog) self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25)) self.widget.setObjectName("widget") self.horizontalLayout = QtGui.QHBoxLayout(self.widget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.label_2 = QtGui.QLabel(self.widget) self.label_2.setObjectName("label_2") self.horizontalLayout.addWidget(self.label_2) self.lineEdit = QtGui.QLineEdit(self.widget) self.lineEdit.setObjectName("lineEdit") self.horizontalLayout.addWidget(self.lineEdit) self.pushButton = QtGui.QPushButton(self.widget) self.pushButton.setObjectName("pushButton") self.horizontalLayout.addWidget(self.pushButton) self.retranslateUi(Dialog) QtCore.QMetaObject.connectSlotsByName(Dialog) # self.pushButton.clicked.connect(self.on_buttom_pressed) self.listWidget.doubleClicked.connect(self.goTweet) # self.alText = u'' self.fullText = u'' self.twitter = Twitter(language='tr') self.prevId = None self.timer = QtCore.QTimer(Dialog) self.timer.timeout.connect(self.on_timer) self.dialog = Dialog self.twIds = []
def poli_twitter_analysis(): """This function parses Twitter to determine the average sentiment towards political figures during an event""" candidates = ['trump','walker', 'fiorina', 'carson', 'cruz', 'rubio', 'huckabee', 'paul', 'kasich','christie', 'bush','clinton','sanders',"o'malley"] #list of searches to use twtNum = 50 #number of tweets to search for each time t = Twitter() i = None twtstext = [] twtsdate = [] twtsauthor = [] twtscandi = [] twtssenti = [] for item in candidates: for j in range(1): for tweet in t.search(item, start=i, count=twtNum): twtscandi.append(item) twtstext.append(tweet.text) m = tweet.text twtsdate.append(tweet.date) twtsauthor.append(tweet.author) [senti,objec] = sentiment(m) twtssenti.append(senti) zipped1 = zip(twtscandi, twtssenti) zipped2 = zip(twtscandi, twtsdate, twtsauthor, twtstext, twtssenti) timestr = time.strftime("%Y%m%d%H%M%S") filename = timestr + '.txt' f = open(filename, 'w') f.write(' '.join(map(str, zipped1))) f.close() filename = 'tweets_' + timestr + '.txt' f = open(filename, 'w') f.write(' '.join(map(str, zipped2))) f.close() print 'Complete'
class tweetSentiment(object): def __init__(self, topic, tweetCount): self.topic = topic self.tweetCount = tweetCount self.t = Twitter(language='EN') self.i = None def fArray(self): '''full array including tweet and sentiment''' fullArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): fullArray.append([tweet.text,indicoio.sentiment(tweet.text)]) self.i = tweet.id return fullArray def sArray(self): '''calculate sentiment ''' sentimentArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): sentimentArray.append(indicoio.sentiment(tweet.text)) self.i = tweet.id return sentimentArray def average(self,numArray): '''average sentiment''' return sum(numArray)/len(numArray) def trending(self): '''trending sentiment''' trendArray = [] for trend in Twitter().trends(cached=False): trendArray.append([trend,indicoio.sentiment(trend)]) return trendArray
def reply_tweet(tweet, reply_id, reply_user="******"): from pattern.web import URL, Twitter tweet = reply_user + " " + tweet url = URL("https://api.twitter.com/1.1/statuses/update.json", method="post", query={ "status": tweet, "in_reply_to_status_id": reply_id }) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) try: # Send the post request. url.open() except Exception as e: print e print e.src print e.src.read()
def create_stream(phrase, queue): """ Celery task that connects to the twitter stream and runs a loop, periodically emitting tweet information to all connected clients. """ local = SocketIO(message_queue=queue) stream = Twitter().stream(phrase, timeout=30) for i in range(60): stream.update() for tweet in reversed(stream): sentiment = classify_tweet(tweet) x, y = vectorize_tweet(tweet) local.emit( 'tweet', { 'id': str(i), 'text': str(tweet.text.encode('ascii', 'ignore')), 'sentiment': sentiment, 'x': x, 'y': y }) stream.clear() time.sleep(1) return queue
def get_tweets(self, search, nb, include_RT, useKey, keys): if not useKey: keys = None twitter = Twitter(language=self.dico_lang[self.language], license=keys) tweets = list() if not include_RT: for tweet in twitter.search(search, start=1, count=nb * 3): if not tweet.text.startswith('RT'): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) if len(tweets) == nb: break else: for tweet in twitter.search(search, start=1, count=nb): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) return tweets
def TwitterStream(): # Another way to mine Twitter is to set up a stream. # A Twitter stream maintains an open connection to Twitter, # and waits for data to pour in. # Twitter.search() allows us to look at older tweets, # Twitter.stream() gives us the most recent tweets. for trend in Twitter().trends(cached=False): print trend # It might take a few seconds to set up the stream. stream = Twitter().stream("i love", timeout=30) pos_count=0 neg_count=0 #while True: for i in range(50): if(neg_count): ratio = pos_count / neg_count else: ratio = 0 print str(pos_count) + " " + str(neg_count) + " " + str(ratio)+"%" #print i #print "+ " + str(pos_count) #print "- " + str(neg_count) #print "- - -" # Poll Twitter to see if there are new tweets. stream.update() # The stream is a list of buffered tweets so far, # with the latest tweet at the end of the list. for tweet in reversed(stream): print tweet.text print tweet.language sent = pol(tweet.text) if(sent>0): pos_count+=1 else: neg_count+=1 # Clear the buffer every so often. stream.clear() # Wait awhile between polls. time.sleep(1) print "Final Twitter" print pos_count print neg_count
def create_stream(phrase, queue): """ Celery task that connects to the twitter stream and runs a loop, periodically emitting tweet information to all connected clients. """ local = SocketIO(message_queue=queue) stream = Twitter().stream(phrase, timeout=30) for i in range(60): stream.update() for tweet in reversed(stream): sentiment = classify_tweet(tweet) x, y = vectorize_tweet(tweet) local.emit('tweet', {'id': str(i), 'text': str(tweet.text.encode('ascii', 'ignore')), 'sentiment': sentiment, 'x': x, 'y': y}) stream.clear() time.sleep(1) return queue
def post_tweet(tweet): from pattern.web import URL, Twitter import json url = URL("https://api.twitter.com/1.1/statuses/update.json", method="post", query={"status": tweet}) twitter = Twitter(license=ccpattern) url = twitter._authenticate(url) try: # Send the post request. data = url.open().read() except Exception as e: print e print e.src print e.src.read() return None data = json.loads(data) return int(data[u'id'])
def obtenerTweets(request): twitterEn = Twitter(language='en') twitterEs = Twitter(language='es') idJuego = request.GET.get("id") juego = Juego.objects.get(id=idJuego) tweets = [] for tweet in twitterEs.search(juego.titulo, cached=False): tweets.append(tweet.text) for tweet in twitterEn.search(juego.titulo, cached=False): tweets.append(tweet.text) return render(request, 'obtenerTweets.html', {'tweets': tweets})
def setStream(keywordsStr): assert keywordsStr print("\n\n==== ====\n\n") print("Setting up the stream for keywords = {0}".format(keywords)) print("\nKeywords Twitter QUERY = \"{0}\"".format(colorGreen(keywordsStr))) if filePath: print("Will dump to file: {0}".format(colorGreen(filePath))) else: print("No file dump") # It might take a few seconds to set up the stream. stream = Twitter(throttle=0.5, language=lang).stream(keywordsStr, timeout=30) print("\nStream initialized") return stream
def create_stream(phrase, queue): local = SocketIO(message_queue=queue) stream = Twitter().stream(phrase, timeout=120) for i in range(120): stream.update() for tweet in reversed(stream): sentiment = classify_tweet(tweet) x, y = vectorize_tweet(tweet) local.emit( 'tweet', { 'id': str(i), 'text': str(tweet.text.encode('ascii', 'ignore')), 'sentiment': sentiment, 'x': x, 'y': y }) stream.clear() time.sleep(1) return queue
#!/usr/bin/env python import os, sys; sys.path.insert(0, os.path.join("..", "..")) import time from pattern.web import Twitter from pattern.db import Datasheet, pprint engine1 = Twitter(language="en") engine2 = Twitter(language="en") print "-------------------------------------" print "Tweets in Boston, MA ... " bosCount=0 sfoCount=0 keyword = " " for tweet in engine1.search(keyword , geocode="42.3583333,-71.0602778,25mi" , count=400, cached=True): print "-> BOSTON " print tweet.author print tweet.text print tweet.date bosCount += 1 print "-------------------------------------"
import sys, time from pattern.web import Twitter s = Twitter().stream( '#joy, #happiness, #hopeful, #pleasure, #harmony, #kindness, #affection, #love' ) for i in range(250): time.sleep(1) s.update(bytes=1024) print s[-1].text if s else ''
from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search('"@snowden"', cached=False): print plaintext(tweet.text)
# This example retrieves tweets containing given keywords from Twitter. try: # We'll store tweets in a Datasheet. # A Datasheet is a table of rows and columns that can be exported as a CSV-file. # In the first column, we'll store a unique id for each tweet. # We only want to add the latest tweets, i.e., those we haven't seen yet. # With an index on the first column we can quickly check if an id already exists. # The pd() function returns the parent directory of this script + any given path. table = Datasheet.load(pd("eulogy.csv")) index = set(table.columns[0]) except: table = Datasheet() index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev = None for i in range(2): print(i) for tweet in engine.search("eulogy", start=prev, count=25, cached=False): print("") print(tweet.text) print(tweet.author) print(tweet.date) print(hashtags(tweet.text)) # Keywords in tweets start with a "#". print("")
# This example retrieves tweets containing given keywords from Twitter (http://twitter.com). try: # We store tweets in a Datasheet that can be saved as a text file (comma-separated). # In the first column, we'll store a unique ID for each tweet. # We only want to add the latest tweets, i.e., those we haven't previously encountered. # With an index on the first column we can quickly check if an ID already exists. # The index becomes important once more and more rows are added to the table (speed). table = Datasheet.load("cool.txt") index = dict.fromkeys(table.columns[0], True) except: table = Datasheet() index = {} engine = Twitter(language="en") # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True
from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search('"more important than"', cached=False): print plaintext(tweet.text)
from pattern.web import Twitter from textblob import TextBlob t = Twitter() i = None for j in range(3): for tweet in t.search('college', start=i, count=30): print tweet.id print tweet.name print tweet.text blob = TextBlob(tweet.text) # Pull nouns from tweet print blob.noun_phrases # tweet's sentiment analysis for sentence in blob.sentences: print(sentence.sentiment.polarity) print
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search("signing day", start=i, count=30): print tweet.id print tweet.name print tweet.text print
#!/usr/bin/python from pattern.web import Twitter, plaintext twitter_api = Twitter(language='en') tweets = twitter_api.search("@", count=2) for tweet in tweets: text = tweet.text print text
from pattern.search import search from pattern.vector import Document, Model, KNN # Classification is a supervised machine learning method, # where labeled documents are used as training material # to learn how to label unlabeled documents. # This example trains a simple classifier with Twitter messages. # The idea is that, if you have a number of texts with a "type" # (mail/spam, positive/negative, language, author's age, ...), # you can predict the type of other "unknown" texts. # The k-Nearest Neighbor algorithm classifies texts according # to the k documents that are most similar (cosine similarity) to the given input document. m = Model() t = Twitter() # First, we mine a model of a 1000 tweets. # We'll use hashtags as type. for page in range(1, 10): for tweet in t.search('#win OR #fail', start=page, count=100, cached=True): # If the tweet contains #win hashtag, we'll set its type to 'WIN': s = tweet.text.lower() # tweet in lowercase p = '#win' in s and 'WIN' or 'FAIL' # document labels s = Sentence(parse(s)) # parse tree with part-of-speech tags s = search('JJ', s) # adjectives in the tweet s = [match[0].string for match in s] # adjectives as a list of strings s = " ".join(s) # adjectives as string if len(s) > 0: m.append(Document(s, type=p, stemmer=None))
class Ui_Dialog(object): def setupUi(self, Dialog): Dialog.setObjectName("Dialog") Dialog.resize(823, 677) self.label = QtGui.QLabel(Dialog) self.label.setGeometry(QtCore.QRect(10, 10, 800, 400)) self.label.setFrameShape(QtGui.QFrame.WinPanel) self.label.setText("") self.label.setObjectName("label") self.listWidget = QtGui.QListWidget(Dialog) self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192)) self.listWidget.setObjectName("listWidget") self.widget = QtGui.QWidget(Dialog) self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25)) self.widget.setObjectName("widget") self.horizontalLayout = QtGui.QHBoxLayout(self.widget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.label_2 = QtGui.QLabel(self.widget) self.label_2.setObjectName("label_2") self.horizontalLayout.addWidget(self.label_2) self.lineEdit = QtGui.QLineEdit(self.widget) self.lineEdit.setObjectName("lineEdit") self.horizontalLayout.addWidget(self.lineEdit) self.pushButton = QtGui.QPushButton(self.widget) self.pushButton.setObjectName("pushButton") self.horizontalLayout.addWidget(self.pushButton) self.retranslateUi(Dialog) QtCore.QMetaObject.connectSlotsByName(Dialog) # self.pushButton.clicked.connect(self.on_buttom_pressed) self.listWidget.doubleClicked.connect(self.goTweet) # self.alText = u'' self.fullText = u'' self.twitter = Twitter(language='tr') self.prevId = None self.timer = QtCore.QTimer(Dialog) self.timer.timeout.connect(self.on_timer) self.dialog = Dialog self.twIds = [] def retranslateUi(self, Dialog): Dialog.setWindowTitle(QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None, QtGui.QApplication.UnicodeUTF8)) self.label_2.setText(QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None, QtGui.QApplication.UnicodeUTF8)) self.pushButton.setText(QtGui.QApplication.translate("Dialog", "Gözetle", None, QtGui.QApplication.UnicodeUTF8)) # def on_buttom_pressed(self): if self.timer.isActive() : self.timer.stop() self.pushButton.setText(u'Gözetle') else: self.listWidget.clear() self.twIds = [] self.fullText = u'' self.on_timer() self.timer.start(60000) self.pushButton.setText('Durdur !') return def on_timer(self): searchKey = self.lineEdit.text() self.getTwits(searchKey) self.filterWords() self.fullText = self.fullText + self.alText self.showWordCloud() def showWordCloud(self): wordcloud = WordCloud(width=800, height=400).generate(self.fullText) img = np.array(wordcloud.to_image()) height, width, byteValue = img.shape byteValue = byteValue * width image = QtGui.QImage(img.data, width, height, byteValue, QtGui.QImage.Format_RGB888) pxmp = QtGui.QPixmap(image) self.label.setPixmap(pxmp) def filterWords(self): # sık geçen kelimeler filitreleniyor eksik elbette.... flt = [u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi', u'işte', u'ile', u'değil', u'ben', u'sen', u'çok', u'ama', u'Sen',u'den',u'htt'] derle = re.compile("\w*", re.UNICODE) wL = re.findall(derle, self.alText) temp = [] for w in wL: if len(w) < 3: continue elif w in flt: continue else: #print w temp.append(w) self.alText = ' '.join(temp) def getTwits(self,keyWord): if len(keyWord) == 0: keyWord =u'"gündem"' self.lineEdit.setText(keyWord) self.alText = u'' try : tList = self.twitter.search(keyWord, start=self.prevId, count=10, cached=False) except: message = "Twitter Aram Limiti Lütfen Biraz Bekleyin" QtGui.QMessageBox.information(self.dialog, "Information", "Python rocks!") for tweet in tList: self.listWidget.addItem(QtGui.QListWidgetItem(cleanTweet(tweet.text))) self.twIds.append(tweet.id) self.listWidget.setCurrentRow(self.listWidget.count()-1) tweet.text = self.filterRT(tweet.text) tweet.text = self.filterLink(tweet.text) self.alText = self.alText + plaintext(tweet.text) + u' ' self.prevId = tweet.id def filterRT(self,tweet): # RT başlığı filtreleniyor buf = tweet[:2] if buf == u'RT': ix = tweet.find(':') tweet = tweet[ix:] return tweet def filterLink(self,tweet): regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+' match = re.search(regex, tweet) buf = tweet if match: ixs= tweet.find(match.group()) ixe= len(match.group()) try: buf = tweet[:ixs] except: print "not removed" return buf def goTweet(self): i = self.listWidget.currentRow() urlTw = 'https:/'+'/twitter.com/statuses/'+ str(self.twIds[i]) webbrowser.open(urlTw)
# This example retrieves tweets containing given keywords from Twitter (http://twitter.com). try: # We store tweets in a Table that can be saved as a text file. # In the first column, we'll store a unique ID for each tweet. # We only want to add the latest tweets, i.e. those we haven't previously encountered. # With an index() on the first column we can quickly check if an ID already exists. # The index becomes important once more and more rows are added to the table (speed). table = Table.load("cool.txt") index = table.index(table.columns[0]) except: table = Table() index = {} engine = Twitter() # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True
from pattern.web import Twitter import time s = Twitter().stream('#snowday') for i in range(25): time.sleep(1) s.update(bytes=1024) print s[-1].text if s else '' s.clear()
from time import sleep import io, json import re WORDS_TO_GENERATE = 50000 # if doing multiple searches: put most recent found tweet id here # so search can stop if it gets there STOP_AT = "533785825059037184" twitter = Twitter(language='en') # collect tweets about #nanowrimo enough = False last = None i = 0 sentences = [] numwords = 0 firsttime = True while not enough: try: if not firsttime: sleep(15)
import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) import time from pattern.web import Twitter # Another way to mine Twitter is to set up a stream. # A Twitter stream maintains an open connection to Twitter, # and waits for data to pour in. # Twitter.search() allows us to look at older tweets, # Twitter.stream() gives us the most recent tweets. # It might take a few seconds to set up the stream. stream = Twitter().stream("I hate", timeout=30) #while True: for i in range(10): print(i) # Poll Twitter to see if there are new tweets. stream.update() # The stream is a list of buffered tweets so far, # with the latest tweet at the end of the list. for tweet in reversed(stream): print(tweet.text) print(tweet.language) # Clear the buffer every so often. stream.clear() # Wait awhile between polls. time.sleep(1)
from pattern.web import Twitter from textblob import TextBlob t = Twitter() i = None for j in range(3): for tweet in t.search("college", start=i, count=30): print tweet.id print tweet.name print tweet.text # Pull nouns from tweet blob = TextBlob(tweet.text) print blob.noun_phrases print
tickers1=ticklist[ticklist['MarketCap']>0.0] tickers2=list(tickers1['Symbol']) tickers3=[re.sub(r'\s','',w) for w in tickers2] tickers=list(set(tickers3)) ############################################################################################## ############################################################################################## ############################################################################################## from pattern.web import Twitter #number of tickers to search N=len(tickers) #number of tweets to download M=2000 #Dataframe DF0=[] #loop t = Twitter() for j in range(N): tick='$'+tickers[j] i = None for tweet in t.search(tick, start=i, count=M): # temp_text=re.sub('[,;"\'?():_`/\.]','',tweet.text) # temp_text=temp_text.strip() temp_text=tweet.text.strip() temp_text.replace('\n',' ') DF0.append({'id':tweet.id,'tickers':tick,'screen_name':tweet.author,'text':temp_text,'time':tweet.date}) # print tweet.text i = tweet.id DF2=DF0 for i in range(len(DF2)):
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search('SunTrust', start=i, count=30): print tweet.id print tweet.name print tweet.text print