def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i + 1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' + search_param + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i+1,count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' +search_param+ ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def obtenerTweets(request): twitterEn = Twitter(language='en') twitterEs = Twitter(language='es') idJuego = request.GET.get("id") juego = Juego.objects.get(id=idJuego) tweets = [] for tweet in twitterEs.search(juego.titulo, cached=False): tweets.append(tweet.text) for tweet in twitterEn.search(juego.titulo, cached=False): tweets.append(tweet.text) return render(request, 'obtenerTweets.html', {'tweets': tweets})
def search_with_language_in_region(lang, capital_city, search_terms, file_name): """ Does a twitter search in the specified language in the area of a given capital city e.g. search_with_language_in_region('en', 'Paris', '#yoloswag', 'Paris_yoloswag') Inputs: expects strings for everything. lang: the language you want to search in [string], e.g. 'en' capital_city: the city you want to search around, found through pattern's geocode function, e.g. 'Paris' search_terms: duh. e.g. ['crimea','putin'] file_name: the file name you want to save the tweets as, will come out as e.g. nealiscool.pickle Outputs: a pickled dictionary of the tweets, which are saved on disk as tweets_gathered.pickle. The keys of the dicitonary are the unique tweet IDs. """ t = Twitter(language=lang) tweets_gathered = {} i = None for j in range(2): for tweet in t.search(search_terms, start=i, count=10,geo=geocode(capital_city)[:2]): print tweet.text print i = tweet.id tweets_gathered[tweet.id] = tweet.text f = open(file_name,'w') pickle.dump(tweets_gathered,f) f.close()
def get_tweets(): '''This function parses Twitter to find tweets about a user-defined political figure ''' print 'This program measures the average sentiment of the populous towards a political candidate through the analysis of recent tweets\n' #introduce program to user print 'Enter the name of a candidate:' x = raw_input('> ') #receives name of candidate to search for print 'Enter number of tweets to search (max = 100)' twtNumstr = raw_input('> ') #recieve number of tweets to search for twtNum = int(twtNumstr) #convert to int to use in search if twtNum <= 1: #check if an invalid number was entered, and if so, correct it to either the minimum or maximum allowed twtNum = 2 print 'Invalid number entered. The minimum of 2 tweets will be used.' elif twtNum > 100: twtNum = 100 print 'Invalid number entered. The maximum of 100 tweets will be used.' t = Twitter() #search for tweets containing user-defined key word i = 0 twts = [] for j in range(1): for tweet in t.search(x, start=i, count=twtNum): twts.append(tweet.text) return twts
def crawl(topic, N=100, Nbatch=25): t = Twitter() # language='en','id' M = N // Nbatch #integer i, Tweets, keepCrawling = None, [], True for j in tqdm(range(M)): if keepCrawling: for tweet in t.search(topic, start=i, count=Nbatch): try: Tweets.append(tweet) i = tweet.id except: print("Twitter Limit reached") keepCrawling = False # Second Break (outer loop) break else: break print('Making sure we get the full tweets, please wait ...') for i, tweet in enumerate(tqdm(Tweets)): try: webPage = URL(tweet.url).download() soup = bs(webPage, 'html.parser') full_tweet = soup.find_all( 'p', class_='TweetTextSize')[0] #modify this to get all replies full_tweet = bs(str(full_tweet), 'html.parser').text Tweets[i]['fullTxt'] = full_tweet except: Tweets[i]['fullTxt'] = tweet.txt print('Done!... Total terdapat {0} tweet'.format(len(Tweets))) return Tweets
def search(): query = str(raw_input("enter search query: ")) t = Twitter() # i = None chances = 0 fileSave.write(query + "\n") allChances = 0 for tweet in t.search(query, start=None, count=5): print tweet.text # Calc tweet sentiment sent_int = sent(tweet.text) sent_str = str(sent_int) # print sent_str # Calc author's follower count follows_int = findFollows(tweet.author) follows_str = str(sent_int) # print follows_str # Calc chances; make cumulative chances = follows_int * sent_int print str(chances) + "\n" # File save save = sent_str + "\n" + follows_str + "\n \n" fileSave.write(save) allChances = allChances + chances print "OVERALL: " + str(allChances)
def gettweets(searchterms): tweetlist = [] from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search(searchterms, cached=False): tweetlist.append(plaintext(tweet.text)) return tweetlist
def getTweetsByCoord(self, term, lat, lng): twitter = Twitter(language='en') tweets = [] for tweet in twitter.search('traffic', geo=(lat, lng)): tweets.append(tweet.text) return tweets
def twitter_search(): t = Twitter(language='es') i = None for j in range(3): # For pagination for r in t.search(query="#DonaldTrump", start=i, count=10): print(r.id, r.text, r.date) i = r.id print("----------------@@@@@@-------------")
def search(text): list = [] twitter = Twitter(language='en') for tweet in twitter.search(text, count=30, cached=False): list.append(tweet.text) return list
def search_tweets(self, celeb): ''' Pull tweets from the Twitter API that mention the given celebrity ''' twitter_api = Twitter(language='en') #TODO: up the count for the final project return twitter_api.search(celeb, count=3000)
def busco_en_twitter(cadena): t = Twitter() i = None for j in range(3): for tweet in t.search(cadena, start=i, count=10): print(tweet.text) print("-------") i = tweet.id
def get_tweets(self, search, nb, include_RT, useKey, keys): if not useKey: keys = None twitter = Twitter( language=self.dico_lang[self.language], license=keys ) tweets = list() if not include_RT: for tweet in twitter.search(search, start=1, count=nb*3): if not tweet.text.startswith('RT'): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) if len(tweets) == nb: break else: for tweet in twitter.search(search, start=1, count=nb): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) return tweets
class tweetSentiment(object): def __init__(self, topic, tweetCount): self.topic = topic self.tweetCount = tweetCount self.t = Twitter(language='EN') self.i = None def fArray(self): '''full array including tweet and sentiment''' fullArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): fullArray.append([tweet.text,indicoio.sentiment(tweet.text)]) self.i = tweet.id return fullArray def sArray(self): '''calculate sentiment ''' sentimentArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): sentimentArray.append(indicoio.sentiment(tweet.text)) self.i = tweet.id return sentimentArray def average(self,numArray): '''average sentiment''' return sum(numArray)/len(numArray) def trending(self): '''trending sentiment''' trendArray = [] for trend in Twitter().trends(cached=False): trendArray.append([trend,indicoio.sentiment(trend)]) return trendArray
def find(tag): """ Finds content and user ID of posts with specified hashtag and saves to .txt file. """ twitter = open("twitter_data.txt", "r+") t = Twitter(language='en') #compiles 1000 tweets with the specified tag and saves content in file for tweet in t.search(tag, count=1000): twitter.write(str(tweet))
def get_tweets(self, search, nb, include_RT, useKey, keys): if not useKey: keys = None twitter = Twitter(language=self.dico_lang[self.language], license=keys) tweets = list() if not include_RT: for tweet in twitter.search(search, start=1, count=nb * 3): if not tweet.text.startswith('RT'): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) if len(tweets) == nb: break else: for tweet in twitter.search(search, start=1, count=nb): tweet_input = Input(tweet.text) annotations = { 'source': 'Twitter', 'author': tweet.author, 'date': tweet.date, 'url': tweet.url, 'search': search, } segment = tweet_input[0] segment.annotations.update(annotations) tweet_input[0] = segment tweets.append(tweet_input) return tweets
def fuzzy_find(thing): t = Twitter() fuzzy_things = fuzzy_list(thing) tweets = [] for item in fuzzy_things: new_tweets = t.search(item, count=50, throttle=2) for tweet in new_tweets: ids = map(lambda x: x.id, tweets) if ids.count(tweet.id) == 0: tweets.append(tweet) return tweets
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return { "Error": "Pass a string, from mine.py [7]", "Result": [None] } result = [] engineGoogle = Google(license=None, throttle=0.5, language=None) engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) engineFacebook = Facebook(license=None, throttle=1.0, language='en') engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) engineFlickr = Flickr(license=None, throttle=5.0, language=None) engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] ''' for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)]) [result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray] # print repr(plaintext(para.text)) # print repr(plaintext(para.url)) + '\n\n' # result.append(repr(plaintext(para.text))) ''' # Google for i in range(1, 5): result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) for i in range(1, 5): result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) ''' # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)]) for i in range(1,2): result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)]) # for i in range(1,2): # result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)]) ''' return { "Error": None, "Result": result } # return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
def get_info(search_query): if isinstance(search_query, str): search_query = str(search_query) else: return {"Error": "Pass a string, from mine.py [7]"} google = [{'text': '', 'url': '', 'title': ''}] twitter = [{'text': '', 'url': '', 'title': ''}] engineGoogle = Google(license=None, throttle=0.5, language=None) # engineBing = Bing(license=None, throttle=0.5, language=None) engineTwitter = Twitter(license=None, throttle=0.5, language=None) # engineFacebook = Facebook(license=None, throttle=1.0, language='en') # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None) # engineFlickr = Flickr(license=None, throttle=5.0, language=None) # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr] engineArray = [engineGoogle, engineTwitter] # Google for i in range(1, 2): for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=5): google.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)]) # Twitter for i in range(1, 2): for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=5): twitter.append({ 'text': para.text, 'url': para.url, 'title': para.title }) #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)]) # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter return {"Error": None, "Google": google, "Twitter": twitter}
def main(): # user input parser = argparse.ArgumentParser(description='Downloads tweets for a given search word') parser.add_argument('--term', help='Term to search tweets',required=True) parser.add_argument('--out', help='Output CSV file name', default='tweets.csv') args = parser.parse_args() # Twitter engine engine = Twitter(language='en') term = " ".join(args.term.split("_")) mkdir_p(os.path.dirname(args.out)) with open(args.out, "w") as outfile: print("Searching for tweets with '{}'".format(term)) writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL) # download tweets for tweet in engine.search(term, cached = False, start=1, count=30): csvrow = tweet.text.encode('utf-8') # write into CSV file writer.writerow([csvrow])
def search(self, args): """ Usage: search [-fty] <keyword> search -h | --help Options: -h --help Show this help message. -f --facebook Search for keyword on Facebook. -t --twitter Search for keyword on Twitter. -y --youtube Search for keyword on YouTube. """ # Example args information: # {'--facebook': False, # '--help': False, # '--twitter': True, # '--youtube': False, # '': 'f'} engine = Twitter(language='en') ret = [] ''' generator = ({ 'text': tweet.text, 'author': tweet.author, 'date': tweet.date, 'hashtags': hashtags(tweet.text) } for tweet in engine.search('is cooler than', count=25, cached=False)) self.db.bulk_insert('test', generator) ''' for tweet in engine.search('is cooler than', count=25, cached=False): ret.append({ 'text': tweet.text, 'author': tweet.author, 'date': tweet.date, 'hashtags': hashtags(tweet.text) }) return str(ret)
def Generate_Tweets(searchterm,filename_label): twitter_obj=Twitter(license=None, throttle=0.5,language='en') #throttle: time between requests. #now the twitter_obj can be searched, with the following parameters. # Twitter returns up to 1500 results for a search term. It has hourly limit of 150 queries. each call to search() is one query. So you can get like 15 queries of 100 each of 150 queries of 10 each. # Parameters for Twitter: # Start 1-1500/count # count: results per page=1-100 # SORT: RELEVANCY, Limit: 150/hour, throttle =0.5 f=open(filename_label,'a') for tweet in twitter_obj.search(searchterm,cached=False,language='en', sort ='RELEVANCY',count=100): unicode_tweet=plaintext(tweet.description) #Tweets are unicode, need to be converted to ascii before storing in file ascii_tweet=unicode_tweet.encode('ascii','ignore') f.write(ascii_tweet+'\n') f.close()
def poli_twitter_analysis(): """This function parses Twitter to determine the average sentiment towards political figures during an event""" candidates = ['trump','walker', 'fiorina', 'carson', 'cruz', 'rubio', 'huckabee', 'paul', 'kasich','christie', 'bush','clinton','sanders',"o'malley"] #list of searches to use twtNum = 50 #number of tweets to search for each time t = Twitter() i = None twtstext = [] twtsdate = [] twtsauthor = [] twtscandi = [] twtssenti = [] for item in candidates: for j in range(1): for tweet in t.search(item, start=i, count=twtNum): twtscandi.append(item) twtstext.append(tweet.text) m = tweet.text twtsdate.append(tweet.date) twtsauthor.append(tweet.author) [senti,objec] = sentiment(m) twtssenti.append(senti) zipped1 = zip(twtscandi, twtssenti) zipped2 = zip(twtscandi, twtsdate, twtsauthor, twtstext, twtssenti) timestr = time.strftime("%Y%m%d%H%M%S") filename = timestr + '.txt' f = open(filename, 'w') f.write(' '.join(map(str, zipped1))) f.close() filename = 'tweets_' + timestr + '.txt' f = open(filename, 'w') f.write(' '.join(map(str, zipped2))) f.close() print 'Complete'
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search("signing day", start=i, count=30): print tweet.id print tweet.name print tweet.text print
# We only want to add the latest tweets, i.e., those we haven't previously encountered. # With an index on the first column we can quickly check if an ID already exists. # The index becomes important once more and more rows are added to the table (speed). table = Datasheet.load("current_tweets.csv") index = dict.fromkeys(table.columns[0], True) except: table = Datasheet() index = {} engine = Twitter(language="en") tweet_csv = [] table = [] for twitter_subject in twitter_subjects: # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search(twitter_subject, count=275, cached=False): # Create a unique ID based on the tweet content and author. new_line = '@' + tweet.author + ' , ' + tweet.description + ' , ' + str( tweet.values()[5]) + ' , ' + str(tweet.url) id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: tweet_csv.append(new_line) norm_descr = unicodedata.normalize('NFKD', tweet.description).encode( 'ascii', 'ignore') norm_author = unicodedata.normalize('NFKD', tweet.author).encode( 'ascii', 'ignore') table = table + ['@' + str(norm_author) + ' ' + str(norm_descr)] index[id] = True
#!/usr/bin/python from pattern.web import Twitter, plaintext twitter_api = Twitter(language='en') tweets = twitter_api.search("@", count=2) for tweet in tweets: text = tweet.text print text
from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search('"more important than"', cached=False): print plaintext(tweet.text)
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search('college', start=i, count=30): print tweet.id print tweet.name print tweet.text print tweet.latitude print
# We store tweets in a Table that can be saved as a text file. # In the first column, we'll store a unique ID for each tweet. # We only want to add the latest tweets, i.e. those we haven't previously encountered. # With an index() on the first column we can quickly check if an ID already exists. # The index becomes important once more and more rows are added to the table (speed). table = Table.load("cool.txt") index = table.index(table.columns[0]) except: table = Table() index = {} engine = Twitter() # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True table.save("cool.txt") print "Total results:", len(table)
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search('Snowmageddon', start=i, count=1000): print tweet.id print tweet.name print tweet.text print
class Ui_Dialog(object): def setupUi(self, Dialog): Dialog.setObjectName("Dialog") Dialog.resize(823, 677) self.label = QtGui.QLabel(Dialog) self.label.setGeometry(QtCore.QRect(10, 10, 800, 400)) self.label.setFrameShape(QtGui.QFrame.WinPanel) self.label.setText("") self.label.setObjectName("label") self.listWidget = QtGui.QListWidget(Dialog) self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192)) self.listWidget.setObjectName("listWidget") self.widget = QtGui.QWidget(Dialog) self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25)) self.widget.setObjectName("widget") self.horizontalLayout = QtGui.QHBoxLayout(self.widget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.label_2 = QtGui.QLabel(self.widget) self.label_2.setObjectName("label_2") self.horizontalLayout.addWidget(self.label_2) self.lineEdit = QtGui.QLineEdit(self.widget) self.lineEdit.setObjectName("lineEdit") self.horizontalLayout.addWidget(self.lineEdit) self.pushButton = QtGui.QPushButton(self.widget) self.pushButton.setObjectName("pushButton") self.horizontalLayout.addWidget(self.pushButton) self.retranslateUi(Dialog) QtCore.QMetaObject.connectSlotsByName(Dialog) # self.pushButton.clicked.connect(self.on_buttom_pressed) self.listWidget.doubleClicked.connect(self.goTweet) # self.alText = u'' self.fullText = u'' self.twitter = Twitter(language='tr') self.prevId = None self.timer = QtCore.QTimer(Dialog) self.timer.timeout.connect(self.on_timer) self.dialog = Dialog self.twIds = [] def retranslateUi(self, Dialog): Dialog.setWindowTitle( QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None, QtGui.QApplication.UnicodeUTF8)) self.label_2.setText( QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None, QtGui.QApplication.UnicodeUTF8)) self.pushButton.setText( QtGui.QApplication.translate("Dialog", "Gözetle", None, QtGui.QApplication.UnicodeUTF8)) # def on_buttom_pressed(self): if self.timer.isActive(): self.timer.stop() self.pushButton.setText(u'Gözetle') else: self.listWidget.clear() self.twIds = [] self.fullText = u'' self.on_timer() self.timer.start(60000) self.pushButton.setText('Durdur !') return def on_timer(self): searchKey = self.lineEdit.text() self.getTwits(searchKey) self.filterWords() self.fullText = self.fullText + self.alText self.showWordCloud() def showWordCloud(self): wordcloud = WordCloud(width=800, height=400).generate(self.fullText) img = np.array(wordcloud.to_image()) height, width, byteValue = img.shape byteValue = byteValue * width image = QtGui.QImage(img.data, width, height, byteValue, QtGui.QImage.Format_RGB888) pxmp = QtGui.QPixmap(image) self.label.setPixmap(pxmp) def filterWords(self): # sık geçen kelimeler filitreleniyor eksik elbette.... flt = [ u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi', u'işte', u'ile', u'değil', u'ben', u'sen', u'çok', u'ama', u'Sen', u'den', u'htt' ] derle = re.compile("\w*", re.UNICODE) wL = re.findall(derle, self.alText) temp = [] for w in wL: if len(w) < 3: continue elif w in flt: continue else: #print w temp.append(w) self.alText = ' '.join(temp) def getTwits(self, keyWord): if len(keyWord) == 0: keyWord = u'"gündem"' self.lineEdit.setText(keyWord) self.alText = u'' try: tList = self.twitter.search(keyWord, start=self.prevId, count=10, cached=False) except: message = "Twitter Aram Limiti Lütfen Biraz Bekleyin" QtGui.QMessageBox.information(self.dialog, "Information", "Python rocks!") for tweet in tList: self.listWidget.addItem( QtGui.QListWidgetItem(cleanTweet(tweet.text))) self.twIds.append(tweet.id) self.listWidget.setCurrentRow(self.listWidget.count() - 1) tweet.text = self.filterRT(tweet.text) tweet.text = self.filterLink(tweet.text) self.alText = self.alText + plaintext(tweet.text) + u' ' self.prevId = tweet.id def filterRT(self, tweet): # RT başlığı filtreleniyor buf = tweet[:2] if buf == u'RT': ix = tweet.find(':') tweet = tweet[ix:] return tweet def filterLink(self, tweet): regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+' match = re.search(regex, tweet) buf = tweet if match: ixs = tweet.find(match.group()) ixe = len(match.group()) try: buf = tweet[:ixs] except: print "not removed" return buf def goTweet(self): i = self.listWidget.currentRow() urlTw = 'https:/' + '/twitter.com/statuses/' + str(self.twIds[i]) webbrowser.open(urlTw)
# to learn how to label unlabeled documents. # This example trains a simple classifier with Twitter messages. # The idea is that, if you have a number of texts with a "type" # (mail/spam, positive/negative, language, author's age, ...), # you can predict the type of other "unknown" texts. # The k-Nearest Neighbor algorithm classifies texts according # to the k documents that are most similar (cosine similarity) to the given input document. m = Model() t = Twitter() # First, we mine a model of a 1000 tweets. # We'll use hashtags as type. for page in range(1, 10): for tweet in t.search('#win OR #fail', start=page, count=100, cached=True): # If the tweet contains #win hashtag, we'll set its type to 'WIN': s = tweet.text.lower() # tweet in lowercase p = '#win' in s and 'WIN' or 'FAIL' # document labels s = Sentence(parse(s)) # parse tree with part-of-speech tags s = search('JJ', s) # adjectives in the tweet s = [match[0].string for match in s] # adjectives as a list of strings s = " ".join(s) # adjectives as string if len(s) > 0: m.append(Document(s, type=p, stemmer=None)) # Train k-Nearest Neighbor on the model. # Note that this is a only simple example: to build a robust classifier # you would need a lot more training data (e.g., tens of thousands of tweets). # The more training data, the more statistically reliable the classifier becomes. # The only way to really know if you're classifier is working correctly
from pattern.db import Datasheet, pprint engine1 = Twitter(language="en") engine2 = Twitter(language="en") print "-------------------------------------" print "Tweets in Boston, MA ... " bosCount=0 sfoCount=0 keyword = " " for tweet in engine1.search(keyword , geocode="42.3583333,-71.0602778,25mi" , count=400, cached=True): print "-> BOSTON " print tweet.author print tweet.text print tweet.date bosCount += 1 print "-------------------------------------" print "Tweets in San Francisco, CA ... " for tweet in engine2.search(keyword, geocode="37.781157,-122.398720,25mi", count=400, cached=True): print "-> SAN FRANCISCO " print tweet.author print tweet.text
table = Datasheet.load(pd("eulogy.csv")) index = set(table.columns[0]) except: table = Datasheet() index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev = None for i in range(2): print(i) for tweet in engine.search("eulogy", start=prev, count=25, cached=False): print("") print(tweet.text) print(tweet.author) print(tweet.date) print(hashtags(tweet.text)) # Keywords in tweets start with a "#". print("") # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.id not in index: table.append([tweet.id, tweet.text]) index.add(tweet.id) # Continue mining older tweets in next iteration. prev = tweet.id # Create a .csv in pattern/examples/01-web/ table.save(pd("eulogy_july_21.csv"))
from pattern.web import Twitter from textblob import TextBlob t = Twitter() i = None for j in range(3): for tweet in t.search("college", start=i, count=30): print tweet.id print tweet.name print tweet.text # Pull nouns from tweet blob = TextBlob(tweet.text) print blob.noun_phrases print
enough = False last = None i = 0 sentences = [] numwords = 0 firsttime = True while not enough: try: if not firsttime: sleep(15) firsttime = False for tweet in twitter.search('#nanowrimo', start=last, count=100): i = i + 1 if tweet.id == STOP_AT: print "Reached STOP_AT tweet" enough = True break #print i, plaintext(tweet.text) last = tweet.id # skip any tweet with funny characters m = re.search(r"[^\w\d\s\'\"\,\.\?\(\)\!\#\@\:]", tweet.text) if m: #print i, m.group(0), tweet.text continue
from pattern.web import Twitter from textblob import TextBlob t = Twitter() i = None for j in range(3): for tweet in t.search('college', start=i, count=30): print tweet.id print tweet.name print tweet.text blob = TextBlob(tweet.text) # Pull nouns from tweet print blob.noun_phrases # tweet's sentiment analysis for sentence in blob.sentences: print(sentence.sentiment.polarity) print
#%%%Data Mining from pattern.web import Google google = Google() for results in google.search('Analytics India Magazine'): print(results.url) print(results.text) for results in google.search('Gamification'): print(results.url) #twitter from pattern.web import Twitter twitter = Twitter() for results in twitter.search('Analytics India Magazine'): print(results.url) print(results.text) for results in twitter.search('Gamification'): print(results.url) #flickr from pattern.web import Flickr flickr = Flickr(license=None) for result in flickr.search('Analytics India Magazine'): print(result.url) print(result.text) #%%%Accessing Web Pages #The URL object is used to retrieve contents from the webpages. It has several methods that can be used to open a webpage, download the contents from a webpage and read a webpage.
def main(): # First two vars hold the number of relevant sentences, the 2 others the float values police_killer_i = 0 police_killed_i = 0 police_killer_value = 0.0 police_killed_value = 0.0 total_sentences = 0 # Init Twitter query engine engine = Twitter(license=None, language='en') results_list = [] print('Performing twitter queries...') # 4 differents queries with 100 results each = 400 results results_list.append( engine.search('policeman kill', start=1, count=100, cached=False)) results_list.append( engine.search('policeman killed', start=1, count=100, cached=False)) results_list.append( engine.search('police kill', start=1, count=100, cached=False)) results_list.append( engine.search('police killed', start=1, count=100, cached=False)) #print lemma('shot') # Open a file to put some recognized examples examples_file = open('examples.txt', 'w') # For each list of results for ii in xrange(len(results_list)): print('Starting to analyze query results: ' + str(ii + 1) + ' out of ' + str(len(results_list))) for res in results_list[ii]: # Parse and split the tweet in sentences s = parse(string.lower(res.description), chunks=True, relations=True, lemmata=True) #s = parse(string.lower(res), chunks=True, relations=True, lemmata=True) #pprint(s) ss = split(s) # Then for each sentence for sent in ss: # Update sentences number total_sentences += 1 found = False i = 0 value = 0.0 # First check the affidability of the sentence - .5 if a bad word is found, 1.0 otherwise while (not found and (i < len(sent.words))): #print sent.words[i] if (sent.words[i].string in PROFANITY): found = True i = i + 1 if (found): #print('Found a bad word') value = 0.5 else: # No bad words found -> giving max affidability value value = 1.0 #print sent.chunks # Here, I want to clear the sentence from the PNP elements. I will filter out the words belonging to PNP from the list cleared_sentence_words = filter(lambda (i): i.pnp is None, sent.words) cleared_string = '' # But now, seems like there's no way to reconstruct a parsed sentence but assembling again a string and parsing it again for word in cleared_sentence_words: cleared_string += ' ' + word.string #print cleared_string cleared_sentence = parse(cleared_string, chunks=True, relations=True, lemmata=True) cleared_sentence = split(cleared_sentence) #pprint(cleared_sentence) sentence_type1 = False # Now cleared sentence is a sentence without PNP # Check if it is a standard active sentence for match in search('NP kill NP', cleared_sentence): # It is sentence_type1 = True # Check if the Subject is the police if (match.constituents()[0].role == 'SBJ'): for word in match.constituents()[0].words: if word.string in search_list: police_killer_i += 1 police_killer_value += value #print('Police killed') # Print to the examples' file the recognized match for sword in match.words: examples_file.write( str(sword.string.encode("utf-8")) + ' ') examples_file.write('\r\n') #examples_file.write(str(match.words)+'\r\n'); examples_file.write( ' Recognized as: police killed somebody' + '\r\n') examples_file.write( ' TYPE: ACTIVE - SUBJECT' + '\r\n') examples_file.write('\r\n') if (len(match.constituents()) > 2): # Or check if it is object if (match.constituents()[2].role == 'OBJ'): for word in match.constituents()[2].words: if word.string in search_list: police_killed_i += 1 police_killed_value += value #print('Killed by police') # Print to the example file the recognized match for sword in match.words: examples_file.write( str(sword.string.encode("utf-8")) + ' ') examples_file.write('\r\n') examples_file.write( ' Recognized as: police killed by somebody' + '\r\n') examples_file.write( ' TYPE: ACTIVE - OBJECT' + '\r\n') examples_file.write('\r\n') # If it was not an active sentence, check if it is a passive one if (not sentence_type1): #print('Try type 2') for match in search('NP kill (PP)+ (NP)+', cleared_sentence): # Here, the problem is that match.constituents returns a mixed list which can contain both Chunks and Words # We are interested in tags, hence in Chunks, hence we need to do some isinstance non-safe tricks # Checking the subject if (isinstance(match.constituents()[0], Chunk)): if (match.constituents()[0].role == 'SBJ'): #print('Is subject') for word in match.constituents()[0]: #for word in match.chunks()[0]: if word.string in search_list: police_killer_i += 1 police_killer_value += value # Print to the example file the recognized match for sword in match.words: examples_file.write( str( sword.string.encode( "utf-8")) + ' ') examples_file.write('\r\n') examples_file.write( ' Recognized as: police killed somebody' + '\r\n') examples_file.write( ' TYPE: PASSIVE - SUBJECT - CHUNK' + '\r\n') examples_file.write('\r\n') elif (isinstance(match.constituents()[0], Word)): if match.constituents()[0].string in search_list: police_killer_i += 1 police_killer_value += value #print('Killed by police') # Print to the example file the recognized match for sword in match.words: examples_file.write( str(sword.string.encode("utf-8")) + ' ') examples_file.write('\r\n') examples_file.write( ' Recognized as: police killed somebody' + '\r\n') examples_file.write( ' TYPE: PASSIVE - SUBJECT - WORD' + '\r\n') examples_file.write('\r\n') # Checking the object. First I have to filter out the Word objects from the match results to see if I have enough Chunks if (len( filter(lambda (i): isinstance(i, Chunk), match.constituents())) == 4): if (match.constituents()[3].role == 'OBJ'): for word in match.constituents()[3]: if word.string in search_list: police_killed_i += 1 police_killed_value += value # Print to the example file the recognized match for sword in match.words: examples_file.write( str( sword.string.encode( "utf-8")) + ' ') examples_file.write('\r\n') examples_file.write( ' Recognized as: police was killed by somebody' + '\r\n') examples_file.write( ' TYPE: PASSIVE - OBJECT - CHUNK' + '\r\n') examples_file.write('\r\n')
from pattern.web import Twitter from pattern.en import sentiment sent = [] t = Twitter() i = None cc = 0 for tweet in t.search('google', count=100, lang='en'): print(str(cc) + ': ' + tweet.text + '\n') sent.append([sentiment(tweet.text)]) cc = cc + 1
from pattern.web import Google google = Google(license=None) for search_result in google.search('artificial intelligence'): print(search_result.url) print(search_result.text) # #### Twitter from pattern.web import Twitter twitter = Twitter() index = None for j in range(3): for tweet in twitter.search('artificial intelligence', start=index, count=3): print(tweet.text) index = tweet.id # ### Converting HTML Data to Plain Text from pattern.web import URL, plaintext html_content = URL( 'https://stackabuse.com/python-for-nlp-introduction-to-the-textblob-library/' ).download() cleaned_page = plaintext(html_content.decode('utf-8')) print(cleaned_page) # ### Parsing PDF Documments
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search('SunTrust', start=i, count=30): print tweet.id print tweet.name print tweet.text print
# We can use it to check if we have already seen a tweet, # so we don't store it twice. # Search for tweets containing the given search query: q = "charlie hebdo" twitter = Twitter(language="en", license=None) id = None for i in range(3): # Look for tweets containing the search query. # We can get a maximum of 100 tweets per search. # Don't cache the results locally, # so we get the latest new tweets when the script runs. # Do this 3x. for tweet in twitter.search(q, start=id, count=100, cached=False): id = tweet.id if id not in seen: print tweet.text print seen.add(id) csv.append([ tweet.id, q, tweet.author, tweet.text, tweet.retweets, tweet.date ]) # Update the CSV. csv.save(PATH) # Each time you run the script, new tweets will be appended to the CSV. # For example, we have Twitter miners that automatically run 10x each day, # and have been running for many days and weeks.
def getTweetSecureLoad(self, topic): # This example retrieves tweets containing given keywords from Twitter. self.search_topic = topic self.search_topic = topic + ' film' ''' print 'CLASS (Twitter_PatternPKG) - Twitter Secure Initial Load - Topic: ' + self.search_topic try: # We'll store tweets in a Datasheet. # A Datasheet is a table of rows and columns that can be exported as a CSV-file. # In the first column, we'll store a unique id for each tweet. # We only want to add the latest tweets, i.e., those we haven't seen yet. # With an index on the first column we can quickly check if an id already exists. # The pd() function returns the parent directory of this script + any given path. table = Datasheet.load(pd(self.FILE_STORAGE)) # index = set(table.columns[0]) index = set(table.columns[4]) # on the text except: table = Datasheet() index = set() ''' table = Datasheet() index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev = None #searchThisSubjects = search_topic # put headers table.append(["tweet_id", "tweet_date", "InputSubject", "Tweet_text"]) #for oneSubject in searchThisSubjects: oneSubject = self.search_topic # oneSubject tweet_list_Json = [] # list of JSons tweet_list = [] try: for i in range(1): for tweet in engine.search(oneSubject, start=prev, count=8, cached=False): if 'http' in tweet.text: posi = tweet.text.index('http') tweet.text = tweet.text[0:posi - 1] # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.text not in index: table.append( [tweet.id, tweet.date, oneSubject, tweet.text]) index.add(tweet.text) tweet_list.append( [tweet.id, tweet.date, oneSubject, tweet.text]) #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject, tweet.text) #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text) tweet.text = filter(lambda x: x in string.printable, tweet.text) # remove weird stuff tweet.text = tweet.text.replace( '"', '') # remove weird stuff tweet.text = tweet.text.replace( '\n', '') # remove weird stuff tweetJson = self.formatData2Json( tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text) # remove artificiall film tweet_list_Json.append(tweetJson) #print tweetJson # BUILD A JSON #http://stackoverflow.com/questions/14547916/how-can-i-loop-over-entries-in-json #BUILD A LIST OF DICTIONARIES #http://stackoverflow.com/questions/2733813/iterating-through-a-json-object # Continue mining older tweets in next iteration. prev = tweet.text except Exception: ''' print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! - ([twitter_patternPkg_connector] getTweetSecureLoad)' ''' print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! (film: ' + oneSubject + ')' pass # Create a .csv in pattern/examples/01-web/ # table.save(pd("OD_CK1_Source4_Tweeter_InitialLoad.csv")) print "CLASS (Twitter_PatternPKG) - Total Secure Twitter Load: " + str( len(table)) + '\n' #print json.dumps(tweet_list) # return tweet_list return tweet_list_Json
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Apr 3 01:54:18 2019 @author: abhijithneilabraham """ from pattern.web import Twitter from pattern.en import tag from pattern.vector import KNN, count twitter, knn = Twitter(), KNN() for i in range(1, 3): for tweet in twitter.search('#github OR #kaggle', start=i, count=100): s = tweet.text.lower() p = '#github' in s and 'WIN' or 'FAIL' v = tag(s) v = [word for word, pos in v if pos == 'JJ'] # JJ = adjective v = count(v) # {'sweet': 1} if v: knn.train(v, type=p) print(knn.classify('sweet potato burger')) print(knn.classify('stupid autocorrect'))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Mar 25 19:37:34 2019 @author: alternatif """ from pattern.web import Twitter from pattern.en import tag from pattern.vector import KNN, count twitter, knn = Twitter(), KNN() for i in range(1, 3): for tweet in twitter.search('#win OR #fail', start=i, count=100): s = tweet.text.lower() p = '#win' in s and 'WIN' or 'FAIL' v = tag(s) v = [word for word, pos in v if pos == 'JJ'] # JJ = adjective v = count(v) # {'sweet': 1} if v: knn.train(v, type=p) print(knn.classify('sweet potato burger')) print(knn.classify('stupid autocorrect'))
from pattern.web import Twitter t = Twitter() i = None for j in range(3): for tweet in t.search('baseball', start=i, count=30): print tweet.id print tweet.name print tweet.text print
# We store tweets in a Datasheet that can be saved as a text file (comma-separated). # In the first column, we'll store a unique ID for each tweet. # We only want to add the latest tweets, i.e., those we haven't previously encountered. # With an index on the first column we can quickly check if an ID already exists. # The index becomes important once more and more rows are added to the table (speed). table = Datasheet.load("cool.txt") index = dict.fromkeys(table.columns[0], True) except: table = Datasheet() index = {} engine = Twitter(language="en") # With cached=False, a live request is sent to Twitter, # so we get the latest results for the query instead of those in the local cache. for tweet in engine.search("is cooler than", count=25, cached=False): print tweet.description print tweet.author print tweet.date print hashtags(tweet.description) # Keywords in tweets start with a #. print # Create a unique ID based on the tweet content and author. id = hash(tweet.author + tweet.description) # Only add the tweet to the table if it doesn't already contain this ID. if len(table) == 0 or id not in index: table.append([id, tweet.description]) index[id] = True table.save("cool.txt") print "Total results:", len(table)
def getTweetSecureLoad(self, topic): # This example retrieves tweets containing given keywords from Twitter. self.search_topic = topic print 'CLASS (Twitter_PatternPKG) - Twitter Secure Initial Load - Topic: ' + self.search_topic self.search_topic = topic + ' film' try: # We'll store tweets in a Datasheet. # A Datasheet is a table of rows and columns that can be exported as a CSV-file. # In the first column, we'll store a unique id for each tweet. # We only want to add the latest tweets, i.e., those we haven't seen yet. # With an index on the first column we can quickly check if an id already exists. # The pd() function returns the parent directory of this script + any given path. table = Datasheet.load(pd(self.FILE_STORAGE)) # index = set(table.columns[0]) index = set(table.columns[4]) # on the text except: table = Datasheet() index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev = None #searchThisSubjects = search_topic # put headers table.append(["tweet_id", "tweet_date", "InputSubject", "Tweet_text"]) #for oneSubject in searchThisSubjects: oneSubject = self.search_topic # oneSubject tweet_list_Json = [] # list of JSons tweet_list = [] try: for i in range(1): for tweet in engine.search(oneSubject, start=prev, count=8, cached=False): if 'http' in tweet.text: posi = tweet.text.index('http') tweet.text = tweet.text[0:posi-1] # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.text not in index : table.append([tweet.id, tweet.date, oneSubject, tweet.text]) index.add(tweet.text) tweet_list.append([tweet.id, tweet.date, oneSubject, tweet.text]) #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject, tweet.text) #tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text) tweet.text = filter(lambda x: x in string.printable, tweet.text) # remove weird stuff tweet.text = tweet.text.replace('"', '') # remove weird stuff tweet.text = tweet.text.replace('\n', '') # remove weird stuff tweetJson = self.formatData2Json(tweet.id, tweet.date, oneSubject.replace(' film', ''), tweet.text) # remove artificiall film tweet_list_Json.append(tweetJson) #print tweetJson # BUILD A JSON #http://stackoverflow.com/questions/14547916/how-can-i-loop-over-entries-in-json #BUILD A LIST OF DICTIONARIES #http://stackoverflow.com/questions/2733813/iterating-through-a-json-object # Continue mining older tweets in next iteration. prev = tweet.text except Exception: print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! - ([twitter_patternPkg_connector] getTweetSecureLoad)' print 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ERROR!! (film: ' + oneSubject +')' pass # Create a .csv in pattern/examples/01-web/ # table.save(pd("OD_CK1_Source4_Tweeter_InitialLoad.csv")) print "CLASS (Twitter_PatternPKG) - Total Secure Twitter Load: " + str(len(table)) + '\n' #print json.dumps(tweet_list) # return tweet_list return tweet_list_Json
from pattern.web import Twitter, plaintext twitter = Twitter(language='en') for tweet in twitter.search('"@snowden"', cached=False): print plaintext(tweet.text)
index = set() engine = Twitter(language="en") # With Twitter.search(cached=False), a "live" request is sent to Twitter: # we get the most recent results instead of those in the local cache. # Keeping a local cache can also be useful (e.g., while testing) # because a query is instant when it is executed the second time. prev=None groups = 1 for i in range(groups): # 49.253000,-123.111432,25mi # results = engine.search("#feelthebern", start=prev, count=100, cached=False, date='2016-02-14', geo=(latitude, longitude, radius)) # results = engine.search("geocode:49.253000,-123.111432,50mi", start=prev, count=100, cached=False) # results = engine.search("#SingleLifeIn3Words", start=prev, count=100, cached=False, date='2016-02-14') results = engine.search("#Valentines OR #ValentinesDay", start=prev, count=100, cached=False, date='2016-02-15', since='2016-02-13') for tweet in results: print # print str(tweet.text) print tweet.author print tweet.date print hashtags(tweet.text) # Keywords in tweets start with a "#". print # Only add the tweet to the table if it doesn't already exists. if len(table) == 0 or tweet.id not in index: # remove new lines tweet.text = tweet.text.replace("\n", "") # tweet.raw = unicode(tweet.raw).encode('utf8').replace("\n", "") tweet.raw = json.dumps(tweet.raw, separators=(',', ': ')).replace("\n", "") table.append([tweet.id, tweet.text, tweet.date, tweet.language, tweet.shares, tweet.geo, tweet.geo_lat, tweet.geo_long, tweet.user_id, tweet.location, tweet.statuses_count, tweet.followers_count, tweet.friends_count, tweet.raw]) index.add(tweet.id)
class Ui_Dialog(object): def setupUi(self, Dialog): Dialog.setObjectName("Dialog") Dialog.resize(823, 677) self.label = QtGui.QLabel(Dialog) self.label.setGeometry(QtCore.QRect(10, 10, 800, 400)) self.label.setFrameShape(QtGui.QFrame.WinPanel) self.label.setText("") self.label.setObjectName("label") self.listWidget = QtGui.QListWidget(Dialog) self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192)) self.listWidget.setObjectName("listWidget") self.widget = QtGui.QWidget(Dialog) self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25)) self.widget.setObjectName("widget") self.horizontalLayout = QtGui.QHBoxLayout(self.widget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.label_2 = QtGui.QLabel(self.widget) self.label_2.setObjectName("label_2") self.horizontalLayout.addWidget(self.label_2) self.lineEdit = QtGui.QLineEdit(self.widget) self.lineEdit.setObjectName("lineEdit") self.horizontalLayout.addWidget(self.lineEdit) self.pushButton = QtGui.QPushButton(self.widget) self.pushButton.setObjectName("pushButton") self.horizontalLayout.addWidget(self.pushButton) self.retranslateUi(Dialog) QtCore.QMetaObject.connectSlotsByName(Dialog) # self.pushButton.clicked.connect(self.on_buttom_pressed) self.listWidget.doubleClicked.connect(self.goTweet) # self.alText = u'' self.fullText = u'' self.twitter = Twitter(language='tr') self.prevId = None self.timer = QtCore.QTimer(Dialog) self.timer.timeout.connect(self.on_timer) self.dialog = Dialog self.twIds = [] def retranslateUi(self, Dialog): Dialog.setWindowTitle(QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None, QtGui.QApplication.UnicodeUTF8)) self.label_2.setText(QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None, QtGui.QApplication.UnicodeUTF8)) self.pushButton.setText(QtGui.QApplication.translate("Dialog", "Gözetle", None, QtGui.QApplication.UnicodeUTF8)) # def on_buttom_pressed(self): if self.timer.isActive() : self.timer.stop() self.pushButton.setText(u'Gözetle') else: self.listWidget.clear() self.twIds = [] self.fullText = u'' self.on_timer() self.timer.start(60000) self.pushButton.setText('Durdur !') return def on_timer(self): searchKey = self.lineEdit.text() self.getTwits(searchKey) self.filterWords() self.fullText = self.fullText + self.alText self.showWordCloud() def showWordCloud(self): wordcloud = WordCloud(width=800, height=400).generate(self.fullText) img = np.array(wordcloud.to_image()) height, width, byteValue = img.shape byteValue = byteValue * width image = QtGui.QImage(img.data, width, height, byteValue, QtGui.QImage.Format_RGB888) pxmp = QtGui.QPixmap(image) self.label.setPixmap(pxmp) def filterWords(self): # sık geçen kelimeler filitreleniyor eksik elbette.... flt = [u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi', u'işte', u'ile', u'değil', u'ben', u'sen', u'çok', u'ama', u'Sen',u'den',u'htt'] derle = re.compile("\w*", re.UNICODE) wL = re.findall(derle, self.alText) temp = [] for w in wL: if len(w) < 3: continue elif w in flt: continue else: #print w temp.append(w) self.alText = ' '.join(temp) def getTwits(self,keyWord): if len(keyWord) == 0: keyWord =u'"gündem"' self.lineEdit.setText(keyWord) self.alText = u'' try : tList = self.twitter.search(keyWord, start=self.prevId, count=10, cached=False) except: message = "Twitter Aram Limiti Lütfen Biraz Bekleyin" QtGui.QMessageBox.information(self.dialog, "Information", "Python rocks!") for tweet in tList: self.listWidget.addItem(QtGui.QListWidgetItem(cleanTweet(tweet.text))) self.twIds.append(tweet.id) self.listWidget.setCurrentRow(self.listWidget.count()-1) tweet.text = self.filterRT(tweet.text) tweet.text = self.filterLink(tweet.text) self.alText = self.alText + plaintext(tweet.text) + u' ' self.prevId = tweet.id def filterRT(self,tweet): # RT başlığı filtreleniyor buf = tweet[:2] if buf == u'RT': ix = tweet.find(':') tweet = tweet[ix:] return tweet def filterLink(self,tweet): regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+' match = re.search(regex, tweet) buf = tweet if match: ixs= tweet.find(match.group()) ixe= len(match.group()) try: buf = tweet[:ixs] except: print "not removed" return buf def goTweet(self): i = self.listWidget.currentRow() urlTw = 'https:/'+'/twitter.com/statuses/'+ str(self.twIds[i]) webbrowser.open(urlTw)
"""Scrapes tweets from Twitter and svaes the results in a dictionary that is the pickled""" from pattern.en import * from pattern.web import Twitter import pickle MY_LICENSE = ('WgRmLC6IAhx27bRIG54ngxaRp', 'ldjhjaWF2G6jtPlg3mudc1IZV0V7PN7YZaSjuDqlw7QpvwF7ra', ('700461301575905284-PMu8wIBN2Qt1dW2T1nrytKjC0GYPgF3', 'OszrgU2gVUyBuNAmQc70CAARcpbqvu26DKwEKE0lAQ1ZG')) # creates dictionary with weather conditions (ex. #snow) as keys and a list of 1000 tweet strings as the value dictionary = {} weather_conditions = ['#snow', '#rain', '#cold', '#storm', "#blizzard", '#sun', '#warm', '#drizzle', '#cloudy'] t = Twitter(license = MY_LICENSE) for hashtag in weather_conditions: dictionary[hashtag] = [] for tweet in t.search(hashtag, start = None, count = 1000): dictionary[hashtag].append(tweet.text) # pickles the tweet dictionary f = open('weather2.pickle', 'w') pickle.dump(dictionary, f) f.close()
############################################################################################## ############################################################################################## ############################################################################################## from pattern.web import Twitter #number of tickers to search N=len(tickers) #number of tweets to download M=2000 #Dataframe DF0=[] #loop t = Twitter() for j in range(N): tick='$'+tickers[j] i = None for tweet in t.search(tick, start=i, count=M): # temp_text=re.sub('[,;"\'?():_`/\.]','',tweet.text) # temp_text=temp_text.strip() temp_text=tweet.text.strip() temp_text.replace('\n',' ') DF0.append({'id':tweet.id,'tickers':tick,'screen_name':tweet.author,'text':temp_text,'time':tweet.date}) # print tweet.text i = tweet.id DF2=DF0 for i in range(len(DF2)): # DF2[i]['text']=DF2[i]['text'].encode('utf-8') # DF2[i]['text'].encode('utf-8') DF2[i]['text'].replace(r"\\",'') DF2[i]['text'].replace('\n','')
from pattern.web import Crawler from pattern.web import download from pattern.web import plaintext from textblob import TextBlob from pattern.web import Twitter t = Twitter() i = None for j in range(4): for tweet in t.search('#SuperBowl', start=i, count=40): # print tweet.id # print tweet.name print tweet.text # print tweet.coordinates blob = TextBlob(plaintext(tweet.text)) for sentence in blob.sentences: print(sentence.sentiment.polarity) print blob.noun_phrases print