Example #1
0
def search_with_language_in_region(lang, capital_city, search_terms, file_name):
    """
    Does a twitter search in the specified language in the area of a given capital city
    e.g. search_with_language_in_region('en', 'Paris', '#yoloswag', 'Paris_yoloswag')
    
    Inputs: expects strings for everything.
        lang: the language you want to search in [string], e.g. 'en'
        capital_city: the city you want to search around, found 
        through pattern's geocode function,  e.g. 'Paris'
        search_terms: duh. e.g. ['crimea','putin']
        file_name: the file name you want to save the tweets as, will come out as e.g. nealiscool.pickle
    
    Outputs: a pickled dictionary of the tweets, which are saved on disk as tweets_gathered.pickle.
    The keys of the dicitonary are the unique tweet IDs.
    """
    t = Twitter(language=lang)
    tweets_gathered = {}
    i = None
    for j in range(2):
        for tweet in t.search(search_terms, start=i, count=10,geo=geocode(capital_city)[:2]):
            print tweet.text
            print
            i = tweet.id
            tweets_gathered[tweet.id] = tweet.text
    f = open(file_name,'w')   
    pickle.dump(tweets_gathered,f)
    f.close()
Example #2
0
def crawl(topic, N=100, Nbatch=25):
    t = Twitter()  # language='en','id'
    M = N // Nbatch  #integer
    i, Tweets, keepCrawling = None, [], True
    for j in tqdm(range(M)):
        if keepCrawling:
            for tweet in t.search(topic, start=i, count=Nbatch):
                try:
                    Tweets.append(tweet)
                    i = tweet.id
                except:
                    print("Twitter Limit reached")
                    keepCrawling = False  # Second Break (outer loop)
                    break
        else:
            break
    print('Making sure we get the full tweets, please wait ...')
    for i, tweet in enumerate(tqdm(Tweets)):
        try:
            webPage = URL(tweet.url).download()
            soup = bs(webPage, 'html.parser')
            full_tweet = soup.find_all(
                'p',
                class_='TweetTextSize')[0]  #modify this to get all replies
            full_tweet = bs(str(full_tweet), 'html.parser').text
            Tweets[i]['fullTxt'] = full_tweet
        except:
            Tweets[i]['fullTxt'] = tweet.txt
    print('Done!... Total terdapat {0} tweet'.format(len(Tweets)))
    return Tweets
Example #3
0
def search():

    query = str(raw_input("enter search query: "))
    t = Twitter()
    # i = None
    chances = 0
    fileSave.write(query + "\n")

    allChances = 0
    for tweet in t.search(query, start=None, count=5):

        print tweet.text

        # Calc tweet sentiment
        sent_int = sent(tweet.text)
        sent_str = str(sent_int)
        # print sent_str

        # Calc author's follower count
        follows_int = findFollows(tweet.author)
        follows_str = str(sent_int)
        # print follows_str

        # Calc chances; make cumulative
        chances = follows_int * sent_int
        print str(chances) + "\n"

        # File save
        save = sent_str + "\n" + follows_str + "\n \n"
        fileSave.write(save)

        allChances = allChances + chances

        print "OVERALL: " + str(allChances)
Example #4
0
def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)
Example #5
0
def get_replies(reply_id):
    import json
    from pattern.web import URL, Twitter

    reply_id = reply_id - 1
    url = URL("https://api.twitter.com/1.1/statuses/mentions_timeline.json",
              method="get",
              query={"since_id": reply_id})

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    user_replies = {}
    bot_replies = {}
    try:
        data = json.loads(url.open().read())
        for reply in data:
            name = reply["user"]["name"].encode('utf-8').strip()
            text = reply["text"].replace("@BotsVsQuotes", "").strip()
            if name == "BotsVsQuotes":
                #bot quotes
                text = text.split(":")
                char_name = text[0]
                bot_replies[char_name] = "".join(text[1:]).strip()
            else:
                #user quotes
                user_replies[name] = text
    except Exception as e:
        print e
        print e.src
        print e.src.read()
        return {}, {}
    return bot_replies, user_replies
def get_tweets():
	'''This function parses Twitter to find tweets about a user-defined political figure
	'''
	
	print 'This program measures the average sentiment of the populous towards a political candidate through the analysis of recent tweets\n' #introduce program to user
	print 'Enter the name of a candidate:'
	x = raw_input('> ') #receives name of candidate to search for
	print 'Enter number of tweets to search (max = 100)'
	twtNumstr = raw_input('> ') #recieve number of tweets to search for
	twtNum = int(twtNumstr) #convert to int to use in search

	if twtNum <= 1: #check if an invalid number was entered, and if so, correct it to either the minimum or maximum allowed
		twtNum = 2
		print 'Invalid number entered. The minimum of 2 tweets will be used.'
	elif twtNum > 100:
		twtNum = 100
		print 'Invalid number entered. The maximum of 100 tweets will be used.'

	t = Twitter() #search for tweets containing user-defined key word
	i = 0
	twts = []
	for j in range(1):
		for tweet in t.search(x, start=i, count=twtNum):
			twts.append(tweet.text)

	return twts
def Pattern_Module_Twitter_Stream():

     # Start Stop Watch
    t1 = time.time()

    # Create a list to Store the Data
    List = Twitter().stream('#Fail')

    # For 10 Instances
    for second in range(10):
        # Get Stream Data
        value = List.update(bytes=1024)
        # Add Value to List if not Empty
        if len(value) == 0:
            # Pass
            continue
        else:
            # Storing Results
            List.append()
            # Print Tweet
            print('Tweet: %s') % (value.text)
            # Get Sentiment
            print('Sentiment Analysis of Tweet: %s') % (TextBlob(str(value.text),
                                                                 analyzer=NaiveBayesAnalyzer()).sentiment[0].upper())
        # Wait 3 Seconds between queries - Do not want to get blocked
        time.sleep(3)

    return time.time() - t1
Example #8
0
def gettweets(searchterms):
    tweetlist = []
    from pattern.web import Twitter, plaintext
    twitter = Twitter(language='en')
    for tweet in twitter.search(searchterms, cached=False):
        tweetlist.append(plaintext(tweet.text))
    return tweetlist
def gettweets(searchterms):
	tweetlist = []
	from pattern.web import Twitter, plaintext
	twitter = Twitter(language='en') 
	for tweet in twitter.search(searchterms, cached=False):
		tweetlist.append(plaintext(tweet.text))
	return tweetlist
def get_pattern_data(search_param):
   
   twitter = Twitter(language='en') 
   
   for tweet in twitter.search(search_param, cached=True):
      print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))
   

   g = Graph()
   for i in range(10):
      for result in twitter.search(search_param, start=i+1,count=50):
         s = result.text.lower() 
         s = plaintext(s)
         s = parsetree(s)
         p = '{NP} (VP) ' +search_param+ ' {NP}'
         for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
               g.add_node(x)
               if y not in g:
                  g.add_node(y)
               g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A

   #if len(g)>0:   
   #   g = g.split()[0] # Largest subgraph.

   for n in g.sorted()[:40]: # Sort by Node.weight.
      n.fill = (0, 0.5, 1, 0.75 * n.weight)

   g.export('data', directed=False, weighted=0.6)
Example #11
0
def Pattern_Module_Twitter_Stream():

    # Start Stop Watch
    t1 = time.time()

    # Create a list to Store the Data
    List = Twitter().stream('#Fail')

    # For 10 Instances
    for second in range(10):
        # Get Stream Data
        value = List.update(bytes=1024)
        # Add Value to List if not Empty
        if len(value) == 0:
            # Pass
            continue
        else:
            # Storing Results
            List.append()
            # Print Tweet
            print('Tweet: %s') % (value.text)
            # Get Sentiment
            print('Sentiment Analysis of Tweet: %s') % (TextBlob(
                str(value.text),
                analyzer=NaiveBayesAnalyzer()).sentiment[0].upper())
        # Wait 3 Seconds between queries - Do not want to get blocked
        time.sleep(3)

    return time.time() - t1
Example #12
0
    def getTweetsByCoord(self, term, lat, lng):

        twitter = Twitter(language='en')
        tweets = []
        for tweet in twitter.search('traffic', geo=(lat, lng)):
            tweets.append(tweet.text)

        return tweets
Example #13
0
    def getTweetsByCoord(self, term, lat, lng):
        
        twitter = Twitter(language='en')
        tweets = []
        for tweet in twitter.search('traffic', geo=(lat, lng)):
            tweets.append(tweet.text)

        return tweets
Example #14
0
 def search_tweets(self, celeb):
     '''
     Pull tweets from the Twitter API that mention 
     the given celebrity
     '''
     twitter_api = Twitter(language='en')
     #TODO: up the count for the final project
     return twitter_api.search(celeb, count=3000)
Example #15
0
def search(text):
    list = []

    twitter = Twitter(language='en')
    for tweet in twitter.search(text, count=30, cached=False):
        list.append(tweet.text)

    return list
Example #16
0
def twitter_search():
    t = Twitter(language='es')
    i = None
    for j in range(3):  # For pagination
        for r in t.search(query="#DonaldTrump", start=i, count=10):
            print(r.id, r.text, r.date)
        i = r.id
        print("----------------@@@@@@-------------")
Example #17
0
def busco_en_twitter(cadena):

        t = Twitter()
        i = None
        for j in range(3):
                for tweet in t.search(cadena, start=i, count=10):
                        print(tweet.text)
                        print("-------")
                        i = tweet.id
Example #18
0
def search(text):
	list = []
 

	twitter = Twitter(language='en') 
	for tweet in twitter.search(text, count=30, cached=False):
            list.append(tweet.text)

                
	return list
Example #19
0
def find(tag):
    """
	Finds content and user ID of posts with specified hashtag and saves to
	.txt file. 
	"""
    twitter = open("twitter_data.txt", "r+")
    t = Twitter(language='en')

    #compiles 1000 tweets with the specified tag and saves content in file
    for tweet in t.search(tag, count=1000):
        twitter.write(str(tweet))
Example #20
0
def fuzzy_find(thing):
    t = Twitter()

    fuzzy_things = fuzzy_list(thing)

    tweets = []
    for item in fuzzy_things:
        new_tweets = t.search(item, count=50, throttle=2)
        for tweet in new_tweets:
            ids = map(lambda x: x.id, tweets)
            if ids.count(tweet.id) == 0:
                tweets.append(tweet)

    return tweets
Example #21
0
def get_info(search_query):
	if isinstance(search_query, str):
		search_query = str(search_query)
	else:
		return { "Error": "Pass a string, from mine.py [7]", "Result": [None] }

	result = []
	engineGoogle = Google(license=None, throttle=0.5, language=None)
	engineBing = Bing(license=None, throttle=0.5, language=None)
	engineTwitter = Twitter(license=None, throttle=0.5, language=None)
	engineFacebook = Facebook(license=None, throttle=1.0, language='en')
	engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
	engineFlickr = Flickr(license=None, throttle=5.0, language=None)
	engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
	engineArray = [engineGoogle, engineTwitter]

	'''
	for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)])
		[result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray]
			# print repr(plaintext(para.text))
			# print repr(plaintext(para.url)) + '\n\n'
			# result.append(repr(plaintext(para.text)))
	'''

	# Google
	for i in range(1, 5):
		result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
		
	for i in range(1, 5):
		result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	'''
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)])
	for i in range(1,2):
		result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)])
	'''

	return { "Error": None, "Result": result }

	# return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
Example #22
0
def get_info(search_query):
    if isinstance(search_query, str):
        search_query = str(search_query)
    else:
        return {"Error": "Pass a string, from mine.py [7]"}

    google = [{'text': '', 'url': '', 'title': ''}]
    twitter = [{'text': '', 'url': '', 'title': ''}]
    engineGoogle = Google(license=None, throttle=0.5, language=None)
    # engineBing = Bing(license=None, throttle=0.5, language=None)
    engineTwitter = Twitter(license=None, throttle=0.5, language=None)
    # engineFacebook = Facebook(license=None, throttle=1.0, language='en')
    # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
    # engineFlickr = Flickr(license=None, throttle=5.0, language=None)
    # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
    engineArray = [engineGoogle, engineTwitter]

    # Google
    for i in range(1, 2):
        for para in engineGoogle.search(search_query,
                                        type=SEARCH,
                                        start=i,
                                        count=5):
            google.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
    # Twitter
    for i in range(1, 2):
        for para in engineTwitter.search(search_query,
                                         type=SEARCH,
                                         start=i,
                                         count=5):
            twitter.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])

    # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter

    return {"Error": None, "Google": google, "Twitter": twitter}
Example #23
0
def main():
    # user input
    parser = argparse.ArgumentParser(description='Downloads tweets for a given search word')
    parser.add_argument('--term', help='Term to search tweets',required=True)
    parser.add_argument('--out', help='Output CSV file name', default='tweets.csv')
    args = parser.parse_args()
    # Twitter engine
    engine = Twitter(language='en')
    term = " ".join(args.term.split("_"))
    mkdir_p(os.path.dirname(args.out))
    with open(args.out, "w") as outfile:
    	print("Searching for tweets with '{}'".format(term))
        writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL)
        # download tweets
        for tweet in engine.search(term, cached = False, start=1, count=30):
            csvrow = tweet.text.encode('utf-8')
            # write into CSV file
            writer.writerow([csvrow])
	def trending(self):
		'''trending sentiment'''

		trendArray = []

		for trend in Twitter().trends(cached=False):
			trendArray.append([trend,indicoio.sentiment(trend)])

		return trendArray
Example #25
0
	def search(self, args):
		"""
		Usage:
		  search [-fty] <keyword>
		  search -h | --help

		Options:
		  -h --help      Show this help message.
		  -f --facebook  Search for keyword on Facebook.
		  -t --twitter   Search for keyword on Twitter.
		  -y --youtube   Search for keyword on YouTube.
		"""

		# Example args information:
		# {'--facebook': False,
		# '--help': False,
		# '--twitter': True,
		# '--youtube': False,
		# '': 'f'}

		engine = Twitter(language='en')
		ret = []

		'''
		generator = ({
			'text': tweet.text,
			'author': tweet.author,
			'date': tweet.date,
			'hashtags': hashtags(tweet.text)
		} for tweet in engine.search('is cooler than', count=25, cached=False))

		self.db.bulk_insert('test', generator)
		'''
		
		for tweet in engine.search('is cooler than', count=25, cached=False):
			ret.append({
				'text': tweet.text,
				'author': tweet.author,
				'date': tweet.date,
				'hashtags': hashtags(tweet.text)
			})
		

		return str(ret)
    def get_tweets(self, search, nb, include_RT, useKey, keys):

        if not useKey:
            keys = None

        twitter = Twitter(
            language=self.dico_lang[self.language],
            license=keys
        )

        tweets = list()
        if not include_RT:
            for tweet in twitter.search(search, start=1, count=nb*3):
                if not tweet.text.startswith('RT'):
                    tweet_input = Input(tweet.text)
                    annotations = {
                        'source': 'Twitter',
                        'author': tweet.author,
                        'date': tweet.date,
                        'url': tweet.url,
                        'search': search,
                    }
                    segment = tweet_input[0]
                    segment.annotations.update(annotations)
                    tweet_input[0] = segment
                    tweets.append(tweet_input)
                if len(tweets) == nb:
                    break
        else:        
            for tweet in twitter.search(search, start=1, count=nb):
                tweet_input = Input(tweet.text)
                annotations = {
                    'source': 'Twitter',
                    'author': tweet.author,
                    'date': tweet.date,
                    'url': tweet.url,
                    'search': search,
                }
                segment = tweet_input[0]
                segment.annotations.update(annotations)
                tweet_input[0] = segment
                tweets.append(tweet_input)
        return tweets
def Generate_Tweets(searchterm,filename_label):
	twitter_obj=Twitter(license=None, throttle=0.5,language='en')
	#throttle: time between requests.
	#now the twitter_obj can be searched, with the following parameters.
	
	# Twitter returns up to 1500 results for a search term. It has hourly limit of 150 queries. each call 	to search() is one query. So you can get like 15 queries of 100 each of 150 queries of 10 each.
	#  Parameters for Twitter: 
	#  Start 1-1500/count
	#  count: results per page=1-100
        #  SORT: RELEVANCY, Limit: 150/hour, throttle =0.5
	f=open(filename_label,'a')

	for tweet in twitter_obj.search(searchterm,cached=False,language='en', sort ='RELEVANCY',count=100):
		unicode_tweet=plaintext(tweet.description)
		#Tweets are unicode, need to be converted to ascii before storing in file
		ascii_tweet=unicode_tweet.encode('ascii','ignore')
		f.write(ascii_tweet+'\n')
	
	f.close()
Example #28
0
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []
def poli_twitter_analysis():
	"""This function parses Twitter to determine the average sentiment towards political figures during an event"""
	
	candidates = ['trump','walker', 'fiorina', 'carson', 'cruz', 'rubio', 'huckabee', 'paul', 'kasich','christie', 'bush','clinton','sanders',"o'malley"] #list of searches to use

	twtNum = 50 #number of tweets to search for each time
	
	t = Twitter() 
	i = None
	twtstext = []
	twtsdate = []
	twtsauthor = []
	twtscandi = []
	twtssenti = []

	for item in candidates:
		for j in range(1):
			for tweet in t.search(item, start=i, count=twtNum):
				twtscandi.append(item)
				twtstext.append(tweet.text)
				m = tweet.text
				twtsdate.append(tweet.date)
				twtsauthor.append(tweet.author)
				[senti,objec] = sentiment(m)
				twtssenti.append(senti)

	zipped1 = zip(twtscandi, twtssenti)
	zipped2 = zip(twtscandi, twtsdate, twtsauthor, twtstext, twtssenti)
	
	timestr = time.strftime("%Y%m%d%H%M%S")

	filename = timestr + '.txt'
	f = open(filename, 'w')
	f.write(' '.join(map(str, zipped1)))
	f.close()

	filename = 'tweets_' + timestr + '.txt'
	f = open(filename, 'w')
	f.write(' '.join(map(str, zipped2)))
	f.close()

	print 'Complete'
class tweetSentiment(object):


	def __init__(self, topic, tweetCount):
		self.topic = topic
		self.tweetCount = tweetCount
		self.t = Twitter(language='EN')
		self.i = None

	def fArray(self):
		'''full array including tweet and sentiment'''	
		fullArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			fullArray.append([tweet.text,indicoio.sentiment(tweet.text)])
			self.i = tweet.id

		return fullArray

	def sArray(self):
		'''calculate sentiment '''
		sentimentArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			sentimentArray.append(indicoio.sentiment(tweet.text))
			self.i = tweet.id

		return sentimentArray

	def average(self,numArray):
		'''average sentiment'''
		return sum(numArray)/len(numArray)

	def trending(self):
		'''trending sentiment'''

		trendArray = []

		for trend in Twitter().trends(cached=False):
			trendArray.append([trend,indicoio.sentiment(trend)])

		return trendArray
Example #31
0
def reply_tweet(tweet, reply_id, reply_user="******"):
    from pattern.web import URL, Twitter

    tweet = reply_user + " " + tweet
    url = URL("https://api.twitter.com/1.1/statuses/update.json",
              method="post",
              query={
                  "status": tweet,
                  "in_reply_to_status_id": reply_id
              })

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    try:
        # Send the post request.
        url.open()
    except Exception as e:
        print e
        print e.src
        print e.src.read()
Example #32
0
def create_stream(phrase, queue):
    """
    Celery task that connects to the twitter stream and runs a loop, periodically
    emitting tweet information to all connected clients.
    """
    local = SocketIO(message_queue=queue)
    stream = Twitter().stream(phrase, timeout=30)

    for i in range(60):
        stream.update()
        for tweet in reversed(stream):
            sentiment = classify_tweet(tweet)
            x, y = vectorize_tweet(tweet)
            local.emit(
                'tweet', {
                    'id': str(i),
                    'text': str(tweet.text.encode('ascii', 'ignore')),
                    'sentiment': sentiment,
                    'x': x,
                    'y': y
                })
        stream.clear()
        time.sleep(1)

    return queue
Example #33
0
    def get_tweets(self, search, nb, include_RT, useKey, keys):

        if not useKey:
            keys = None

        twitter = Twitter(language=self.dico_lang[self.language], license=keys)

        tweets = list()
        if not include_RT:
            for tweet in twitter.search(search, start=1, count=nb * 3):
                if not tweet.text.startswith('RT'):
                    tweet_input = Input(tweet.text)
                    annotations = {
                        'source': 'Twitter',
                        'author': tweet.author,
                        'date': tweet.date,
                        'url': tweet.url,
                        'search': search,
                    }
                    segment = tweet_input[0]
                    segment.annotations.update(annotations)
                    tweet_input[0] = segment
                    tweets.append(tweet_input)
                if len(tweets) == nb:
                    break
        else:
            for tweet in twitter.search(search, start=1, count=nb):
                tweet_input = Input(tweet.text)
                annotations = {
                    'source': 'Twitter',
                    'author': tweet.author,
                    'date': tweet.date,
                    'url': tweet.url,
                    'search': search,
                }
                segment = tweet_input[0]
                segment.annotations.update(annotations)
                tweet_input[0] = segment
                tweets.append(tweet_input)
        return tweets
Example #34
0
def TwitterStream():
    # Another way to mine Twitter is to set up a stream.
    # A Twitter stream maintains an open connection to Twitter, 
    # and waits for data to pour in.
    # Twitter.search() allows us to look at older tweets,
    # Twitter.stream() gives us the most recent tweets.
    for trend in Twitter().trends(cached=False):
        print trend

    # It might take a few seconds to set up the stream.
    stream = Twitter().stream("i love", timeout=30)

    pos_count=0
    neg_count=0

    #while True:
    for i in range(50):
        if(neg_count):
            ratio = pos_count / neg_count
        else:
            ratio = 0

        print str(pos_count) + " " + str(neg_count) + " " + str(ratio)+"%"
        
        #print i
        #print "+ " + str(pos_count)
        #print "- " + str(neg_count)
        #print "- - -"

        # Poll Twitter to see if there are new tweets.
        stream.update()
        
        # The stream is a list of buffered tweets so far,
        # with the latest tweet at the end of the list.
        for tweet in reversed(stream):
            print tweet.text
            print tweet.language

            sent = pol(tweet.text)

            if(sent>0):
                pos_count+=1
            else:
                neg_count+=1
            
        # Clear the buffer every so often.
        stream.clear()
        
        # Wait awhile between polls.
        time.sleep(1)


    print "Final Twitter"
    print pos_count
    print neg_count
Example #35
0
def create_stream(phrase, queue):
    """
    Celery task that connects to the twitter stream and runs a loop, periodically
    emitting tweet information to all connected clients.
    """
    local = SocketIO(message_queue=queue)
    stream = Twitter().stream(phrase, timeout=30)

    for i in range(60):
        stream.update()
        for tweet in reversed(stream):
            sentiment = classify_tweet(tweet)
            x, y = vectorize_tweet(tweet)
            local.emit('tweet', {'id': str(i),
                                 'text': str(tweet.text.encode('ascii', 'ignore')),
                                 'sentiment': sentiment,
                                 'x': x,
                                 'y': y})
        stream.clear()
        time.sleep(1)

    return queue
Example #36
0
def post_tweet(tweet):
    from pattern.web import URL, Twitter
    import json

    url = URL("https://api.twitter.com/1.1/statuses/update.json",
              method="post",
              query={"status": tweet})

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    try:
        # Send the post request.
        data = url.open().read()
    except Exception as e:
        print e
        print e.src
        print e.src.read()
        return None

    data = json.loads(data)
    return int(data[u'id'])
Example #37
0
def obtenerTweets(request):
    twitterEn = Twitter(language='en')
    twitterEs = Twitter(language='es')
    idJuego = request.GET.get("id")
    juego = Juego.objects.get(id=idJuego)
    tweets = []
    for tweet in twitterEs.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    for tweet in twitterEn.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    return render(request, 'obtenerTweets.html', {'tweets': tweets})
def setStream(keywordsStr):
    assert keywordsStr
    print("\n\n==== ====\n\n")
    print("Setting up the stream for keywords = {0}".format(keywords))
    print("\nKeywords Twitter QUERY = \"{0}\"".format(colorGreen(keywordsStr)))

    if filePath:
        print("Will dump to file: {0}".format(colorGreen(filePath)))
    else:
        print("No file dump")

    # It might take a few seconds to set up the stream.
    stream = Twitter(throttle=0.5, language=lang).stream(keywordsStr,
                                                         timeout=30)
    print("\nStream initialized")

    return stream
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []
def create_stream(phrase, queue):
    local = SocketIO(message_queue=queue)
    stream = Twitter().stream(phrase, timeout=120)

    for i in range(120):
        stream.update()
        for tweet in reversed(stream):
            sentiment = classify_tweet(tweet)
            x, y = vectorize_tweet(tweet)
            local.emit(
                'tweet', {
                    'id': str(i),
                    'text': str(tweet.text.encode('ascii', 'ignore')),
                    'sentiment': sentiment,
                    'x': x,
                    'y': y
                })
        stream.clear()
        time.sleep(1)

    return queue
Example #41
0
#!/usr/bin/env python

import os, sys; sys.path.insert(0, os.path.join("..", ".."))
import time

from pattern.web import Twitter
from pattern.db  import Datasheet, pprint

engine1 = Twitter(language="en")
engine2 = Twitter(language="en")


print "-------------------------------------"
print "Tweets in Boston, MA ... "


bosCount=0
sfoCount=0 

keyword = " "

for tweet in engine1.search(keyword , geocode="42.3583333,-71.0602778,25mi" , count=400, cached=True):
    print "-> BOSTON "
    print tweet.author
    print tweet.text
    print tweet.date
    bosCount += 1



print "-------------------------------------"
Example #42
0
import sys, time
from pattern.web import Twitter

s = Twitter().stream(
    '#joy, #happiness, #hopeful, #pleasure, #harmony, #kindness, #affection, #love'
)
for i in range(250):
    time.sleep(1)
    s.update(bytes=1024)
    print s[-1].text if s else ''
Example #43
0
from pattern.web import Twitter, plaintext
twitter = Twitter(language='en') 
for tweet in twitter.search('"@snowden"', cached=False):
    print plaintext(tweet.text)
# This example retrieves tweets containing given keywords from Twitter.

try:
    # We'll store tweets in a Datasheet.
    # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
    # In the first column, we'll store a unique id for each tweet.
    # We only want to add the latest tweets, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already exists.
    # The pd() function returns the parent directory of this script + any given path.
    table = Datasheet.load(pd("eulogy.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

engine = Twitter(language="en")

# With Twitter.search(cached=False), a "live" request is sent to Twitter:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
prev = None
for i in range(2):
    print(i)
    for tweet in engine.search("eulogy", start=prev, count=25, cached=False):
        print("")
        print(tweet.text)
        print(tweet.author)
        print(tweet.date)
        print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
        print("")
Example #45
0
# This example retrieves tweets containing given keywords from Twitter (http://twitter.com).

try:
    # We store tweets in a Datasheet that can be saved as a text file (comma-separated).
    # In the first column, we'll store a unique ID for each tweet.
    # We only want to add the latest tweets, i.e., those we haven't previously encountered.
    # With an index on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Datasheet.load("cool.txt")
    index = dict.fromkeys(table.columns[0], True)
except:
    table = Datasheet()
    index = {}

engine = Twitter(language="en")

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True
Example #46
0
from pattern.web import Twitter, plaintext

twitter = Twitter(language='en')
for tweet in twitter.search('"more important than"', cached=False):
    print plaintext(tweet.text)
from pattern.web import Twitter
from textblob import TextBlob

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('college', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text

        blob = TextBlob(tweet.text)
        # Pull nouns from tweet
        print blob.noun_phrases
        # tweet's sentiment analysis
        for sentence in blob.sentences:
            print(sentence.sentiment.polarity)

        print
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search("signing day", start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print
Example #49
0
#!/usr/bin/python

from pattern.web import Twitter, plaintext


twitter_api = Twitter(language='en')

tweets = twitter_api.search("@", count=2)
for tweet in tweets:
    text = tweet.text
    print text
Example #50
0
from pattern.search import search
from pattern.vector import Document, Model, KNN

# Classification is a supervised machine learning method,
# where labeled documents are used as training material
# to learn how to label unlabeled documents.

# This example trains a simple classifier with Twitter messages.
# The idea is that, if you have a number of texts with a "type"
# (mail/spam, positive/negative, language, author's age, ...),
# you can predict the type of other "unknown" texts.
# The k-Nearest Neighbor algorithm classifies texts according
# to the k documents that are most similar (cosine similarity) to the given input document.

m = Model()
t = Twitter()

# First, we mine a model of a 1000 tweets.
# We'll use hashtags as type.
for page in range(1, 10):
    for tweet in t.search('#win OR #fail', start=page, count=100, cached=True):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        s = tweet.text.lower()  # tweet in lowercase
        p = '#win' in s and 'WIN' or 'FAIL'  # document labels
        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))
class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []


    def retranslateUi(self, Dialog):
        Dialog.setWindowTitle(QtGui.QApplication.translate("Dialog", "Twitter Gözetleyici", None, QtGui.QApplication.UnicodeUTF8))
        self.label_2.setText(QtGui.QApplication.translate("Dialog", "Anahtar Kelime :", None, QtGui.QApplication.UnicodeUTF8))
        self.pushButton.setText(QtGui.QApplication.translate("Dialog", "Gözetle", None, QtGui.QApplication.UnicodeUTF8))
    #
    def on_buttom_pressed(self):
        if self.timer.isActive() :
            self.timer.stop()
            self.pushButton.setText(u'Gözetle')
        else:
            self.listWidget.clear()
            self.twIds = []
            self.fullText = u''
            self.on_timer()
            self.timer.start(60000)
            self.pushButton.setText('Durdur !')

        return

    def on_timer(self):
        searchKey = self.lineEdit.text()
        self.getTwits(searchKey)
        self.filterWords()
        self.fullText = self.fullText + self.alText
        self.showWordCloud()


    def showWordCloud(self):
        wordcloud = WordCloud(width=800, height=400).generate(self.fullText)
        img = np.array(wordcloud.to_image())
        height, width, byteValue = img.shape
        byteValue = byteValue * width
        image = QtGui.QImage(img.data, width, height, byteValue, QtGui.QImage.Format_RGB888)
        pxmp = QtGui.QPixmap(image)
        self.label.setPixmap(pxmp)

    def filterWords(self):
        # sık geçen kelimeler filitreleniyor eksik elbette....
        flt = [u'https', u'nin', u'bir', u'daha', u'diye', u'için', u'gibi', u'işte', u'ile', u'değil', u'ben', u'sen',
               u'çok', u'ama', u'Sen',u'den',u'htt']
        derle = re.compile("\w*", re.UNICODE)
        wL = re.findall(derle, self.alText)
        temp = []
        for w in wL:
            if len(w) < 3:
                continue
            elif w in flt:
                continue
            else:
                #print w
                temp.append(w)
        self.alText = ' '.join(temp)

    def getTwits(self,keyWord):
        if len(keyWord) == 0:
            keyWord =u'"gündem"'
            self.lineEdit.setText(keyWord)
        self.alText = u''
        try :
            tList = self.twitter.search(keyWord, start=self.prevId, count=10, cached=False)

        except:
            message = "Twitter Aram Limiti Lütfen Biraz Bekleyin"
            QtGui.QMessageBox.information(self.dialog, "Information", "Python rocks!")

        for tweet in tList:
            self.listWidget.addItem(QtGui.QListWidgetItem(cleanTweet(tweet.text)))
            self.twIds.append(tweet.id)
            self.listWidget.setCurrentRow(self.listWidget.count()-1)
            tweet.text = self.filterRT(tweet.text)
            tweet.text = self.filterLink(tweet.text)
            self.alText = self.alText + plaintext(tweet.text) + u' '
            self.prevId = tweet.id

    def filterRT(self,tweet):
        # RT başlığı filtreleniyor
        buf = tweet[:2]
        if buf == u'RT':
            ix = tweet.find(':')
            tweet = tweet[ix:]
        return tweet

    def filterLink(self,tweet):
        regex = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
        match = re.search(regex, tweet)
        buf = tweet
        if match:
            ixs= tweet.find(match.group())
            ixe= len(match.group())
            try:
                buf = tweet[:ixs]
            except:
                print "not removed"
        return buf

    def goTweet(self):
        i = self.listWidget.currentRow()
        urlTw = 'https:/'+'/twitter.com/statuses/'+ str(self.twIds[i])
        webbrowser.open(urlTw)
Example #52
0
# This example retrieves tweets containing given keywords from Twitter (http://twitter.com).

try:
    # We store tweets in a Table that can be saved as a text file.
    # In the first column, we'll store a unique ID for each tweet.
    # We only want to add the latest tweets, i.e. those we haven't previously encountered.
    # With an index() on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Table.load("cool.txt")
    index = table.index(table.columns[0])
except:
    table = Table()
    index = {}

engine = Twitter()

# With cached=False, a live request is sent to Twitter,
# so we get the latest results for the query instead of those in the local cache.
for tweet in engine.search("is cooler than", count=25, cached=False):
    print tweet.description
    print tweet.author
    print tweet.date
    print hashtags(tweet.description)  # Keywords in tweets start with a #.
    print
    # Create a unique ID based on the tweet content and author.
    id = hash(tweet.author + tweet.description)
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, tweet.description])
        index[id] = True
Example #53
0
from pattern.web import Twitter
import time

s = Twitter().stream('#snowday')
for i in range(25):
    time.sleep(1)
    s.update(bytes=1024)
    print s[-1].text if s else ''
    s.clear()
Example #54
0
from time import sleep

import io, json

import re 

WORDS_TO_GENERATE = 50000

# if doing multiple searches: put most recent found tweet id here
# so search can stop if it gets there
STOP_AT = "533785825059037184"




twitter = Twitter(language='en')

# collect tweets about #nanowrimo
enough = False
last = None
i = 0
sentences = []
numwords = 0

firsttime = True

while not enough:
	try:
	
		if not firsttime:
			sleep(15)
Example #55
0
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

import time

from pattern.web import Twitter

# Another way to mine Twitter is to set up a stream.
# A Twitter stream maintains an open connection to Twitter,
# and waits for data to pour in.
# Twitter.search() allows us to look at older tweets,
# Twitter.stream() gives us the most recent tweets.

# It might take a few seconds to set up the stream.
stream = Twitter().stream("I hate", timeout=30)

#while True:
for i in range(10):
    print(i)
    # Poll Twitter to see if there are new tweets.
    stream.update()
    # The stream is a list of buffered tweets so far,
    # with the latest tweet at the end of the list.
    for tweet in reversed(stream):
        print(tweet.text)
        print(tweet.language)
    # Clear the buffer every so often.
    stream.clear()
    # Wait awhile between polls.
    time.sleep(1)
Example #56
0
from pattern.web import Twitter
from textblob import TextBlob

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search("college", start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text

        # Pull nouns from tweet
        blob = TextBlob(tweet.text)
        print blob.noun_phrases

        print
tickers1=ticklist[ticklist['MarketCap']>0.0]
tickers2=list(tickers1['Symbol'])
tickers3=[re.sub(r'\s','',w) for w in tickers2]
tickers=list(set(tickers3))
##############################################################################################
##############################################################################################
##############################################################################################
from pattern.web import Twitter
#number of tickers to search
N=len(tickers)
#number of tweets to download
M=2000
#Dataframe
DF0=[]
#loop
t = Twitter()
for j in range(N):
    tick='$'+tickers[j]
    i = None
    for tweet in t.search(tick, start=i, count=M):
#        temp_text=re.sub('[,;"\'?():_`/\.]','',tweet.text)
#        temp_text=temp_text.strip()
        temp_text=tweet.text.strip()
        temp_text.replace('\n',' ')
        DF0.append({'id':tweet.id,'tickers':tick,'screen_name':tweet.author,'text':temp_text,'time':tweet.date})
#        print tweet.text
        i = tweet.id

DF2=DF0

for i in range(len(DF2)):
Example #58
0
from pattern.web import Twitter

t = Twitter()
i = None
for j in range(3):
    for tweet in t.search('SunTrust', start=i, count=30):
        print tweet.id
        print tweet.name
        print tweet.text
        print