Example #1
0
def TwitterStream():
    # Another way to mine Twitter is to set up a stream.
    # A Twitter stream maintains an open connection to Twitter, 
    # and waits for data to pour in.
    # Twitter.search() allows us to look at older tweets,
    # Twitter.stream() gives us the most recent tweets.
    for trend in Twitter().trends(cached=False):
        print trend

    # It might take a few seconds to set up the stream.
    stream = Twitter().stream("i love", timeout=30)

    pos_count=0
    neg_count=0

    #while True:
    for i in range(50):
        if(neg_count):
            ratio = pos_count / neg_count
        else:
            ratio = 0

        print str(pos_count) + " " + str(neg_count) + " " + str(ratio)+"%"
        
        #print i
        #print "+ " + str(pos_count)
        #print "- " + str(neg_count)
        #print "- - -"

        # Poll Twitter to see if there are new tweets.
        stream.update()
        
        # The stream is a list of buffered tweets so far,
        # with the latest tweet at the end of the list.
        for tweet in reversed(stream):
            print tweet.text
            print tweet.language

            sent = pol(tweet.text)

            if(sent>0):
                pos_count+=1
            else:
                neg_count+=1
            
        # Clear the buffer every so often.
        stream.clear()
        
        # Wait awhile between polls.
        time.sleep(1)


    print "Final Twitter"
    print pos_count
    print neg_count
Example #2
0
def obtenerTweets(request):
    twitterEn = Twitter(language='en')
    twitterEs = Twitter(language='es')
    idJuego = request.GET.get("id")
    juego = Juego.objects.get(id=idJuego)
    tweets = []
    for tweet in twitterEs.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    for tweet in twitterEn.search(juego.titulo, cached=False):
        tweets.append(tweet.text)
    return render(request, 'obtenerTweets.html', {'tweets': tweets})
Example #3
0
def crawl(topic, N=100, Nbatch=25):
    t = Twitter()  # language='en','id'
    M = N // Nbatch  #integer
    i, Tweets, keepCrawling = None, [], True
    for j in tqdm(range(M)):
        if keepCrawling:
            for tweet in t.search(topic, start=i, count=Nbatch):
                try:
                    Tweets.append(tweet)
                    i = tweet.id
                except:
                    print("Twitter Limit reached")
                    keepCrawling = False  # Second Break (outer loop)
                    break
        else:
            break
    print('Making sure we get the full tweets, please wait ...')
    for i, tweet in enumerate(tqdm(Tweets)):
        try:
            webPage = URL(tweet.url).download()
            soup = bs(webPage, 'html.parser')
            full_tweet = soup.find_all(
                'p',
                class_='TweetTextSize')[0]  #modify this to get all replies
            full_tweet = bs(str(full_tweet), 'html.parser').text
            Tweets[i]['fullTxt'] = full_tweet
        except:
            Tweets[i]['fullTxt'] = tweet.txt
    print('Done!... Total terdapat {0} tweet'.format(len(Tweets)))
    return Tweets
Example #4
0
def get_replies(reply_id):
    import json
    from pattern.web import URL, Twitter

    reply_id = reply_id - 1
    url = URL("https://api.twitter.com/1.1/statuses/mentions_timeline.json",
              method="get",
              query={"since_id": reply_id})

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    user_replies = {}
    bot_replies = {}
    try:
        data = json.loads(url.open().read())
        for reply in data:
            name = reply["user"]["name"].encode('utf-8').strip()
            text = reply["text"].replace("@BotsVsQuotes", "").strip()
            if name == "BotsVsQuotes":
                #bot quotes
                text = text.split(":")
                char_name = text[0]
                bot_replies[char_name] = "".join(text[1:]).strip()
            else:
                #user quotes
                user_replies[name] = text
    except Exception as e:
        print e
        print e.src
        print e.src.read()
        return {}, {}
    return bot_replies, user_replies
Example #5
0
def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)
Example #6
0
def create_stream(phrase, queue):
    """
    Celery task that connects to the twitter stream and runs a loop, periodically
    emitting tweet information to all connected clients.
    """
    local = SocketIO(message_queue=queue)
    stream = Twitter().stream(phrase, timeout=30)

    for i in range(60):
        stream.update()
        for tweet in reversed(stream):
            sentiment = classify_tweet(tweet)
            x, y = vectorize_tweet(tweet)
            local.emit(
                'tweet', {
                    'id': str(i),
                    'text': str(tweet.text.encode('ascii', 'ignore')),
                    'sentiment': sentiment,
                    'x': x,
                    'y': y
                })
        stream.clear()
        time.sleep(1)

    return queue
Example #7
0
def Pattern_Module_Twitter_Stream():

    # Start Stop Watch
    t1 = time.time()

    # Create a list to Store the Data
    List = Twitter().stream('#Fail')

    # For 10 Instances
    for second in range(10):
        # Get Stream Data
        value = List.update(bytes=1024)
        # Add Value to List if not Empty
        if len(value) == 0:
            # Pass
            continue
        else:
            # Storing Results
            List.append()
            # Print Tweet
            print('Tweet: %s') % (value.text)
            # Get Sentiment
            print('Sentiment Analysis of Tweet: %s') % (TextBlob(
                str(value.text),
                analyzer=NaiveBayesAnalyzer()).sentiment[0].upper())
        # Wait 3 Seconds between queries - Do not want to get blocked
        time.sleep(3)

    return time.time() - t1
Example #8
0
def gettweets(searchterms):
    tweetlist = []
    from pattern.web import Twitter, plaintext
    twitter = Twitter(language='en')
    for tweet in twitter.search(searchterms, cached=False):
        tweetlist.append(plaintext(tweet.text))
    return tweetlist
Example #9
0
def search(text):
    list = []

    twitter = Twitter(language='en')
    for tweet in twitter.search(text, count=30, cached=False):
        list.append(tweet.text)

    return list
Example #10
0
def twitter_search():
    t = Twitter(language='es')
    i = None
    for j in range(3):  # For pagination
        for r in t.search(query="#DonaldTrump", start=i, count=10):
            print(r.id, r.text, r.date)
        i = r.id
        print("----------------@@@@@@-------------")
Example #11
0
    def getTweetsByCoord(self, term, lat, lng):

        twitter = Twitter(language='en')
        tweets = []
        for tweet in twitter.search('traffic', geo=(lat, lng)):
            tweets.append(tweet.text)

        return tweets
	def trending(self):
		'''trending sentiment'''

		trendArray = []

		for trend in Twitter().trends(cached=False):
			trendArray.append([trend,indicoio.sentiment(trend)])

		return trendArray
Example #13
0
def busco_en_twitter(cadena):

        t = Twitter()
        i = None
        for j in range(3):
                for tweet in t.search(cadena, start=i, count=10):
                        print(tweet.text)
                        print("-------")
                        i = tweet.id
Example #14
0
def find(tag):
    """
	Finds content and user ID of posts with specified hashtag and saves to
	.txt file. 
	"""
    twitter = open("twitter_data.txt", "r+")
    t = Twitter(language='en')

    #compiles 1000 tweets with the specified tag and saves content in file
    for tweet in t.search(tag, count=1000):
        twitter.write(str(tweet))
Example #15
0
def fuzzy_find(thing):
    t = Twitter()

    fuzzy_things = fuzzy_list(thing)

    tweets = []
    for item in fuzzy_things:
        new_tweets = t.search(item, count=50, throttle=2)
        for tweet in new_tweets:
            ids = map(lambda x: x.id, tweets)
            if ids.count(tweet.id) == 0:
                tweets.append(tweet)

    return tweets
Example #16
0
def get_info(search_query):
	if isinstance(search_query, str):
		search_query = str(search_query)
	else:
		return { "Error": "Pass a string, from mine.py [7]", "Result": [None] }

	result = []
	engineGoogle = Google(license=None, throttle=0.5, language=None)
	engineBing = Bing(license=None, throttle=0.5, language=None)
	engineTwitter = Twitter(license=None, throttle=0.5, language=None)
	engineFacebook = Facebook(license=None, throttle=1.0, language='en')
	engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
	engineFlickr = Flickr(license=None, throttle=5.0, language=None)
	engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
	engineArray = [engineGoogle, engineTwitter]

	'''
	for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engine[0].search(search_query, type=SEARCH, start=i, count=5)])
		[result.append([result.append(repr(plaintext(para.text))) for para in engine.search(search_query, type=SEARCH, start=i, count=5)]) for engine in engineArray]
			# print repr(plaintext(para.text))
			# print repr(plaintext(para.url)) + '\n\n'
			# result.append(repr(plaintext(para.text)))
	'''

	# Google
	for i in range(1, 5):
		result = result + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
		
	for i in range(1, 5):
		result = result + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	'''
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineBing.search(search_query, type=SEARCH, start=i, count=5)])
	for i in range(1,2):
		result = result + ([repr(plaintext(para.text)) for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFacebook.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineWikipedia.search(search_query, type=SEARCH, start=i, count=5)])
	# for i in range(1,2):
		# result = result + ([repr(plaintext(para.text)) for para in engineFlickr.search(search_query, type=SEARCH, start=i, count=5)])
	'''

	return { "Error": None, "Result": result }

	# return { "Error": None, "Result": ['Hello World', 'Bye Bye Tommy'] }
def setStream(keywordsStr):
    assert keywordsStr
    print("\n\n==== ====\n\n")
    print("Setting up the stream for keywords = {0}".format(keywords))
    print("\nKeywords Twitter QUERY = \"{0}\"".format(colorGreen(keywordsStr)))

    if filePath:
        print("Will dump to file: {0}".format(colorGreen(filePath)))
    else:
        print("No file dump")

    # It might take a few seconds to set up the stream.
    stream = Twitter(throttle=0.5, language=lang).stream(keywordsStr,
                                                         timeout=30)
    print("\nStream initialized")

    return stream
Example #18
0
def get_info(search_query):
    if isinstance(search_query, str):
        search_query = str(search_query)
    else:
        return {"Error": "Pass a string, from mine.py [7]"}

    google = [{'text': '', 'url': '', 'title': ''}]
    twitter = [{'text': '', 'url': '', 'title': ''}]
    engineGoogle = Google(license=None, throttle=0.5, language=None)
    # engineBing = Bing(license=None, throttle=0.5, language=None)
    engineTwitter = Twitter(license=None, throttle=0.5, language=None)
    # engineFacebook = Facebook(license=None, throttle=1.0, language='en')
    # engineWikipedia = Wikipedia(license=None, throttle=5.0, language=None)
    # engineFlickr = Flickr(license=None, throttle=5.0, language=None)
    # engineArray = [engineGoogle, engineBing, engineTwitter, engineFacebook, engineWikipedia, engineFlickr]
    engineArray = [engineGoogle, engineTwitter]

    # Google
    for i in range(1, 2):
        for para in engineGoogle.search(search_query,
                                        type=SEARCH,
                                        start=i,
                                        count=5):
            google.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultGoogle = resultGoogle + ([para.text for para in engineGoogle.search(search_query, type=SEARCH, start=i, count=10)])
    # Twitter
    for i in range(1, 2):
        for para in engineTwitter.search(search_query,
                                         type=SEARCH,
                                         start=i,
                                         count=5):
            twitter.append({
                'text': para.text,
                'url': para.url,
                'title': para.title
            })
        #resultTwitter = resultTwitter + ([para.text for para in engineTwitter.search(search_query, type=SEARCH, start=i, count=10)])

    # print 'From data_mine.py --> google: ', google, ', twitter: ', twitter

    return {"Error": None, "Google": google, "Twitter": twitter}
Example #19
0
def main():
    # user input
    parser = argparse.ArgumentParser(description='Downloads tweets for a given search word')
    parser.add_argument('--term', help='Term to search tweets',required=True)
    parser.add_argument('--out', help='Output CSV file name', default='tweets.csv')
    args = parser.parse_args()
    # Twitter engine
    engine = Twitter(language='en')
    term = " ".join(args.term.split("_"))
    mkdir_p(os.path.dirname(args.out))
    with open(args.out, "w") as outfile:
    	print("Searching for tweets with '{}'".format(term))
        writer = csv.writer(outfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL)
        # download tweets
        for tweet in engine.search(term, cached = False, start=1, count=30):
            csvrow = tweet.text.encode('utf-8')
            # write into CSV file
            writer.writerow([csvrow])
Example #20
0
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(823, 677)
        self.label = QtGui.QLabel(Dialog)
        self.label.setGeometry(QtCore.QRect(10, 10, 800, 400))
        self.label.setFrameShape(QtGui.QFrame.WinPanel)
        self.label.setText("")
        self.label.setObjectName("label")
        self.listWidget = QtGui.QListWidget(Dialog)
        self.listWidget.setGeometry(QtCore.QRect(10, 470, 801, 192))
        self.listWidget.setObjectName("listWidget")
        self.widget = QtGui.QWidget(Dialog)
        self.widget.setGeometry(QtCore.QRect(10, 429, 801, 25))
        self.widget.setObjectName("widget")
        self.horizontalLayout = QtGui.QHBoxLayout(self.widget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtGui.QLabel(self.widget)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtGui.QLineEdit(self.widget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtGui.QPushButton(self.widget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)

        self.retranslateUi(Dialog)
        QtCore.QMetaObject.connectSlotsByName(Dialog)
        #
        self.pushButton.clicked.connect(self.on_buttom_pressed)
        self.listWidget.doubleClicked.connect(self.goTweet)

        #
        self.alText = u''
        self.fullText = u''
        self.twitter = Twitter(language='tr')
        self.prevId = None
        self.timer = QtCore.QTimer(Dialog)
        self.timer.timeout.connect(self.on_timer)
        self.dialog = Dialog
        self.twIds = []
def create_stream(phrase, queue):
    local = SocketIO(message_queue=queue)
    stream = Twitter().stream(phrase, timeout=120)

    for i in range(120):
        stream.update()
        for tweet in reversed(stream):
            sentiment = classify_tweet(tweet)
            x, y = vectorize_tweet(tweet)
            local.emit(
                'tweet', {
                    'id': str(i),
                    'text': str(tweet.text.encode('ascii', 'ignore')),
                    'sentiment': sentiment,
                    'x': x,
                    'y': y
                })
        stream.clear()
        time.sleep(1)

    return queue
Example #22
0
def reply_tweet(tweet, reply_id, reply_user="******"):
    from pattern.web import URL, Twitter

    tweet = reply_user + " " + tweet
    url = URL("https://api.twitter.com/1.1/statuses/update.json",
              method="post",
              query={
                  "status": tweet,
                  "in_reply_to_status_id": reply_id
              })

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    try:
        # Send the post request.
        url.open()
    except Exception as e:
        print e
        print e.src
        print e.src.read()
Example #23
0
    def get_tweets(self, search, nb, include_RT, useKey, keys):

        if not useKey:
            keys = None

        twitter = Twitter(language=self.dico_lang[self.language], license=keys)

        tweets = list()
        if not include_RT:
            for tweet in twitter.search(search, start=1, count=nb * 3):
                if not tweet.text.startswith('RT'):
                    tweet_input = Input(tweet.text)
                    annotations = {
                        'source': 'Twitter',
                        'author': tweet.author,
                        'date': tweet.date,
                        'url': tweet.url,
                        'search': search,
                    }
                    segment = tweet_input[0]
                    segment.annotations.update(annotations)
                    tweet_input[0] = segment
                    tweets.append(tweet_input)
                if len(tweets) == nb:
                    break
        else:
            for tweet in twitter.search(search, start=1, count=nb):
                tweet_input = Input(tweet.text)
                annotations = {
                    'source': 'Twitter',
                    'author': tweet.author,
                    'date': tweet.date,
                    'url': tweet.url,
                    'search': search,
                }
                segment = tweet_input[0]
                segment.annotations.update(annotations)
                tweet_input[0] = segment
                tweets.append(tweet_input)
        return tweets
Example #24
0
def post_tweet(tweet):
    from pattern.web import URL, Twitter
    import json

    url = URL("https://api.twitter.com/1.1/statuses/update.json",
              method="post",
              query={"status": tweet})

    twitter = Twitter(license=ccpattern)
    url = twitter._authenticate(url)

    try:
        # Send the post request.
        data = url.open().read()
    except Exception as e:
        print e
        print e.src
        print e.src.read()
        return None

    data = json.loads(data)
    return int(data[u'id'])
Example #25
0
from pattern.search import search
from pattern.vector import Document, Model, KNN

# Classification is a supervised machine learning method,
# where labeled documents are used as training material
# to learn how to label unlabeled documents.

# This example trains a simple classifier with Twitter messages.
# The idea is that, if you have a number of texts with a "type"
# (mail/spam, positive/negative, language, author's age, ...),
# you can predict the type of other "unknown" texts.
# The k-Nearest Neighbor algorithm classifies texts according
# to the k documents that are most similar (cosine similarity) to the given input document.

m = Model()
t = Twitter()

# First, we mine a model of a 1000 tweets.
# We'll use hashtags as type.
for page in range(1, 10):
    for tweet in t.search('#win OR #fail', start=page, count=100, cached=True):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        s = tweet.text.lower()  # tweet in lowercase
        p = '#win' in s and 'WIN' or 'FAIL'  # document labels
        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))
Example #26
0
import sys, time
from pattern.web import Twitter

s = Twitter().stream(
    '#joy, #happiness, #hopeful, #pleasure, #harmony, #kindness, #affection, #love'
)
for i in range(250):
    time.sleep(1)
    s.update(bytes=1024)
    print s[-1].text if s else ''
# This example retrieves tweets containing given keywords from Twitter.

try:
    # We'll store tweets in a Datasheet.
    # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
    # In the first column, we'll store a unique id for each tweet.
    # We only want to add the latest tweets, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already exists.
    # The pd() function returns the parent directory of this script + any given path.
    table = Datasheet.load(pd("eulogy.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

engine = Twitter(language="en")

# With Twitter.search(cached=False), a "live" request is sent to Twitter:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
prev = None
for i in range(2):
    print(i)
    for tweet in engine.search("eulogy", start=prev, count=25, cached=False):
        print("")
        print(tweet.text)
        print(tweet.author)
        print(tweet.date)
        print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
        print("")
Example #28
0
from pattern.web import Twitter, plaintext

twitter = Twitter(language='en')
for tweet in twitter.search('"more important than"', cached=False):
    print plaintext(tweet.text)
Example #29
0
from pattern.web import Twitter, plaintext

for tweet in Twitter().search('"more important than"', cached=False):
    print plaintext(tweet.description.encode("ASCII", "ignore"))
Example #30
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 25 19:37:34 2019

@author: alternatif
"""

from pattern.web import Twitter
from pattern.en import tag
from pattern.vector import KNN, count

twitter, knn = Twitter(), KNN()

for i in range(1, 3):
    for tweet in twitter.search('#win OR #fail', start=i, count=100):
        s = tweet.text.lower()
        p = '#win' in s and 'WIN' or 'FAIL'
        v = tag(s)
        v = [word for word, pos in v if pos == 'JJ']  # JJ = adjective
        v = count(v)  # {'sweet': 1}
        if v:
            knn.train(v, type=p)

print(knn.classify('sweet potato burger'))
print(knn.classify('stupid autocorrect'))