Ejemplo n.º 1
0
def main(search_name):
    stream_name = search_name[0]
    client = boto3.client('firehose',
                          region_name='us-east-2',
                          aws_access_key_id=aws_key_id,
                          aws_secret_access_key=aws_key)

    try:
        create_stream(client, stream_name)
        print 'Creating Kinesis stream... Please wait...'
        time.sleep(60)
    except:
        pass

    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    searched_list = search_name

    streamListener = StreamListener(client, searched_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    while True:
        try:
            stream.filter(track=searched_list)
        except:
            pass

    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    searched_list = search_name

    streamListener = StreamListener(client, searched_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    while True:
        try:
            stream.filter(track=searched_list)
        except:
            time.sleep(5)
            continue
Ejemplo n.º 2
0
    def stream_tweets(self, hash_tag_list):

        """ Initialize Stream & Use Search Engine with Hashtags """

        # Clean The Status Bar
        self.statusbar_table.clear()

        # Handles Twitter authetification and the connection to Twitter Streaming API
        self.listener = StreamListener()
        self.stream = Stream(self.auth, self.listener)

        # Capture the Tweets in a List
        tweetsList = []

        try:

            for hashtag in hash_tag_list:

                for tweet in Cursor(self.twitter_client.search, q=hashtag, result_type='mixed', tweet_mode='extended', include_entities=True, include_rts=True, lang="en").items(self.num_of_tweets):
                    tweetsList.append(tweet)

                self.tweet_matrix.append(tweetsList.copy())
                tweetsList.clear()

        except Exception as e:
            self.statusbar_table.append('<center>Search Error: {0}'.format(e))

        # Stream Disconnect
        self.stream.disconnect()
def main(search_list):
    stream_name = search_list[0]
    client = boto3.client('firehose', region_name='us-east-1')
    try:
        create_stream(client, stream_name)
        print('Creating Kinesis stream... Please wait...')
        time.sleep(60)
    except:
        print("Failed to create Stream")
        pass
    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print("\n ==== KINESES ONLINE ====")
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    streamListener = StreamListener(client, search_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    print('Starting streaming')
    while True:
        try:
            stream.filter(follow=["1094299419556626432"])
        except:
            time.sleep(5)
            print('Sleeping 5 sec before next filter')
            pass
Ejemplo n.º 4
0
def extract_by_keyword(search_string):

    # Retrieve Credentials
    oauth_file = '../auth/oauth.txt'
    keys = []
    line = 'a'
    with open(oauth_file) as fp:
        while line:
            line = fp.readline().strip()
            keys.append(line)

    #Twitter credentials for the app
    consumer_key = keys[2]
    consumer_secret = keys[3]
    access_key = keys[0]
    access_secret = keys[1]

    #pass twitter credentials to tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    streamListener = StreamListener()
    stream = tweepy.Stream(auth=api.auth,
                           listener=streamListener,
                           tweet_mode='extended')
    with open("../data/out.csv", "w", encoding='utf-8') as f:
        f.write("date,user,is_retweet,is_quote,text,quoted_text\n")
    tags = ['corona', 'covid', 'virus', 'flu']
    stream.filter(track=tags, languages=['en'])
Ejemplo n.º 5
0
def main():
    while True:
        sl = StreamListener()
        stream = tweepy.Stream(OAuth, sl)
        try:
            stream.filter(languages=["en"],
                          track=['The Imitation Game', 'Fifty Shades of Grey'])
        except:
            print 'Exception occur!'
Ejemplo n.º 6
0
 def start(self):
     paralleldots.set_api_key(paralleldots_key)
     listener = StreamListener(self.batchedtweets, self.lock)
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth.set_access_token(access_token, access_token_secret)
     stream = Stream(auth, listener)
     Thread(target=self.tweetfilter, args=(stream, )).start()
     thread = Thread(target=self.processtweets)
     thread.start()
     print("Started...")
Ejemplo n.º 7
0
def getTweets(hashtags):
    auth = OAuthHandler(APIkey, APIsecretkey)
    auth.set_access_token(AccessToken, AccessTokenSecret)
    streamListener = StreamListener()
    stream = Stream(auth, streamListener)
    while True:
        try:
            stream.filter(track=hashtags)
        except:
            continue
Ejemplo n.º 8
0
def main():
    myStream = Stream(auth, StreamListener())
    start_time = time.time()
    end_time = start_time + 60*60*8 # 8 hours in seconds
    while (time.time() < end_time):
        try:
            myStream.filter(track=keywords, languages=["en"], async=True)
        except tweepy.error.TweepError:
            print (" Twiiter API error: Too many tweet requests");
        except tweepy.TweepError as e:
            print(" Twitter API error: " + str(e))
    parse_tweets()
def main():
	#start the listener
	#this will stream tweets of the topic indicated in the filer
	stream_listener= StreamListener()
	stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
	#establish a connection to the sql database
	con = getConnection()
	#get the keywords (hashtags) that the stream should pull about the restaurtant from twitter_queries table in database using the get_queries function
	rest_track=get_queries(con)
	stream.filter(track=rest_track)
	stream.flush()
	#Closes Connection to the SQL Database
	con.close()
def main():

    try:
        l = StreamListener()
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        stream = Stream(auth, l)
        stream.filter(track=['#Disney'])

    except Exception as e:
        print("EXCEPTION IN MAIN FUNCTION!!!")
        print(e)
        print(type(e))
        print(e.__dict__)
        exit(1)
Ejemplo n.º 11
0
def collect_tweets():
    MONGO_HOST= 'mongodb://localhost/twitterdb'  
    WORDS = ['zappy', 'FictionFone', 'fictionfone', 'Egypt']
#  ['#bigdata', '#AI', '#datascience', '#machinelearning', '#ml', '#iot']
 
    CONSUMER_KEY = "z1bFPfjFkbZKSqTxogxjxVyqN"
    CONSUMER_SECRET = "f9f6z3SH48TDcV4NEke9w7DzbP6gInhfaslLZM8FqCwMHSLJvF"
    ACCESS_TOKEN = "1050811660091514880-0ggSQ1YDhF5yxXXdRmlAeWPPwRm2Lo"
    ACCESS_TOKEN_SECRET = "pmNk57sQzoLbqRIUcpBa25939p8Fa4eWS3GAb2Ec31ODl"
    
    start_time = time.time()
    time_limit = 10

    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    #Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
    listener = StreamListener(start_time, time_limit) #, wait_on_rate_limit=True 
    streamer = tweepy.Stream(auth=auth, listener=listener)
    print("Tracking: " + str(WORDS))
    streamer.filter(track=WORDS)
    print("done collecting tweets")
Ejemplo n.º 12
0
def start_stream():
    while True:
        if (time.time() - start_time) < time_limit:
            try:
                auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
                auth.set_access_token(access_key, access_secret)
                streamer = tweepy.Stream(
                    auth=auth, listener=StreamListener(time_limit=runtime),tweet_mode='extended')
                t = time.localtime()
                current_time = time.strftime("%H:%M:%S", t)
                print("Started tracking process for", runtime, "seconds at UTC time",current_time)
                print("Tracking hashtags:", HASHTAGS)
                streamer.filter(track=HASHTAGS)
            except Exception as e:
                print('Error in main():')
                print(e.__doc__)
        else:
            t = time.localtime()
            current_time = time.strftime("%H:%M:%S", t)
            print('Runtime limit of', time_limit, ' seconds reached, stopping connection at UTC time.',current_time)
            sys.exit()
            return False
Ejemplo n.º 13
0
def start_stream(auth, track):
    tweepy.Stream(auth=auth,
                  listener=StreamListener(track)).filter(track=[track])
Ejemplo n.º 14
0
 def launch_streamer(api, track):
     stream_listener = StreamListener()
     stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
     stream.filter(track=track, async=True)
access_token_secret='viQnv7V95K8QGlkaZa6yAbv27UI9efZNTvegerACIAHwY'



#This is a basic listener that just prints received tweets and their sentiments.
class StreamListener(StreamListener):

    def on_status(self, status):
        print(status.text)
        blob = TextBlob(status.text)
        sent = blob.sentiment
        print(sent)
        print()
        
    def on_error(self, status_code):
        if status_code == 420:
            return False


if __name__ == '__main__':

    #This handles Twitter authentification and the connection to Twitter Streaming API
    l = StreamListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filters Twitter Streams to capture data by the keywords: 'brexit'
    subject = 'brexit'
    stream.filter(track=[subject])
def call_stream():
    try:
        l = tweepy.streaming.Stream(auth, StreamListener())
        l.filter(track=['australia'])
    except Exception as ex:
        warnings.warn("Streaming failed")
Ejemplo n.º 17
0
def call_stream():
    try:
        l = tweepy.streaming.Stream(auth, StreamListener())
        l.filter(locations=[144.5937, -38.59, 145.5125, -37.5113])
    except Exception as ex:
        warnings.warn("Streaming failed")
Ejemplo n.º 18
0
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False
        if status_code == 403:
            print(
                "The request is understood, but it has been refused or access is not allowed. Limit is maybe reached"
            )
            return False


# api to stream
api = tweepy.API(auth,
                 wait_on_rate_limit=True,
                 wait_on_rate_limit_notify=True,
                 retry_count=10,
                 retry_delay=5,
                 retry_errors=5)
# stream object
myStreamListener = StreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
# Starting a Stream with certain keywords with Async Streaming
myStream.filter(languages=["en"],
                track=[
                    'python', 'html', 'javascript', 'java', 'nodejs',
                    'elasticsearch', 'c++', 'stackoverflow', 'MongoDB'
                ],
                async=True)

# Useful Links:
# ElasticSearch Data : http://localhost:9200/_cat/indices?v
# Search : http://localhost:9200/my-tweets/_search?q='input query'&size=100
Ejemplo n.º 19
0
Archivo: main.py Proyecto: gabkk/binbot
def checkTwitter(currency):
    """Stream track the currency, not working for the moment"""
    streamlistener = StreamListener()
    stream = tweepy.Stream(auth=auth.auth(), listener=streamlistener)
    stream.filter(track=['$' + currency])
Ejemplo n.º 20
0
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

es = Elasticsearch()


class StreamListener(tweepy.StreamListener):
    status_wrapper = TextWrapper(width=60,
                                 initial_indent='    ',
                                 subsequent_indent='    ')

    def on_status(self, status):
        try:

            json_data = status._json
            #print json_data['text']

            es.create(index="idx_twp", doc_type="twitter_twp", body=json_data)

        except Exception as e:
            print(e)
            pass


streamer = tweepy.Stream(auth=auth, listener=StreamListener())

#Fill with your own Keywords bellow
#terms = ['big data','cloud computig']

streamer.filter(track=['cloud computing', 'bigdata'])
Ejemplo n.º 21
0
def stream_tweets(limit, callback):
    stream_listener = StreamListener()
    stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
    stream.filter(track=['OnThisDay'], follow=['2278940227'], languages=['en'])
Ejemplo n.º 22
0
                     'location': location, 'coordinates': cor, 'hashtags': hashtags, 'created': created}


            # Save the refined Tweet data to MongoDB
            collection.insert_one(tweet)

            # Optional - Print the username and text of each Tweet to your console in realtime as they are pulled from the stream
            print(username + ':' + ' ' + text)
            return True
        except Exception as e:
            print(e)

        #     t = json.loads(data)
        #     # grab the created_at data from the Tweet to use for display and change it to Data object
        #     created_at = t['created_at']
        #     dt = datetime.datetime.strptime(created_at, '%a %b %d %H:%M:%S +0000 %Y')
        #     t['created_at'] = dt
        #     # print a message to the screen that we have collected a tweet
        #     print('tweet inserted')
        # except Exception as e:
        #     print(e)


auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# set up the listener. the wait_on_rate_limit=true is needed to help with twitter api rate limiting.True
listener = StreamListener(
    api=tweepy.API(wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True))
streamer = tweepy.Stream(auth=auth, listener=listener, tweet_mode='extended', until='2020-04-22')
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS, languages=language)
Ejemplo n.º 23
0
    def __init__(self, key):
        self.key = key

        self.listener = StreamListener()
        self.listener.on_data = self.on_messages_received
        self.listener.on_error = self.on_error
Ejemplo n.º 24
0
def readData(socket):
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, StreamListener(socket))
    tags = ["layoffs", "covid", "corona", "wfh"]
    stream.filter(track=tags)
        return False

    def on_data(self, data):
        # This is the meat of the script...it connects to your mongoDB and stores the tweet
        try:
            client = MongoClient(MONGO_HOST)
            # Use twitterdb database. If it doesn't exist, it will be created.
            db = client.usa_db
            # Decode the JSON from Twitter
            datajson = json.loads(data)
            # grab the 'created_at' data from the Tweet to use for display
            created_at = datajson['created_at']
            coordinates = datajson['coordinates']
            country = datajson['place']['country_code']
            if coordinates and country == "US":
                print("Tweet collected at " + str(created_at),
                      str(datajson['coordinates']))
                # insert the data into the mongoDB into a collection called twitter_search
                # if twitter_search doesn't exist, it will be created.
                db.usa_tweets_collection.insert(datajson)
        except Exception as e:
            print(e)


auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
streamer.filter(locations=[-124.848974, 24.396308, -66.885444, 49.384358])
Ejemplo n.º 26
0
        status_dict['mapping_location'] = getMappingLocation(
            status_dict['geo_coordinates'],
            status_dict['author_geo_coordinates'])
        #add record to kinesis / data producer
        kinesis.put_record(StreamName="wegmans_tweets",
                           Data=json.dumps(status_dict),
                           PartitionKey="filler")

    def on_error(self, status_code):
        print(status_code)
        if status_code == 420:
            return False


if __name__ == '__main__':
    #Get Twitter credentials from cred.py
    consumer_key = creds.consumer_key
    consumer_secret = creds.consumer_secret
    access_token = creds.access_token
    access_token_secret = creds.access_token_secret

    #init Twitter API
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    stream_listener = StreamListener()
    stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
    stat = stream.filter(track=["#Wegmans"])
                              create_stream(client,stream_name)
                              print 'Creating Kinesis stream... Please wait...'
                                  time.sleep(60)
                              except:
                                  pass

stream_status = client.describe_delivery_stream(DeliveryStreamName=stream_name)
if stream_status['DeliveryStreamDescription']['DeliveryStreamStatus'] == 'ACTIVE':
    print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    
    searched_list = search_name
    
    streamListener = StreamListener(client, searched_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)
    
    while True:
        try:
            stream.filter(track=searched_list)
        except:
            pass

stream_status = client.describe_delivery_stream(DeliveryStreamName=stream_name)
if stream_status['DeliveryStreamDescription']['DeliveryStreamStatus'] == 'ACTIVE':
    print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    
Ejemplo n.º 28
0
def run_stream_listener():
    sl = StreamListener()
    stream = Stream(auth=twitter_api.auth, listener=sl)
    stream.filter(track=WORDS, async=True)
Ejemplo n.º 29
0
                        status.coordinates)  #convert coordinates to string

                table.insert(
                    dict(
                        user_description=status.user.description,
                        user_location=status.user.location,
                        coordinates=coords,
                        text=status.text,
                        user_name=status.user.screen_name,
                        user_created=status.user.created_at,
                        user_followers=status.user.followers_count,
                        id_str=status.id_str,
                        created=status.created_at,
                        retweet_count=status.retweet_count,
                        user_bg_color=status.user.profile_background_color,
                    ))

        except:
            print('This one returned nothing')

    def on_error(self, status_code):
        if status_code == 420:
            return False


twitter_stream = Stream(auth, StreamListener())
twitter_stream.filter(track=TRACK_TERMS, languages=LANGUAGES)
twitter_stream.sample(languages=['en', 'nl'])

#This script runs indefinitely, time parameter still needs to be added
Ejemplo n.º 30
0
    def on_status(self, status):
        print status.txt

    def on_data(self, data):
        self.kinesis.put_record(DeliveryStreamName='twitter',
                                Record={'Data': data})

    def on_error(self, status):
        print status
        return False


stream_name = 'project4_capstone_stream'
client = boto3.client('firehose')

stream_status = client.describe_delivery_stream(DeliveryStreamName=stream_name)
if stream_status['DeliveryStreamDescription']['DeliveryStreamStatus'] == 'ACTIVE':
    print "\n ==== KINESES ONLINE ===="

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)

streamListener = StreamListener(client)
stream = tweepy.Stream(auth=api.auth, listener=streamListener)

stream.filter(track=['WWDC2017'],
                      languages=['en'])