Ejemplo n.º 1
0
    def __init__(self,
                 is_ai=False):


        StreamListener.__init__(self)
        self._is_ai = is_ai
        self._kafka_producer = KafkaProducer(bootstrap_servers=['kfk-brk-1.au.adaltas.cloud:6667','kfk-brk-2.au.adaltas.cloud:6667','kfk-brk-3.au.adaltas.cloud:6667'],security_protocol = 'SASL_PLAINTEXT',sasl_mechanism='GSSAPI')
Ejemplo n.º 2
0
 def __init__(self, db_path, verbose, *args, **kwargs):
     self._verbose = verbose
     #setup database:
     self._conn = sqlite3.connect(options.db_path)
     c = self._conn.cursor()
     # Create table
     #             c.execute('''CREATE TABLE IF NOT EXISTS GeoTweet
     #                          (text text, lat real, lng real, time text, rt text, rt_count text, tw_id text)''')
     cmd = """
         CREATE TABLE IF NOT EXISTS "capture_geotweet" (
             "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, -- table ID
             "time" varchar(100) NOT NULL, -- time in string, e.g., "Sun Jun 22 14:24:56 +0000 2014"
             "text" varchar(200) NOT NULL, -- content of tweet
             "lat" decimal NOT NULL, -- latitude of tweet (all tweets in the database are geotagged)
             "lng" decimal NOT NULL, -- longitude of tweet
             "rt" bool NOT NULL,  -- whether the tweet was retweeted
             "rt_count" integer NOT NULL, -- the number of times the tweet has been retweeted
             "tw_id" varchar(50) NOT NULL -- the Twitter assigned ID
         , "epoch_time" integer);  -- time of tweet in seconds since Thursday Jan 1st, 1970 00:00:00 GMT
     """
     c.execute(cmd)
     self._conn.commit()
     self._time_format = '%a %b %d %H:%M:%S +0000 %Y'
     self._pattern = re.compile("[\W,.:/\']+")
     StreamListener.__init__(self, *args, **kwargs)
Ejemplo n.º 3
0
    def __init__(self, collection, stream_name, data_dir, seed=False, duration_minutes=15, tweets_per_record=25000):
        StreamListener.__init__(self)
        log.info("Streaming %s tweets for %s collection into %s. Rotating files every %s minutes. Rotating "
                 "records every %s tweets",
                 stream_name, collection, data_dir, duration_minutes, tweets_per_record)
        self.collection = collection
        self.stream_name = stream_name
        self.duration_minutes = duration_minutes
        self.tweets_per_record = tweets_per_record
        self.data_dir = data_dir
        self.seed = seed

        #Create data_dir
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)

        self.period_start = None
        self.period_start_time = None
        self.warc = None
        self.warc_filepath = None
        self.segment_origin_id = None
        self.payload = ""
        self.segment = 1
        self.segment_total_length = 0
        self.tweet_count = 0
        #These will be set by StreamDecorator
        self.session = None
        self.url = None
Ejemplo n.º 4
0
 def __init__(self, api=None):
     StreamListener.__init__(self, api)
     self.d = datetime.date.today()
     self.out_prefix = "D:/workspace/python/MyPythonApp/works/output/filter"
     self.fout = open("%s-%s.jsonl" % (self.out_prefix, self.d.isoformat()),
                      'a')
     self.day = self.d.day
        def __init__(self,db_path,verbose,*args,**kwargs):
            self._verbose = verbose
            #setup database:
            self._conn = sqlite3.connect(options.db_path)
            c = self._conn.cursor()
            # Create table
#             c.execute('''CREATE TABLE IF NOT EXISTS GeoTweet
#                          (text text, lat real, lng real, time text, rt text, rt_count text, tw_id text)''')
            cmd = """
                CREATE TABLE IF NOT EXISTS "capture_geotweet" (
                    "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, -- table ID
                    "time" varchar(100) NOT NULL, -- time in string, e.g., "Sun Jun 22 14:24:56 +0000 2014"
                    "text" varchar(200) NOT NULL, -- content of tweet
                    "lat" decimal NOT NULL, -- latitude of tweet (all tweets in the database are geotagged)
                    "lng" decimal NOT NULL, -- longitude of tweet
                    "rt" bool NOT NULL,  -- whether the tweet was retweeted
                    "rt_count" integer NOT NULL, -- the number of times the tweet has been retweeted
                    "tw_id" varchar(50) NOT NULL -- the Twitter assigned ID
                , "epoch_time" integer);  -- time of tweet in seconds since Thursday Jan 1st, 1970 00:00:00 GMT
            """
            c.execute(cmd)
            self._conn.commit()
            self._time_format = '%a %b %d %H:%M:%S +0000 %Y'
            self._pattern = re.compile("[\W,.:/\']+")
            StreamListener.__init__(self,*args,**kwargs)
Ejemplo n.º 6
0
 def __init__(self, api=None, fobj=None):
     StreamListener.__init__(self, api)
     if fobj is None:
         fobj = sys.stdout
     self.fobj = fobj
     self.ratelimit = open('ratelimits.log', 'a')
     self.ratelimit.write('\n\n# Stream restarted on {0}'.format(time.strftime('%c')))
Ejemplo n.º 7
0
 def __init__(self, keywords, collection, *args):
     """
     add list of keywords and pymongo connection to a mongodb collection to
     StreamListener attributes
     """
     self.keywords = keywords
     self.collection = collection
     StreamListener.__init__(self, *args)
Ejemplo n.º 8
0
    def __init__(self, hashtags, session, engine):
        StreamListener.__init__(self)
        self.cpt = 0   # FIXME: test if useful
        self.eu = EncodingUtils()

        self.hashtags = self.format_hashtags(hashtags)
        self.session = session  # bridge to the db
        self.engine = engine
Ejemplo n.º 9
0
 def __init__(self, botname, taghandler, customhandler):
     self.botname = botname
     self.rsettag = re.compile(u"^@%s +@?([a-zA-Z0-9_-]+) *[\=\=] *([^\s]+)$" % self.botname, re.IGNORECASE)
     self.rgettag = re.compile(u"^@%s +@?([a-zA-Z0-9_-]+) *$" % self.botname, re.IGNORECASE)
     self.rdeltag = re.compile(u"^@%s +\/del +([^\s]+) *$" % self.botname, re.IGNORECASE)
     StreamListener.__init__(self)
     self.taghandler = taghandler
     self.customhandler = customhandler
 def __init__(self, api=None):
     StreamListener.__init__(self, api=api)
     ts = time.strftime("./data/%Y%m%d%H%M")
     self.statusf = open(ts+'_status.csv','w',newline='')
     self.statusw = csv.writer(self.statusf)
     self.statusw.writerow(['id', 'created_at', 'coordinates',\
                            'hashtags', 'user_mentions', 'symbols', 'urls', \
                            'media', \
                            'in_reply_to_screen_name', \
                            'in_reply_to_user_id_str', \
                            'in_reply_to_status_id_str', \
                            'place', 'retweeted_status_id', 'source', \
                            'text', 'user id' \
                            # some other attributes exsits, they are list below
                            #, status.withheld_copyright, \#optional
                            #status.withheld_in_countries, \#optional
                            #status.withheld_scope, \#optional
                            #status.truncated, \#default False
                            #status.retweeted, status.retweet_count, \#for no rt
                            #status.scopes, possibly_sensitive, \
                            #status.lang, status.fiter_level, \lang=en
                            #status.favorited, status.favorite_count, \
                            #status.current_user_retweet, \
                            #status.contributors, status.annotations \
                            ])
     self.userf = open(ts+'_user.csv','w',newline='')
     self.userw = csv.writer(self.userf)
     self.userw.writerow(['created_at', 'default_profile', \
                          #user.default_profile_image, \
                          'description', \
                          #user.entities, \
                          'favourites_count', \
                          #user.follow_request_sent, user.following,\#relate to given user
                          'followers_count', 'friends_count', \
                          'geo_enabled', 'id_str', 'is_translator', \
                          'lang', 'listed_count', 'location', \
                          #user.notifications, \
                          'name', \
                          #user.profile_background_color, user.profile_background_image_url, \
                          #user.profile_background_image_url_https, user.profile_background_tile, \
                          #user.profile_banner_url, user.profile_image_url, \
                          #user.profile_image_url_https, user.profile_link_color, \
                          #user.profile_sidebar_border_color, user.profile_sidebar_fill_color, \
                          #user.profile_text_color, user.profile_use_background_image, \
                          'protected', 'screen_name', \
                          #user.show_all_inline_media, user.status, \
                          'statuses_count', 'time_zone', 'user.url', \
                          #user.utc_offset, \
                          #user.withheld_in_countries, user.withheld_scope, 
                          'verified'])
     self.deletef = open(ts+'_delete.csv','w',newline='')
     self.deletew = csv.writer(self.deletef)
     self.deletew.writerow(['status_id','user_id'])
     self.logf = open('stream.log','a')
     self.count = 0
     self.err_count = 0
     self.time = 0.0
Ejemplo n.º 11
0
 def __init__(self,timer):
     self.inc = 0
     StreamListener.__init__(self)
     # report the start of data collection...
     print "Gathering data at %s"%(str(ctime()))
     self.startTime = time()
     print "Start Time = %s"%(str(ctime()))
     self.timer = timer
     self.count = 0
 def __init__(self, timer):
     self.inc = 0
     StreamListener.__init__(self)
     # report the start of data collection...
     print "Gathering data at %s" % (str(ctime()))
     self.startTime = time()
     print "Start Time = %s" % (str(ctime()))
     self.timer = timer
     self.count = 0
Ejemplo n.º 13
0
 def __init__(self, output_file, time_limit):
     # attribute to get listener start time
     self.start_time = datetime.datetime.now()
     # attribute to set time limit for listening
     self.time_limit = time_limit
     # attribute to set the output file
     self.output_file = output_file
     # initiate superclass's constructor
     StreamListener.__init__(self)
Ejemplo n.º 14
0
 def __init__(self):
     StreamListener.__init__(self)
     self.dataAmount = 0
     self.client = MongoClient()
     self.twitterCollection = self.client.test.twitters
     # for text preprocessing
     self.wordMacher = re.compile(r'[a-z]+')
     self.urlMacher = re.compile(r'http[s]?://[\S]+')
     self.atMacher = re.compile(r'[\S]*@[\S]+')
Ejemplo n.º 15
0
 def __init__(self,timer):
     self.inc = 0
     StreamListener.__init__(self)
     # report the start of data collection...
     print "Gathering data at %s"%(str(ctime()))
     self.startTime = time()
     print "Start Time = %s"%(str(ctime()))
     self.timer = timer
     self.count = 0
     self.conn = psycopg2.connect(database="postgres", user="******", password="******", host="localhost", port="5432")
Ejemplo n.º 16
0
 def __init__(self, count=100):
     """
     creates a Custom Listener of Tweets that will end after a given amount of streamed Tweets
     :param count: number of Tweets to stream
     """
     # instantiating the super class StreamListener
     StreamListener.__init__(self)
     self.max_count = count
     self.counter = 0
     self.tweets = []
 def __init__(self,useBrowser,ofile,colorDict):
     StreamListener.__init__(self)
     self.useBrowser = useBrowser
     self.ofile = ofile
     (self.twMap,self.controller) = mapper.initMap(self.ofile,self.useBrowser)
     self.allCoordinates = []
     self.flog = codecs.open("log.txt",mode="w",encoding="utf-8")
     self.gcCount = 0
     self.tweetCount = 0
     self.colorDict = colorDict
Ejemplo n.º 18
0
 def __init__(self, count=100):
     """
     creates a Custom Listener of Tweets that will end after a given amount of streamed Tweets
     :param count: number of Tweets to stream
     """
     # instantiating the super class StreamListener
     StreamListener.__init__(self)
     self.max_count = count
     self.counter = 0
     self.tweets = []
Ejemplo n.º 19
0
    def __init__(self, *args, **kwargs):
        state_dir = kwargs.pop('state_dir')
        self.post_replies = kwargs.pop('post_replies', False)
        self.gather = kwargs.pop('gather', None)
        StreamListener.__init__(self, *args, **kwargs)
        self.me = self.api.me()

        self._state = State.load(self.me.screen_name, state_dir)

        if self.gather:
            os.makedirs(self.gather, exist_ok=True)
Ejemplo n.º 20
0
 def __init__(self, path, fileName, StartDate):
     StreamListener.__init__(self)
     self.fileName = r'{0}/{1}'.format(path, fileName)
     self.start_date = StartDate
     self.fileNumber = 1
     self.base_file = open(r'{0}_{1}_{2}.json'.format(
         self.fileName, self.start_date, self.fileNumber),
                           'a+',
                           encoding="utf8")
     self.dataCount = 0
     self.maxCount = 100
Ejemplo n.º 21
0
def main(search_name):
    stream_name = search_name[0]
    client = boto3.client('firehose',
                          region_name='us-east-2',
                          aws_access_key_id=aws_key_id,
                          aws_secret_access_key=aws_key)

    try:
        create_stream(client, stream_name)
        print 'Creating Kinesis stream... Please wait...'
        time.sleep(60)
    except:
        pass

    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    searched_list = search_name

    streamListener = StreamListener(client, searched_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    while True:
        try:
            stream.filter(track=searched_list)
        except:
            pass

    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print "\n ==== KINESES ONLINE ===="
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    searched_list = search_name

    streamListener = StreamListener(client, searched_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    while True:
        try:
            stream.filter(track=searched_list)
        except:
            time.sleep(5)
            continue
Ejemplo n.º 22
0
 def __init__(self, useBrowser, filename, colorDict):
     StreamListener.__init__(self)
     self.useBrowser = useBrowser
     self.ofile = filename
     (self.twMap, self.controller) = mapper.initMap(self.ofile,
                                                    self.useBrowser)
     self.allCoordinates = []
     self.flog = codecs.open("log.txt", mode="w", encoding="utf-8")
     self.gcCount = 0
     self.tweetCount = 0
     self.colorDict = colorDict
 def __init__(self,timer):
     self.inc = 0
     StreamListener.__init__(self)
     # report the start of data collection...
     print "Gathering data at %s"%(str(ctime()))
     self.startTime = time()
     print "Start Time = %s"%(str(ctime()))
     ### When initialized, connects to MongoDB database tweets
     self.db=pymongo.MongoClient().tweets
     self.timer = timer
     self.count = 0
	def __init__(self,timer):
		self.inc = 0
		StreamListener.__init__(self)
		# report the start of data collection...
		print "Gathering data at %s"%(str(ctime()))
		self.startTime = time()
		print "Start Time = %s"%(str(ctime()))
		self.timer = timer
		self.count = 0

		self.f = open('sentiment_corpus.csv', 'a')
		self.writer = csv.writer(self.f, lineterminator='\n')
Ejemplo n.º 25
0
    def __init__(self, user_id_to_monitor, handle_status_callback):
        """ user_id_to_monitor: a valid Twitter user handle

            handle_status_callback: a callback that takes 
            (hashtag, originating_twitter_user_id, tweet) as parameters.
            
            The client callback will be invoked on every hashtag in a tweet
            that mentions it."""
                       
        StreamListener.__init__(self)
        self.user_id_to_monitor = user_id_to_monitor
        self.handle_status = handle_status_callback
Ejemplo n.º 26
0
 def __init__(self, api=None, **options):
     StreamListener.__init__(self, api)
     self.json = import_simplejson()
     self.print_data = options.get("print_data", False)
     self.print_data = options.get("created_at", False)
     self.print_data = options.get("user", False)
     self.print_data = options.get("id", False)
     self.print_data = options.get("favorite_count", False)
     self.print_data = options.get("retweet_count", False)
     self.print_data = options.get("text", False)
     self.print_data = options.get("source_url", False)
     self.print_data = options.get("print_data", False)
     self.print_data = options.get("print_data", False)
     self.delay = int(options.get("delay", 0))
Ejemplo n.º 27
0
    def __init__(self, name, output_dir):
        StreamListener.__init__(self)
        self.name = name
        self.output_dir = output_dir

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        current_dt = datetime.utcfromtimestamp(int(
            time.time())).strftime('%Y-%m-%d-%H')
        self.output = bz2.open(
            os.path.join(self.output_dir, '{0}.bz2'.format(current_dt)), 'at')
        self.next_hour_ts = (datetime.strptime(current_dt, '%Y-%m-%d-%H') -
                             datetime(1970, 1, 1)).total_seconds() + 3600
Ejemplo n.º 28
0
    def stream_tweets(self, hash_tag_list):

        """ Initialize Stream & Use Search Engine with Hashtags """

        # Clean The Status Bar
        self.statusbar_table.clear()

        # Handles Twitter authetification and the connection to Twitter Streaming API
        self.listener = StreamListener()
        self.stream = Stream(self.auth, self.listener)

        # Capture the Tweets in a List
        tweetsList = []

        try:

            for hashtag in hash_tag_list:

                for tweet in Cursor(self.twitter_client.search, q=hashtag, result_type='mixed', tweet_mode='extended', include_entities=True, include_rts=True, lang="en").items(self.num_of_tweets):
                    tweetsList.append(tweet)

                self.tweet_matrix.append(tweetsList.copy())
                tweetsList.clear()

        except Exception as e:
            self.statusbar_table.append('<center>Search Error: {0}'.format(e))

        # Stream Disconnect
        self.stream.disconnect()
Ejemplo n.º 29
0
def extract_by_keyword(search_string):

    # Retrieve Credentials
    oauth_file = '../auth/oauth.txt'
    keys = []
    line = 'a'
    with open(oauth_file) as fp:
        while line:
            line = fp.readline().strip()
            keys.append(line)

    #Twitter credentials for the app
    consumer_key = keys[2]
    consumer_secret = keys[3]
    access_key = keys[0]
    access_secret = keys[1]

    #pass twitter credentials to tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    streamListener = StreamListener()
    stream = tweepy.Stream(auth=api.auth,
                           listener=streamListener,
                           tweet_mode='extended')
    with open("../data/out.csv", "w", encoding='utf-8') as f:
        f.write("date,user,is_retweet,is_quote,text,quoted_text\n")
    tags = ['corona', 'covid', 'virus', 'flu']
    stream.filter(track=tags, languages=['en'])
def main(search_list):
    stream_name = search_list[0]
    client = boto3.client('firehose', region_name='us-east-1')
    try:
        create_stream(client, stream_name)
        print('Creating Kinesis stream... Please wait...')
        time.sleep(60)
    except:
        print("Failed to create Stream")
        pass
    stream_status = client.describe_delivery_stream(
        DeliveryStreamName=stream_name)
    if stream_status['DeliveryStreamDescription'][
            'DeliveryStreamStatus'] == 'ACTIVE':
        print("\n ==== KINESES ONLINE ====")
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)

    streamListener = StreamListener(client, search_list)
    stream = tweepy.Stream(auth=api.auth, listener=streamListener)

    print('Starting streaming')
    while True:
        try:
            stream.filter(follow=["1094299419556626432"])
        except:
            time.sleep(5)
            print('Sleeping 5 sec before next filter')
            pass
Ejemplo n.º 31
0
 def on_data(self, raw_data):
     if raw_data is None:
         mylog.error(
             "Tweepy says raw_data=None.  Wat.  Dropping tweet, or whatever it was."
         )
         return
     return StreamListener.on_data(self, raw_data)
Ejemplo n.º 32
0
    def __init__(self,
                 api=None,
                 stream_options=None,
                 engine=None,
                 session_maker=None):
        if stream_options is None:
            stream_options = dict()

        StreamListener.__init__(self, api=api)
        AlchemyDatabase.__init__(self,
                                 engine=engine,
                                 session_maker=session_maker,
                                 declarative_base=SQLTwitterBase)
        self._auth = None
        self._stream = None
        self._stream_options = stream_options
        self.session = self.get_session()
Ejemplo n.º 33
0
    def __init__(self, api=None, basename=None):
        StreamListener.__init__(self, api)
        if basename is None:
            basename = 'tweets'

        logHandler = TimedRotatingFileHandler("tweets", when="H", interval=8)
        logFormatter = logging.Formatter('%(message)s')
        logHandler.setFormatter(logFormatter)
        logger = logging.getLogger('MyLogger')
        logger.addHandler(logHandler)
        logger.setLevel(logging.INFO)

        self.logHandler = logHandler
        self.logger = logger
        self.ratelimit = open('ratelimit.log', 'a')
        data = "\n\n# Stream restarted: {0}\n".format(time.strftime('%c'))
        print(data, file=self.ratelimit)
Ejemplo n.º 34
0
def main():
    while True:
        sl = StreamListener()
        stream = tweepy.Stream(OAuth, sl)
        try:
            stream.filter(languages=["en"],
                          track=['The Imitation Game', 'Fifty Shades of Grey'])
        except:
            print 'Exception occur!'
Ejemplo n.º 35
0
def getTweets(hashtags):
    auth = OAuthHandler(APIkey, APIsecretkey)
    auth.set_access_token(AccessToken, AccessTokenSecret)
    streamListener = StreamListener()
    stream = Stream(auth, streamListener)
    while True:
        try:
            stream.filter(track=hashtags)
        except:
            continue
Ejemplo n.º 36
0
    def __init__(self, node, southwest, northeast, extras):

        logging.info("Beginning TwitterStreamer init")

        StreamListener.__init__(self)

        # Lock and target location
        self.lock = threading.Lock()
        self.buckets = {}
        self.deltas = {}
        self.uploaders = {}
        self.location = [southwest[LONG],southwest[LAT],northeast[LONG],northeast[LAT]]
        self.event_manager = EventManager()


        # Upload handler
        self.node = node

        logging.info("TwitterStreamer init successful")
Ejemplo n.º 37
0
 def start(self):
     paralleldots.set_api_key(paralleldots_key)
     listener = StreamListener(self.batchedtweets, self.lock)
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth.set_access_token(access_token, access_token_secret)
     stream = Stream(auth, listener)
     Thread(target=self.tweetfilter, args=(stream, )).start()
     thread = Thread(target=self.processtweets)
     thread.start()
     print("Started...")
Ejemplo n.º 38
0
 def on_data(self, data):
     jsonData = json.loads(data)
     text1 = jsonData['text']
     text2 = jsonData['entities']['hashtags']
     #Change the file name if different files are needed.
     f1 = open('./tweetFile1.txt', 'ab')
     a = csv.writer(f1, delimiter=',')
     print(text1)
     a.writerow([str(text1.encode('utf-8'))])
     return StreamListener.on_data(self, data)
Ejemplo n.º 39
0
def main():
    myStream = Stream(auth, StreamListener())
    start_time = time.time()
    end_time = start_time + 60*60*8 # 8 hours in seconds
    while (time.time() < end_time):
        try:
            myStream.filter(track=keywords, languages=["en"], async=True)
        except tweepy.error.TweepError:
            print (" Twiiter API error: Too many tweet requests");
        except tweepy.TweepError as e:
            print(" Twitter API error: " + str(e))
    parse_tweets()
def main():
	#start the listener
	#this will stream tweets of the topic indicated in the filer
	stream_listener= StreamListener()
	stream = tweepy.Stream(auth=api.auth, listener=stream_listener)
	#establish a connection to the sql database
	con = getConnection()
	#get the keywords (hashtags) that the stream should pull about the restaurtant from twitter_queries table in database using the get_queries function
	rest_track=get_queries(con)
	stream.filter(track=rest_track)
	stream.flush()
	#Closes Connection to the SQL Database
	con.close()
Ejemplo n.º 41
0
    def __init__(self, api=None, **options):

        StreamListener.__init__(self, api)
        Verbose.__init__(self, options.get("verbose_level", 1))

        self.queue = Queue(maxsize=options.get("queue_size", 20))
        self.tweet_list = OrganizedList(options.get("list_size", 250))
        self.queue_thread = Thread(target=self._listen)
        self.queue_thread.daemon = True
        self.running = False

        self.locked = options.get("locked", True)
        self.retweet_time = options.get("retweet_time", 10)
        self.fav_time = options.get("fav_time", 10)
        self.follow_time = options.get("follow_time", 10)
        self.error_time = options.get("error_time", 10)
        self.empty_time = options.get("empty_time", 10)
        self.interaction_time = options.get("interaction_time", 0)

        if options.get("load_timeline", True):
            self._load_timeline()
        if options.get("autostart", True):
            self.start()
Ejemplo n.º 42
0
 def on_data(self, data):
     jsonData = json.loads(data)
     text1 = jsonData['text']
     text2 = jsonData['entities']['hashtags']
     for hashtag in text2:
         text2 = hashtag['text']
         print text2, ":", text1
         f1 = open('./tweetFile_superBOWL6.csv', 'ab')
         a = csv.writer(f1, delimiter=',')
         a.writerow([
             "UJG123469",
             str(text2.encode('utf-8')),
             str(text1.encode('utf-8'))
         ])
     return StreamListener.on_data(self, data)
def main():

    try:
        l = StreamListener()
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        stream = Stream(auth, l)
        stream.filter(track=['#Disney'])

    except Exception as e:
        print("EXCEPTION IN MAIN FUNCTION!!!")
        print(e)
        print(type(e))
        print(e.__dict__)
        exit(1)
Ejemplo n.º 44
0
def collect_tweets():
    MONGO_HOST= 'mongodb://localhost/twitterdb'  
    WORDS = ['zappy', 'FictionFone', 'fictionfone', 'Egypt']
#  ['#bigdata', '#AI', '#datascience', '#machinelearning', '#ml', '#iot']
 
    CONSUMER_KEY = "z1bFPfjFkbZKSqTxogxjxVyqN"
    CONSUMER_SECRET = "f9f6z3SH48TDcV4NEke9w7DzbP6gInhfaslLZM8FqCwMHSLJvF"
    ACCESS_TOKEN = "1050811660091514880-0ggSQ1YDhF5yxXXdRmlAeWPPwRm2Lo"
    ACCESS_TOKEN_SECRET = "pmNk57sQzoLbqRIUcpBa25939p8Fa4eWS3GAb2Ec31ODl"
    
    start_time = time.time()
    time_limit = 10

    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    #Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
    listener = StreamListener(start_time, time_limit) #, wait_on_rate_limit=True 
    streamer = tweepy.Stream(auth=auth, listener=listener)
    print("Tracking: " + str(WORDS))
    streamer.filter(track=WORDS)
    print("done collecting tweets")
Ejemplo n.º 45
0
 def __init__(self, queue, *args, **kwargs):
     StreamListener.__init__(self, *args, **kwargs)
     self.queue = queue
 def __init__(self):
     StreamListener.__init__(self)
Ejemplo n.º 47
0
 def __init__(self, bot):
     StreamListener.__init__(self, api=None)
     self.bot = bot
     self.count = 0
Ejemplo n.º 48
0
 def __init__(self, output):
     self.output = output
     StreamListener.__init__(self)
Ejemplo n.º 49
0
 def __init__(self,count,keyword,api=None):
     StreamListener.__init__(self,api=api)
     self.count = count
     self.keyword = keyword
Ejemplo n.º 50
0
 def __init__(self, output_file):
     StreamListener.__init__(self)
     self.output_file = os.path.join("/scratch/data/", output_file)
     self.tmpData = ""
     self.tweet_count = 0
Ejemplo n.º 51
0
 def __init__(self, api=None):
     StreamListener.__init__(self, api)
     self.d = datetime.date.today()
     self.out_prefix = "D:/workspace/python/MyPythonApp/works/output/filter"
     self.fout = open("%s-%s.jsonl" % (self.out_prefix,  self.d.isoformat()), 'a' )
     self.day = self.d.day
Ejemplo n.º 52
0
 def __init__(self, api=None):
     StreamListener.__init__(self, api=api)
Ejemplo n.º 53
0
 def __init__(self, handler_class, api=None, **opts):
     StreamListener.__init__(self)
     self.handler = handler_class(**opts)
Ejemplo n.º 54
0
 def __init__(self, player, api=None):
     StreamListener.__init__(self, api)
     self._player = player
Ejemplo n.º 55
0
 def on_data(self, raw_data):
     if raw_data is None:
         mylog.error("Tweepy says raw_data=None.  Wat.  Dropping tweet, or whatever it was.")
         return
     return StreamListener.on_data(self, raw_data)
Ejemplo n.º 56
0
 def __init__(self, monitor_these_user_ids, api=None):
     StreamListener.__init__(self)
     self.monitor_these_user_ids = monitor_these_user_ids
     self.pickle_file = open('/tmp/tweet_status.pickle', 'w')
Ejemplo n.º 57
0
 def __init__(self):
     StreamListener.__init__(self)
     self.destinations = []
 def __init__(self, producer, count=10000, verbose=False):
     StreamListener.__init__(self)
     self.max_count = count
     self.counter = 0
     self.verbose = verbose
     self.producer = producer
Ejemplo n.º 59
0
 def __init__(self):
     StreamListener.__init__(self)
     self.s3 = S3Connection(aws_access_key, aws_secret_key)
     self.start_new_tweet_file()
Ejemplo n.º 60
0
 def __init__(self):
     StreamListener.__init__(self)
     
     # ニコニコ処理用のインスタンスを作って、on_statusメソッド内ではそれを使いまわす
     self.n = niconico()