Beispiel #1
0
def update_twitter_profile(user):
    a = API()
    try:
        profile = user.get_profile()
        twitter_user = a.get_user(user_id=profile.twitter_profile.twitter_id)
    except:
        twitter_user = None
    
    if twitter_user:
        profile.user.first_name = twitter_user.name.split(" ")[0]
        profile.user.last_name = " ".join(twitter_user.name.split(" ")[1:])
        profile.user.save()    

        profile.website = twitter_user.url    
        profile.profile_image_url = twitter_user.profile_image_url    
        profile.description = twitter_user.description    
        profile.twitter_name = twitter_user.screen_name
        profile.location=twitter_user.location
        profile.save()
Beispiel #2
0
def go(collection_type, project_id, collector_id, rawdir, logdir):
    if collection_type not in ['track', 'follow', 'none']:
        print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'."
        print 'Exiting with invalid params...'
        sys.exit()
    else:
        # Grab collector & project details from DB
        project = db.get_project_detail(project_id)
        resp = db.get_collector_detail(project_id, collector_id)

        if project['status'] and resp['status']:
            collector = resp['collector']
            configdb = project['project_config_db']
            project_config_db = db.connection[configdb]
            project_config_db = project_config_db.config
            collector_name = collector['collector_name']
            project_name = project['project_name']
        else:
            'Invalid project account & collector. Try again!'

    # module_config = project_config_db.find_one({'module': 'twitter'})

    # Reference for controller if script is active or not.
    project_config_db.update({'_id': ObjectId(collector_id)},
                             {'$set': {
                                 'active': 1
                             }})

    Config = ConfigParser.ConfigParser()
    Config.read(PLATFORM_CONFIG_FILE)

    # Creates logger w/ level INFO
    logger = logging.getLogger(collector_name)
    logger.setLevel(logging.INFO)
    # Creates rotating file handler w/ level INFO
    fh = logging.handlers.TimedRotatingFileHandler(
        logdir + '/' + project_name + '-' + collector_name + '-' +
        collection_type + '-collector-log-' + collector_id + '.out', 'D', 1,
        30, None, False, False)
    fh.setLevel(logging.INFO)
    # Creates formatter and applies to rotating handler
    format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    datefmt = '%m-%d %H:%M'
    formatter = logging.Formatter(format, datefmt)
    fh.setFormatter(formatter)
    # Finishes by adding the rotating, formatted handler
    logger.addHandler(fh)

    # Sets current date as starting point
    tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    logger.info('Starting collection system at %s' % tmpDate)
    logger.info('Collector name: %s' % collector_name)

    # Grabs tweets out file info from config
    # TODO - move this info to Mongo
    tweetsOutFilePath = rawdir + '/'
    if not os.path.exists(tweetsOutFilePath):
        os.makedirs(tweetsOutFilePath)
    tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0)
    tweetsOutFile = Config.get('files', 'tweets_file', 0)

    # NOTE - proper naming for api_auth dictionary from front_end
    oauth_info = collector['api_auth']

    consumerKey = oauth_info['consumer_key']
    consumerSecret = oauth_info['consumer_secret']
    accessToken = oauth_info['access_token']
    accessTokenSecret = oauth_info['access_token_secret']

    # Authenticates via app info
    auth = OAuthHandler(consumerKey, consumerSecret)
    auth.set_access_token(accessToken, accessTokenSecret)

    # Sets Mongo collection; sets rate_limitng & error counts to 0
    if 'stream_limit_loss' not in collector:
        project_config_db.update(
            {'_id': ObjectId(collector_id)},
            {'$set': {
                'stream_limit_loss': {
                    'counts': [],
                    'total': 0
                }
            }})

    if 'rate_limit_count' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)},
                                 {'$set': {
                                     'rate_limit_count': 0
                                 }})

    if 'error_code' not in collector:
        project_config_db.update({"_id": ObjectId(collector_id)},
                                 {'$set': {
                                     'error_code': 0
                                 }})

    runCollector = collector['collector']['run']

    if runCollector:
        print 'Starting process w/ start signal %d' % runCollector
        logger.info('Starting process w/ start signal %d' % runCollector)
    collectingData = False

    i = 0
    myThreadCounter = 0
    runLoopSleep = 0

    while runCollector:
        i += 1

        # Finds Mongo collection & grabs signal info
        # If Mongo is offline throws an acception and continues
        exception = None
        try:
            resp = db.get_collector_detail(project_id, collector_id)
            collector = resp['collector']
            flags = collector['collector']
            runCollector = flags['run']
            collectSignal = flags['collect']
            updateSignal = flags['update']
        except Exception, exception:
            logger.info('Mongo connection refused with exception: %s' %
                        exception)
        """
        Collection process is running, and:
        A) An update has been triggered -OR-
        B) The collection signal is not set -OR-
        C) Run signal is not set
        """
        if collectingData and (updateSignal or not collectSignal
                               or not runCollector):
            # Update has been triggered
            if updateSignal:
                logger.info(
                    'MAIN: received UPDATE signal. Attempting to stop collection thread'
                )
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=1)
            # Collection thread triggered to stop
            if not collectSignal:
                logger.info(
                    'MAIN: received STOP signal. Attempting to stop collection thread'
                )
            # Entire process trigerred to stop
            if not runCollector:
                logger.info(
                    'MAIN: received EXIT signal. Attempting to stop collection thread'
                )
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=0)
                collectSignal = 0

            # Send stream disconnect signal, kills thread
            stream.disconnect()
            wait_count = 0
            while e.isSet() is False:
                wait_count += 1
                print '%d) Waiting on collection thread shutdown' % wait_count
                sleep(wait_count)

            collectingData = False

            logger.info('COLLECTION THREAD: stream stopped after %d tweets' %
                        l.tweet_count)
            logger.info('COLLECTION THREAD: collected %d error tweets' %
                        l.delete_count)
            print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count
            logger.info(
                'COLLECTION THREAD: lost %d tweets to stream rate limit' %
                l.limit_count)
            print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count
            print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count

            if not l.error_code == 0:
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=0)
                project_config_db.update(
                    {"_id": ObjectId(collector_id)},
                    {'$set': {
                        'error_code': l.error_code
                    }})

            if not l.limit_count == 0:
                project_config_db.update(
                    {'_id': ObjectId(collector_id)},
                    {'$set': {
                        'stream_limit_loss.total': l.limit_count
                    }})

            if not l.rate_limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)},
                                         {'$set': {
                                             'rate_limit_count': 0
                                         }})

        # Collection has been signaled & main program thread is running
        # TODO - Check Mongo for handle:ID pairs
        # Only call for new pairs
        if collectSignal and (threading.activeCount() == 1):
            # Names collection thread & adds to counter
            myThreadCounter += 1
            myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter

            termsList = collector['terms_list']
            if termsList:
                print 'Terms list length: ' + str(len(termsList))

                # Grab IDs for follow stream
                if collection_type == 'follow':
                    """
                    TODO - Update Mongo terms w/ set for collect status 0 or 1
                    # Updates current stored handles to collect 0 if no longer listed in terms file
                    stored_terms = doc['termsList']
                    for user in stored_terms:
                        if user['handle'] not in termsList:
                            user_id = user['id']
                            mongo_config.update({'module': 'collector-follow'},
                                {'$pull': {'termsList': {'handle': user['handle']}}})
                            mongo_config.update({'module': 'collecting-follow'},
                                {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}})

                    # Loops thru current stored handles and adds list if both:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    all_stored_handles = [user['handle'] for user in stored_terms]
                    stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']]

                    print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles)
                    """

                    # Loop thru & query (except handles that have been stored)
                    print 'MAIN: Querying Twitter API for handle:id pairs...'
                    logger.info(
                        'MAIN: Querying Twitter API for handle:id pairs...')
                    # Initiates REST API connection
                    twitter_api = API(auth_handler=auth)
                    failed_handles = []
                    success_handles = []
                    # Loops thru user-given terms list
                    for item in termsList:
                        term = item['term']
                        # If term already has a valid ID, pass
                        if item['id'] is not None:
                            pass
                        # Queries the Twitter API for the ID value of the handle
                        else:
                            try:
                                user = twitter_api.get_user(screen_name=term)
                            except TweepError as tweepy_exception:
                                error_message = tweepy_exception.args[0][0][
                                    'message']
                                code = tweepy_exception.args[0][0]['code']
                                # Rate limited for 15 minutes w/ code 88
                                if code == 88:
                                    print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    logger.exception(
                                        'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    )
                                    time.sleep(900)
                                # Handle doesn't exist, added to Mongo as None
                                elif code == 34:
                                    print 'MAIN: User w/ handle %s does not exist.' % term
                                    logger.exception(
                                        'MAIN: User w/ handle %s does not exist.'
                                        % term)
                                    item['collect'] = 0
                                    item['id'] = None
                                    failed_handles.append(term)
                            # Success - handle:ID pair stored in Mongo
                            else:
                                user_id = user._json['id_str']
                                item['id'] = user_id
                                success_handles.append(term)

                    print 'MAIN: Collected %d new ids for follow stream.' % len(
                        success_handles)
                    logger.info(
                        'MAIN: Collected %d new ids for follow stream.' %
                        len(success_handles))
                    print 'MAIN: %d handles failed to be found.' % len(
                        failed_handles)
                    logger.info('MAIN: %d handles failed to be found.' %
                                len(failed_handles))
                    logger.info(failed_handles)
                    print failed_handles
                    print 'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    logger.info(
                        'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    )

                    # Updates term list with follow values
                    project_config_db.update(
                        {'_id': ObjectId(collector_id)},
                        {'$set': {
                            'terms_list': termsList
                        }})

                    # Loops thru current stored handles and adds to list if:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    ids = [
                        item['id'] for item in termsList
                        if item['id'] and item['collect']
                    ]
                    noncoll = [
                        item['term'] for item in termsList
                        if not item['collect']
                    ]
                    termsList = ids
                else:
                    terms = [
                        item['term'] for item in termsList if item['collect']
                    ]
                    noncoll = [
                        item['term'] for item in termsList
                        if not item['collect']
                    ]
                    termsList = terms

                print 'Terms List: '
                print termsList
                print ''
                print 'Not collecting for: '
                print noncoll
                print ''

                logger.info('Terms list: %s' % str(termsList).strip('[]'))
                logger.info('Not collecting for: %s' %
                            str(noncoll).strip('[]'))

            print 'COLLECTION THREAD: Initializing Tweepy listener instance...'
            logger.info(
                'COLLECTION THREAD: Initializing Tweepy listener instance...')
            l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt,
                                tweetsOutFile, logger, collection_type,
                                project_id, collector_id)

            print 'TOOLKIT STREAM: Initializing Tweepy stream listener...'
            logger.info(
                'TOOLKIT STREAM: Initializing Tweepy stream listener...')

            # Initiates async stream via Tweepy, which handles the threading
            # TODO - location & language

            languages = collector['languages']
            location = collector['location']

            if languages:
                print '%s language codes found!' % len(languages)
            if location:
                print 'Location points found!'
                for i in range(len(location)):
                    location[i] = float(location[i])

            stream = ToolkitStream(auth,
                                   l,
                                   logger,
                                   project_id,
                                   collector_id,
                                   retry_count=100)
            if collection_type == 'track':
                stream.filter(track=termsList,
                              languages=languages,
                              locations=location,
                              async=True)
            elif collection_type == 'follow':
                stream.filter(follow=termsList,
                              languages=languages,
                              locations=location,
                              async=True)
            elif collection_type == 'none':
                stream.filter(locations=location,
                              languages=languages,
                              async=True)
            else:
                sys.exit('ERROR: Unrecognized stream filter.')

            collectingData = True
            print 'MAIN: Collection thread started (%s)' % myThreadName
            logger.info('MAIN: Collection thread started (%s)' % myThreadName)

        #if threading.activeCount() == 1:
        #    print "MAIN: %d iteration with no collection thread running" % i
        #else:
        #    print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount())

        # Incrementally delays loop if Mongo is offline, otherwise 2 seconds
        max_sleep_time = 1800
        if exception:
            if runLoopSleep < max_sleep_time:
                runLoopSleep += 2
            else:
                runLoopSleep = max_sleep_time
            print "Exception caught, sleeping for: %d" % runLoopSleep
            time.sleep(runLoopSleep)
        else:
            time.sleep(2)
 # Loop thru & query (except handles that have been stored)
 print 'MAIN: Querying Twitter API for new handle:id pairs...'
 logger.info('MAIN: Querying Twitter API for new handle:id pairs...')
 # Initiates REST API connection
 twitter_api = API(auth_handler=auth)
 failed_handles = []
 success_handles = []
 # Loops thru user-given terms list
 for handle in termsList:
     # If handle already stored, no need to query for ID
     if handle in stored_handles:
         pass
     # Queries the Twitter API for the ID value of the handle
     else:
         try:
             user = twitter_api.get_user(screen_name=handle)
         except TweepError as tweepy_exception:
             error_message = tweepy_exception.args[0][0]['message']
             code = tweepy_exception.args[0][0]['code']
             # Rate limited for 15 minutes w/ code 88
             if code == 88:
                 print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                 logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.')
                 time.sleep(900)
             # Handle doesn't exist, added to Mongo as None
             elif code == 34:
                 print 'MAIN: User w/ handle %s does not exist.' % handle
                 logger.exception('MAIN: User w/ handle %s does not exist.' % handle)
                 if handle not in all_stored_handles:
                     terms_info = { 'handle': handle, 'id': None, 'collect': 0 }
                     mongo_config.update({'module':'collector-follow'},
    while 1:
        try:
    	    forward = []
    	    backward = []
            #outfile = sys.argv[1]
            #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') 
            auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                    host='api.twitter.com', search_host='search.twitter.com',
                     cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='',
                    retry_count=0, retry_delay=0, retry_errors=None,
                    parser=None)
                        
            #username1, username2,listUsernames = readFile(outfile)
            user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable
            user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable
            
            forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]})
            backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] })
            reqs = 0
            while 1:
        	fin, path = go_backward()
		reqs +=1;print reqs
		if fin: print path;reqs=-2;break
        	while has_node(backward):
		    fin, path = go_backward()
		    reqs +=1;print reqs
		    if fin or reqs >= 1000: print path;break
		if fin: break
		if reqs >= 1000: reqs=-2;break
                "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                             host='api.twitter.com',
                             search_host='search.twitter.com',
                             cache=FileCache("cache", timeout=-1),
                             secure=False,
                             api_root='/1',
                             search_root='',
                             retry_count=0,
                             retry_delay=0,
                             retry_errors=None,
                             parser=None)

            username1 = sys.argv[1]
            username2 = sys.argv[2]
            user1 = twitterApi.get_user(username1)  #@UndefinedVariable
            user2 = twitterApi.get_user(username2)  #@UndefinedVariable

            forward = []
            forward_ds = [1]
            backward_ds = [1]
            forward_min = backward_min = 0
            backward = []
            is_opt = 0

            forward.append({
                "obj": user1,
                "cursor": -1,
                "friends": [],
                "cursor_obj": -1,
                "path": []
Beispiel #6
0
def go(collection_type, project_id, collector_id, rawdir, logdir):
    if collection_type not in ['track', 'follow', 'none']:
        print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'."
        print 'Exiting with invalid params...'
        sys.exit()
    else:
        # Grab collector & project details from DB
        project = db.get_project_detail(project_id)
        resp = db.get_collector_detail(project_id, collector_id)

        if project['status'] and resp['status']:
            collector = resp['collector']
            configdb = project['project_config_db']
            project_config_db = db.connection[configdb]
            project_config_db = project_config_db.config
            collector_name = collector['collector_name']
            project_name = project['project_name']
        else:
            'Invalid project account & collector. Try again!'

    # module_config = project_config_db.find_one({'module': 'twitter'})

    # Reference for controller if script is active or not.
    project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'active': 1}})

    Config = ConfigParser.ConfigParser()
    Config.read(PLATFORM_CONFIG_FILE)

    # Creates logger w/ level INFO
    logger = logging.getLogger(collector_name)
    logger.setLevel(logging.INFO)
    # Creates rotating file handler w/ level INFO
    fh = logging.handlers.TimedRotatingFileHandler(logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False)
    fh.setLevel(logging.INFO)
    # Creates formatter and applies to rotating handler
    format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    datefmt = '%m-%d %H:%M'
    formatter = logging.Formatter(format, datefmt)
    fh.setFormatter(formatter)
    # Finishes by adding the rotating, formatted handler
    logger.addHandler(fh)

    # Sets current date as starting point
    tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    logger.info('Starting collection system at %s' % tmpDate)
    logger.info('Collector name: %s' % collector_name)

    # Grabs tweets out file info from config
    # TODO - move this info to Mongo
    tweetsOutFilePath = rawdir + '/'
    if not os.path.exists(tweetsOutFilePath):
        os.makedirs(tweetsOutFilePath)
    tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0)
    tweetsOutFile = Config.get('files', 'tweets_file', 0)

    # NOTE - proper naming for api_auth dictionary from front_end
    oauth_info = collector['api_auth']

    consumerKey = oauth_info['consumer_key']
    consumerSecret = oauth_info['consumer_secret']
    accessToken = oauth_info['access_token']
    accessTokenSecret = oauth_info['access_token_secret']

    # Authenticates via app info
    auth = OAuthHandler(consumerKey, consumerSecret)
    auth.set_access_token(accessToken, accessTokenSecret)

    # Sets Mongo collection; sets rate_limitng & error counts to 0
    if 'stream_limit_loss' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss': { 'counts': [], 'total': 0 }}})

    if 'rate_limit_count' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}})

    if 'error_code' not in collector:
        project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': 0}})

    runCollector = collector['collector']['run']

    if runCollector:
        print 'Starting process w/ start signal %d' % runCollector
        logger.info('Starting process w/ start signal %d' % runCollector)
    collectingData = False

    i = 0
    myThreadCounter = 0
    runLoopSleep = 0

    while runCollector:
        i += 1

        # Finds Mongo collection & grabs signal info
        # If Mongo is offline throws an acception and continues
        exception = None
        try:
            resp = db.get_collector_detail(project_id, collector_id)
            collector = resp['collector']
            flags = collector['collector']
            runCollector = flags['run']
            collectSignal = flags['collect']
            updateSignal = flags['update']
        except Exception, exception:
            logger.info('Mongo connection refused with exception: %s' % exception)

        """
        Collection process is running, and:
        A) An update has been triggered -OR-
        B) The collection signal is not set -OR-
        C) Run signal is not set
        """
        if collectingData and (updateSignal or not collectSignal or not runCollector):
            # Update has been triggered
            if updateSignal:
                logger.info('MAIN: received UPDATE signal. Attempting to stop collection thread')
                resp = db.set_collector_status(project_id, collector_id, collector_status=1)
            # Collection thread triggered to stop
            if not collectSignal:
                logger.info('MAIN: received STOP signal. Attempting to stop collection thread')
            # Entire process trigerred to stop
            if not runCollector:
                logger.info('MAIN: received EXIT signal. Attempting to stop collection thread')
                resp = db.set_collector_status(project_id, collector_id, collector_status=0)
                collectSignal = 0

            # Send stream disconnect signal, kills thread
            stream.disconnect()
            wait_count = 0
            while e.isSet() is False:
                wait_count += 1
                print '%d) Waiting on collection thread shutdown' % wait_count
                sleep(wait_count)

            collectingData = False

            logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count)
            logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count)
            print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count
            logger.info('COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count)
            print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count
            print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count

            if not l.error_code == 0:
                resp = db.set_collector_status(project_id, collector_id, collector_status=0)
                project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': l.error_code}})

            if not l.limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss.total': l.limit_count}})

            if not l.rate_limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}})

        # Collection has been signaled & main program thread is running
        # TODO - Check Mongo for handle:ID pairs
        # Only call for new pairs
        if collectSignal and (threading.activeCount() == 1):
            # Names collection thread & adds to counter
            myThreadCounter += 1
            myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter

            termsList = collector['terms_list']
            if termsList:
                print 'Terms list length: ' + str(len(termsList))

                # Grab IDs for follow stream
                if collection_type == 'follow':
                    """
                    TODO - Update Mongo terms w/ set for collect status 0 or 1
                    # Updates current stored handles to collect 0 if no longer listed in terms file
                    stored_terms = doc['termsList']
                    for user in stored_terms:
                        if user['handle'] not in termsList:
                            user_id = user['id']
                            mongo_config.update({'module': 'collector-follow'},
                                {'$pull': {'termsList': {'handle': user['handle']}}})
                            mongo_config.update({'module': 'collecting-follow'},
                                {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}})

                    # Loops thru current stored handles and adds list if both:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    all_stored_handles = [user['handle'] for user in stored_terms]
                    stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']]

                    print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles)
                    """

                    # Loop thru & query (except handles that have been stored)
                    print 'MAIN: Querying Twitter API for handle:id pairs...'
                    logger.info('MAIN: Querying Twitter API for handle:id pairs...')
                    # Initiates REST API connection
                    twitter_api = API(auth_handler=auth)
                    failed_handles = []
                    success_handles = []
                    # Loops thru user-given terms list
                    for item in termsList:
                        term = item['term']
                        # If term already has a valid ID, pass
                        if item['id'] is not None:
                            pass
                        # Queries the Twitter API for the ID value of the handle
                        else:
                            try:
                                user = twitter_api.get_user(screen_name=term)
                            except TweepError as tweepy_exception:
                                error_message = tweepy_exception.args[0][0]['message']
                                code = tweepy_exception.args[0][0]['code']
                                # Rate limited for 15 minutes w/ code 88
                                if code == 88:
                                    print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.')
                                    time.sleep(900)
                                # Handle doesn't exist, added to Mongo as None
                                elif code == 34:
                                    print 'MAIN: User w/ handle %s does not exist.' % term
                                    logger.exception('MAIN: User w/ handle %s does not exist.' % term)
                                    item['collect'] = 0
                                    item['id'] = None
                                    failed_handles.append(term)
                            # Success - handle:ID pair stored in Mongo
                            else:
                                user_id = user._json['id_str']
                                item['id'] = user_id
                                success_handles.append(term)

                    print 'MAIN: Collected %d new ids for follow stream.' % len(success_handles)
                    logger.info('MAIN: Collected %d new ids for follow stream.' % len(success_handles))
                    print 'MAIN: %d handles failed to be found.' % len(failed_handles)
                    logger.info('MAIN: %d handles failed to be found.' % len(failed_handles))
                    logger.info(failed_handles)
                    print failed_handles
                    print 'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    logger.info('MAIN: Grabbing full list of follow stream IDs from Mongo.')

                    # Updates term list with follow values
                    project_config_db.update({'_id': ObjectId(collector_id)},
                        {'$set': {'terms_list': termsList}})

                    # Loops thru current stored handles and adds to list if:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    ids = [item['id'] for item in termsList if item['id'] and item['collect']]
                    noncoll = [item['term'] for item in termsList if not item['collect']]
                    termsList = ids
                else:
                    terms = [item['term'] for item in termsList if item['collect']]
                    noncoll = [item['term'] for item in termsList if not item['collect']]
                    termsList = terms

                print 'Terms List: '
                print termsList
                print ''
                print 'Not collecting for: '
                print noncoll
                print ''

                logger.info('Terms list: %s' % str(termsList).strip('[]'))
                logger.info('Not collecting for: %s' % str(noncoll).strip('[]'))

            print 'COLLECTION THREAD: Initializing Tweepy listener instance...'
            logger.info('COLLECTION THREAD: Initializing Tweepy listener instance...')
            l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id)

            print 'TOOLKIT STREAM: Initializing Tweepy stream listener...'
            logger.info('TOOLKIT STREAM: Initializing Tweepy stream listener...')

            # Initiates async stream via Tweepy, which handles the threading
            # TODO - location & language

            languages = collector['languages']
            location = collector['location']

            if languages:
                print '%s language codes found!' % len(languages)
            if location:
                print 'Location points found!'
                for i in range(len(location)):
                    location[i] = float(location[i])

            stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100)
            if collection_type == 'track':
                stream.filter(track=termsList, languages=languages, locations=location, async=True)
            elif collection_type == 'follow':
                stream.filter(follow=termsList, languages=languages, locations=location, async=True)
            elif collection_type == 'none':
                stream.filter(locations=location, languages=languages, async=True)
            else:
                sys.exit('ERROR: Unrecognized stream filter.')

            collectingData = True
            print 'MAIN: Collection thread started (%s)' % myThreadName
            logger.info('MAIN: Collection thread started (%s)' % myThreadName)


        #if threading.activeCount() == 1:
        #    print "MAIN: %d iteration with no collection thread running" % i
        #else:
        #    print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount())

        # Incrementally delays loop if Mongo is offline, otherwise 2 seconds
        max_sleep_time = 1800
        if exception:
            if runLoopSleep < max_sleep_time:
                runLoopSleep += 2
            else:
                runLoopSleep = max_sleep_time
            print "Exception caught, sleeping for: %d" % runLoopSleep
            time.sleep(runLoopSleep)
        else:
            time.sleep( 2 )
Beispiel #7
0
class TweepyAPITests(unittest.TestCase):

    def setUp(self):
        auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret)
        auth.set_access_token(oauth_token, oauth_token_secret)
        self.api = API(auth)
        self.api.retry_count = 2
        self.api.retry_delay = 5

    def testhometimeline(self):
        self.api.home_timeline()

    def testfriendstimeline(self):
        self.api.friends_timeline()

    def testusertimeline(self):
        self.api.user_timeline()
        self.api.user_timeline('twitter')

    def testmentions(self):
        self.api.mentions()

    def testretweetedbyme(self):
        self.api.retweeted_by_me()

    def testretweetedbyuser(self):
        self.api.retweeted_by_user('twitter')

    def testretweetedtome(self):
        self.api.retweeted_to_me()

    def testretweetsofme(self):
        self.api.retweets_of_me()

    def testretweet(self):
        s = self.api.retweet(123)
        s.destroy()

    def testretweets(self):
        self.api.retweets(123)

    def testgetstatus(self):
        self.api.get_status(id=123)

    def testupdateanddestroystatus(self):
        # test update
        text = 'testing %i' % random.randint(0, 1000)
        update = self.api.update_status(status=text)
        self.assertEqual(update.text, text)

        # test destroy
        deleted = self.api.destroy_status(id=update.id)
        self.assertEqual(deleted.id, update.id)

    def testgetuser(self):
        u = self.api.get_user('twitter')
        self.assertEqual(u.screen_name, 'twitter')

        u = self.api.get_user(783214)
        self.assertEqual(u.screen_name, 'twitter')

    def testsearchusers(self):
        self.api.search_users('twitter')

    def testme(self):
        me = self.api.me()
        self.assertEqual(me.screen_name, username)

    def testfriends(self):
        self.api.friends()

    def testfollowers(self):
        self.api.followers()

    def testdirectmessages(self):
        self.api.direct_messages()

    def testsentdirectmessages(self):
        self.api.sent_direct_messages()

    def testsendanddestroydirectmessage(self):
        # send
        sent_dm = self.api.send_direct_message(username, text='test message')
        self.assertEqual(sent_dm.text, 'test message')
        self.assertEqual(sent_dm.sender.screen_name, username)
        self.assertEqual(sent_dm.recipient.screen_name, username)

        # destroy
        destroyed_dm = self.api.destroy_direct_message(sent_dm.id)
        self.assertEqual(destroyed_dm.text, sent_dm.text)
        self.assertEqual(destroyed_dm.id, sent_dm.id)
        self.assertEqual(destroyed_dm.sender.screen_name, username)
        self.assertEqual(destroyed_dm.recipient.screen_name, username)

    def testcreatedestroyfriendship(self):
        enemy = self.api.destroy_friendship('twitter')
        self.assertEqual(enemy.screen_name, 'twitter')
        self.assertFalse(self.api.exists_friendship(username, 'twitter'))

        friend = self.api.create_friendship('twitter')
        self.assertEqual(friend.screen_name, 'twitter')
        self.assertTrue(self.api.exists_friendship(username, 'twitter'))

    def testshowfriendship(self):
        source, target = self.api.show_friendship(target_screen_name='twtiter')
        self.assert_(isinstance(source, Friendship))
        self.assert_(isinstance(target, Friendship))

    def testfriendsids(self):
        self.api.friends_ids(username)

    def testfollowersids(self):
        self.api.followers_ids(username)

    def testverifycredentials(self):
        self.assertNotEqual(self.api.verify_credentials(), False)

        # make sure that `me.status.entities` is not an empty dict
        me = self.api.verify_credentials(include_entities=True)
        self.assertTrue(me.status.entities)

        # `status` shouldn't be included
        me = self.api.verify_credentials(skip_status=True)
        self.assertFalse(hasattr(me, 'status'))

    def testratelimitstatus(self):
        self.api.rate_limit_status()

    def testupdateprofilecolors(self):
        original = self.api.me()
        updated = self.api.update_profile_colors(
            '000', '000', '000', '000', '000')

        # restore colors
        self.api.update_profile_colors(
            original.profile_background_color,
            original.profile_text_color,
            original.profile_link_color,
            original.profile_sidebar_fill_color,
            original.profile_sidebar_border_color
        )

        self.assertEqual(updated.profile_background_color, '000')
        self.assertEqual(updated.profile_text_color, '000')
        self.assertEqual(updated.profile_link_color, '000')
        self.assertEqual(updated.profile_sidebar_fill_color, '000')
        self.assertEqual(updated.profile_sidebar_border_color, '000')

    """
    def testupateprofileimage(self):
        self.api.update_profile_image('examples/profile.png')

    def testupdateprofilebg(self):
        self.api.update_profile_background_image('examples/bg.png')
    """

    def testupdateprofile(self):
        original = self.api.me()
        profile = {
            'name': 'Tweepy test 123',
            'url': 'http://www.example.com',
            'location': 'pytopia',
            'description': 'just testing things out'
        }
        updated = self.api.update_profile(**profile)
        self.api.update_profile(
            name=original.name, url=original.url,
            location=original.location, description=original.description
        )

        for k, v in profile.items():
            if k == 'email':
                continue
            self.assertEqual(getattr(updated, k), v)

    def testfavorites(self):
        self.api.favorites()

    def testcreatedestroyfavorite(self):
        self.api.create_favorite(4901062372)
        self.api.destroy_favorite(4901062372)

    def testenabledisablenotifications(self):
        self.api.enable_notifications('twitter')
        self.api.disable_notifications('twitter')

    def testcreatedestroyblock(self):
        self.api.create_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), True)
        self.api.destroy_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), False)
        self.api.create_friendship('twitter')  # restore

    def testblocks(self):
        self.api.blocks()

    def testblocksids(self):
        self.api.blocks_ids()

    def testcreateupdatedestroylist(self):
        self.api.create_list('tweeps')
        # XXX: right now twitter throws a 500 here,
        # issue is being looked into by twitter.
        # self.api.update_list('tweeps', mode='private')
        self.api.destroy_list('tweeps')

    def testlists(self):
        self.api.lists()

    def testlistsmemberships(self):
        self.api.lists_memberships()

    def testlistssubscriptions(self):
        self.api.lists_subscriptions()

    def testlisttimeline(self):
        self.api.list_timeline('applepie', 'stars')

    def testgetlist(self):
        self.api.get_list('applepie', 'stars')

    def testlistmembers(self):
        self.api.list_members('applepie', 'stars')

    def testislistmember(self):
        uid = self.api.get_user('applepie').id
        self.api.is_list_member('applepie', 'stars', uid)

    def testsubscribeunsubscribelist(self):
        self.api.subscribe_list('applepie', 'stars')
        self.api.unsubscribe_list('applepie', 'stars')

    def testlistsubscribers(self):
        self.api.list_subscribers('applepie', 'stars')

    def testissubscribedlist(self):
        uid = self.api.get_user('applepie').id
        self.api.is_subscribed_list('applepie', 'stars', uid)

    def testsavedsearches(self):
        s = self.api.create_saved_search('test')
        self.api.saved_searches()
        self.assertEqual(self.api.get_saved_search(s.id).query, 'test')
        self.api.destroy_saved_search(s.id)

    def testsearch(self):
        self.api.search('tweepy')

    def testtrends(self):
        self.api.trends_daily()
        self.api.trends_weekly()

    def testgeoapis(self):
        self.api.geo_id(id='c3f37afa9efcf94b')  # Austin, TX, USA
        self.api.nearby_places(lat=30.267370168467806,
                               long=-97.74261474609375)  # Austin, TX, USA
        self.api.reverse_geocode(lat=30.267370168467806,
                                 long=-97.74261474609375)  # Austin, TX, USA
                "174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1",
                "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                             host='api.twitter.com',
                             search_host='search.twitter.com',
                             cache=FileCache("cache", timeout=-1),
                             secure=False,
                             api_root='/1',
                             search_root='',
                             retry_count=0,
                             retry_delay=0,
                             retry_errors=None,
                             parser=None)

            #username1, username2,listUsernames = readFile(outfile)
            user1 = twitterApi.get_user(sys.argv[1])  #@UndefinedVariable
            user2 = twitterApi.get_user(sys.argv[2])  #@UndefinedVariable

            forward.append({
                "obj": user1,
                "cursor": -1,
                "friends": [],
                "cursor_obj": -1,
                "path": []
            })
            backward.append({
                "obj": user2,
                "cursor": -1,
                "cursor_obj": -1,
                "path": [],
                "followers": []
Beispiel #9
0
class TwicorderStream(Stream):

    def __init__(self, auth, listener, **options):
        super(TwicorderStream, self).__init__(auth, listener, **options)
        msg = 'Listener starting at {:%d %b %Y %H:%M:%S}'.format(datetime.now())
        utils.message('Info', msg)
        self.api = API(auth)
        self._id_to_screenname_time = None
        self._id_to_screenname = {}
        stream_mode = self.config.get('stream_mode') or 'filter'
        if stream_mode == 'filter':
            self.filter(
                follow=self.follow,
                track=self.track,
                locations=self.locations,
                stall_warnings=self.stall_warnings,
                languages=self.languages,
                encoding=self.encoding,
                filter_level=self.filter_level
            )
        elif stream_mode == 'sample':
            self.sample(
                languages=self.languages,
                stall_warnings=self.stall_warnings
            )
        else:
            utils.message('Error', 'stream_mode must be "filter" or "sample"')

    def _read_loop(self, resp):
        charset = resp.headers.get('content-type', default='')
        enc_search = re.search('charset=(?P<enc>\S*)', charset)
        if enc_search is not None:
            encoding = enc_search.group('enc')
        else:
            encoding = 'utf-8'

        buf = ReadBuffer(resp.raw, self.chunk_size, encoding=encoding)

        while self.running and not resp.raw.closed:
            length = 0
            try:
                while not resp.raw.closed:
                    line = buf.read_line() or ''
                    stripped_line = line.strip()
                    if not stripped_line:
                        # keep-alive new lines are expected
                        self.listener.keep_alive()
                    elif stripped_line.isdigit():
                        length = int(stripped_line)
                        break
                    else:
                        raise TweepError('Expecting length, unexpected value found')

                next_status_obj = buf.read_len(length)
            except Exception as error:
                TwiLogger.exception('Unable to process response: \n')
                continue
            if self.running and next_status_obj:
                self._data(next_status_obj)

        if resp.raw.closed:
            self.on_closed(resp)

    @property
    def config(self):
        return Config.get()

    @property
    def id_to_screenname(self):
        now = datetime.now()
        time_since_lookup = now - (self._id_to_screenname_time or now)
        expiry = timedelta(minutes=15)
        if self._id_to_screenname and time_since_lookup <= expiry:
            return self._id_to_screenname
        for follow_id in self.follow:
            user = self.api.get_user(follow_id)
            self._id_to_screenname[follow_id] = '@{}'.format(user.screen_name)
        self._id_to_screenname_time = datetime.now()
        TwiLogger.info(self._id_to_screenname)
        return self._id_to_screenname

    @property
    def track(self):
        track_list = [t for t in self.config.get('track') or [] if t] or None
        if track_list and self.follow_also_tracks:
            track_list += self.id_to_screenname.values()
        TwiLogger.info('Tracking: ', track_list)
        return track_list

    @property
    def follow(self):
        return self.config.get('follow')

    @property
    def locations(self):
        return self.config.get('locations')

    @property
    def stall_warnings(self):
        return self.config.get('stall_warnings', False)

    @property
    def languages(self):
        return self.config.get('languages')

    @property
    def encoding(self):
        return self.config.get('encoding', 'utf8')

    @property
    def filter_level(self):
        return self.config.get('filter_level')

    @property
    def follow_also_tracks(self):
        return self.config.get('follow_also_tracks', False)
if __name__ == "__main__":
    while 1:
        try:
            #outfile = sys.argv[1]
            #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') 
            auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                    host='api.twitter.com', search_host='search.twitter.com',
                     cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='',
                    retry_count=0, retry_delay=0, retry_errors=None,
                    parser=None)
                        
            username1 = sys.argv[1]
            username2 = sys.argv[2]
            user1 = twitterApi.get_user(username1) #@UndefinedVariable
            user2 = twitterApi.get_user(username2) #@UndefinedVariable
            
            
            forward = []
	    forward_ds = [1]
	    backward_ds = [1]
	    forward_min = backward_min = 0
	    backward = []
	    is_opt = 0
            
            forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]})
            backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] })
            reqs = 0
            while 1:
		fin, path = go_backward()