def handle(self, *args, **options): if options['inputfile'] is None: print 'Please specify a valid input file using --inputfile' return infile = options['inputfile'] fin = open(infile, 'r+') logfile = infile + '.log' flog = open(logfile, 'w') if options['outputfile']: outfile = options['outputfile'] outstream = open(outfile, 'w') else: outstream = sys.stdout limit = int(options['limit']) if options['limit'] else None api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) assert api errors_occurred = False tweet_ids = [] for count, tweetidline in enumerate(fin): if limit and limit == count: print "Reached limit of %s tweets" % limit break tweet_ids.append(tweetidline[:-1]) if len(tweet_ids) == 100: errors_occurred = self.fetch(tweet_ids, api, outstream, flog) or errors_occurred tweet_ids = [] #Final fetch errors_occurred = self.fetch(tweet_ids, api, outstream, flog) or errors_occurred fin.close() flog.close() if options.get('outputfile', True): outstream.close() if errors_occurred: print 'Completed with errors. Please view the log file (%s) for details' % logfile
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) for tweep in qs_tweeps: print 'user: %s' % tweep.name # check user status, update twitter user name if it has changed if tweep.uid == 0: print 'uid has not been set yet - skipping.' continue try: user_status = api.get_user(id=tweep.uid) if user_status['screen_name'] != tweep.name: print ' -- updating screen name to %s' % \ user_status['screen_name'] former_names = tweep.former_names if not tweep.former_names: former_names = '{}' oldnames = json.loads(former_names) oldnames[datetime.datetime.now().strftime('%c')] = \ tweep.name tweep.former_names = json.dumps(oldnames) tweep.name = user_status['screen_name'] #TODO: Is this save unnecessary, since it gets saved below? tweep.save() except tweepy.error.TweepError as e: print 'Error: %s' % e #go to the next tweep in the for loop continue finally: time.sleep(set_wait_time(api.last_response))
def handle(self, *args, **options): if options['inputfile'] is None: print 'Please specify a valid input file using --inputfile' return infile = options['inputfile'] fin = open(infile, 'r+') logfile = infile + '.log' flog = open(logfile, 'w') if options['outputfile']: outfile = options['outputfile'] outstream = open(outfile, 'w') else: outstream = sys.stdout api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) errors_occurred = False for tweetidline in fin: try: status = api.get_status(id=tweetidline) json_value = json.dumps(status) + '\n\n' outstream.write(json_value) except tweepy.error.TweepError as e: content = 'Error: %s for the tweetid: %s' \ % (e, tweetidline) + '\n' flog.write(content) errors_occurred = True fin.close() flog.close() if options.get('outputfile', True): outstream.close() if errors_occurred: print 'Completed with errors. Please view the log file for details'
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) for tweep in qs_tweeps: print 'user: %s' % tweep.name # check user status, update twitter user name if it has changed if tweep.uid == 0: print 'uid has not been set yet - skipping.' continue try: user_status = api.get_user(id=tweep.uid) if user_status['screen_name'] != tweep.name: print ' -- updating screen name to %s' % \ user_status['screen_name'] former_names = tweep.former_names if not tweep.former_names: former_names = '{}' oldnames = json.loads(former_names) oldnames[datetime.datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ')] = tweep.name tweep.former_names = json.dumps(oldnames) tweep.name = user_status['screen_name'] #TODO: Is this save unnecessary, since it gets saved below? tweep.save() except tweepy.error.TweepError as e: print 'Error: %s' % e #go to the next tweep in the for loop continue finally: time.sleep(set_wait_time(api.last_response))
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) qs_tweeps = TwitterUser.objects.filter(is_active=True) # if a username has been specified, limit to only that user if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) for tweep in qs_tweeps: print 'user: %s' % tweep.name # check user status, update twitter user name if it has changed populate_uid(tweep.name, api)
def forwards(self, orm): # Adding field 'TwitterFilter.uids' db.add_column(u'ui_twitterfilter', 'uids', self.gf('django.db.models.fields.TextField')(default='', blank=True), keep_default=False) diff = set() for ids in orm.TwitterFilter.objects.all(): try: filter_ids = orm.TwitterFilter.objects.get(id=ids.id) if filter_ids.is_active is True: #add a comma-space for people filter_ids.people = re.sub('\n|\r', ' ', filter_ids.people) repl_ppl = re.split('\s*', filter_ids.people) filter_ids.people = ', '.join(map(str, repl_ppl)) #add a comma-space for words filter_ids.words = re.sub('\n|\r', ' ', filter_ids.words) repl_wrd = re.split('\s*', filter_ids.words) filter_ids.words = ', '.join(map(str, repl_wrd)) #fetch uids for twitterusers uids = [] uids_screennames = [] temp = [] if filter_ids.people != '': ppl = filter_ids.people.split(",") for items in ppl: temp.append(items.lstrip().lstrip("@").rstrip()) api = authenticated_api( username=settings.TWITTER_DEFAULT_USERNAME) try: people_uids = api.lookup_users(screen_names=temp) except Exception as e: print e, temp for person in range(0, len(people_uids)): uids.append(people_uids[person]['id']) uids_screennames.append( people_uids[person]['screen_name']) #store values filter_ids.uids = ', '.join(map(str, uids)) #find invalid accounts if set(temp) - set(uids_screennames) != set(): diff = set(temp) - set(uids_screennames) if diff != set(): fp = open('0026_migration.log', 'wb') fp.write( 'Unable to retrieve uid for the following Twitter users:\n\n' ) for t_usr in diff: fp.write('%s\n' % t_usr) filter_ids.save() except Exception as e: print 'id: ', filter_ids.id, e if diff != set(): print 'Please view log file for invalid accounts'
def forwards(self, orm): # Adding field 'TwitterFilter.uids' db.add_column( u"ui_twitterfilter", "uids", self.gf("django.db.models.fields.TextField")(default="", blank=True), keep_default=False, ) diff = set() for ids in orm.TwitterFilter.objects.all(): try: filter_ids = orm.TwitterFilter.objects.get(id=ids.id) if filter_ids.is_active is True: # add a comma-space for people filter_ids.people = re.sub("\n|\r", " ", filter_ids.people) repl_ppl = re.split("\s*", filter_ids.people) filter_ids.people = ", ".join(map(str, repl_ppl)) # add a comma-space for words filter_ids.words = re.sub("\n|\r", " ", filter_ids.words) repl_wrd = re.split("\s*", filter_ids.words) filter_ids.words = ", ".join(map(str, repl_wrd)) # fetch uids for twitterusers uids = [] uids_screennames = [] temp = [] if filter_ids.people != "": ppl = filter_ids.people.split(",") for items in ppl: temp.append(items.lstrip().lstrip("@").rstrip()) api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) try: people_uids = api.lookup_users(screen_names=temp) except Exception as e: print e, temp for person in range(0, len(people_uids)): uids.append(people_uids[person]["id"]) uids_screennames.append(people_uids[person]["screen_name"]) # store values filter_ids.uids = ", ".join(map(str, uids)) # find invalid accounts if set(temp) - set(uids_screennames) != set(): diff = set(temp) - set(uids_screennames) if diff != set(): fp = open("0026_migration.log", "wb") fp.write("Unable to retrieve uid for the following Twitter users:\n\n") for t_usr in diff: fp.write("%s\n" % t_usr) filter_ids.save() except Exception as e: print "id: ", filter_ids.id, e if diff != set(): print "Please view log file for invalid accounts"
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) paginator = Paginator(qs_tweeps, 100) page_count = paginator.num_pages for page_counter in range(1, page_count + 1): print "Page %s of %s" % (page_counter, page_count) qs_page = paginator.page(page_counter) tweep_map = {} for tweep in qs_page: # check user status, update twitter user name if it has changed if tweep.uid == 0: print 'user: %s' % tweep.name print ' -- uid has not been set yet - skipping.' continue else: tweep_map[tweep.uid] = tweep if tweep_map: try: user_statuses = api.lookup_users(user_ids=tweep_map.keys()) for user_status in user_statuses: tweep = tweep_map[user_status['id']] print 'user: %s' % tweep.name if user_status['screen_name'] != tweep.name: print ' -- updating screen name to %s' % \ user_status['screen_name'] former_names = tweep.former_names if not tweep.former_names: former_names = '{}' oldnames = json.loads(former_names) oldnames[datetime.datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ')] = tweep.name tweep.former_names = json.dumps(oldnames) tweep.name = user_status['screen_name'] #TODO: Is this save unnecessary, since it gets saved below? tweep.save() except tweepy.error.TweepError as e: print 'Error: %s' % e #go to the next tweep in the for loop continue finally: time.sleep(set_wait_time(api.last_response))
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) job = TwitterUserTimelineJob() job.save() qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) else: # NOTE: randomizing here might be healthier when considering # possibility of multiple parallel jobs running and competing # for api calls but this is an instinctual call, not data-driven qs_tweeps = qs_tweeps.order_by('?') for tweep in qs_tweeps: print 'user: %s' % tweep.name # can't do this unless we have a twitter user_id stored if tweep.uid == 0: skipmsg = 'uid has not been set yet - skipping this ' + \ 'user. May need to run populate_uids if this ' + \ 'is an old database.' print skipmsg error = TwitterUserTimelineError(job=job, user=tweep, error=skipmsg) error.save() continue # now move on to determining first tweet id to get since_id = 1 # set since_id if they have any statuses recorded if tweep.items.count() > 0: max_dict = tweep.items.all().aggregate(Max('twitter_id')) since_id = max_dict['twitter_id__max'] max_id = 0 # update their record (auto_now) as we're checking it now tweep.save() while True: stop = False try: print 'since: %s' % (since_id) if max_id: print 'max: %s' % max_id timeline = api.user_timeline(id=tweep.uid, since_id=since_id, max_id=max_id, count=200) else: timeline = api.user_timeline(id=tweep.uid, since_id=since_id, count=200) except tweepy.error.TweepError as e: print 'ERROR: %s' % e error = TwitterUserTimelineError(job=job, user=tweep, error=e) error.save() timeline = [] if len(timeline) == 0: # Nothing new; stop for this user stop = True new_status_count = 0 for status in timeline: # eg 'Mon Oct 15 20:15:12 +0000 2012' dt_aware = dt_aware_from_created_at(status['created_at']) try: item, created = TwitterUserItem.objects.get_or_create( twitter_user=tweep, twitter_id=status['id'], date_published=dt_aware, item_text=status['text'], item_json=json.dumps(status), place=status['place'] or '', source=status['source']) if created: max_id = item.twitter_id - 1 new_status_count += 1 else: print 'skip: id %s' % item.id except IntegrityError as ie: print 'ERROR: %s' % ie error = TwitterUserTimelineError(job=job, user=tweep, error=ie) error.save() print 'saved: %s item(s)' % new_status_count job.num_added += new_status_count # max new statuses per call is 200, so check for less than # a reasonable fraction of that to see if we should stop if new_status_count < 150: print 'stop: < 150 new statuses' stop = True if max_id < since_id: # Got 'em all, stop for this user print 'stop: max_id < since_id' stop = True # Check response codes for issues response_status = api.last_response.status if response_status >= 400: print 'error:', api.last_response.getheader('status') error = TwitterUserTimelineError(job=job, user=tweep, error=e) error.save() stop = True job.save() # wait before next call no matter what time.sleep(set_wait_time(api.last_response)) if stop: break
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) qs_tweeps = qs_tweeps.order_by('date_last_checked') for tweep in qs_tweeps: print 'user: %s' % tweep.name # can't do this unless we have a twitter user_id stored if tweep.uid == 0: print 'uid has not been set yet - skipping this user. ' + \ 'May need to run populate_uids if this is an old ' + \ 'database.' continue # now move on to determining first tweet id to get since_id = 1 # set since_id if they have any statuses recorded if tweep.items.count() > 0: max_dict = tweep.items.all().aggregate(Max('twitter_id')) since_id = max_dict['twitter_id__max'] max_id = 0 # update their record (auto_now) as we're checking it now tweep.save() while True: stop = False try: print 'since: %s' % (since_id) if max_id: print 'max: %s' % max_id timeline = api.user_timeline(id=tweep.uid, since_id=since_id, max_id=max_id, count=200) else: timeline = api.user_timeline(id=tweep.uid, since_id=since_id, count=200) except tweepy.error.TweepError as e: print 'ERROR: %s' % e timeline = [] if len(timeline) == 0: # Nothing new; stop for this user stop = True new_status_count = 0 for status in timeline: # eg 'Mon Oct 15 20:15:12 +0000 2012' dt_aware = dt_aware_from_created_at(status['created_at']) try: item, created = TwitterUserItem.objects.get_or_create( twitter_user=tweep, twitter_id=status['id'], date_published=dt_aware, item_text=status['text'], item_json=json.dumps(status), place=status['place'] or '', source=status['source']) if created: max_id = item.twitter_id - 1 new_status_count += 1 else: print 'skip: id %s' % item.id except IntegrityError as ie: print 'ERROR: %s' % ie print 'saved: %s item(s)' % new_status_count # max new statuses per call is 200, so check for less than # a reasonable fraction of that to see if we should stop if new_status_count < 150: print 'stop: < 150 new statuses' stop = True if max_id < since_id: # Got 'em all, stop for this user print 'stop: max_id < since_id' stop = True # Check response codes for issues response_status = api.last_response.status if response_status >= 400: print 'error:', api.last_response.getheader('status') stop = True # wait before next call no matter what time.sleep(set_wait_time(api.last_response)) if stop: break
def handle(self, *args, **options): api = authenticated_api(username=settings.TWITTER_DEFAULT_USERNAME) job = TwitterUserTimelineJob() job.save() qs_tweeps = TwitterUser.objects.filter(is_active=True) if options.get('user', None): qs_tweeps = qs_tweeps.filter(name=options.get('user')) else: # NOTE: randomizing here might be healthier when considering # possibility of multiple parallel jobs running and competing # for api calls but this is an instinctual call, not data-driven qs_tweeps = qs_tweeps.order_by('?') for tweep in qs_tweeps: print 'user: %s' % tweep.name # can't do this unless we have a twitter user_id stored if tweep.uid == 0: skipmsg = 'uid has not been set yet - skipping this ' + \ 'user. May need to run populate_uids if this ' + \ 'is an old database.' print skipmsg error = TwitterUserTimelineError(job=job, user=tweep, error=skipmsg) error.save() continue # now move on to determining first tweet id to get since_id = 1 # set since_id if they have any statuses recorded if tweep.items.count() > 0: max_dict = tweep.items.all().aggregate(Max('twitter_id')) since_id = max_dict['twitter_id__max'] max_id = 0 # update their record (auto_now) as we're checking it now tweep.save() while True: # wait before next call no matter what; # use getattr() because api might be None the first time or # after errors time.sleep(set_wait_time(getattr(api, 'last_response', None))) job.save() stop = False try: print 'since: %s' % (since_id) if max_id: print 'max: %s' % max_id timeline = api.user_timeline(id=tweep.uid, since_id=since_id, max_id=max_id, count=200) else: timeline = api.user_timeline(id=tweep.uid, since_id=since_id, count=200) except tweepy.error.TweepError as e: print 'ERROR: %s' % e error = TwitterUserTimelineError(job=job, user=tweep, error=e) error.save() timeline = [] break if len(timeline) == 0: # Nothing new; stop for this user stop = True new_status_count = 0 for status in timeline: # eg 'Mon Oct 15 20:15:12 +0000 2012' dt_aware = dt_aware_from_created_at(status['created_at']) try: item, created = TwitterUserItem.objects.get_or_create( twitter_user=tweep, twitter_id=status['id'], date_published=dt_aware, item_text=status['text'], item_json=json.dumps(status), place=status['place'] or '', source=status['source']) if created: max_id = item.twitter_id - 1 new_status_count += 1 else: print 'skip: id %s' % item.id except IntegrityError as ie: print 'ERROR: %s' % ie error = TwitterUserTimelineError(job=job, user=tweep, error=ie) error.save() print 'saved: %s item(s)' % new_status_count job.num_added += new_status_count # max new statuses per call is 200, so check for less than # a reasonable fraction of that to see if we should stop if new_status_count < 150: print 'stop: < 150 new statuses' stop = True if max_id < since_id: # Got 'em all, stop for this user print 'stop: max_id < since_id' stop = True # Check response codes for issues response_status = api.last_response.status_code if response_status >= 400: print 'error:', api.last_response.getheader('status') error = TwitterUserTimelineError(job=job, user=tweep, error=e) error.save() stop = True if stop: break
def save_model(self, request, obj, form, change): ppl_submitted = [] uids = [] ppl_found = [] ppl_not_found = [] warn_msg = {'supervisor_not_running': False, 'tweep_error': False, 'invalid_acc': False} if obj.people.lstrip().rstrip(): for person in obj.people.split(','): # create array of cleaned-up usernames ppl_submitted.append(person.lstrip().lstrip('@').rstrip()) if ppl_submitted == []: obj.uids = '' else: try: print ppl_submitted api = m.authenticated_api(username= settings.TWITTER_DEFAULT_USERNAME) profiles_found = api.lookup_users(screen_names=ppl_submitted) # construct lower-case equivalent for usernames submitted for profile in profiles_found: uids.append(profile['id']) sn = str(profile['screen_name']) ppl_found.append(sn) # set the filter's uids to a comma-separated list of uids # of found profiles obj.uids = ', '.join(map(str, uids)) # create a lower-case version of ppl_found # for case-sensitive comparison of lists ppl_found_lower = set(n.lower() for n in ppl_found) # Compare lists, create list of people_not_found # (needed when we display the warning) for p in ppl_submitted: if p.lower() not in ppl_found_lower: ppl_not_found.append(p) # At least one account name wasn't found if ppl_not_found != []: warn_msg['invalid_acc'] = True # save people list back to the model without @ symbols obj.people = ', '.join(ppl_submitted) except Exception as e: if tweepy.error.TweepError: if e[0][0]['code'] == 17: warn_msg['invalid_acc'] = True ppl_not_found = ppl_submitted else: warn_msg['tweep_error'] = True warn_msg['error'] = e[0][0]['message'] if os.path.exists(settings.SUPERVISOR_UNIX_SOCKET_FILE) is False: warn_msg['supervisor_not_running'] = True if warn_msg['tweep_error']: messages.add_message(request, messages.WARNING, 'TwitterFilter %s was saved with the ' 'exception: %s' % (obj.id, warn_msg['error'])) if warn_msg['supervisor_not_running']: messages.add_message(request, messages.WARNING, 'Supervsiord is not running, TwitterFilter %s' ' saved but not added to supervisor' ' subprocesses' % (obj.id)) if warn_msg['invalid_acc']: messages.add_message(request, messages.WARNING, 'TwitterFilter %s was saved with the' ' following invalid accounts: %s' % (obj.id, ', '.join(map(str, ppl_not_found)))) super(TwitterFilterAdmin, self).save_model(request, obj, form, change)