def maybe_ingest_app_info(now_offset=0): now_timestamp = time.time() + now_offset app_ids_needing_ingestion = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(APP_INFO_INGESTION_ZSET_KEY) app_ids_needing_ingestion = pipe.zrangebyscore(APP_INFO_INGESTION_ZSET_KEY, 0, now_timestamp, start=0, num=30) if app_ids_needing_ingestion: rescored_app_ids = [] for app_id in app_ids_needing_ingestion: rescored_app_ids.append(next_ingestion_time()) rescored_app_ids.append(app_id) pipe.multi() pipe.zadd(APP_INFO_INGESTION_ZSET_KEY, *rescored_app_ids) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') if not app_ids_needing_ingestion: return logging.info('Ingesting app infos for %d app(s)...', len(app_ids_needing_ingestion)) for app_id in app_ids_needing_ingestion: try: app = AppStoreApp.objects.get(pk=app_id) except AppStoreApp.DoesNotExist: logging.warn('Deleted app id, removing from ingestion queue: %s', app_id) redis.zrem(APP_INFO_INGESTION_ZSET_KEY, app_id) continue # NOTE: This should be unique but it isn't right now for whatever reason. unique_countries = set(app.app_info_countries) for country in unique_countries: try: fetch_info_and_maybe_update_app(app, country) except appstore_fetch.RateLimitedError: # This is not likely to fix itself soon. Give up. logging.info('Rate limited by App Store, giving up for now...') time.sleep(15) return except Exception: logging.exception('Problem ingesting app info for app id: %s - %s (%s - %s)', app.id, country, app.itunes_id, app.bundle_id) app.app_info_ingestion_time = datetime.now() app.save(update_fields=['app_info_ingestion_time']) time.sleep(0.3333)
def finish_auth(user, token, verifier): auth = get_auth_handler() redis = redis_wrap.client() redis_key = get_redis_key(token) token_secret = redis.get(redis_key) auth.request_token = {'oauth_token': token, 'oauth_token_secret': token_secret} try: auth.get_access_token(verifier) except tweepy.TweepError as e: logging.error('tweepy error during finish auth: %s', e) return False, None access_token = auth.access_token access_token_secret = auth.access_token_secret twitter_handle = auth.get_username() TwitterAccessToken.objects.get_or_create( handle=twitter_handle, token=access_token, token_secret=access_token_secret, user=user ) return True, twitter_handle
def finish_auth(user, token, verifier): auth = get_auth_handler() redis = redis_wrap.client() redis_key = get_redis_key(token) token_secret = redis.get(redis_key) auth.request_token = { 'oauth_token': token, 'oauth_token_secret': token_secret } try: auth.get_access_token(verifier) except tweepy.TweepError as e: logging.error('tweepy error during finish auth: %s', e) return False, None access_token = auth.access_token access_token_secret = auth.access_token_secret twitter_handle = auth.get_username() TwitterAccessToken.objects.get_or_create(handle=twitter_handle, token=access_token, token_secret=access_token_secret, user=user) return True, twitter_handle
def update_labels_for_user(sdk_user, future_days_offset=0): old_labels = set(sdk_user.labels or []) new_labels = labels_for_user(sdk_user, future_days_offset=future_days_offset) if old_labels == new_labels: return False change_events = [] increment_label_by = collections.defaultdict(lambda: 0) removed_labels = old_labels - new_labels for label in removed_labels: change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id, sdk_user_id=sdk_user.id, user_id=sdk_user.user_id, label=label, kind='removed') increment_label_by[label] -= 1 change_events.append(change_event) added_labels = new_labels - old_labels for label in added_labels: change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id, sdk_user_id=sdk_user.id, user_id=sdk_user.user_id, label=label, kind='added') increment_label_by[label] += 1 change_events.append(change_event) # Create change events and update labels on user. SDKUserLabelChangeEvent.objects.bulk_create(change_events) labels = list(sorted(new_labels)) sdk_user.labels = labels sdk_user.save(update_fields=['labels']) # Now track the changes we've made in our counts. for l1, l2 in TRACK_LABEL_COMBINATIONS: had_label = l1 in old_labels and l2 in old_labels has_label = l1 in new_labels and l2 in new_labels if had_label != has_label: if had_label: # removed label increment = -1 else: # added label increment = 1 combined_label = '%s-%s' % (l1, l2) increment_label_by[combined_label] = increment redis = redis_wrap.client() redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % sdk_user.app_id with redis.pipeline() as pipe: for label, increment in increment_label_by.items(): pipe.hincrby(redis_key, label, increment) pipe.execute() # logging.info('Updated sdk user %s labels: %s', sdk_user.id, labels) return True
def process_sdkuser_labels_periodically(): redis = redis_wrap.client() max_id = redis.get(LAST_SDKUSER_LABELS_PROCESSED_KEY) or 0 with transaction.atomic(): sdk_users = list( SDKUser.objects .select_for_update() .filter(id__gt=max_id) .order_by('id')[:PROCESS_SDKUSERS_LABELS_LIMIT] ) if sdk_users: new_max_id = sdk_users[-1].id updates = 0 for sdk_user in sdk_users: if update_labels_for_user(sdk_user): updates += 1 if updates: logging.info('Updated %s sdk user labels periodically', updates) else: logging.info('No more users to update labels, starting over') new_max_id = 0 redis.set(LAST_SDKUSER_LABELS_PROCESSED_KEY, new_max_id)
def add_all_apps_to_ingestion_queue(): apps_countries = list(AppStoreAppReviewTracker.objects.all().values_list('app_id', 'country')) redis = redis_wrap.client() for app_id, country in apps_countries: app = AppStoreApp.objects.get(pk=app_id) appstore_app_info.decorate_app(app, country) target_time = next_ingestion_time(app, country) redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time, '%s:%s' % (app_id, country))
def process_sdkuser_days_active(): redis = redis_wrap.client() max_id = redis.get(LAST_SDKUSER_PROCESSED_KEY) or 0 new_max_id = process_sdkuser_days_active_from_id(max_id) if new_max_id: logging.info('New max id: %s', new_max_id) redis.set(LAST_SDKUSER_PROCESSED_KEY, new_max_id)
def add_all_apps_to_ingestion_queue(): apps_countries = list(AppStoreAppReviewTracker.objects.all().values_list( 'app_id', 'country')) redis = redis_wrap.client() for app_id, country in apps_countries: app = AppStoreApp.objects.get(pk=app_id) appstore_app_info.decorate_app(app, country) target_time = next_ingestion_time(app, country) redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time, '%s:%s' % (app_id, country))
def mark_app_needs_ingestion(app, country, force=False): t = AppStoreAppReviewTracker.objects.get(app_id=app.id, country=country) if not force and t.last_ingestion_time: return # If it needs ingestion, do it now. target_time = time.time() # ADD app.id TO ZSET AT target_time redis = redis_wrap.client() redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time, '%s:%s' % (app.id, country))
def get_auth_redirect(app_id=None, custom_cb=None): auth = get_auth_handler(app_id, custom_cb) try: redirect_url = auth.get_authorization_url() except tweepy.TweepError as e: logging.error('tweepy error getting auth url: %s', e) return None redis = redis_wrap.client() redis_key = get_redis_key(auth.request_token['oauth_token']) redis.setex(redis_key, 3600, auth.request_token['oauth_token_secret']) return redirect_url
def apps_countries_to_ingest(limit, now_offset=0): now_timestamp = time.time() + now_offset apps_needing_ingestion = [] countries = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(APP_REVIEWS_INGESTION_ZSET_KEY) app_ids_countries_needing_ingestion = pipe.zrangebyscore( APP_REVIEWS_INGESTION_ZSET_KEY, 0, now_timestamp, start=0, num=limit) if app_ids_countries_needing_ingestion: app_ids = [] countries = [] for ac in app_ids_countries_needing_ingestion: app_id, country = ac.split(':') app_id = long(app_id) app_ids.append(app_id) countries.append(country) apps_by_id = dict( (a.id, a) for a in AppStoreApp.objects.filter(id__in=app_ids)) apps_needing_ingestion = [ apps_by_id[app_id] for app_id in app_ids ] rescored_app_ids = [] for app, country in zip(apps_needing_ingestion, countries): rescored_app_ids.append( next_ingestion_time(app, country)) rescored_app_ids.append('%s:%s' % (app.id, country)) if rescored_app_ids: pipe.multi() pipe.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, *rescored_app_ids) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') for app, country in zip(apps_needing_ingestion, countries): yield app, country
def mark_sdk_user_labels_dirty(sdk_user_ids): if not sdk_user_ids: return t = time.time() rescored_user_ids = [] user_ids_uniq = set(sdk_user_ids) for user_id in user_ids_uniq: rescored_user_ids.append(t) rescored_user_ids.append(user_id) redis = redis_wrap.client() redis.zadd(DIRTY_SDK_USER_LABELS_REDIS_KEY, *rescored_user_ids)
def label_counts_by_app_ids(app_ids): redis = redis_wrap.client() results = None with redis.pipeline() as pipe: for app_id in app_ids: redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % app_id pipe.hgetall(redis_key) results = pipe.execute() response = {} for app_id, result in zip(app_ids, results): response[app_id] = {k: int(v) for k, v in result.items()} return response
def apps_countries_to_ingest(limit, now_offset=0): now_timestamp = time.time() + now_offset apps_needing_ingestion = [] countries = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(APP_REVIEWS_INGESTION_ZSET_KEY) app_ids_countries_needing_ingestion = pipe.zrangebyscore(APP_REVIEWS_INGESTION_ZSET_KEY, 0, now_timestamp, start=0, num=limit) if app_ids_countries_needing_ingestion: app_ids = [] countries = [] for ac in app_ids_countries_needing_ingestion: app_id, country = ac.split(':') app_id = long(app_id) app_ids.append(app_id) countries.append(country) apps_by_id = dict((a.id, a) for a in AppStoreApp.objects.filter(id__in=app_ids)) apps_needing_ingestion = [apps_by_id[app_id] for app_id in app_ids] rescored_app_ids = [] for app, country in zip(apps_needing_ingestion, countries): rescored_app_ids.append(next_ingestion_time(app, country)) rescored_app_ids.append('%s:%s' % (app.id, country)) if rescored_app_ids: pipe.multi() pipe.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, *rescored_app_ids) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') for app, country in zip(apps_needing_ingestion, countries): yield app, country
def process_dirty_sdk_user_labels(): now_timestamp = time.time() sdk_user_ids_needing_attention = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(DIRTY_SDK_USER_LABELS_REDIS_KEY) sdk_user_ids_needing_attention = pipe.zrangebyscore( DIRTY_SDK_USER_LABELS_REDIS_KEY, 0, now_timestamp, start=0, num=50) if sdk_user_ids_needing_attention: pipe.multi() pipe.zrem(DIRTY_SDK_USER_LABELS_REDIS_KEY, *sdk_user_ids_needing_attention) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') updates = 0 with transaction.atomic(): # Select these in a single transaction instead of individually to avoid lock contention. sdk_users_needing_attention = list( SDKUser.objects.filter(id__in=sdk_user_ids_needing_attention). select_for_update().order_by('id')) for sdk_user in sdk_users_needing_attention: if update_labels_for_user(sdk_user): updates += 1 if updates: logging.info('Updated %s sdk user rows labels...', updates)
def process_dirty_sdk_user_labels(): now_timestamp = time.time() sdk_user_ids_needing_attention = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(DIRTY_SDK_USER_LABELS_REDIS_KEY) sdk_user_ids_needing_attention = pipe.zrangebyscore(DIRTY_SDK_USER_LABELS_REDIS_KEY, 0, now_timestamp, start=0, num=50) if sdk_user_ids_needing_attention: pipe.multi() pipe.zrem(DIRTY_SDK_USER_LABELS_REDIS_KEY, *sdk_user_ids_needing_attention) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') updates = 0 with transaction.atomic(): # Select these in a single transaction instead of individually to avoid lock contention. sdk_users_needing_attention = list( SDKUser.objects .filter(id__in=sdk_user_ids_needing_attention) .select_for_update() .order_by('id') ) for sdk_user in sdk_users_needing_attention: if update_labels_for_user(sdk_user): updates += 1 if updates: logging.info('Updated %s sdk user rows labels...', updates)
def passes_health_check(): try: users = list(User.objects.all()[:1]) except Exception as e: # Note: This won't usually happen because # session middleware will try accessing the DB cursor before we # get here, which will cause an error farther up the stack. return 'Database unreachable with error: %s' % e if not users or not users[0].date_joined: return 'Could not find a user, database must be FUBAR' redis = redis_wrap.client() if not redis: return 'Redis unreachable, see log for more detail' try: redis.echo('Hello, world!') except Exception as e: return 'Redis unreachable with error: %s' % e return None
def process_sdkuser_labels_periodically(): redis = redis_wrap.client() max_id = redis.get(LAST_SDKUSER_LABELS_PROCESSED_KEY) or 0 with transaction.atomic(): sdk_users = list(SDKUser.objects.select_for_update().filter( id__gt=max_id).order_by('id')[:PROCESS_SDKUSERS_LABELS_LIMIT]) if sdk_users: new_max_id = sdk_users[-1].id updates = 0 for sdk_user in sdk_users: if update_labels_for_user(sdk_user): updates += 1 if updates: logging.info('Updated %s sdk user labels periodically', updates) else: logging.info('No more users to update labels, starting over') new_max_id = 0 redis.set(LAST_SDKUSER_LABELS_PROCESSED_KEY, new_max_id)
def mark_app_needs_info_ingestion(app): target_time = time.time() # ADD app.id TO ZSET AT target_time redis = redis_wrap.client() redis.zadd(APP_INFO_INGESTION_ZSET_KEY, target_time, app.id)
def maybe_ingest_app_info(now_offset=0): now_timestamp = time.time() + now_offset app_ids_needing_ingestion = [] # Atomically find next app ids to check. redis = redis_wrap.client() while True: with redis.pipeline() as pipe: try: pipe.watch(APP_INFO_INGESTION_ZSET_KEY) app_ids_needing_ingestion = pipe.zrangebyscore( APP_INFO_INGESTION_ZSET_KEY, 0, now_timestamp, start=0, num=30) if app_ids_needing_ingestion: rescored_app_ids = [] for app_id in app_ids_needing_ingestion: rescored_app_ids.append(next_ingestion_time()) rescored_app_ids.append(app_id) pipe.multi() pipe.zadd(APP_INFO_INGESTION_ZSET_KEY, *rescored_app_ids) pipe.execute() break except WatchError: logging.info('Interrupted while fetching app ids to ingest...') if not app_ids_needing_ingestion: return logging.info('Ingesting app infos for %d app(s)...', len(app_ids_needing_ingestion)) for app_id in app_ids_needing_ingestion: try: app = AppStoreApp.objects.get(pk=app_id) except AppStoreApp.DoesNotExist: logging.warn('Deleted app id, removing from ingestion queue: %s', app_id) redis.zrem(APP_INFO_INGESTION_ZSET_KEY, app_id) continue # NOTE: This should be unique but it isn't right now for whatever reason. unique_countries = set(app.app_info_countries) for country in unique_countries: try: fetch_info_and_maybe_update_app(app, country) except appstore_fetch.RateLimitedError: # This is not likely to fix itself soon. Give up. logging.info('Rate limited by App Store, giving up for now...') time.sleep(15) return except Exception: logging.exception( 'Problem ingesting app info for app id: %s - %s (%s - %s)', app.id, country, app.itunes_id, app.bundle_id) app.app_info_ingestion_time = datetime.now() app.save(update_fields=['app_info_ingestion_time']) time.sleep(0.3333)