def maybe_ingest_app_info(now_offset=0):
  now_timestamp = time.time() + now_offset

  app_ids_needing_ingestion = []

  # Atomically find next app ids to check.
  redis = redis_wrap.client()
  while True:
    with redis.pipeline() as pipe:
      try:
        pipe.watch(APP_INFO_INGESTION_ZSET_KEY)
        app_ids_needing_ingestion = pipe.zrangebyscore(APP_INFO_INGESTION_ZSET_KEY,
            0, now_timestamp,
            start=0, num=30)

        if app_ids_needing_ingestion:
          rescored_app_ids = []
          for app_id in app_ids_needing_ingestion:
            rescored_app_ids.append(next_ingestion_time())
            rescored_app_ids.append(app_id)
          pipe.multi()
          pipe.zadd(APP_INFO_INGESTION_ZSET_KEY, *rescored_app_ids)
          pipe.execute()

        break
      except WatchError:
        logging.info('Interrupted while fetching app ids to ingest...')

  if not app_ids_needing_ingestion:
    return

  logging.info('Ingesting app infos for %d app(s)...', len(app_ids_needing_ingestion))
  for app_id in app_ids_needing_ingestion:
    try:
      app = AppStoreApp.objects.get(pk=app_id)
    except AppStoreApp.DoesNotExist:
      logging.warn('Deleted app id, removing from ingestion queue: %s', app_id)
      redis.zrem(APP_INFO_INGESTION_ZSET_KEY, app_id)
      continue

    # NOTE: This should be unique but it isn't right now for whatever reason.
    unique_countries = set(app.app_info_countries)
    for country in unique_countries:
      try:
        fetch_info_and_maybe_update_app(app, country)

      except appstore_fetch.RateLimitedError:
        # This is not likely to fix itself soon. Give up.
        logging.info('Rate limited by App Store, giving up for now...')
        time.sleep(15)
        return

      except Exception:
        logging.exception('Problem ingesting app info for app id: %s - %s (%s - %s)',
            app.id, country, app.itunes_id, app.bundle_id)

    app.app_info_ingestion_time = datetime.now()
    app.save(update_fields=['app_info_ingestion_time'])

    time.sleep(0.3333)
Beispiel #2
0
def finish_auth(user, token, verifier):
  auth = get_auth_handler()
  redis = redis_wrap.client()
  redis_key = get_redis_key(token)
  token_secret = redis.get(redis_key)

  auth.request_token = {'oauth_token': token, 'oauth_token_secret': token_secret}
  try:
    auth.get_access_token(verifier)
  except tweepy.TweepError as e:
    logging.error('tweepy error during finish auth: %s', e)
    return False, None

  access_token = auth.access_token
  access_token_secret = auth.access_token_secret
  twitter_handle = auth.get_username()

  TwitterAccessToken.objects.get_or_create(
    handle=twitter_handle,
    token=access_token,
    token_secret=access_token_secret,
    user=user
  )

  return True, twitter_handle
Beispiel #3
0
def finish_auth(user, token, verifier):
    auth = get_auth_handler()
    redis = redis_wrap.client()
    redis_key = get_redis_key(token)
    token_secret = redis.get(redis_key)

    auth.request_token = {
        'oauth_token': token,
        'oauth_token_secret': token_secret
    }
    try:
        auth.get_access_token(verifier)
    except tweepy.TweepError as e:
        logging.error('tweepy error during finish auth: %s', e)
        return False, None

    access_token = auth.access_token
    access_token_secret = auth.access_token_secret
    twitter_handle = auth.get_username()

    TwitterAccessToken.objects.get_or_create(handle=twitter_handle,
                                             token=access_token,
                                             token_secret=access_token_secret,
                                             user=user)

    return True, twitter_handle
def update_labels_for_user(sdk_user, future_days_offset=0):
  old_labels = set(sdk_user.labels or [])
  new_labels = labels_for_user(sdk_user, future_days_offset=future_days_offset)

  if old_labels == new_labels:
    return False

  change_events = []
  increment_label_by = collections.defaultdict(lambda: 0)

  removed_labels = old_labels - new_labels
  for label in removed_labels:
    change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id, sdk_user_id=sdk_user.id, user_id=sdk_user.user_id,
                                           label=label, kind='removed')
    increment_label_by[label] -= 1
    change_events.append(change_event)

  added_labels = new_labels - old_labels
  for label in added_labels:
    change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id, sdk_user_id=sdk_user.id, user_id=sdk_user.user_id,
                                           label=label, kind='added')
    increment_label_by[label] += 1
    change_events.append(change_event)

  # Create change events and update labels on user.

  SDKUserLabelChangeEvent.objects.bulk_create(change_events)

  labels = list(sorted(new_labels))
  sdk_user.labels = labels
  sdk_user.save(update_fields=['labels'])

  # Now track the changes we've made in our counts.

  for l1, l2 in TRACK_LABEL_COMBINATIONS:
    had_label = l1 in old_labels and l2 in old_labels
    has_label = l1 in new_labels and l2 in new_labels

    if had_label != has_label:
      if had_label:
        # removed label
        increment = -1
      else:
        # added label
        increment = 1

      combined_label = '%s-%s' % (l1, l2)
      increment_label_by[combined_label] = increment

  redis = redis_wrap.client()
  redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % sdk_user.app_id

  with redis.pipeline() as pipe:
    for label, increment in increment_label_by.items():
      pipe.hincrby(redis_key, label, increment)
    pipe.execute()

  # logging.info('Updated sdk user %s labels: %s', sdk_user.id, labels)

  return True
def process_sdkuser_labels_periodically():
  redis = redis_wrap.client()
  max_id = redis.get(LAST_SDKUSER_LABELS_PROCESSED_KEY) or 0

  with transaction.atomic():
    sdk_users = list(
      SDKUser.objects
        .select_for_update()
        .filter(id__gt=max_id)
        .order_by('id')[:PROCESS_SDKUSERS_LABELS_LIMIT]
    )

    if sdk_users:
      new_max_id = sdk_users[-1].id
      updates = 0
      for sdk_user in sdk_users:
        if update_labels_for_user(sdk_user):
          updates += 1

      if updates:
        logging.info('Updated %s sdk user labels periodically', updates)

    else:
      logging.info('No more users to update labels, starting over')
      new_max_id = 0

  redis.set(LAST_SDKUSER_LABELS_PROCESSED_KEY, new_max_id)
def add_all_apps_to_ingestion_queue():
  apps_countries = list(AppStoreAppReviewTracker.objects.all().values_list('app_id', 'country'))
  redis = redis_wrap.client()
  for app_id, country in apps_countries:
    app = AppStoreApp.objects.get(pk=app_id)
    appstore_app_info.decorate_app(app, country)
    target_time = next_ingestion_time(app, country)
    redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time, '%s:%s' % (app_id, country))
Beispiel #7
0
def process_sdkuser_days_active():
    redis = redis_wrap.client()
    max_id = redis.get(LAST_SDKUSER_PROCESSED_KEY) or 0

    new_max_id = process_sdkuser_days_active_from_id(max_id)

    if new_max_id:
        logging.info('New max id: %s', new_max_id)
        redis.set(LAST_SDKUSER_PROCESSED_KEY, new_max_id)
Beispiel #8
0
def process_sdkuser_days_active():
  redis = redis_wrap.client()
  max_id = redis.get(LAST_SDKUSER_PROCESSED_KEY) or 0

  new_max_id = process_sdkuser_days_active_from_id(max_id)

  if new_max_id:
    logging.info('New max id: %s', new_max_id)
    redis.set(LAST_SDKUSER_PROCESSED_KEY, new_max_id)
def add_all_apps_to_ingestion_queue():
    apps_countries = list(AppStoreAppReviewTracker.objects.all().values_list(
        'app_id', 'country'))
    redis = redis_wrap.client()
    for app_id, country in apps_countries:
        app = AppStoreApp.objects.get(pk=app_id)
        appstore_app_info.decorate_app(app, country)
        target_time = next_ingestion_time(app, country)
        redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time,
                   '%s:%s' % (app_id, country))
def mark_app_needs_ingestion(app, country, force=False):
  t = AppStoreAppReviewTracker.objects.get(app_id=app.id, country=country)
  if not force and t.last_ingestion_time:
    return

  # If it needs ingestion, do it now.
  target_time = time.time()

  # ADD app.id TO ZSET AT target_time
  redis = redis_wrap.client()
  redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time, '%s:%s' % (app.id, country))
Beispiel #11
0
def get_auth_redirect(app_id=None, custom_cb=None):
    auth = get_auth_handler(app_id, custom_cb)
    try:
        redirect_url = auth.get_authorization_url()
    except tweepy.TweepError as e:
        logging.error('tweepy error getting auth url: %s', e)
        return None

    redis = redis_wrap.client()
    redis_key = get_redis_key(auth.request_token['oauth_token'])
    redis.setex(redis_key, 3600, auth.request_token['oauth_token_secret'])
    return redirect_url
def mark_app_needs_ingestion(app, country, force=False):
    t = AppStoreAppReviewTracker.objects.get(app_id=app.id, country=country)
    if not force and t.last_ingestion_time:
        return

    # If it needs ingestion, do it now.
    target_time = time.time()

    # ADD app.id TO ZSET AT target_time
    redis = redis_wrap.client()
    redis.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, target_time,
               '%s:%s' % (app.id, country))
Beispiel #13
0
def get_auth_redirect(app_id=None, custom_cb=None):
  auth = get_auth_handler(app_id, custom_cb)
  try:
    redirect_url = auth.get_authorization_url()
  except tweepy.TweepError as e:
    logging.error('tweepy error getting auth url: %s', e)
    return None

  redis = redis_wrap.client()
  redis_key = get_redis_key(auth.request_token['oauth_token'])
  redis.setex(redis_key, 3600, auth.request_token['oauth_token_secret'])
  return redirect_url
def apps_countries_to_ingest(limit, now_offset=0):
    now_timestamp = time.time() + now_offset

    apps_needing_ingestion = []
    countries = []

    # Atomically find next app ids to check.
    redis = redis_wrap.client()
    while True:
        with redis.pipeline() as pipe:
            try:
                pipe.watch(APP_REVIEWS_INGESTION_ZSET_KEY)
                app_ids_countries_needing_ingestion = pipe.zrangebyscore(
                    APP_REVIEWS_INGESTION_ZSET_KEY,
                    0,
                    now_timestamp,
                    start=0,
                    num=limit)

                if app_ids_countries_needing_ingestion:
                    app_ids = []
                    countries = []
                    for ac in app_ids_countries_needing_ingestion:
                        app_id, country = ac.split(':')
                        app_id = long(app_id)
                        app_ids.append(app_id)
                        countries.append(country)

                    apps_by_id = dict(
                        (a.id, a)
                        for a in AppStoreApp.objects.filter(id__in=app_ids))
                    apps_needing_ingestion = [
                        apps_by_id[app_id] for app_id in app_ids
                    ]

                    rescored_app_ids = []
                    for app, country in zip(apps_needing_ingestion, countries):
                        rescored_app_ids.append(
                            next_ingestion_time(app, country))
                        rescored_app_ids.append('%s:%s' % (app.id, country))

                    if rescored_app_ids:
                        pipe.multi()
                        pipe.zadd(APP_REVIEWS_INGESTION_ZSET_KEY,
                                  *rescored_app_ids)
                        pipe.execute()

                break
            except WatchError:
                logging.info('Interrupted while fetching app ids to ingest...')

    for app, country in zip(apps_needing_ingestion, countries):
        yield app, country
def mark_sdk_user_labels_dirty(sdk_user_ids):
  if not sdk_user_ids:
    return

  t = time.time()
  rescored_user_ids = []

  user_ids_uniq = set(sdk_user_ids)
  for user_id in user_ids_uniq:
    rescored_user_ids.append(t)
    rescored_user_ids.append(user_id)

  redis = redis_wrap.client()
  redis.zadd(DIRTY_SDK_USER_LABELS_REDIS_KEY, *rescored_user_ids)
Beispiel #16
0
def mark_sdk_user_labels_dirty(sdk_user_ids):
    if not sdk_user_ids:
        return

    t = time.time()
    rescored_user_ids = []

    user_ids_uniq = set(sdk_user_ids)
    for user_id in user_ids_uniq:
        rescored_user_ids.append(t)
        rescored_user_ids.append(user_id)

    redis = redis_wrap.client()
    redis.zadd(DIRTY_SDK_USER_LABELS_REDIS_KEY, *rescored_user_ids)
Beispiel #17
0
def label_counts_by_app_ids(app_ids):
    redis = redis_wrap.client()

    results = None
    with redis.pipeline() as pipe:
        for app_id in app_ids:
            redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % app_id
            pipe.hgetall(redis_key)

        results = pipe.execute()

    response = {}
    for app_id, result in zip(app_ids, results):
        response[app_id] = {k: int(v) for k, v in result.items()}
    return response
def label_counts_by_app_ids(app_ids):
  redis = redis_wrap.client()

  results = None
  with redis.pipeline() as pipe:
    for app_id in app_ids:
      redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % app_id
      pipe.hgetall(redis_key)

    results = pipe.execute()

  response = {}
  for app_id, result in zip(app_ids, results):
    response[app_id] = {k: int(v) for k, v in result.items()}
  return response
def apps_countries_to_ingest(limit, now_offset=0):
  now_timestamp = time.time() + now_offset

  apps_needing_ingestion = []
  countries = []

  # Atomically find next app ids to check.
  redis = redis_wrap.client()
  while True:
    with redis.pipeline() as pipe:
      try:
        pipe.watch(APP_REVIEWS_INGESTION_ZSET_KEY)
        app_ids_countries_needing_ingestion = pipe.zrangebyscore(APP_REVIEWS_INGESTION_ZSET_KEY,
            0, now_timestamp,
            start=0, num=limit)

        if app_ids_countries_needing_ingestion:
          app_ids = []
          countries = []
          for ac in app_ids_countries_needing_ingestion:
            app_id, country = ac.split(':')
            app_id = long(app_id)
            app_ids.append(app_id)
            countries.append(country)

          apps_by_id = dict((a.id, a) for a in AppStoreApp.objects.filter(id__in=app_ids))
          apps_needing_ingestion = [apps_by_id[app_id] for app_id in app_ids]

          rescored_app_ids = []
          for app, country in zip(apps_needing_ingestion, countries):
            rescored_app_ids.append(next_ingestion_time(app, country))
            rescored_app_ids.append('%s:%s' % (app.id, country))

          if rescored_app_ids:
            pipe.multi()
            pipe.zadd(APP_REVIEWS_INGESTION_ZSET_KEY, *rescored_app_ids)
            pipe.execute()

        break
      except WatchError:
        logging.info('Interrupted while fetching app ids to ingest...')

  for app, country in zip(apps_needing_ingestion, countries):
    yield app, country
Beispiel #20
0
def process_dirty_sdk_user_labels():
    now_timestamp = time.time()

    sdk_user_ids_needing_attention = []

    # Atomically find next app ids to check.
    redis = redis_wrap.client()
    while True:
        with redis.pipeline() as pipe:
            try:
                pipe.watch(DIRTY_SDK_USER_LABELS_REDIS_KEY)
                sdk_user_ids_needing_attention = pipe.zrangebyscore(
                    DIRTY_SDK_USER_LABELS_REDIS_KEY,
                    0,
                    now_timestamp,
                    start=0,
                    num=50)

                if sdk_user_ids_needing_attention:
                    pipe.multi()
                    pipe.zrem(DIRTY_SDK_USER_LABELS_REDIS_KEY,
                              *sdk_user_ids_needing_attention)
                    pipe.execute()

                break
            except WatchError:
                logging.info('Interrupted while fetching app ids to ingest...')

    updates = 0
    with transaction.atomic():
        # Select these in a single transaction instead of individually to avoid lock contention.
        sdk_users_needing_attention = list(
            SDKUser.objects.filter(id__in=sdk_user_ids_needing_attention).
            select_for_update().order_by('id'))
        for sdk_user in sdk_users_needing_attention:
            if update_labels_for_user(sdk_user):
                updates += 1

    if updates:
        logging.info('Updated %s sdk user rows labels...', updates)
def process_dirty_sdk_user_labels():
  now_timestamp = time.time()

  sdk_user_ids_needing_attention = []

  # Atomically find next app ids to check.
  redis = redis_wrap.client()
  while True:
    with redis.pipeline() as pipe:
      try:
        pipe.watch(DIRTY_SDK_USER_LABELS_REDIS_KEY)
        sdk_user_ids_needing_attention = pipe.zrangebyscore(DIRTY_SDK_USER_LABELS_REDIS_KEY,
            0, now_timestamp,
            start=0, num=50)

        if sdk_user_ids_needing_attention:
          pipe.multi()
          pipe.zrem(DIRTY_SDK_USER_LABELS_REDIS_KEY, *sdk_user_ids_needing_attention)
          pipe.execute()

        break
      except WatchError:
        logging.info('Interrupted while fetching app ids to ingest...')

  updates = 0
  with transaction.atomic():
    # Select these in a single transaction instead of individually to avoid lock contention.
    sdk_users_needing_attention = list(
        SDKUser.objects
          .filter(id__in=sdk_user_ids_needing_attention)
          .select_for_update()
          .order_by('id')
    )
    for sdk_user in sdk_users_needing_attention:
      if update_labels_for_user(sdk_user):
        updates += 1

  if updates:
    logging.info('Updated %s sdk user rows labels...', updates)
Beispiel #22
0
def passes_health_check():
    try:
        users = list(User.objects.all()[:1])
    except Exception as e:
        # Note: This won't usually happen because
        # session middleware will try accessing the DB cursor before we
        # get here, which will cause an error farther up the stack.
        return 'Database unreachable with error: %s' % e

    if not users or not users[0].date_joined:
        return 'Could not find a user, database must be FUBAR'

    redis = redis_wrap.client()
    if not redis:
        return 'Redis unreachable, see log for more detail'

    try:
        redis.echo('Hello, world!')
    except Exception as e:
        return 'Redis unreachable with error: %s' % e

    return None
Beispiel #23
0
def process_sdkuser_labels_periodically():
    redis = redis_wrap.client()
    max_id = redis.get(LAST_SDKUSER_LABELS_PROCESSED_KEY) or 0

    with transaction.atomic():
        sdk_users = list(SDKUser.objects.select_for_update().filter(
            id__gt=max_id).order_by('id')[:PROCESS_SDKUSERS_LABELS_LIMIT])

        if sdk_users:
            new_max_id = sdk_users[-1].id
            updates = 0
            for sdk_user in sdk_users:
                if update_labels_for_user(sdk_user):
                    updates += 1

            if updates:
                logging.info('Updated %s sdk user labels periodically',
                             updates)

        else:
            logging.info('No more users to update labels, starting over')
            new_max_id = 0

    redis.set(LAST_SDKUSER_LABELS_PROCESSED_KEY, new_max_id)
def mark_app_needs_info_ingestion(app):
  target_time = time.time()

  # ADD app.id TO ZSET AT target_time
  redis = redis_wrap.client()
  redis.zadd(APP_INFO_INGESTION_ZSET_KEY, target_time, app.id)
Beispiel #25
0
def maybe_ingest_app_info(now_offset=0):
    now_timestamp = time.time() + now_offset

    app_ids_needing_ingestion = []

    # Atomically find next app ids to check.
    redis = redis_wrap.client()
    while True:
        with redis.pipeline() as pipe:
            try:
                pipe.watch(APP_INFO_INGESTION_ZSET_KEY)
                app_ids_needing_ingestion = pipe.zrangebyscore(
                    APP_INFO_INGESTION_ZSET_KEY,
                    0,
                    now_timestamp,
                    start=0,
                    num=30)

                if app_ids_needing_ingestion:
                    rescored_app_ids = []
                    for app_id in app_ids_needing_ingestion:
                        rescored_app_ids.append(next_ingestion_time())
                        rescored_app_ids.append(app_id)
                    pipe.multi()
                    pipe.zadd(APP_INFO_INGESTION_ZSET_KEY, *rescored_app_ids)
                    pipe.execute()

                break
            except WatchError:
                logging.info('Interrupted while fetching app ids to ingest...')

    if not app_ids_needing_ingestion:
        return

    logging.info('Ingesting app infos for %d app(s)...',
                 len(app_ids_needing_ingestion))
    for app_id in app_ids_needing_ingestion:
        try:
            app = AppStoreApp.objects.get(pk=app_id)
        except AppStoreApp.DoesNotExist:
            logging.warn('Deleted app id, removing from ingestion queue: %s',
                         app_id)
            redis.zrem(APP_INFO_INGESTION_ZSET_KEY, app_id)
            continue

        # NOTE: This should be unique but it isn't right now for whatever reason.
        unique_countries = set(app.app_info_countries)
        for country in unique_countries:
            try:
                fetch_info_and_maybe_update_app(app, country)

            except appstore_fetch.RateLimitedError:
                # This is not likely to fix itself soon. Give up.
                logging.info('Rate limited by App Store, giving up for now...')
                time.sleep(15)
                return

            except Exception:
                logging.exception(
                    'Problem ingesting app info for app id: %s - %s (%s - %s)',
                    app.id, country, app.itunes_id, app.bundle_id)

        app.app_info_ingestion_time = datetime.now()
        app.save(update_fields=['app_info_ingestion_time'])

        time.sleep(0.3333)
Beispiel #26
0
def mark_app_needs_info_ingestion(app):
    target_time = time.time()

    # ADD app.id TO ZSET AT target_time
    redis = redis_wrap.client()
    redis.zadd(APP_INFO_INGESTION_ZSET_KEY, target_time, app.id)
Beispiel #27
0
def update_labels_for_user(sdk_user, future_days_offset=0):
    old_labels = set(sdk_user.labels or [])
    new_labels = labels_for_user(sdk_user,
                                 future_days_offset=future_days_offset)

    if old_labels == new_labels:
        return False

    change_events = []
    increment_label_by = collections.defaultdict(lambda: 0)

    removed_labels = old_labels - new_labels
    for label in removed_labels:
        change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id,
                                               sdk_user_id=sdk_user.id,
                                               user_id=sdk_user.user_id,
                                               label=label,
                                               kind='removed')
        increment_label_by[label] -= 1
        change_events.append(change_event)

    added_labels = new_labels - old_labels
    for label in added_labels:
        change_event = SDKUserLabelChangeEvent(app_id=sdk_user.app_id,
                                               sdk_user_id=sdk_user.id,
                                               user_id=sdk_user.user_id,
                                               label=label,
                                               kind='added')
        increment_label_by[label] += 1
        change_events.append(change_event)

    # Create change events and update labels on user.

    SDKUserLabelChangeEvent.objects.bulk_create(change_events)

    labels = list(sorted(new_labels))
    sdk_user.labels = labels
    sdk_user.save(update_fields=['labels'])

    # Now track the changes we've made in our counts.

    for l1, l2 in TRACK_LABEL_COMBINATIONS:
        had_label = l1 in old_labels and l2 in old_labels
        has_label = l1 in new_labels and l2 in new_labels

        if had_label != has_label:
            if had_label:
                # removed label
                increment = -1
            else:
                # added label
                increment = 1

            combined_label = '%s-%s' % (l1, l2)
            increment_label_by[combined_label] = increment

    redis = redis_wrap.client()
    redis_key = CURRENT_COUNT_REDIS_KEY_FORMAT % sdk_user.app_id

    with redis.pipeline() as pipe:
        for label, increment in increment_label_by.items():
            pipe.hincrby(redis_key, label, increment)
        pipe.execute()

    # logging.info('Updated sdk user %s labels: %s', sdk_user.id, labels)

    return True