def _process_message(): if not g.sitemap_sqs_queue: return sqs = SQSConnection() sqs_q = sqs.get_queue(g.sitemap_sqs_queue) messages = sqs.receive_message(sqs_q, number_messages=1) if not messages: return message, = messages js = json.loads(message.get_body()) s3path = parse_s3_path(js['location']) # There are some error cases that allow us to get messages # for sitemap creation that are now out of date. timestamp = js.get('timestamp') if timestamp is not None and _before_last_sitemap(timestamp): sqs_q.delete_message(message) return g.log.info("Got import job %r", js) subreddits = find_all_subreddits(s3path) store_sitemaps_in_s3(subreddits) sqs_q.delete_message(message)
def _normalize_sqs_message(message): """Simplifies raw SQS messages to make them easier to work with. We currently work on two types of keys: location: This is expected to be a string in the form s3://bucket/key. If this exists we add a new key called s3path that is an S3Path version of the original location. locations: This is expected to be an array of s3path strings. Similarly we create a new key called s3paths, which are an array of S3Path objects that represent the old locations. """ if 'location' in message: s3path = parse_s3_path(message['location']) message = dict(s3path=s3path, **message) if 'locations' in message: s3paths = [parse_s3_path(loc) for loc in message['locations']] message = dict(s3paths=s3paths, **message) return message