def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.list(prefix=file_name_prefix): if key.name == file_name: # ignore the one we just uploaded continue self.bucket.delete_key(key.name) logger.info('Deleting {} from bucket'.format(key.name)) else: # Send a SQS message that will be processed by another server, which will eventually run # csv_generation.generate_csvs(download_job, message) (see generate_zip.py) queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see generate_zip.py) write_to_log(message='Passing download_job {} to SQS'.format(download_job.download_job_id), download_job=download_job) queue = sqs_queue(QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def handle(self, *args, **options): """Run the application.""" queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) write_to_log(message='Starting SQS polling') while True: second_attempt = True try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All'], VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT) for message in messages: write_to_log( message='Message Received: {}'.format(message)) if message.body is not None: # Retrieve and update the job download_job = DownloadJob.objects.filter( download_job_id=int(message.body)).first() second_attempt = download_job.error_message is not None # Retrieve the data and write to the CSV(s) write_to_log( message='Starting to work on DownloadJob {}'. format(download_job.download_job_id), download_job=download_job) csv_generation.generate_csvs(download_job=download_job, sqs_message=message) # If successful, we do not want to run again; delete message.delete() except Exception as e: # Handle uncaught exceptions in validation process logger.error(e) write_to_log(message=str(e), download_job=download_job, is_error=True) if download_job: download_job.error_message = str(e) download_job.job_status_id = JOB_STATUS_DICT[ 'failed' if second_attempt else 'ready'] download_job.save() finally: # Set visibility to 0 so that another attempt can be made to process in SQS immediately, instead of # waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except botocore.exceptions.ClientError: # TODO: check existence instead of catching error continue