def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.list(prefix=file_name_prefix): if key.name == file_name: # ignore the one we just uploaded continue self.bucket.delete_key(key.name) logger.info('Deleting {} from bucket'.format(key.name)) else: # Send a SQS message that will be processed by another server, which will eventually run # csv_generation.generate_csvs(download_job, message) (see generate_zip.py) queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def restart_download_operation(self): if process_is_local(): self.update_download_job(job_status_id=JOB_STATUS_DICT["ready"], error_message=None) csv_generation.generate_csvs(download_job=self.download_job) else: self.push_job_to_queue() self.update_download_job(job_status_id=JOB_STATUS_DICT["queued"], error_message=None)
def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see generate_zip.py) write_to_log(message='Passing download_job {} to SQS'.format(download_job.download_job_id), download_job=download_job) queue = sqs_queue(QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def handle(self, *args, **options): """Run the application.""" queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION, QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) write_to_log(message='Starting SQS polling') while True: second_attempt = True try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All'], VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT) for message in messages: write_to_log( message='Message Received: {}'.format(message)) if message.body is not None: # Retrieve and update the job download_job = DownloadJob.objects.filter( download_job_id=int(message.body)).first() second_attempt = download_job.error_message is not None # Retrieve the data and write to the CSV(s) write_to_log( message='Starting to work on DownloadJob {}'. format(download_job.download_job_id), download_job=download_job) csv_generation.generate_csvs(download_job=download_job, sqs_message=message) # If successful, we do not want to run again; delete message.delete() except Exception as e: # Handle uncaught exceptions in validation process logger.error(e) write_to_log(message=str(e), download_job=download_job, is_error=True) if download_job: download_job.error_message = str(e) download_job.job_status_id = JOB_STATUS_DICT[ 'failed' if second_attempt else 'ready'] download_job.save() finally: # Set visibility to 0 so that another attempt can be made to process in SQS immediately, instead of # waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except botocore.exceptions.ClientError: # TODO: check existence instead of catching error continue
def process_request(self, download_job): if settings.IS_LOCAL: # Locally, we do not use SQS csv_generation.generate_csvs(download_job=download_job) else: # Send a SQS message that will be processed by another server which will eventually run # csv_generation.write_csvs(**kwargs) (see download_sqs_worker.py) write_to_log( message='Passing download_job {} to SQS'.format(download_job.download_job_id), download_job=download_job ) queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def download_service_app(download_job_id): download_job = retrieve_download_job_from_db(download_job_id) write_to_log(message="Starting new Download Service App with pid {}".format(os.getpid()), download_job=download_job) # Retrieve the data and write to the CSV(s) try: csv_generation.generate_csvs(download_job=download_job) except Exception: write_to_log(message="Caught exception", download_job=download_job, is_error=True) return 11 # arbitrary positive integer return 0
def download_service_app(download_job_id): download_job = retrieve_download_job_from_db(download_job_id) write_to_log( message="Starting new Download Service App with pid {}".format( os.getpid()), download_job=download_job) # Retrieve the data and write to the CSV(s) try: csv_generation.generate_csvs(download_job=download_job) except Exception: write_to_log(message="Caught exception", download_job=download_job, is_error=True) return 11 # arbitrary positive integer return 0
def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False): date_range = {} if start_date: date_range['start_date'] = start_date if end_date: date_range['end_date'] = end_date json_request = { 'constraint_type': 'year', 'award_levels': award_levels, 'filters': { 'award_types': award_types, 'agency': str(agency), 'date_type': date_type, 'date_range': date_range, }, 'columns': columns, 'file_format': file_format } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = download_viewset.validate_award_request(json_request) download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[:-12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info('Deleting {} from bucket'.format(key.key)) else: queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))
def download( self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None, start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False, use_sqs=False, ): date_range = {} if start_date: date_range["start_date"] = start_date if end_date: date_range["end_date"] = end_date json_request = { "constraint_type": "year", "award_levels": award_levels, "filters": { "award_types": award_types, "agency": str(agency), "date_type": date_type, "date_range": date_range, }, "columns": columns, "file_format": file_format, } download_viewset = YearLimitedDownloadViewSet() download_viewset.process_filters(json_request) validated_request = validate_award_request(json_request) download_job = DownloadJob.objects.create( job_status_id=JOB_STATUS_DICT["ready"], file_name=file_name, json_request=json.dumps(order_nested_object(validated_request)), monthly_download=True, ) if not use_sqs: # Note: Because of the line below, it's advised to only run this script on a separate instance as this will # modify your bulk download settings. settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME csv_generation.generate_csvs(download_job=download_job) if cleanup: # Get all the files that have the same prefix except for the update date file_name_prefix = file_name[: -12] # subtracting the 'YYYYMMDD.zip' for key in self.bucket.objects.filter(Prefix=file_name_prefix): if key.key == file_name: # ignore the one we just uploaded continue key.delete() logger.info("Deleting {} from bucket".format(key.key)) else: queue = get_sqs_queue_resource( queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME) queue.send_message(MessageBody=str(download_job.download_job_id))