Example #1
0
    def post(self, request):
        """Push a message to SQS with the validated request JSON"""
        json_request = self.validate_request(request.data)
        ordered_json_request = json.dumps(order_nested_object(json_request))

        # Check if the same request has been called today
        updated_date_timestamp = datetime.datetime.strftime(
            datetime.datetime.utcnow(), '%Y-%m-%d')
        cached_download = DownloadJob.objects.filter(
            json_request=ordered_json_request,
            update_date__gte=updated_date_timestamp).exclude(
                job_status_id=4).values('file_name')
        if cached_download and not settings.IS_LOCAL:
            # By returning the cached files, there should be no duplicates on a daily basis
            cached_filename = cached_download[0]['file_name']
            return self.get_download_response(file_name=cached_filename)

        # Create download name and timestamped name for uniqueness
        download_name = '_'.join(
            VALUE_MAPPINGS[award_level]['download_name']
            for award_level in json_request['award_levels'])
        timestamped_file_name = self.s3_handler.get_timestamped_filename(
            download_name + '.zip')
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=timestamped_file_name,
            json_request=ordered_json_request)

        write_to_log(message='Starting new download job'.format(
            download_job.download_job_id),
                     download_job=download_job,
                     other_params={'request_addr': get_remote_addr(request)})
        self.process_request(download_job)

        return self.get_download_response(file_name=timestamped_file_name)
    def download(self,
                 file_name,
                 award_levels,
                 award_types=None,
                 agency=None,
                 sub_agency=None,
                 date_type=None,
                 start_date=None,
                 end_date=None,
                 columns=[],
                 file_format="csv",
                 monthly_download=False,
                 cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = download_viewset.validate_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.list(prefix=file_name_prefix):
                    if key.name == file_name:
                        # ignore the one we just uploaded
                        continue
                    self.bucket.delete_key(key.name)
                    logger.info('Deleting {} from bucket'.format(key.name))
        else:
            # Send a SQS message that will be processed by another server, which will eventually run
            # csv_generation.generate_csvs(download_job, message) (see generate_zip.py)
            queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION,
                              QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))
Example #3
0
 def get_source_dict(self, params, view_instance, view_method, request,
                     args, kwargs):
     params = dict(request.query_params)
     params.update(dict(request.data))
     if 'auditTrail' in params:
         del params['auditTrail']
     return {'request': json.dumps(order_nested_object(params))}
Example #4
0
 def prepare_key(self, key_dict):
     # Order the key_dict using the order_nested_object function to make sure cache keys are always exactly the same
     ordered_key_dict = json.dumps(order_nested_object(key_dict))
     key_hex = hashlib.md5(ordered_key_dict.encode('utf-8')).hexdigest()
     return key_hex