Python generate_csvsの例、usaspending_api.download.filestreaming.csv_generation.generate_csvs Pythonの例

コード例 #1

0

ファイルを表示

ファイル: populate_monthly_files.py プロジェクト: mtomic/usaspending-api

    def download(self,
                 file_name,
                 award_levels,
                 award_types=None,
                 agency=None,
                 sub_agency=None,
                 date_type=None,
                 start_date=None,
                 end_date=None,
                 columns=[],
                 file_format="csv",
                 monthly_download=False,
                 cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = download_viewset.validate_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT['ready'],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.list(prefix=file_name_prefix):
                    if key.name == file_name:
                        # ignore the one we just uploaded
                        continue
                    self.bucket.delete_key(key.name)
                    logger.info('Deleting {} from bucket'.format(key.name))
        else:
            # Send a SQS message that will be processed by another server, which will eventually run
            # csv_generation.generate_csvs(download_job, message) (see generate_zip.py)
            queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION,
                              QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))

コード例 #2

0

ファイルを表示

ファイル: download_admin.py プロジェクト: ece-jacob-scott/usaspending-api

 def restart_download_operation(self):
     if process_is_local():
         self.update_download_job(job_status_id=JOB_STATUS_DICT["ready"],
                                  error_message=None)
         csv_generation.generate_csvs(download_job=self.download_job)
     else:
         self.push_job_to_queue()
         self.update_download_job(job_status_id=JOB_STATUS_DICT["queued"],
                                  error_message=None)

コード例 #3

0

ファイルを表示

 def process_request(self, download_job):
     if settings.IS_LOCAL:
         # Locally, we do not use SQS
         csv_generation.generate_csvs(download_job=download_job)
     else:
         # Send a SQS message that will be processed by another server which will eventually run
         # csv_generation.write_csvs(**kwargs) (see generate_zip.py)
         write_to_log(message='Passing download_job {} to SQS'.format(download_job.download_job_id),
                      download_job=download_job)
         queue = sqs_queue(QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
         queue.send_message(MessageBody=str(download_job.download_job_id))

コード例 #4

0

ファイルを表示

    def handle(self, *args, **options):
        """Run the application."""
        queue = sqs_queue(region_name=settings.BULK_DOWNLOAD_AWS_REGION,
                          QueueName=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)

        write_to_log(message='Starting SQS polling')
        while True:
            second_attempt = True
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10,
                    MessageAttributeNames=['All'],
                    VisibilityTimeout=DEFAULT_VISIBILITY_TIMEOUT)
                for message in messages:
                    write_to_log(
                        message='Message Received: {}'.format(message))
                    if message.body is not None:
                        # Retrieve and update the job
                        download_job = DownloadJob.objects.filter(
                            download_job_id=int(message.body)).first()
                        second_attempt = download_job.error_message is not None

                        # Retrieve the data and write to the CSV(s)
                        write_to_log(
                            message='Starting to work on DownloadJob {}'.
                            format(download_job.download_job_id),
                            download_job=download_job)
                        csv_generation.generate_csvs(download_job=download_job,
                                                     sqs_message=message)

                        # If successful, we do not want to run again; delete
                        message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process
                logger.error(e)
                write_to_log(message=str(e),
                             download_job=download_job,
                             is_error=True)

                if download_job:
                    download_job.error_message = str(e)
                    download_job.job_status_id = JOB_STATUS_DICT[
                        'failed' if second_attempt else 'ready']
                    download_job.save()
            finally:
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately, instead of
                # waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except botocore.exceptions.ClientError:
                        # TODO: check existence instead of catching error
                        continue

コード例 #5

0

ファイルを表示

ファイル: views.py プロジェクト: fedspendingtransparency/usaspending-api

 def process_request(self, download_job):
     if settings.IS_LOCAL:
         # Locally, we do not use SQS
         csv_generation.generate_csvs(download_job=download_job)
     else:
         # Send a SQS message that will be processed by another server which will eventually run
         # csv_generation.write_csvs(**kwargs) (see download_sqs_worker.py)
         write_to_log(
             message='Passing download_job {} to SQS'.format(download_job.download_job_id), download_job=download_job
         )
         queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
         queue.send_message(MessageBody=str(download_job.download_job_id))

コード例 #6

0

ファイルを表示

ファイル: download_sqs_worker.py プロジェクト: fedspendingtransparency/usaspending-api

def download_service_app(download_job_id):
    download_job = retrieve_download_job_from_db(download_job_id)
    write_to_log(message="Starting new Download Service App with pid {}".format(os.getpid()), download_job=download_job)

    # Retrieve the data and write to the CSV(s)
    try:
        csv_generation.generate_csvs(download_job=download_job)
    except Exception:
        write_to_log(message="Caught exception", download_job=download_job, is_error=True)
        return 11  # arbitrary positive integer

    return 0

コード例 #7

0

ファイルを表示

ファイル: download_sqs_worker.py プロジェクト: ece-jacob-scott/usaspending-api

def download_service_app(download_job_id):
    download_job = retrieve_download_job_from_db(download_job_id)
    write_to_log(
        message="Starting new Download Service App with pid {}".format(
            os.getpid()),
        download_job=download_job)

    # Retrieve the data and write to the CSV(s)
    try:
        csv_generation.generate_csvs(download_job=download_job)
    except Exception:
        write_to_log(message="Caught exception",
                     download_job=download_job,
                     is_error=True)
        return 11  # arbitrary positive integer

    return 0

コード例 #8

0

ファイルを表示

ファイル: populate_monthly_files.py プロジェクト: fedspendingtransparency/usaspending-api

    def download(self, file_name, award_levels, award_types=None, agency=None, sub_agency=None, date_type=None,
                 start_date=None, end_date=None, columns=[], file_format="csv", monthly_download=False, cleanup=False,
                 use_sqs=False):
        date_range = {}
        if start_date:
            date_range['start_date'] = start_date
        if end_date:
            date_range['end_date'] = end_date
        json_request = {
            'constraint_type': 'year',
            'award_levels': award_levels,
            'filters': {
                'award_types': award_types,
                'agency': str(agency),
                'date_type': date_type,
                'date_range': date_range,
            },
            'columns': columns,
            'file_format': file_format
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = download_viewset.validate_award_request(json_request)
        download_job = DownloadJob.objects.create(job_status_id=JOB_STATUS_DICT['ready'], file_name=file_name,
                                                  json_request=json.dumps(order_nested_object(validated_request)),
                                                  monthly_download=True)

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:-12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info('Deleting {} from bucket'.format(key.key))
        else:
            queue = get_sqs_queue_resource(queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))

コード例 #9

0

ファイルを表示

ファイル: populate_monthly_files.py プロジェクト: Android-Recommended-Enterprise/usaspending-api

    def download(
        self,
        file_name,
        award_levels,
        award_types=None,
        agency=None,
        sub_agency=None,
        date_type=None,
        start_date=None,
        end_date=None,
        columns=[],
        file_format="csv",
        monthly_download=False,
        cleanup=False,
        use_sqs=False,
    ):
        date_range = {}
        if start_date:
            date_range["start_date"] = start_date
        if end_date:
            date_range["end_date"] = end_date
        json_request = {
            "constraint_type": "year",
            "award_levels": award_levels,
            "filters": {
                "award_types": award_types,
                "agency": str(agency),
                "date_type": date_type,
                "date_range": date_range,
            },
            "columns": columns,
            "file_format": file_format,
        }
        download_viewset = YearLimitedDownloadViewSet()
        download_viewset.process_filters(json_request)
        validated_request = validate_award_request(json_request)
        download_job = DownloadJob.objects.create(
            job_status_id=JOB_STATUS_DICT["ready"],
            file_name=file_name,
            json_request=json.dumps(order_nested_object(validated_request)),
            monthly_download=True,
        )

        if not use_sqs:
            # Note: Because of the line below, it's advised to only run this script on a separate instance as this will
            #       modify your bulk download settings.
            settings.BULK_DOWNLOAD_S3_BUCKET_NAME = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
            csv_generation.generate_csvs(download_job=download_job)
            if cleanup:
                # Get all the files that have the same prefix except for the update date
                file_name_prefix = file_name[:
                                             -12]  # subtracting the 'YYYYMMDD.zip'
                for key in self.bucket.objects.filter(Prefix=file_name_prefix):
                    if key.key == file_name:
                        # ignore the one we just uploaded
                        continue
                    key.delete()
                    logger.info("Deleting {} from bucket".format(key.key))
        else:
            queue = get_sqs_queue_resource(
                queue_name=settings.BULK_DOWNLOAD_SQS_QUEUE_NAME)
            queue.send_message(MessageBody=str(download_job.download_job_id))