Ejemplo n.º 1
0
    def handle(self, *args, **options):
        """ Run the application. """
        agencies = options["agencies"]
        award_types = options["award_types"]
        last_date = options["last_date"]
        self.debugging_end_date = options["debugging_end_date"]
        self.debugging_skip_deleted = options["debugging_skip_deleted"]

        toptier_agencies = ToptierAgency.objects.filter(
            toptier_code__in=set(pull_modified_agencies_cgacs()))
        include_all = True
        if agencies:
            if "all" in agencies:
                agencies.remove("all")
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_agency_id__in=agencies)
        toptier_agencies = list(
            toptier_agencies.order_by("toptier_code").values(
                "name", "toptier_agency_id", "toptier_code"))

        if include_all:
            toptier_agencies.append("all")

        for agency in toptier_agencies:
            for award_type in award_types:
                self.download(award_type.capitalize(), agency, last_date)

        logger.info(
            "IMPORTANT: Be sure to run synchronize_transaction_delta management command "
            "after a successful monthly delta run.")
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        """ Run the application. """
        agencies = options['agencies']
        award_types = options['award_types']
        last_date = options['last_date']

        toptier_agencies = ToptierAgency.objects.filter(
            cgac_code__in=set(pull_modified_agencies_cgacs()))
        include_all = True
        if agencies:
            if 'all' in agencies:
                agencies.remove('all')
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_agency_id__in=agencies)
        toptier_agencies = list(
            toptier_agencies.order_by('cgac_code').values(
                'name', 'toptier_agency_id', 'cgac_code'))

        if include_all:
            toptier_agencies.append('all')

        for agency in toptier_agencies:
            for award_type in award_types:
                self.download(award_type.capitalize(), agency, last_date)
    def handle(self, *args, **options):
        """ Run the application. """
        agencies = options['agencies']
        award_types = options['award_types']
        last_date = options['last_date']

        toptier_agencies = ToptierAgency.objects.filter(
            cgac_code__in=set(pull_modified_agencies_cgacs()))
        include_all = True
        if agencies:
            if 'all' in agencies:
                agencies.remove('all')
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_agency_id__in=agencies)
        toptier_agencies = list(
            toptier_agencies.order_by('cgac_code').values(
                'name', 'toptier_agency_id', 'cgac_code'))

        if include_all:
            toptier_agencies.append('all')

        for agency in toptier_agencies:
            for award_type in award_types:
                self.download(award_type.capitalize(), agency, last_date)

        logger.info(
            'IMPORTANT: Be sure to run synchronize_transaction_delta management command '
            'after a successful monthly delta run')
    def handle(self, *args, **options):
        """ Run the application. """
        agencies = options['agencies']
        award_types = options['award_types']
        last_date = options['last_date']

        toptier_agencies = ToptierAgency.objects.filter(cgac_code__in=set(pull_modified_agencies_cgacs()))
        include_all = True
        if agencies:
            if 'all' in agencies:
                agencies.remove('all')
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(toptier_agency_id__in=agencies)
        toptier_agencies = list(toptier_agencies.order_by('cgac_code').values('name', 'toptier_agency_id', 'cgac_code'))

        if include_all:
            toptier_agencies.append('all')

        for agency in toptier_agencies:
            for award_type in award_types:
                self.download(award_type.capitalize(), agency, last_date)
Ejemplo n.º 5
0
    def handle(self, *args, **options):
        """Run the application."""

        # Make sure
        #   settings.BULK_DOWNLOAD_S3_BUCKET_NAME
        #   settings.BULK_DOWNLOAD_SQS_QUEUE_NAME
        #   settings.BULK_DOWNLOAD_AWS_REGION
        # are properly configured!

        local = options['local']
        clobber = options['clobber']
        use_modified_list = options['use_modified_list']
        agencies = options['agencies']
        award_types = options['award_types']
        for award_type in award_types:
            if award_type not in ['contracts', 'assistance']:
                raise Exception(
                    'Unacceptable award type: {}'.format(award_type))
        fiscal_years = options['fiscal_years']
        placeholders = options['placeholders']
        cleanup = options['cleanup']
        empty_asssistance_file = options['empty_asssistance_file']
        empty_contracts_file = options['empty_contracts_file']
        if placeholders and (not empty_asssistance_file
                             or not empty_contracts_file):
            raise Exception(
                'Placeholder arg provided but empty files not provided')

        current_date = datetime.date.today()
        updated_date_timestamp = datetime.datetime.strftime(
            current_date, '%Y%m%d')

        toptier_agencies = ToptierAgency.objects.all()
        include_all = True
        if use_modified_list:
            used_cgacs = set(pull_modified_agencies_cgacs())
            toptier_agencies = ToptierAgency.objects.filter(
                cgac_code__in=used_cgacs)
        if agencies:
            if 'all' in agencies:
                agencies.remove('all')
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_agency_id__in=agencies)
        toptier_agencies = list(
            toptier_agencies.values('name', 'toptier_agency_id', 'cgac_code'))
        # Adding 'all' to prevent duplication of code
        if include_all:
            toptier_agencies.append({
                'name': 'All',
                'toptier_agency_id': 'all',
                'cgac_code': 'all'
            })
        if not fiscal_years:
            fiscal_years = range(2001, generate_fiscal_year(current_date) + 1)

        # moving it to self.bucket as it may be used in different cases
        bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
        region_name = settings.BULK_DOWNLOAD_AWS_REGION
        self.bucket = boto.s3.connect_to_region(region_name).get_bucket(
            bucket_name)

        if not clobber:
            reuploads = []
            for key in self.bucket.list():
                re_match = re.findall(
                    '(.*)_Full_{}.zip'.format(updated_date_timestamp),
                    key.name)
                if re_match:
                    reuploads.append(re_match[0])

        logger.info('Generating {} files...'.format(
            len(toptier_agencies) * len(fiscal_years) * 2))
        for agency in toptier_agencies:
            for fiscal_year in fiscal_years:
                start_date = '{}-10-01'.format(fiscal_year - 1)
                end_date = '{}-09-30'.format(fiscal_year)
                for award_type in award_types:
                    file_name = '{}_{}_{}'.format(fiscal_year,
                                                  agency['cgac_code'],
                                                  award_type.capitalize())
                    full_file_name = '{}_Full_{}.zip'.format(
                        file_name, updated_date_timestamp)
                    if not clobber and file_name in reuploads:
                        logger.info('Skipping already uploaded: {}'.format(
                            full_file_name))
                        continue
                    if placeholders:
                        empty_file = empty_contracts_file if award_type == 'contracts' else empty_asssistance_file
                        self.upload_placeholder(file_name=full_file_name,
                                                empty_file=empty_file)
                    else:
                        self.download(full_file_name, ['prime_awards'],
                                      award_types=award_mappings[award_type],
                                      agency=agency['toptier_agency_id'],
                                      date_type='action_date',
                                      start_date=start_date,
                                      end_date=end_date,
                                      monthly_download=True,
                                      cleanup=cleanup,
                                      use_sqs=(not local))
        logger.info('Populate Monthly Files complete')
    def handle(self, *args, **options):
        """Run the application."""

        # Make sure
        #   settings.BULK_DOWNLOAD_S3_BUCKET_NAME
        #   settings.BULK_DOWNLOAD_SQS_QUEUE_NAME
        #   settings.USASPENDING_AWS_REGION
        # are properly configured!

        local = options["local"]
        clobber = options["clobber"]
        use_modified_list = options["use_modified_list"]
        agencies = options["agencies"]
        award_types = options["award_types"]
        for award_type in award_types:
            if award_type not in ["contracts", "assistance"]:
                raise Exception(
                    "Unacceptable award type: {}".format(award_type))
        fiscal_years = options["fiscal_years"]
        placeholders = options["placeholders"]
        cleanup = options["cleanup"]
        empty_asssistance_file = options["empty_asssistance_file"]
        empty_contracts_file = options["empty_contracts_file"]
        if placeholders and (not empty_asssistance_file
                             or not empty_contracts_file):
            raise Exception(
                "Placeholder arg provided but empty files not provided")

        current_date = datetime.date.today()
        updated_date_timestamp = datetime.datetime.strftime(
            current_date, "%Y%m%d")

        toptier_agencies = ToptierAgency.objects.all()
        include_all = True
        if use_modified_list:
            used_cgacs = set(pull_modified_agencies_cgacs())
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_code__in=used_cgacs)
        if agencies:
            if "all" in agencies:
                agencies.remove("all")
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(
                toptier_agency_id__in=agencies)
        toptier_agencies = list(
            toptier_agencies.values("name", "toptier_agency_id",
                                    "toptier_code"))
        # Adding 'all' to prevent duplication of code
        if include_all:
            toptier_agencies.append({
                "name": "All",
                "toptier_agency_id": "all",
                "toptier_code": "All"
            })
        if not fiscal_years:
            fiscal_years = range(2001, generate_fiscal_year(current_date) + 1)

        # moving it to self.bucket as it may be used in different cases
        bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
        region_name = settings.USASPENDING_AWS_REGION
        self.bucket = boto3.resource(
            "s3", region_name=region_name).Bucket(bucket_name)

        if not clobber:
            reuploads = []
            for key in self.bucket.objects.all():
                re_match = re.findall(
                    "(.*)_Full_{}.zip".format(updated_date_timestamp), key.key)
                if re_match:
                    reuploads.append(re_match[0])

        logger.info("Generating {} files...".format(
            len(toptier_agencies) * len(fiscal_years) * 2))
        for agency in toptier_agencies:
            for fiscal_year in fiscal_years:
                start_date = "{}-10-01".format(fiscal_year - 1)
                end_date = "{}-09-30".format(fiscal_year)
                for award_type in award_types:
                    file_name = f"FY{fiscal_year}_{agency['toptier_code']}_{award_type.capitalize()}"
                    full_file_name = f"{file_name}_Full_{updated_date_timestamp}.zip"
                    if not clobber and file_name in reuploads:
                        logger.info(
                            f"Skipping already uploaded: {full_file_name}")
                        continue
                    if placeholders:
                        empty_file = empty_contracts_file if award_type == "contracts" else empty_asssistance_file
                        self.upload_placeholder(file_name=full_file_name,
                                                empty_file=empty_file)
                    else:
                        self.download(
                            file_name=full_file_name,
                            prime_award_types=award_mappings[award_type],
                            agency=agency["toptier_agency_id"],
                            date_type="action_date",
                            start_date=start_date,
                            end_date=end_date,
                            monthly_download=True,
                            cleanup=cleanup,
                            use_sqs=(not local),
                        )
        logger.info("Populate Monthly Files complete")
    def handle(self, *args, **options):
        """Run the application."""

        # Make sure
        #   settings.BULK_DOWNLOAD_S3_BUCKET_NAME
        #   settings.BULK_DOWNLOAD_SQS_QUEUE_NAME
        #   settings.USASPENDING_AWS_REGION
        # are properly configured!

        local = options['local']
        clobber = options['clobber']
        use_modified_list = options['use_modified_list']
        agencies = options['agencies']
        award_types = options['award_types']
        for award_type in award_types:
            if award_type not in ['contracts', 'assistance']:
                raise Exception('Unacceptable award type: {}'.format(award_type))
        fiscal_years = options['fiscal_years']
        placeholders = options['placeholders']
        cleanup = options['cleanup']
        empty_asssistance_file = options['empty_asssistance_file']
        empty_contracts_file = options['empty_contracts_file']
        if placeholders and (not empty_asssistance_file or not empty_contracts_file):
            raise Exception('Placeholder arg provided but empty files not provided')

        current_date = datetime.date.today()
        updated_date_timestamp = datetime.datetime.strftime(current_date, '%Y%m%d')

        toptier_agencies = ToptierAgency.objects.all()
        include_all = True
        if use_modified_list:
            used_cgacs = set(pull_modified_agencies_cgacs())
            toptier_agencies = ToptierAgency.objects.filter(cgac_code__in=used_cgacs)
        if agencies:
            if 'all' in agencies:
                agencies.remove('all')
            else:
                include_all = False
            toptier_agencies = ToptierAgency.objects.filter(toptier_agency_id__in=agencies)
        toptier_agencies = list(toptier_agencies.values('name', 'toptier_agency_id', 'cgac_code'))
        # Adding 'all' to prevent duplication of code
        if include_all:
            toptier_agencies.append({'name': 'All', 'toptier_agency_id': 'all', 'cgac_code': 'all'})
        if not fiscal_years:
            fiscal_years = range(2001, generate_fiscal_year(current_date) + 1)

        # moving it to self.bucket as it may be used in different cases
        bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME
        region_name = settings.USASPENDING_AWS_REGION
        self.bucket = boto3.resource('s3', region_name=region_name).Bucket(bucket_name)

        if not clobber:
            reuploads = []
            for key in self.bucket.objects.all():
                re_match = re.findall('(.*)_Full_{}.zip'.format(updated_date_timestamp), key.key)
                if re_match:
                    reuploads.append(re_match[0])

        logger.info('Generating {} files...'.format(len(toptier_agencies) * len(fiscal_years) * 2))
        for agency in toptier_agencies:
            for fiscal_year in fiscal_years:
                start_date = '{}-10-01'.format(fiscal_year - 1)
                end_date = '{}-09-30'.format(fiscal_year)
                for award_type in award_types:
                    file_name = '{}_{}_{}'.format(fiscal_year, agency['cgac_code'], award_type.capitalize())
                    full_file_name = '{}_Full_{}.zip'.format(file_name, updated_date_timestamp)
                    if not clobber and file_name in reuploads:
                        logger.info('Skipping already uploaded: {}'.format(full_file_name))
                        continue
                    if placeholders:
                        empty_file = empty_contracts_file if award_type == 'contracts' else empty_asssistance_file
                        self.upload_placeholder(file_name=full_file_name, empty_file=empty_file)
                    else:
                        self.download(full_file_name, ['prime_awards'], award_types=award_mappings[award_type],
                                      agency=agency['toptier_agency_id'], date_type='action_date',
                                      start_date=start_date, end_date=end_date, monthly_download=True, cleanup=cleanup,
                                      use_sqs=(not local))
        logger.info('Populate Monthly Files complete')