def handle(self, *args, **options): """ Run the application. """ agencies = options["agencies"] award_types = options["award_types"] last_date = options["last_date"] self.debugging_end_date = options["debugging_end_date"] self.debugging_skip_deleted = options["debugging_skip_deleted"] toptier_agencies = ToptierAgency.objects.filter( toptier_code__in=set(pull_modified_agencies_cgacs())) include_all = True if agencies: if "all" in agencies: agencies.remove("all") else: include_all = False toptier_agencies = ToptierAgency.objects.filter( toptier_agency_id__in=agencies) toptier_agencies = list( toptier_agencies.order_by("toptier_code").values( "name", "toptier_agency_id", "toptier_code")) if include_all: toptier_agencies.append("all") for agency in toptier_agencies: for award_type in award_types: self.download(award_type.capitalize(), agency, last_date) logger.info( "IMPORTANT: Be sure to run synchronize_transaction_delta management command " "after a successful monthly delta run.")
def handle(self, *args, **options): """ Run the application. """ agencies = options['agencies'] award_types = options['award_types'] last_date = options['last_date'] toptier_agencies = ToptierAgency.objects.filter( cgac_code__in=set(pull_modified_agencies_cgacs())) include_all = True if agencies: if 'all' in agencies: agencies.remove('all') else: include_all = False toptier_agencies = ToptierAgency.objects.filter( toptier_agency_id__in=agencies) toptier_agencies = list( toptier_agencies.order_by('cgac_code').values( 'name', 'toptier_agency_id', 'cgac_code')) if include_all: toptier_agencies.append('all') for agency in toptier_agencies: for award_type in award_types: self.download(award_type.capitalize(), agency, last_date)
def handle(self, *args, **options): """ Run the application. """ agencies = options['agencies'] award_types = options['award_types'] last_date = options['last_date'] toptier_agencies = ToptierAgency.objects.filter( cgac_code__in=set(pull_modified_agencies_cgacs())) include_all = True if agencies: if 'all' in agencies: agencies.remove('all') else: include_all = False toptier_agencies = ToptierAgency.objects.filter( toptier_agency_id__in=agencies) toptier_agencies = list( toptier_agencies.order_by('cgac_code').values( 'name', 'toptier_agency_id', 'cgac_code')) if include_all: toptier_agencies.append('all') for agency in toptier_agencies: for award_type in award_types: self.download(award_type.capitalize(), agency, last_date) logger.info( 'IMPORTANT: Be sure to run synchronize_transaction_delta management command ' 'after a successful monthly delta run')
def handle(self, *args, **options): """ Run the application. """ agencies = options['agencies'] award_types = options['award_types'] last_date = options['last_date'] toptier_agencies = ToptierAgency.objects.filter(cgac_code__in=set(pull_modified_agencies_cgacs())) include_all = True if agencies: if 'all' in agencies: agencies.remove('all') else: include_all = False toptier_agencies = ToptierAgency.objects.filter(toptier_agency_id__in=agencies) toptier_agencies = list(toptier_agencies.order_by('cgac_code').values('name', 'toptier_agency_id', 'cgac_code')) if include_all: toptier_agencies.append('all') for agency in toptier_agencies: for award_type in award_types: self.download(award_type.capitalize(), agency, last_date)
def handle(self, *args, **options): """Run the application.""" # Make sure # settings.BULK_DOWNLOAD_S3_BUCKET_NAME # settings.BULK_DOWNLOAD_SQS_QUEUE_NAME # settings.BULK_DOWNLOAD_AWS_REGION # are properly configured! local = options['local'] clobber = options['clobber'] use_modified_list = options['use_modified_list'] agencies = options['agencies'] award_types = options['award_types'] for award_type in award_types: if award_type not in ['contracts', 'assistance']: raise Exception( 'Unacceptable award type: {}'.format(award_type)) fiscal_years = options['fiscal_years'] placeholders = options['placeholders'] cleanup = options['cleanup'] empty_asssistance_file = options['empty_asssistance_file'] empty_contracts_file = options['empty_contracts_file'] if placeholders and (not empty_asssistance_file or not empty_contracts_file): raise Exception( 'Placeholder arg provided but empty files not provided') current_date = datetime.date.today() updated_date_timestamp = datetime.datetime.strftime( current_date, '%Y%m%d') toptier_agencies = ToptierAgency.objects.all() include_all = True if use_modified_list: used_cgacs = set(pull_modified_agencies_cgacs()) toptier_agencies = ToptierAgency.objects.filter( cgac_code__in=used_cgacs) if agencies: if 'all' in agencies: agencies.remove('all') else: include_all = False toptier_agencies = ToptierAgency.objects.filter( toptier_agency_id__in=agencies) toptier_agencies = list( toptier_agencies.values('name', 'toptier_agency_id', 'cgac_code')) # Adding 'all' to prevent duplication of code if include_all: toptier_agencies.append({ 'name': 'All', 'toptier_agency_id': 'all', 'cgac_code': 'all' }) if not fiscal_years: fiscal_years = range(2001, generate_fiscal_year(current_date) + 1) # moving it to self.bucket as it may be used in different cases bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME region_name = settings.BULK_DOWNLOAD_AWS_REGION self.bucket = boto.s3.connect_to_region(region_name).get_bucket( bucket_name) if not clobber: reuploads = [] for key in self.bucket.list(): re_match = re.findall( '(.*)_Full_{}.zip'.format(updated_date_timestamp), key.name) if re_match: reuploads.append(re_match[0]) logger.info('Generating {} files...'.format( len(toptier_agencies) * len(fiscal_years) * 2)) for agency in toptier_agencies: for fiscal_year in fiscal_years: start_date = '{}-10-01'.format(fiscal_year - 1) end_date = '{}-09-30'.format(fiscal_year) for award_type in award_types: file_name = '{}_{}_{}'.format(fiscal_year, agency['cgac_code'], award_type.capitalize()) full_file_name = '{}_Full_{}.zip'.format( file_name, updated_date_timestamp) if not clobber and file_name in reuploads: logger.info('Skipping already uploaded: {}'.format( full_file_name)) continue if placeholders: empty_file = empty_contracts_file if award_type == 'contracts' else empty_asssistance_file self.upload_placeholder(file_name=full_file_name, empty_file=empty_file) else: self.download(full_file_name, ['prime_awards'], award_types=award_mappings[award_type], agency=agency['toptier_agency_id'], date_type='action_date', start_date=start_date, end_date=end_date, monthly_download=True, cleanup=cleanup, use_sqs=(not local)) logger.info('Populate Monthly Files complete')
def handle(self, *args, **options): """Run the application.""" # Make sure # settings.BULK_DOWNLOAD_S3_BUCKET_NAME # settings.BULK_DOWNLOAD_SQS_QUEUE_NAME # settings.USASPENDING_AWS_REGION # are properly configured! local = options["local"] clobber = options["clobber"] use_modified_list = options["use_modified_list"] agencies = options["agencies"] award_types = options["award_types"] for award_type in award_types: if award_type not in ["contracts", "assistance"]: raise Exception( "Unacceptable award type: {}".format(award_type)) fiscal_years = options["fiscal_years"] placeholders = options["placeholders"] cleanup = options["cleanup"] empty_asssistance_file = options["empty_asssistance_file"] empty_contracts_file = options["empty_contracts_file"] if placeholders and (not empty_asssistance_file or not empty_contracts_file): raise Exception( "Placeholder arg provided but empty files not provided") current_date = datetime.date.today() updated_date_timestamp = datetime.datetime.strftime( current_date, "%Y%m%d") toptier_agencies = ToptierAgency.objects.all() include_all = True if use_modified_list: used_cgacs = set(pull_modified_agencies_cgacs()) toptier_agencies = ToptierAgency.objects.filter( toptier_code__in=used_cgacs) if agencies: if "all" in agencies: agencies.remove("all") else: include_all = False toptier_agencies = ToptierAgency.objects.filter( toptier_agency_id__in=agencies) toptier_agencies = list( toptier_agencies.values("name", "toptier_agency_id", "toptier_code")) # Adding 'all' to prevent duplication of code if include_all: toptier_agencies.append({ "name": "All", "toptier_agency_id": "all", "toptier_code": "All" }) if not fiscal_years: fiscal_years = range(2001, generate_fiscal_year(current_date) + 1) # moving it to self.bucket as it may be used in different cases bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME region_name = settings.USASPENDING_AWS_REGION self.bucket = boto3.resource( "s3", region_name=region_name).Bucket(bucket_name) if not clobber: reuploads = [] for key in self.bucket.objects.all(): re_match = re.findall( "(.*)_Full_{}.zip".format(updated_date_timestamp), key.key) if re_match: reuploads.append(re_match[0]) logger.info("Generating {} files...".format( len(toptier_agencies) * len(fiscal_years) * 2)) for agency in toptier_agencies: for fiscal_year in fiscal_years: start_date = "{}-10-01".format(fiscal_year - 1) end_date = "{}-09-30".format(fiscal_year) for award_type in award_types: file_name = f"FY{fiscal_year}_{agency['toptier_code']}_{award_type.capitalize()}" full_file_name = f"{file_name}_Full_{updated_date_timestamp}.zip" if not clobber and file_name in reuploads: logger.info( f"Skipping already uploaded: {full_file_name}") continue if placeholders: empty_file = empty_contracts_file if award_type == "contracts" else empty_asssistance_file self.upload_placeholder(file_name=full_file_name, empty_file=empty_file) else: self.download( file_name=full_file_name, prime_award_types=award_mappings[award_type], agency=agency["toptier_agency_id"], date_type="action_date", start_date=start_date, end_date=end_date, monthly_download=True, cleanup=cleanup, use_sqs=(not local), ) logger.info("Populate Monthly Files complete")
def handle(self, *args, **options): """Run the application.""" # Make sure # settings.BULK_DOWNLOAD_S3_BUCKET_NAME # settings.BULK_DOWNLOAD_SQS_QUEUE_NAME # settings.USASPENDING_AWS_REGION # are properly configured! local = options['local'] clobber = options['clobber'] use_modified_list = options['use_modified_list'] agencies = options['agencies'] award_types = options['award_types'] for award_type in award_types: if award_type not in ['contracts', 'assistance']: raise Exception('Unacceptable award type: {}'.format(award_type)) fiscal_years = options['fiscal_years'] placeholders = options['placeholders'] cleanup = options['cleanup'] empty_asssistance_file = options['empty_asssistance_file'] empty_contracts_file = options['empty_contracts_file'] if placeholders and (not empty_asssistance_file or not empty_contracts_file): raise Exception('Placeholder arg provided but empty files not provided') current_date = datetime.date.today() updated_date_timestamp = datetime.datetime.strftime(current_date, '%Y%m%d') toptier_agencies = ToptierAgency.objects.all() include_all = True if use_modified_list: used_cgacs = set(pull_modified_agencies_cgacs()) toptier_agencies = ToptierAgency.objects.filter(cgac_code__in=used_cgacs) if agencies: if 'all' in agencies: agencies.remove('all') else: include_all = False toptier_agencies = ToptierAgency.objects.filter(toptier_agency_id__in=agencies) toptier_agencies = list(toptier_agencies.values('name', 'toptier_agency_id', 'cgac_code')) # Adding 'all' to prevent duplication of code if include_all: toptier_agencies.append({'name': 'All', 'toptier_agency_id': 'all', 'cgac_code': 'all'}) if not fiscal_years: fiscal_years = range(2001, generate_fiscal_year(current_date) + 1) # moving it to self.bucket as it may be used in different cases bucket_name = settings.MONTHLY_DOWNLOAD_S3_BUCKET_NAME region_name = settings.USASPENDING_AWS_REGION self.bucket = boto3.resource('s3', region_name=region_name).Bucket(bucket_name) if not clobber: reuploads = [] for key in self.bucket.objects.all(): re_match = re.findall('(.*)_Full_{}.zip'.format(updated_date_timestamp), key.key) if re_match: reuploads.append(re_match[0]) logger.info('Generating {} files...'.format(len(toptier_agencies) * len(fiscal_years) * 2)) for agency in toptier_agencies: for fiscal_year in fiscal_years: start_date = '{}-10-01'.format(fiscal_year - 1) end_date = '{}-09-30'.format(fiscal_year) for award_type in award_types: file_name = '{}_{}_{}'.format(fiscal_year, agency['cgac_code'], award_type.capitalize()) full_file_name = '{}_Full_{}.zip'.format(file_name, updated_date_timestamp) if not clobber and file_name in reuploads: logger.info('Skipping already uploaded: {}'.format(full_file_name)) continue if placeholders: empty_file = empty_contracts_file if award_type == 'contracts' else empty_asssistance_file self.upload_placeholder(file_name=full_file_name, empty_file=empty_file) else: self.download(full_file_name, ['prime_awards'], award_types=award_mappings[award_type], agency=agency['toptier_agency_id'], date_type='action_date', start_date=start_date, end_date=end_date, monthly_download=True, cleanup=cleanup, use_sqs=(not local)) logger.info('Populate Monthly Files complete')