def retrieve_cached_file_generation(job, agency_type, agency_code): """ Retrieves a cached FileGeneration for the D file request, if there is one. Args: job: Upload Job for the generation file agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code to generate file for Returns: FileGeneration object matching the criteria, or None """ sess = GlobalDB.db().session logger.info({'message': 'Checking for a cached FileGeneration to pull file from', 'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': job.file_type.letter_name}) # find current date and date of last FPDS pull current_date = datetime.now().date() last_update = sess.query(FPDSUpdate).one_or_none() fpds_date = last_update.update_date if last_update else current_date # check if a cached FileGeneration already exists using these criteria file_generation = None file_gen = sess.query(FileGeneration).filter( FileGeneration.start_date == job.start_date, FileGeneration.end_date == job.end_date, FileGeneration.agency_code == agency_code, FileGeneration.agency_type == agency_type, FileGeneration.file_type == job.file_type.letter_name, FileGeneration.is_cached_file.is_(True)).one_or_none() if file_gen and (file_gen.file_type == 'D1' and file_gen.request_date < fpds_date): # Uncache expired D1 FileGeneration file_gen.is_cached_file = False sess.commit() elif file_gen: file_generation = file_gen return file_generation
def get_submission_data(submission, file_type=''): """ Get data for the submission specified Args: submission: submission to retrieve metadata for file_type: the type of job to retrieve metadata for Returns: JsonResponse containing the error information or the object containing metadata for all relevant file types """ sess = GlobalDB.db().session file_type = file_type.lower() # Make sure the file type provided is valid if file_type and file_type not in FILE_TYPE_DICT and file_type != 'cross': return JsonResponse.error(ValueError(file_type + ' is not a valid file type'), StatusCode.CLIENT_ERROR) # Make sure the file type provided is valid for the submission type is_fabs = submission.d2_submission if file_type and (is_fabs and file_type != 'fabs') or (not is_fabs and file_type == 'fabs'): return JsonResponse.error(ValueError(file_type + ' is not a valid file type for this submission'), StatusCode.CLIENT_ERROR) job_query = sess.query(Job).filter(Job.submission_id == submission.submission_id) if not file_type: relevant_job_types = (JOB_TYPE_DICT['csv_record_validation'], JOB_TYPE_DICT['validation']) job_query = job_query.filter(Job.job_type_id.in_(relevant_job_types)) elif file_type == 'cross': job_query = job_query.filter(Job.job_type_id == JOB_TYPE_DICT['validation']) else: job_query = job_query.filter(Job.file_type_id == FILE_TYPE_DICT[file_type]) job_dict = {'jobs': [job_to_dict(job) for job in job_query]} return JsonResponse.create(StatusCode.OK, job_dict)
def start_a_generation(job, start_date, end_date, agency_code): """ Validates the start and end dates of the generation and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_code: Agency code for A file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type} logger.info(log_data) # Set SQS message attributes message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}} # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def load_frec(file_name): """ Load FREC (high-level agency names) lookup table. Args: file_name: path/url to the file to be read """ sess = GlobalDB.db().session models = {frec.frec_code: frec for frec in sess.query(FREC)} # read FREC values from csv data = pd.read_csv(file_name, dtype=str) # clean data data = clean_data( data, FREC, {"frec": "frec_code", "cgac_agency_code": "cgac_code", "frec_entity_description": "agency_name", "agency_abbreviation": "agency_abbreviation"}, {"frec": {"keep_null": False}, "cgac_code": {"pad_to_length": 3}, "frec_code": {"pad_to_length": 4}} ) # de-dupe data.drop_duplicates(subset=['frec_code'], inplace=True) # create foreign key dicts cgac_dict = {str(cgac.cgac_code): cgac.cgac_id for cgac in sess.query(CGAC).filter(CGAC.cgac_code.in_(data["cgac_code"])).all()} # insert to db delete_missing_frecs(models, data) update_frecs(models, data, cgac_dict) sess.add_all(models.values()) sess.commit() logger.info('%s FREC records inserted', len(models))
def load_cgac(file_name): """ Load CGAC (high-level agency names) lookup table. Args: file_name: path/url to the file to be read """ sess = GlobalDB.db().session models = {cgac.cgac_code: cgac for cgac in sess.query(CGAC)} # read CGAC values from csv data = pd.read_csv(file_name, dtype=str) # clean data data = clean_data( data, CGAC, {"cgac_agency_code": "cgac_code", "agency_name": "agency_name", "agency_abbreviation": "agency_abbreviation"}, {"cgac_code": {"pad_to_length": 3}} ) # de-dupe data.drop_duplicates(subset=['cgac_code'], inplace=True) delete_missing_cgacs(models, data) update_cgacs(models, data) sess.add_all(models.values()) sess.commit() logger.info('%s CGAC records inserted', len(models))
def main(): """ Loads DUNS from the DUNS export file (comprised of DUNS pre-2014) """ parser = argparse.ArgumentParser(description='Adding historical DUNS to Broker.') parser.add_argument('-size', '--block_size', help='Number of rows to batch load', type=int, default=10000) args = parser.parse_args() sess = GlobalDB.db().session client = sam_config_is_valid() logger.info('Retrieving historical DUNS file') start = datetime.now() if CONFIG_BROKER["use_aws"]: s3_client = boto3.client('s3', region_name=CONFIG_BROKER['aws_region']) duns_file = s3_client.generate_presigned_url('get_object', {'Bucket': CONFIG_BROKER['archive_bucket'], 'Key': "DUNS_export_deduped.csv"}, ExpiresIn=10000) else: duns_file = os.path.join(CONFIG_BROKER["broker_files"], "DUNS_export_deduped.csv") if not duns_file: raise OSError("No DUNS_export_deduped.csv found.") logger.info("Retrieved historical DUNS file in {} s".format((datetime.now()-start).total_seconds())) try: run_duns_batches(duns_file, sess, client, args.block_size) except Exception as e: logger.exception(e) sess.rollback() logger.info("Updating historical DUNS complete") sess.close()
def delete_submission(submission): """ Deletes all data associated with the specified submission NOTE: THERE IS NO WAY TO UNDO THIS """ if submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished']: return JsonResponse.error(ValueError("Submissions that have been certified cannot be deleted"), StatusCode.CLIENT_ERROR) sess = GlobalDB.db().session # Check if the submission has any jobs that are currently running, if so, do not allow deletion jobs = sess.query(Job).filter(Job.submission_id == submission.submission_id, Job.job_status_id == JOB_STATUS_DICT['running']).all() if jobs: return JsonResponse.error(ValueError("Submissions with running jobs cannot be deleted"), StatusCode.CLIENT_ERROR) sess.query(SubmissionSubTierAffiliation).filter( SubmissionSubTierAffiliation.submission_id == submission.submission_id).delete( synchronize_session=False) sess.query(Submission).filter(Submission.submission_id == submission.submission_id).delete( synchronize_session=False) sess.expire_all() return JsonResponse.create(StatusCode.OK, {"message": "Success"})
def main(): parser = argparse.ArgumentParser(description='Update contract transaction and/or fabs rows based on updates to the ' 'agency list') parser.add_argument('-a', '--missing_agency', help='Perform an update on 999 agency codes', action='store_true', required=False, default=False) parser.add_argument('-s', '--subtier_code', help='Select specific subtier to update. Must be a 4-digit code', type=str, required=False) parser.add_argument('-t', '--tables', help='Which tables (fabs, fpds, or both) to update. Defaults to both.', required=False, default='both', choices=['fabs', 'fpds', 'both']) args = parser.parse_args() if not args.subtier_code and not args.missing_agency: logger.error('Missing either subtier_code or missing_agency argument') elif args.subtier_code and len(args.subtier_code) != 4: logger.error('Subtier not a correct format, must be 4 digits') else: sess = GlobalDB.db().session if args.tables in ('fpds', 'both'): update_table(sess, 'awarding', 'detached_award_procurement', args) update_table(sess, 'funding', 'detached_award_procurement', args) logger.info("Procurement Update Complete") if args.tables in ('fabs', 'both'): update_table(sess, 'awarding', 'published_award_financial_assistance', args) update_table(sess, 'funding', 'published_award_financial_assistance', args) logger.info("Award Financial Assistance Update Complete")
def main(): sess = GlobalDB.db().session # delete any data in the PublishedAwardFinancialAssistance table logger.info('deleting PublishedAwardFinancialAssistance data') sess.query(PublishedAwardFinancialAssistance).delete(synchronize_session=False) sess.commit() if CONFIG_BROKER["use_aws"]: s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region']) s3bucket = s3connection.lookup(CONFIG_BROKER['archive_bucket']) for key in s3bucket.list(): if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', key.name): file_path = key.generate_url(expires_in=600) parse_fabs_file(urllib.request.urlopen(file_path), sess) else: base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "fabs") file_list = [f for f in os.listdir(base_path)] for file in file_list: if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', file): parse_fabs_file(open(os.path.join(base_path, file)), sess) set_active_rows(sess) logger.info("Historical FABS script complete")
def update_offices(csv_path): """ Load office data from the provided CSV and replace/insert any office lookups. Args: csv_path: path/url to the file to read from """ sess = GlobalDB.db().session data = clean_office(csv_path) add_existing_id(data) old_data = data[data['existing_id'].notnull()] del old_data['existing_id'] new_data = data[data['existing_id'].isnull()] del new_data['existing_id'] # instead of using the pandas to_sql dataframe method like some of the other domain load processes, iterate through # the dataframe rows so we can load using the orm model (note: toyed with the SQLAlchemy bulk load options but # ultimately decided not to go outside the unit of work for the sake of a performance gain) for _, row in old_data.iterrows(): sess.query(FPDSContractingOffice).filter_by(contracting_office_code=row['contracting_office_code'])\ .update(row, synchronize_session=False) for _, row in new_data.iterrows(): sess.add(FPDSContractingOffice(**row)) sess.commit() logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
def submission_procurements(submission_id): """Fetch procurements and subcontracts""" sess = GlobalDB.db().session logger.debug('Starting submission procurements') award_proc_sub = sess.query(AwardProcurement.piid, AwardProcurement.parent_award_id, AwardProcurement.naics_description, AwardProcurement.awarding_sub_tier_agency_c, AwardProcurement.submission_id).\ filter(AwardProcurement.submission_id == submission_id).distinct().cte("award_proc_sub") results = sess.query(award_proc_sub, FSRSProcurement, FSRSSubcontract).\ filter(FSRSProcurement.contract_number == award_proc_sub.c.piid).\ filter(FSRSProcurement.idv_reference_number.isnot_distinct_from(award_proc_sub.c.parent_award_id)).\ filter(FSRSProcurement.contracting_office_aid == award_proc_sub.c.awarding_sub_tier_agency_c).\ filter(FSRSSubcontract.parent_id == FSRSProcurement.id) # The cte returns a set of columns, not an AwardProcurement object, so we have to unpack each column for award_piid, award_parent_id, award_naics_desc, award_sub_tier, award_sub_id, proc, sub in results: # need to combine those columns again here so we can get a proper ModelRow award = AwardProcurement(piid=award_piid, parent_award_id=award_parent_id, naics_description=award_naics_desc, awarding_sub_tier_agency_c=award_sub_tier, submission_id=award_sub_id) yield ModelRow(award, proc, sub, naics_desc=award.naics_description) logger.debug('Finished submission procurements')
def main(): sess = GlobalDB.db().session start = datetime.datetime.now() logger.info("FPDS IDV delete started") # get and read the file del_file = get_delete_file() data = pd.read_csv(del_file, dtype=str, encoding='utf_8_sig') # Clean up the data so it's usable data = clean_delete_data(data) # Gather list of records to delete gather_start = datetime.datetime.now() logger.info("Starting gathering of records to delete.") delete_list, delete_dict = get_deletes(sess, data) gather_end = datetime.datetime.now() logger.info("Finished gathering records in {} seconds. Total records to delete: {}".format(gather_end-gather_start, len(delete_list))) # Delete records logger.info("Deleting records") delete_records(sess, delete_list, delete_dict) sess.commit() end = datetime.datetime.now() logger.info("FPDS IDV delete finished in %s seconds", end-start)
def insert_from_table(file_type_id, submission_id): """ Insert the data from the base staging table into the corresponding Certified table. Params: file_type_id: Database file type ID for files A, B, or C submission_id: Database ID for the submission being loaded """ sess = GlobalDB.db().session logger.info('Copying submission {} data from base table into {}'.format(submission_id, FTI_TABLENAME_DICT[file_type_id])) table_type = FTI_BASETABLE_DICT[file_type_id].__table__.name column_list = [col.key for col in FTI_BASETABLE_DICT[file_type_id].__table__.columns] column_list.remove('created_at') column_list.remove('updated_at') column_list.remove(table_type + '_id') col_string = ", ".join(column_list) executed = sess.execute( "INSERT INTO certified_{} (created_at, updated_at, {}) " "SELECT NOW() AS created_at, NOW() AS updated_at, {} " "FROM {} " "WHERE submission_id={}".format(table_type, col_string, col_string, table_type, submission_id)) sess.commit() logger.info('Loaded {} records into the {} table'.format(executed.rowcount, FTI_TABLENAME_DICT[file_type_id]))
def list_all_agencies(): sess = GlobalDB.db().session cgacs = sess.query(CGAC).all() agency_list = [{'agency_name': cgac.agency_name, 'cgac_code': cgac.cgac_code} for cgac in cgacs] frecs = sess.query(FREC).all() shared_list = [{'agency_name': frec.agency_name, 'frec_code': frec.frec_code} for frec in frecs] return JsonResponse.create(StatusCode.OK, {'agency_list': agency_list, 'shared_agency_list': shared_list})
def create_generation_job(file_type_name, start_date, end_date): """ Add details to jobs for generating files Args: file_type_name: the name of the file type being generated job: the generation job, None if it is a detached generation start_date: The start date for the generation job, only used for detached files end_date: The end date for the generation job, only used for detached files Returns: the file generation job """ sess = GlobalDB.db().session # Create a new job for a detached generation job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'], user_id=g.user.user_id, file_type_id=lookups.FILE_TYPE_DICT[file_type_name], start_date=start_date, end_date=end_date) sess.add(job) # Update the job details job.message = None job.job_status_id = lookups.JOB_STATUS_DICT["ready"] sess.commit() sess.refresh(job) return job
def set_skip_guide(self): """ Set current user's skip guide parameter Returns: JsonResponse object containing results of setting the skip guide or details of the error that occurred. Possible errors include the request not containing a skip_guide parameter or it not being a boolean value """ sess = GlobalDB.db().session request_dict = RequestDictionary.derive(self.request) try: if 'skip_guide' not in request_dict: raise ResponseException( "Must include skip_guide parameter", StatusCode.CLIENT_ERROR ) skip_guide = str(request_dict['skip_guide']).lower() if skip_guide not in ("true", "false"): raise ResponseException( "skip_guide must be true or false", StatusCode.CLIENT_ERROR ) g.user.skip_guide = skip_guide == "true" except ResponseException as exc: return JsonResponse.error(exc, exc.status) sess.commit() return JsonResponse.create(StatusCode.OK, {"message": "skip_guide set successfully", "skip_guide": skip_guide})
def check_generation_prereqs(submission_id, file_type): """ Make sure the prerequisite jobs for this file type are complete without errors. Args: submission_id: the submission id for which we're checking file generation prerequisites file_type: the type of file being generated Returns: A boolean indicating if the job has no incomplete prerequisites (True if the job is clear to start) """ sess = GlobalDB.db().session prereq_query = sess.query(Job).filter(Job.submission_id == submission_id, or_(Job.job_status_id != lookups.JOB_STATUS_DICT['finished'], Job.number_of_errors > 0)) # Check cross-file validation if generating E or F if file_type in ['E', 'F']: unfinished_prereqs = prereq_query.filter(Job.job_type_id == lookups.JOB_TYPE_DICT['validation']).count() # Check A, B, C files if generating a D file elif file_type in ['D1', 'D2']: unfinished_prereqs = prereq_query.filter(Job.file_type_id.in_( [lookups.FILE_TYPE_DICT['appropriations'], lookups.FILE_TYPE_DICT['program_activity'], lookups.FILE_TYPE_DICT['award_financial']])).count() else: raise ResponseException('Invalid type for file generation', StatusCode.CLIENT_ERROR) return unfinished_prereqs == 0
def main(): sess = GlobalDB.db().session parser = argparse.ArgumentParser(description='Update business categories for existing transaction data') parser.add_argument('-fpds', help='Update only FPDS business categories', action='store_true') parser.add_argument('-fabs', help='Update only FABS business categories', action='store_true') args = parser.parse_args() overall_start = datetime.now() logger.info("Starting business categories updates") start = datetime.now() logger.info("Recreating business category SQL functions") sess.execute(BUSINESS_CATEGORY_FUNCTION_SQL) sess.commit() logger.info("Finished recreating business category SQL functions in %s seconds" % str(datetime.now() - start)) if not (args.fpds and args.fabs): update_fpds_business_categories(sess) update_fabs_business_categories(sess) elif args.fpds: update_fpds_business_categories(sess) elif args.fabs: update_fabs_business_categories(sess) logger.info("Completed business categories updates in %s seconds" % str(datetime.now() - overall_start))
def submit_files(): file_manager = FileHandler(request, is_local=is_local, server_path=server_path) sess = GlobalDB.db().session start_date = request.json.get('reporting_period_start_date') end_date = request.json.get('reporting_period_end_date') is_quarter = request.json.get('is_quarter_format', False) if not (start_date is None or end_date is None): formatted_start_date, formatted_end_date = FileHandler.check_submission_dates(start_date, end_date, is_quarter) submissions = sess.query(Submission).filter( Submission.cgac_code == request.json.get('cgac_code'), Submission.frec_code == request.json.get('frec_code'), Submission.reporting_start_date == formatted_start_date, Submission.reporting_end_date == formatted_end_date, Submission.is_quarter_format == request.json.get('is_quarter'), Submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished']) if 'existing_submission_id' in request.json: submissions.filter(Submission.submission_id != request.json['existing_submission_id']) submissions = submissions.order_by(desc(Submission.created_at)) if submissions.count() > 0: data = { "message": "A submission with the same period already exists.", "submissionId": submissions[0].submission_id } return JsonResponse.create(StatusCode.CLIENT_ERROR, data) return file_manager.submit(create_credentials)
def read_zips(): with create_app().app_context(): sess = GlobalDB.db().session # delete old values in case something changed and one is now invalid sess.query(Zips).delete(synchronize_session=False) sess.commit() if CONFIG_BROKER["use_aws"]: s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region']) s3bucket = s3connection.lookup(CONFIG_BROKER['sf_133_bucket']) zip_folder = CONFIG_BROKER["zip_folder"] + "/" for key in s3bucket.list(prefix=zip_folder): if key.name != zip_folder: zip_4_file_path = key.generate_url(expires_in=600) parse_zip4_file(urllib.request.urlopen(zip_4_file_path), sess) # parse remaining 5 digit zips that weren't in the first file citystate_file = s3bucket.get_key("ctystate.txt").generate_url(expires_in=600) parse_citystate_file(urllib.request.urlopen(citystate_file), sess) else: base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", CONFIG_BROKER["zip_folder"]) # creating the list while ignoring hidden files on mac file_list = [f for f in os.listdir(base_path) if not re.match('^\.', f)] for file in file_list: parse_zip4_file(open(os.path.join(base_path, file)), sess) # parse remaining 5 digit zips that weren't in the first file citystate_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "ctystate.txt") parse_citystate_file(open(citystate_file), sess) logger.info("Zipcode script complete")
def setup_emails(): """Create email templates from model metadata.""" with create_app().app_context(): sess = GlobalDB.db().session # insert email template types type_list = [ ('review_submission', '') ] for t in type_list: email_id = sess.query( EmailTemplateType.email_template_type_id).filter( EmailTemplateType.name == t[0]).one_or_none() if not email_id: email_type = EmailTemplateType(name=t[0], description=t[1]) sess.add(email_type) sess.commit() # insert email templates # Submission Review template = ("[REV_USER_NAME] has shared a DATA Act broker submission with you from [REV_AGENCY]. Click " "<a href='[REV_URL]'>here</a> to review their submission. For questions or comments, please visit " "the Service Desk at https://servicedesk.usaspending.gov/ or e-mail [email protected].") load_email_template(sess, "DATA Act Broker - Submission Ready for Review", template, "review_submission")
def get_fabs_meta(submission_id): """Return the total rows, valid rows, publish date, and publish file for FABS submissions""" sess = GlobalDB.db().session # get row counts from the DetachedAwardFinancialAssistance table dafa = DetachedAwardFinancialAssistance total_rows = sess.query(dafa).filter(dafa.submission_id == submission_id) valid_rows = total_rows.filter(dafa.is_valid) # retrieve the published data and file submission = sess.query(Submission).filter(Submission.submission_id == submission_id).one() publish_date, published_file = None, None certify_data = get_lastest_certified_date(submission, is_fabs=True) try: iter(certify_data) except TypeError: publish_date = certify_data else: publish_date, file_path = certify_data if CONFIG_BROKER["use_aws"] and file_path: path, file_name = file_path.rsplit('/', 1) # split by last instance of / published_file = S3Handler().get_signed_url(path=path, file_name=file_name, bucket_route=CONFIG_BROKER['certified_bucket'], url_mapping=CONFIG_BROKER["certified_bucket_mapping"], method="get_object") elif file_path: published_file = file_path return { 'valid_rows': valid_rows.count(), 'total_rows': total_rows.count(), 'publish_date': publish_date.strftime('%-I:%M%p %m/%d/%Y') if publish_date else None, 'published_file': published_file }
def setUpClass(cls): """Set up class-wide resources like submissions and jobs.""" super(UserTests, cls).setUpClass() with create_app().app_context(): sess = GlobalDB.db().session # Add submissions for agency user sess.query(Submission).filter(Submission.user_id == cls.agency_user_id).delete() sess.commit() for i in range(0, 6): sub = Submission(user_id=cls.agency_user_id) sub.reporting_start_date = datetime(2015, 10, 1) sub.reporting_end_date = datetime(2015, 12, 31) sub.cgac_code = cls.admin_cgac_code sess.add(sub) sess.commit() if i == 0: cls.submission_id = sub.submission_id # Add job to first submission job = Job( submission_id=cls.submission_id, job_status_id=JOB_STATUS_DICT['running'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'] ) sess.add(job) sess.commit() cls.uploadId = job.job_id
def write_file_error(job_id, filename, error_type, extra_info=None): """ Write a file-level error to the file table Args: job_id: ID of job in job tracker filename: name of error report in S3 error_type: type of error, value will be mapped to ValidationError class extra_info: list of extra information to be included in file """ sess = GlobalDB.db().session try: int(job_id) except: raise ValueError("".join(["Bad jobId: ", str(job_id)])) # Get File object for this job ID or create it if it doesn't exist file_rec = create_file_if_needed(job_id, filename) # Mark error type and add header info if present file_rec.file_status_id = FILE_STATUS_DICT[ValidationError.get_error_type_string(error_type)] if extra_info is not None: if "missing_headers" in extra_info: file_rec.headers_missing = extra_info["missing_headers"] if "duplicated_headers" in extra_info: file_rec.headers_duplicated = extra_info["duplicated_headers"] sess.add(file_rec) sess.commit()
def get_window(): sess = GlobalDB.db().session curr_date = datetime.now().date() return sess.query(SubmissionWindow).filter( SubmissionWindow.start_date <= curr_date, SubmissionWindow.end_date >= curr_date)
def check_year_and_quarter(cgac_code, frec_code, reporting_fiscal_year, reporting_fiscal_period): """ Check if cgac (or frec) code, year, and quarter already has a published submission """ if not cgac_code and not frec_code: return JsonResponse.error(ValueError("CGAC or FR Entity Code required"), StatusCode.CLIENT_ERROR) sess = GlobalDB.db().session return find_existing_submissions_in_period(sess, cgac_code, frec_code, reporting_fiscal_year, reporting_fiscal_period)
def before_request(): # Set global value for local g.is_local = local sess = GlobalDB.db().session # setup user g.user = None if session.get('name') is not None: g.user = sess.query(User).filter_by(user_id=session['name']).one_or_none()
def update_tas_ids(model_class, submission_id): sess = GlobalDB.db().session submission = sess.query(Submission).filter_by(submission_id=submission_id).one() subquery = matching_cars_subquery(sess, model_class, submission.reporting_start_date, submission.reporting_end_date) sess.query(model_class).filter_by(submission_id=submission_id).\ update({getattr(model_class, 'tas_id'): subquery}, synchronize_session=False) sess.commit()
def test_w_perms_broker_user(create_session_mock, max_dict_mock, database, monkeypatch): ah = max_login_func(create_session_mock, max_dict_mock, monkeypatch, MAX_RESPONSE_W_PERMS) res = ah.max_login({}) sess = GlobalDB.db().session # This is to prevent an integrity error with other tests that create users. sess.query(func.lower(User.email) == func.lower("*****@*****.**")).delete() sess.commit() assert res is True
def load_tas(backfill_historic=False): """ Load TAS file into broker database. Args: backfill_historic: if set to true, this will only update certain columns if budget_function_code is null """ # read TAS file to dataframe, to make sure all is well with the file before firing up a db transaction tas_files = [] if CONFIG_BROKER["use_aws"]: # Storing version dictionaries in the list to prevent getting all the links at once and possibly work with # expired AWS links s3connection = boto3.client('s3', region_name=CONFIG_BROKER['aws_region']) # list_object_versions returns the versions in reverse chronological order if not backfill_historic: # get the latest tas_file tas_files = [s3connection.generate_presigned_url(ClientMethod='get_object', Params={'Bucket': CONFIG_BROKER['sf_133_bucket'], 'Key': 'cars_tas.csv'}, ExpiresIn=600)] else: tas_files = s3connection.list_object_versions(Bucket=CONFIG_BROKER['sf_133_bucket'], Prefix='cars_tas.csv')['Versions'] # getting the latest file (see the reversed) from each day for performance and accuracy tas_files_grouped = {tas_file['LastModified'].date(): tas_file for tas_file in reversed(tas_files)} # sorting them back to chronological order tas_files = sorted([tas_file for date, tas_file in tas_files_grouped.items()], key=lambda k: k['LastModified']) elif backfill_historic: logger.error('Unable to attain historical versions of cars_tas without aws access.') return else: tas_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "cars_tas.csv") tas_files.append(tas_file) sess = GlobalDB.db().session for tas_file in reversed(tas_files): update_missing = missing_records(sess) if backfill_historic else [] if backfill_historic and not update_missing: # no more missing, we're done here break if CONFIG_BROKER["use_aws"] and backfill_historic: # generate url from the version dict logger.info('Working with remote cars_tas.csv (from {})'.format(tas_file['LastModified'])) tas_file = s3connection.generate_presigned_url( ClientMethod='get_object', Params={ 'Bucket': CONFIG_BROKER['sf_133_bucket'], 'Key': 'cars_tas.csv', 'VersionId': tas_file['VersionId'] }, ExpiresIn=600 ) elif CONFIG_BROKER["use_aws"]: logger.info('Working with latest remote cars_tas.csv') else: logger.info('Working with local cars_tas.csv') update_tas_lookups(sess, tas_file, update_missing=update_missing)
def setUpClass(cls): """Set up resources to be shared within a test class""" cls.session_id = "" with createValidatorApp().app_context(): # update application's db config options so unittests # run against test databases suite = cls.__name__.lower() config = dataactcore.config.CONFIG_DB cls.num = randint(1, 9999) config['db_name'] = 'unittest{}_{}_data_broker'.format( cls.num, suite) dataactcore.config.CONFIG_DB = config createDatabase(CONFIG_DB['db_name']) runMigrations() # drop and re-create test user db/tables setupUserDB() # drop and re-create test job db/tables setupJobTrackerDB() # drop and re-create test error db/tables setupErrorDB() # drop and re-create test validation db/tables setupValidationDB() # load e-mail templates setupEmails() # set up default e-mails for tests test_users = {} test_users['admin_email'] = '*****@*****.**' test_users['change_user_email'] = '*****@*****.**' test_users['password_reset_email'] = '*****@*****.**' test_users['inactive_email'] = '*****@*****.**' test_users['password_lock_email'] = '*****@*****.**' test_users['expired_lock_email'] = '*****@*****.**' test_users['agency_admin_email'] = '*****@*****.**' # this email is for a regular agency_user email that is to be used for # testing functionality expected by a normal, base user test_users['agency_user'] = '******' test_users['approved_email'] = '*****@*****.**' test_users['submission_email'] = '*****@*****.**' user_password = '******' admin_password = '******' # set up users for status tests StatusTestUser = namedtuple( 'StatusTestUser', ['email', 'user_status', 'permissions', 'user_type']) StatusTestUser.__new__.__defaults__ = (None, None, AccountType.AGENCY_USER, None) status_test_users = [] status_test_users.append( StatusTestUser('*****@*****.**', 'awaiting_confirmation', 0)) status_test_users.append( StatusTestUser('*****@*****.**', 'email_confirmed')) status_test_users.append( StatusTestUser('*****@*****.**', 'awaiting_approval')) status_test_users.append( StatusTestUser('*****@*****.**', 'awaiting_approval')) status_test_users.append( StatusTestUser('*****@*****.**', 'awaiting_approval')) status_test_users.append( StatusTestUser( test_users['admin_email'], 'approved', AccountType.WEBSITE_ADMIN + AccountType.AGENCY_USER)) status_test_users.append( StatusTestUser(test_users['approved_email'], 'approved')) status_test_users.append( StatusTestUser('*****@*****.**', 'denied')) # add new users createUserWithPassword(test_users["submission_email"], user_password, Bcrypt()) createUserWithPassword(test_users["change_user_email"], user_password, Bcrypt()) createUserWithPassword(test_users["password_reset_email"], user_password, Bcrypt()) createUserWithPassword(test_users["inactive_email"], user_password, Bcrypt()) createUserWithPassword(test_users["password_lock_email"], user_password, Bcrypt()) createUserWithPassword(test_users['expired_lock_email'], user_password, Bcrypt()) createUserWithPassword(test_users['agency_admin_email'], admin_password, Bcrypt(), permission=4) createUserWithPassword(test_users['agency_user'], user_password, Bcrypt()) # get user info and save as class variables for use by tests sess = GlobalDB.db().session agencyUser = sess.query(User).filter( User.email == test_users['agency_user']).one() cls.agency_user_id = agencyUser.user_id # set the specified account to be expired expiredUser = sess.query(User).filter( User.email == test_users['expired_lock_email']).one() today = parse(time.strftime("%c")) expiredUser.last_login_date = (today - timedelta(days=120)).strftime("%c") sess.add(expiredUser) # create users for status testing for u in status_test_users: user = User(email=u.email, permissions=u.permissions, user_status=sess.query(UserStatus).filter( UserStatus.name == u.user_status).one()) sess.add(user) # set up approved user user = sess.query(User).filter( User.email == test_users['approved_email']).one() user.username = "******" user.cgac_code = "000" user.salt, user.password_hash = getPasswordHash( user_password, Bcrypt()) sess.add(user) cls.approved_user_id = user.user_id # set up admin user admin = sess.query(User).filter( User.email == test_users['admin_email']).one() admin.salt, admin.password_hash = getPasswordHash( admin_password, Bcrypt()) admin.name = "Mr. Manager" admin.cgac_code = "SYS" sess.add(admin) # set up status changed user statusChangedUser = sess.query(User).filter( User.email == test_users['change_user_email']).one() statusChangedUser.name = "Test User" statusChangedUser.user_status = sess.query(UserStatus).filter( UserStatus.name == 'email_confirmed').one() sess.add(statusChangedUser) cls.status_change_user_id = statusChangedUser.user_id # set up deactivated user deactivated_user = sess.query(User).filter( User.email == test_users['inactive_email']).one() deactivated_user.last_login_date = time.strftime("%c") deactivated_user.is_active = False sess.add(deactivated_user) sess.commit() # get lookup dictionaries cls.jobStatusDict = lookups.JOB_STATUS_DICT cls.jobTypeDict = lookups.JOB_TYPE_DICT cls.fileTypeDict = lookups.FILE_TYPE_DICT cls.fileStatusDict = lookups.FILE_STATUS_DICT cls.ruleSeverityDict = lookups.RULE_SEVERITY_DICT cls.errorTypeDict = lookups.ERROR_TYPE_DICT cls.publishStatusDict = lookups.PUBLISH_STATUS_DICT cls.userStatusDict = lookups.USER_STATUS_DICT # set up info needed by the individual test classes cls.test_users = test_users cls.user_password = user_password cls.admin_password = admin_password cls.local = CONFIG_BROKER['local']
def receive_messages(WaitTimeSeconds, MessageAttributeNames=None): # noqa sess = GlobalDB.db().session messages = [] for sqs in sess.query(SQS): messages.append(SQSMockMessage(sqs)) return messages
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission): """ Add upload and validation jobs for a single filetype Arguments: upload_file: UploadFile named tuple submission_id: submission ID to attach to jobs existing_submission: true if we should update existing jobs rather than creating new ones Returns: the validation job id for this file type (if any) the upload job id for this file type """ sess = GlobalDB.db().session file_type_id = FILE_TYPE_DICT[upload_file.file_type] validation_job_id = None # Create a file upload job or, for an existing submission, modify the # existing upload job. if existing_submission: # mark existing upload job as running upload_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['file_upload'] ).one() # mark as running and set new file name and path upload_job.job_status_id = JOB_STATUS_DICT['running'] upload_job.original_filename = upload_file.file_name upload_job.filename = upload_file.upload_name else: if upload_file.file_type in ["award", "award_procurement"]: # file generation handled on backend, mark as ready upload_status = JOB_STATUS_DICT['ready'] elif upload_file.file_type in ["awardee_attributes", "sub_award"]: # these are dependent on file D2 validation upload_status = JOB_STATUS_DICT['waiting'] else: # mark as running since frontend should be doing this upload upload_status = JOB_STATUS_DICT['running'] upload_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=upload_status, job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=submission_id) sess.add(upload_job) sess.flush() if existing_submission: # if the file's validation job is attached to an existing submission, # reset its status and delete any validation artifacts (e.g., error metadata) that # might exist from a previous run. val_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['csv_record_validation'] ).one() val_job.job_status_id = JOB_STATUS_DICT['waiting'] val_job.original_filename = upload_file.file_name val_job.filename = upload_file.upload_name # reset file size and number of rows to be set during validation of new file val_job.file_size = None val_job.number_of_rows = None # delete error metadata this might exist from a previous run of this validation job sess.query(ErrorMetadata).\ filter(ErrorMetadata.job_id == val_job.job_id).\ delete(synchronize_session='fetch') # delete file error information that might exist from a previous run of this validation job sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch') else: # create a new record validation job and add dependencies if necessary if upload_file.file_type == "awardee_attributes": d1_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_procurement'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if d1_val_job is None: raise Exception("Cannot create E job without a D1 job") # Add dependency on D1 validation job d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id) sess.add(d1_dependency) elif upload_file.file_type == "sub_award": # todo: check for C validation job c_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_financial'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if c_val_job is None: raise Exception("Cannot create F job without a C job") # add dependency on C validation job c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id) sess.add(c_dependency) else: # E and F don't get validation jobs val_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], submission_id=submission_id) sess.add(val_job) sess.flush() # add dependency between file upload job and file validation job upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id) sess.add(upload_dependency) validation_job_id = val_job.job_id sess.commit() return validation_job_id, upload_job.job_id
def setUserActive(user, is_active): """ Sets the is_active field for the specified user """ sess = GlobalDB.db().session user.is_active = is_active sess.commit()
def main(): parser = argparse.ArgumentParser( description= 'Update county information for historical FABS and FPDS data') parser.add_argument('-t', '--type', help='Which data type, argument must be fpds or fabs', nargs=1, type=str, required=True) parser.add_argument('-s', '--start', help='Start id, must be number', nargs=1, type=int, required=True) parser.add_argument('-e', '--end', help='End id, must be number', nargs=1, type=int, required=True) args = parser.parse_args() sess = GlobalDB.db().session data_type = args.type[0] global g_country_list global g_state_by_code global g_state_code_by_fips global g_state_by_name global g_zip_list global g_county_by_city global g_county_by_code global g_county_by_name logger.info("Starting location dictionary compilation") # get and create list of country code -> name mappings countries = sess.query(CountryCode).all() for country in countries: g_country_list[country.country_code] = country.country_name del countries # get and create list of state code -> state name mappings. Prime the county lists with state codes states = sess.query(States).all() for state in states: g_county_by_name[state.state_code] = {} g_county_by_code[state.state_code] = {} # we want to capitalize it if it's FPDS because that's how we store it state_name = state.state_name state_code = state.state_code if data_type == 'fpds': state_name = state_name.upper() g_state_by_code[state_code] = state_name g_state_code_by_fips[state.fips_code] = state_code g_state_by_name[state_name.upper()] = state_code del states # Fill the county lists with data (code -> name mappings and name -> code mappings) county_codes = sess.query(CountyCode.county_number, CountyCode.state_code, CountyCode.county_name).all() for county_code in county_codes: state_code = county_code.state_code county_num = county_code.county_number county_name = county_code.county_name.strip() if data_type == 'fpds': # we don't want any "(CA)" endings for FPDS, so strip those (also strip all extra whitespace) county_name = county_name.replace(' (CA)', '').strip().upper() # we want all the counties in our by-code lookup because we'd be using this table anyway for derivations g_county_by_code[state_code][county_num] = county_name # if the county name has only letters/spaces then we want it in our by-name lookup, the rest have the # potential to be different from the FPDS feed (and won't be used in FABS) if re.match('^[A-Z\s]+$', county_name): g_county_by_name[state_code][county_name] = county_num del county_codes # pull in all city codes city_codes = sess.query(CityCode.city_code, CityCode.state_code, CityCode.county_number).all() for city_code in city_codes: g_county_by_city[city_code.state_code + city_code.city_code] = city_code.county_number del city_codes # pull in all the zip codes start_slice = 0 while True: end_slice = start_slice + ZIP_SLICE zip_codes = sess.query(Zips.zip5, Zips.zip_last4, Zips.state_abbreviation, Zips.county_number).\ slice(start_slice, end_slice).all() for zip_data in zip_codes: if zip_data.zip5 not in g_zip_list: g_zip_list[zip_data.zip5] = {} g_zip_list[zip_data.zip5]['default'] = { "state_abbreviation": zip_data.state_abbreviation, "county_number": zip_data.county_number } g_zip_list[zip_data.zip5][zip_data.zip_last4] = { "state_abbreviation": zip_data.state_abbreviation, "county_number": zip_data.county_number } logger.info("Added %s rows to zip dict", str(end_slice)) start_slice = end_slice # break the loop if we've hit the last records if len(zip_codes) < ZIP_SLICE: break del zip_codes if data_type == 'fpds': update_historical_fpds(sess, args.start[0], args.end[0]) elif data_type == 'fabs': update_historical_fabs(sess, args.start[0], args.end[0]) else: logger.error("Type must be fpds or fabs.") # delete all global variables just in case del g_country_list del g_state_by_code del g_state_code_by_fips del g_state_by_name del g_zip_list del g_county_by_city del g_county_by_code del g_county_by_name logger.info("Completed location derivations")
def validator_process_file_generation(file_gen_id, is_retry=False): """ Retrieves a FileGeneration object based on its ID, and kicks off a file generation. Handles errors by ensuring the FileGeneration (if exists) is no longer cached. Args: file_gen_id: ID of a FileGeneration object is_retry: If this is not the very first time handling execution of this job. If True, cleanup is performed before proceeding to retry the job Raises: Any Exceptions raised by the FileGenerationManager """ if is_retry: if cleanup_generation(file_gen_id): log_job_message( logger=logger, message= "Attempting a retry of {} after successful retry-cleanup.". format(inspect.stack()[0][3]), job_id=file_gen_id, is_debug=True) else: log_job_message( logger=logger, message="Retry of {} found to be not necessary after cleanup. " "Returning from job with success.".format( inspect.stack()[0][3]), job_id=file_gen_id, is_debug=True) return sess = GlobalDB.db().session file_generation = None try: file_generation = sess.query(FileGeneration).filter_by( file_generation_id=file_gen_id).one_or_none() if file_generation is None: raise ResponseException( 'FileGeneration ID {} not found in database'.format( file_gen_id), StatusCode.CLIENT_ERROR, None) file_generation_manager = FileGenerationManager( sess, g.is_local, file_generation=file_generation) file_generation_manager.generate_file() except Exception as e: # Log uncaught exceptions and fail all Jobs referencing this FileGeneration error_data = { 'message': 'An unhandled exception occurred in the Validator during file generation', 'message_type': 'ValidatorInfo', 'file_generation_id': file_gen_id, 'traceback': traceback.format_exc() } if file_generation: error_data.update({ 'agency_code': file_generation.agency_code, 'agency_type': file_generation.agency_type, 'start_date': file_generation.start_date, 'end_date': file_generation.end_date, 'file_type': file_generation.file_type, 'file_path': file_generation.file_path, }) logger.error(error_data) # Try to mark the Jobs as failed, but continue raising the original Exception if not possible try: if file_generation: # Uncache the FileGeneration sess.refresh(file_generation) file_generation.is_cached_file = False # Mark all Jobs waiting on this FileGeneration as failed generation_jobs = sess.query(Job).filter_by( file_generation_id=file_gen_id).all() for job in generation_jobs: if job.job_status in [ JOB_STATUS_DICT['waiting'], JOB_STATUS_DICT['ready'], JOB_STATUS_DICT['running'] ]: mark_job_status(job.job_id, 'failed') sess.refresh(job) job.file_generation_id = None job.error_message = str(e) sess.commit() except: pass # ResponseExceptions only occur at very specific times, and should not affect the Validator's future attempts # at handling messages from SQS if not isinstance(e, ResponseException): raise e
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None, file_format='csv'): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations file_format: determines if the file generated is a txt or a csv """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( "Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, 'waiting') file_generation = retrieve_cached_file_generation(job, agency_type, agency_code, file_format) if file_generation: try: copy_file_generation_to_job(job, file_generation, g.is_local) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) sess.commit() else: # Create new FileGeneration and reset Jobs file_generation = FileGeneration(request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date, file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, file_format=file_format, is_cached_file=True) sess.add(file_generation) sess.commit() try: job.file_generation_id = file_generation.file_generation_id sess.commit() reset_generation_jobs(sess, job) logger.info({ 'message': 'Sending new FileGeneration {} to SQS'.format( file_generation.file_generation_id), 'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id, 'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id }) # Add file_generation_id to the SQS job queue queue = sqs_queue() message_attr = { "validation_type": { "DataType": "String", "StringValue": "generation" } } queue.send_message(MessageBody=str( file_generation.file_generation_id), MessageAttributes=message_attr) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) file_generation.is_cached_file = False sess.commit()
def max_login(self, session): """ Logs a user in if their password matches using MAX Args: session: Session object from flask Returns: A JsonResponse containing the user information or details on which error occurred, such as whether a type was wrong, something wasn't implemented, invalid keys were provided, login was denied, or a different, unexpected error occurred. """ try: safe_dictionary = RequestDictionary(self.request) ticket = safe_dictionary.get_value("ticket") service = safe_dictionary.get_value('service') # Call MAX's serviceValidate endpoint and retrieve the response max_dict = get_max_dict(ticket, service) if 'cas:authenticationSuccess' not in max_dict[ 'cas:serviceResponse']: raise ValueError( "The Max CAS endpoint was unable to locate your session " "using the ticket/service combination you provided.") cas_attrs = max_dict['cas:serviceResponse'][ 'cas:authenticationSuccess']['cas:attributes'] # Grab MAX ID to see if a service account is being logged in max_id_components = cas_attrs['maxAttribute:MAX-ID'].split('_') service_account_flag = (len(max_id_components) > 1 and max_id_components[0].lower() == 's') # Grab the email and list of groups from MAX's response email = cas_attrs['maxAttribute:Email-Address'] try: sess = GlobalDB.db().session user = sess.query(User).filter( func.lower(User.email) == func.lower(email)).one_or_none() # If the user does not exist, create them since they are allowed to access the site because they got # past the above group membership checks if user is None: user = User() user.email = email set_user_name(user, cas_attrs) set_max_perms(user, cas_attrs['maxAttribute:GroupList'], service_account_flag) sess.add(user) sess.commit() except MultipleResultsFound: raise ValueError("An error occurred during login.") return self.create_session_and_response(session, user) # Catch any specifically raised errors or any other errors that may have happened and return them cleanly. # We add the error parameter here because this endpoint needs to provide better feedback, and to avoid changing # the default behavior of the JsonResponse class globally. except (TypeError, KeyError, NotImplementedError) as e: # Return a 400 with appropriate message return JsonResponse.error(e, StatusCode.CLIENT_ERROR, error=str(e)) except ValueError as e: # Return a 401 for login denied return JsonResponse.error(e, StatusCode.LOGIN_REQUIRED, error=str(e)) except Exception as e: # Return 500 return JsonResponse.error(e, StatusCode.INTERNAL_ERROR, error=str(e))
def perms_to_affiliations(perms, user_id, service_account_flag=False): """ Convert a list of perms from MAX to a list of UserAffiliations. Filter out and log any malformed perms Args: perms: list of permissions (as strings) for the user user_id: the ID of the user service_account_flag: flag to indicate a service account Yields: UserAffiliations based on the permissions provided """ available_cgacs = { cgac.cgac_code: cgac for cgac in GlobalDB.db().session.query(CGAC) } available_frecs = { frec.frec_code: frec for frec in GlobalDB.db().session.query(FREC) } log_data = {'message_type': 'BrokerWarning', 'user_id': user_id} for perm in perms: log_data[ 'message'] = 'User with ID {} has malformed permission: {}'.format( user_id, perm) components = perm.split('-PERM_') if len(components) != 2: logger.warning(log_data) continue codes, perm_level = components split_codes = codes.split('-FREC_') frec_code, cgac_code = None, None if len(split_codes) == 2: # permissions for FR entity code and readonly CGAC frec_code, cgac_code = split_codes[1], split_codes[0] if frec_code not in available_frecs or cgac_code not in available_cgacs: logger.warning(log_data) continue else: # permissions for CGAC cgac_code = codes if cgac_code not in available_cgacs: logger.warning(log_data) continue perm_level = perm_level.lower() if service_account_flag: # Replace MAX Service Account permissions with Broker "write" and "editfabs" permissions perm_level = 'we' elif perm_level not in 'rwsef': logger.warning(log_data) continue for permission in perm_level: if frec_code: yield UserAffiliation( cgac=available_cgacs[cgac_code], frec=None, permission_type_id=PERMISSION_SHORT_DICT['r']) yield UserAffiliation( cgac=None, frec=available_frecs[frec_code], permission_type_id=PERMISSION_SHORT_DICT[permission]) else: yield UserAffiliation( cgac=available_cgacs[cgac_code] if cgac_code else None, frec=None, permission_type_id=PERMISSION_SHORT_DICT[permission])
def list_submission_users(is_fabs): """ List user IDs and names that have submissions that the requesting user can see. Arguments: is_fabs: boolean indicating whether it is a DABS or FABS submission (True if FABS) Returns: A JsonResponse containing a list of users that have submissions that the requesting user can see """ sess = GlobalDB.db().session # subquery to create the EXISTS portion of the query exists_query = sess.query(Submission).filter( Submission.user_id == User.user_id, Submission.is_fabs.is_(is_fabs)) # if user is not an admin, we have to adjust the exists query to limit submissions if not g.user.website_admin: # split affiliations into frec and cgac cgac_affiliations = [aff for aff in g.user.affiliations if aff.cgac] frec_affiliations = [aff for aff in g.user.affiliations if aff.frec] # Don't list FABS permissions users if the user only has DABS permissions if not is_fabs: cgac_affiliations = [ aff for aff in cgac_affiliations if aff.permission_type_id in DABS_PERMISSION_ID_LIST ] frec_affiliations = [ aff for aff in frec_affiliations if aff.permission_type_id in DABS_PERMISSION_ID_LIST ] # Make a list of cgac and frec codes cgac_list = [aff.cgac.cgac_code for aff in cgac_affiliations] frec_list = [aff.frec.frec_code for aff in frec_affiliations] # Add filters where applicable affiliation_filters = [Submission.user_id == g.user.user_id] if cgac_list: affiliation_filters.append(Submission.cgac_code.in_(cgac_list)) if frec_list: affiliation_filters.append(Submission.frec_code.in_(frec_list)) exists_query = exists_query.filter(or_(*affiliation_filters)) # Add an exists onto the query, couldn't do this earlier because then the filters couldn't get added in the if exists_query = exists_query.exists() # Get all the relevant users user_results = sess.query(User.user_id, User.name, User.email).filter(exists_query).order_by( User.name).all() # Create an array containing relevant users in a readable format user_list = [] for user in user_results: user_list.append({ 'user_id': user[0], 'name': user[1], 'email': user[2] }) return JsonResponse.create(StatusCode.OK, {"users": user_list})
def delete(self): sess = GlobalDB.db().session sess.delete(self.sqs) sess.commit()
def copy_file_generation_to_job(job, file_generation, is_local): """ Copy cached FileGeneration data to a Job requesting a file. Args: job: Job object to copy the data to file_generation: Cached FileGeneration object to copy the data from is_local: A boolean flag indicating whether the application is being run locally or not """ sess = GlobalDB.db().session log_data = { 'message': 'Copying FileGeneration {} data to Job {}'.format( file_generation.file_generation_id, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': job.file_type.name, 'file_generation_id': file_generation.file_generation_id } logger.info(log_data) # Do not edit submissions that have already successfully completed sess.refresh(job) if job.job_status_id == lookups.JOB_STATUS_DICT['finished']: return job.file_generation_id = file_generation.file_generation_id # File is still being generated, just mark the FileGeneration ID in the Job and wait # FileGeneration will update all child Jobs when it finishes if not file_generation.file_path: sess.commit() return # Generate file path for child Job's filename filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format( str(job.submission_id)) original_filename = file_generation.file_path.split('/')[-1] filename = '{}{}'.format(filepath, original_filename) # Copy parent job's data job.filename = filename job.original_filename = original_filename job.number_of_errors = 0 job.number_of_warnings = 0 # Change the validation job's file data when within a submission if job.submission_id is not None: val_job = sess.query(Job).filter( Job.submission_id == job.submission_id, Job.file_type_id == job.file_type_id, Job.job_type_id == lookups.JOB_TYPE_DICT['csv_record_validation']).one() val_job.filename = filename val_job.original_filename = original_filename # Copy the data to the Submission's bucket if not g.is_local and file_generation.file_path != job.filename: # Check to see if the same file exists in the child bucket s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"]) bucket = CONFIG_BROKER['aws_bucket'] response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename) for obj in response.get('Contents', []): if obj['Key'] == job.filename: # The file already exists in this location log_data[ 'message'] = '{} file already exists in this location: {}; not overwriting.'.format( job.file_type.name, job.filename) logger.info(log_data) mark_job_status(job.job_id, 'finished') return S3Handler.copy_file(bucket, bucket, file_generation.file_path, job.filename) sess.commit() # Mark Job status last so the validation job doesn't start until everything is done mark_job_status(job.job_id, 'finished')
def main(): now = datetime.now() parser = argparse.ArgumentParser( description='Pull data from the Federal Hierarchy API.') parser.add_argument('-a', '--all', help='Clear out the database and get historical data', action='store_true') parser.add_argument('-f', '--filename', help='Generate a local CSV file from the data.', nargs=1, type=str) parser.add_argument('-o', '--export_office', help='Export the current office table. ' 'Please provide the file name/path.', nargs=1, type=str) parser.add_argument('-d', '--pull_date', help='Date from which to start the pull', nargs=1, type=str) parser.add_argument('-i', '--ignore_db', help='Do not update the DB tables', action='store_true') args = parser.parse_args() if args.all and args.pull_date: logger.error('The -a and -d flags conflict, cannot use both at once.') sys.exit(1) metrics_json = { 'script_name': 'load_federal_hierarchy.py', 'start_time': str(now), 'level_1_records': 0, 'level_2_records': 0, 'level_3_records': 0, 'level_4_records': 0, 'level_5_records': 0, 'level_6_records': 0, 'level_7_records': 0, 'missing_cgacs': [], 'missing_subtier_codes': [] } # Handle the pull_date parameter updated_date_from = None if args.pull_date: try: updated_date_from = args.pull_date[0] datetime.strptime(updated_date_from, '%Y-%m-%d') except ValueError: logger.error('The date given to the -d flag was not parseable.') sys.exit(1) # Get or create the start date sess = GlobalDB.db().session if not args.all and not updated_date_from: last_pull_date = sess.query(ExternalDataLoadDate.last_load_date_start).\ filter_by(external_data_type_id=EXTERNAL_DATA_TYPE_DICT['office']).one_or_none() if not last_pull_date: logger.error( 'The -a or -d flag must be set when there is no latest run in the database.' ) sys.exit(1) # We want to make the date one day earlier to account for any timing weirdness between the two systems updated_date_from = last_pull_date[0].date() - timedelta(days=1) # Handle the filename parameter filename = args.filename[0] if args.filename else None # Handle the export office parameter export_office = args.export_office[0] if args.export_office else None # Handle a complete data reload if args.all and not args.ignore_db: logger.info('Emptying out the Office table for a complete reload.') sess.execute('''TRUNCATE TABLE office RESTART IDENTITY''') try: pull_offices(sess, filename, not args.ignore_db, args.all, updated_date_from, export_office, metrics_json) except Exception as e: logger.exception(e) sys.exit(1) # find if there were any new cgacs/subtiers added all_cgacs = [cgac.cgac_code for cgac in sess.query(CGAC.cgac_code)] all_frecs = [frec.frec_code for frec in sess.query(FREC.frec_code)] all_subtiers = [ subtier.sub_tier_agency_code for subtier in sess.query(SubTierAgency.sub_tier_agency_code) ] metrics_json['missing_cgacs'] = list( set(metrics_json['missing_cgacs']) - set(all_cgacs + all_frecs)) metrics_json['missing_subtier_codes'] = list( set(metrics_json['missing_subtier_codes']) - set(all_subtiers)) update_external_data_load_date(now, datetime.now(), 'office') metrics_json['duration'] = str(datetime.now() - now) with open('load_federal_hierarchy_metrics.json', 'w+') as metrics_file: json.dump(metrics_json, metrics_file) logger.info('Script complete')
def run_validation(self, job): """ Run validations for specified job Args: job: Job to be validated Returns: True if successful """ sess = GlobalDB.db().session error_list = ErrorInterface() job_id = job.job_id submission_id = job.submission_id row_number = 1 file_type = job.file_type.name validation_start = datetime.now() log_str = 'on submission_id: {}, job_id: {}, file_type: {}'.format( str(submission_id), str(job_id), file_type) logger.info({ 'message': 'Beginning run_validation {}'.format(log_str), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job_id, 'file_type': file_type, 'action': 'run_validations', 'status': 'start', 'start_time': validation_start }) # Get orm model for this file model = [ft.model for ft in FILE_TYPE if ft.name == file_type][0] # Delete existing file level errors for this submission sess.query(ErrorMetadata).filter( ErrorMetadata.job_id == job_id).delete() sess.commit() # Clear existing records for this submission sess.query(model).filter_by(submission_id=submission_id).delete() sess.commit() # Clear existing flex fields for this job sess.query(FlexField).filter_by(job_id=job_id).delete() sess.commit() # If local, make the error report directory if self.is_local and not os.path.exists(self.directory): os.makedirs(self.directory) # Get bucket name and file name file_name = job.filename bucket_name = CONFIG_BROKER['aws_bucket'] region_name = CONFIG_BROKER['aws_region'] error_file_name = report_file_name(job.submission_id, False, job.file_type.name) error_file_path = "".join( [CONFIG_SERVICES['error_report_path'], error_file_name]) warning_file_name = report_file_name(job.submission_id, True, job.file_type.name) warning_file_path = "".join( [CONFIG_SERVICES['error_report_path'], warning_file_name]) # Create File Status object create_file_if_needed(job_id, file_name) reader = CsvReader() # Get file size and write to jobs table if CONFIG_BROKER["use_aws"]: file_size = S3Handler.get_file_size(file_name) else: file_size = os.path.getsize(file_name) job.file_size = file_size sess.commit() # Get fields for this file fields = sess.query(FileColumn).filter( FileColumn.file_id == FILE_TYPE_DICT[file_type]).all() for field in fields: sess.expunge(field) csv_schema = {row.name_short: row for row in fields} try: extension = os.path.splitext(file_name)[1] if not extension or extension.lower() not in ['.csv', '.txt']: raise ResponseException("", StatusCode.CLIENT_ERROR, None, ValidationError.fileTypeError) # Count file rows: throws a File Level Error for non-UTF8 characters temp_file = open(reader.get_filename(region_name, bucket_name, file_name), encoding='utf-8') file_row_count = len(list(csv.reader(temp_file))) try: temp_file.close() except AttributeError: # File does not exist, and so does not need to be closed pass # Pull file and return info on whether it's using short or long col headers reader.open_file(region_name, bucket_name, file_name, fields, bucket_name, self.get_file_name(error_file_name), self.long_to_short_dict[job.file_type_id], is_local=self.is_local) # list to keep track of rows that fail validations error_rows = [] # While not done, pull one row and put it into staging table if it passes # the Validator loading_start = datetime.now() logger.info({ 'message': 'Beginning data loading {}'.format(log_str), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job_id, 'file_type': file_type, 'action': 'data_loading', 'status': 'start', 'start_time': loading_start }) with open(error_file_path, 'w', newline='') as error_file,\ open(warning_file_path, 'w', newline='') as warning_file: error_csv = csv.writer(error_file, delimiter=',', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') warning_csv = csv.writer(warning_file, delimiter=',', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') required_list = None type_list = None if file_type == "fabs": # create a list of all required/type labels for FABS labels = sess.query(ValidationLabel).all() required_list = {} type_list = {} for label in labels: if label.label_type == "requirement": required_list[label.column_name] = label.label else: type_list[label.column_name] = label.label # write headers to file error_csv.writerow(self.reportHeaders) warning_csv.writerow(self.reportHeaders) while not reader.is_finished: row_number += 1 if row_number % 100 == 0: elapsed_time = (datetime.now() - loading_start).total_seconds() logger.info({ 'message': 'Loading row: {} {}'.format( str(row_number), log_str), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job_id, 'file_type': file_type, 'action': 'data_loading', 'status': 'loading', 'rows_loaded': row_number, 'start_time': loading_start, 'elapsed_time': elapsed_time }) # # first phase of validations: read record and record a # formatting error if there's a problem # (record, reduceRow, skip_row, doneReading, rowErrorHere, flex_cols) = \ self.read_record(reader, error_csv, row_number, job, fields, error_list) if reduceRow: row_number -= 1 if rowErrorHere: error_rows.append(row_number) if doneReading: # Stop reading from input file break elif skip_row: # Do not write this row to staging, but continue processing future rows continue # # second phase of validations: do basic schema checks # (e.g., require fields, field length, data type) # # D files are obtained from upstream systems (ASP and FPDS) that perform their own basic # validations, so these validations are not repeated here if file_type in ["award", "award_procurement"]: # Skip basic validations for D files, set as valid to trigger write to staging passed_validations = True valid = True else: if file_type == "fabs": record['afa_generated_unique'] = (record['award_modification_amendme'] or '-none-') + "_" +\ (record['awarding_sub_tier_agency_c'] or '-none-') + \ "_" + (record['fain'] or '-none-') + "_" + \ (record['uri'] or '-none-') passed_validations, failures, valid = Validator.validate( record, csv_schema, file_type == "fabs", required_list, type_list) if valid: # todo: update this logic later when we have actual validations if file_type == "fabs": record["is_valid"] = True model_instance = model(job_id=job_id, submission_id=submission_id, valid_record=passed_validations, **record) skip_row = not insert_staging_model( model_instance, job, error_csv, error_list) if flex_cols: sess.add_all(flex_cols) sess.commit() if skip_row: error_rows.append(row_number) continue if not passed_validations: fatal = write_errors( failures, job, self.short_to_long_dict[job.file_type_id], error_csv, warning_csv, row_number, error_list, flex_cols) if fatal: error_rows.append(row_number) loading_duration = (datetime.now() - loading_start).total_seconds() logger.info({ 'message': 'Completed data loading {}'.format(log_str), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job_id, 'file_type': file_type, 'action': 'data_loading', 'status': 'finish', 'start_time': loading_start, 'end_time': datetime.now(), 'duration': loading_duration, 'total_rows': row_number }) if file_type in ('appropriations', 'program_activity', 'award_financial'): update_tas_ids(model, submission_id) # # third phase of validations: run validation rules as specified # in the schema guidance. these validations are sql-based. # sql_error_rows = self.run_sql_validations( job, file_type, self.short_to_long_dict[job.file_type_id], error_csv, warning_csv, row_number, error_list) error_rows.extend(sql_error_rows) error_file.close() warning_file.close() # stream file to S3 when not local if not self.is_local: # stream error file with open(error_file_path, 'rb') as csv_file: with smart_open.smart_open(S3Handler.create_file_path(self.get_file_name(error_file_name)), 'w')\ as writer: while True: chunk = csv_file.read(CHUNK_SIZE) if chunk: writer.write(chunk) else: break csv_file.close() os.remove(error_file_path) # stream warning file with open(warning_file_path, 'rb') as warning_csv_file: with smart_open.smart_open(S3Handler.create_file_path(self.get_file_name(warning_file_name)), 'w')\ as warning_writer: while True: chunk = warning_csv_file.read(CHUNK_SIZE) if chunk: warning_writer.write(chunk) else: break warning_csv_file.close() os.remove(warning_file_path) # Calculate total number of rows in file # that passed validations error_rows_unique = set(error_rows) total_rows_excluding_header = row_number - 1 valid_rows = total_rows_excluding_header - len(error_rows_unique) # Update fabs is_valid rows where applicable # Update submission to include action dates where applicable if file_type == "fabs": sess.query(DetachedAwardFinancialAssistance).\ filter(DetachedAwardFinancialAssistance.row_number.in_(error_rows_unique), DetachedAwardFinancialAssistance.submission_id == submission_id).\ update({"is_valid": False}, synchronize_session=False) sess.commit() min_action_date, max_action_date = get_action_dates( submission_id) sess.query(Submission).filter(Submission.submission_id == submission_id).\ update({"reporting_start_date": min_action_date, "reporting_end_date": max_action_date}, synchronize_session=False) # Ensure validated rows match initial row count if file_row_count != row_number: raise ResponseException("", StatusCode.CLIENT_ERROR, None, ValidationError.rowCountError) # Update job metadata job.number_of_rows = row_number job.number_of_rows_valid = valid_rows sess.commit() error_list.write_all_row_errors(job_id) # Update error info for submission populate_job_error_info(job) if file_type == "fabs": # set number of errors and warnings for detached submission populate_submission_error_info(submission_id) # Mark validation as finished in job tracker mark_job_status(job_id, "finished") mark_file_complete(job_id, file_name) finally: # Ensure the files always close reader.close() validation_duration = (datetime.now() - validation_start).total_seconds() logger.info({ 'message': 'Completed run_validation {}'.format(log_str), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job_id, 'file_type': file_type, 'action': 'run_validation', 'status': 'finish', 'start_time': validation_start, 'end_time': datetime.now(), 'duration': validation_duration }) return True
def validator_process_job(job_id, agency_code, is_retry=False): """ Retrieves a Job based on its ID, and kicks off a validation. Handles errors by ensuring the Job (if exists) is no longer running. Args: job_id: ID of a Job agency_code: CGAC or FREC code for agency, only required for file generations by Job is_retry: If this is not the very first time handling execution of this job. If True, cleanup is performed before proceeding to retry the job Raises: Any Exceptions raised by the GenerationManager or ValidationManager, excluding those explicitly handled """ if is_retry: if cleanup_validation(job_id): log_job_message( logger=logger, message= "Attempting a retry of {} after successful retry-cleanup.". format(inspect.stack()[0][3]), job_id=job_id, is_debug=True) else: log_job_message( logger=logger, message="Retry of {} found to be not necessary after cleanup. " "Returning from job with success.".format( inspect.stack()[0][3]), job_id=job_id, is_debug=True) return sess = GlobalDB.db().session job = None try: # Get the job job = sess.query(Job).filter_by(job_id=job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) mark_job_status(job_id, 'ready') # We can either validate or generate a file based on Job ID if job.job_type.name == 'file_upload': # Generate A, E, or F file file_generation_manager = FileGenerationManager(sess, g.is_local, job=job) file_generation_manager.generate_file(agency_code) else: # Run validations validation_manager = ValidationManager( g.is_local, CONFIG_SERVICES['error_report_path']) validation_manager.validate_job(job.job_id) except (ResponseException, csv.Error, UnicodeDecodeError, ValueError) as e: # Handle exceptions explicitly raised during validation error_data = { 'message': 'An exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) sess.refresh(job) job.error_message = str(e) if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError elif isinstance(e, ResponseException): error_type = e.errorType write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, 'invalid') else: logger.error(error_data) raise e except Exception as e: # Log uncaught exceptions and fail the Job error_data = { 'message': 'An unhandled exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) # Try to mark the Job as failed, but continue raising the original Exception if not possible try: mark_job_status(job_id, 'failed') sess.refresh(job) job.error_message = str(e) sess.commit() except: pass raise e
def run_cross_validation(self, job): """ Cross file validation job. Test all rules with matching rule_timing. Run each cross-file rule and create error report. Args: job: Current job """ sess = GlobalDB.db().session job_id = job.job_id # Create File Status object create_file_if_needed(job_id) # Create list of errors error_list = ErrorInterface() submission_id = job.submission_id job_start = datetime.now() logger.info({ 'message': 'Beginning cross-file validations on submission_id: ' + str(submission_id), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job.job_id, 'action': 'run_cross_validations', 'start': job_start, 'status': 'start' }) # Delete existing cross file errors for this submission sess.query(ErrorMetadata).filter( ErrorMetadata.job_id == job_id).delete() sess.commit() # get all cross file rules from db cross_file_rules = sess.query(RuleSql).filter_by( rule_cross_file_flag=True) # for each cross-file combo, run associated rules and create error report for c in get_cross_file_pairs(): first_file = c[0] second_file = c[1] combo_rules = cross_file_rules.filter( or_( and_(RuleSql.file_id == first_file.id, RuleSql.target_file_id == second_file.id), and_(RuleSql.file_id == second_file.id, RuleSql.target_file_id == first_file.id))) # get error file name/path error_file_name = report_file_name(submission_id, False, first_file.name, second_file.name) error_file_path = "".join( [CONFIG_SERVICES['error_report_path'], error_file_name]) warning_file_name = report_file_name(submission_id, True, first_file.name, second_file.name) warning_file_path = "".join( [CONFIG_SERVICES['error_report_path'], warning_file_name]) # open error report and gather failed rules within it with open(error_file_path, 'w', newline='') as error_file,\ open(warning_file_path, 'w', newline='') as warning_file: error_csv = csv.writer(error_file, delimiter=',', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') warning_csv = csv.writer(warning_file, delimiter=',', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') # write headers to file error_csv.writerow(self.crossFileReportHeaders) warning_csv.writerow(self.crossFileReportHeaders) # send comboRules to validator.crossValidate sql current_cols_short_to_long = self.short_to_long_dict[ first_file.id].copy() current_cols_short_to_long.update( self.short_to_long_dict[second_file.id].copy()) cross_validate_sql(combo_rules.all(), submission_id, current_cols_short_to_long, first_file.id, second_file.id, job, error_csv, warning_csv, error_list, job_id) # close files error_file.close() warning_file.close() # stream file to S3 when not local if not self.is_local: # stream error file with open(error_file_path, 'rb') as csv_file: with smart_open.smart_open( S3Handler.create_file_path( self.get_file_name(error_file_name)), 'w') as writer: while True: chunk = csv_file.read(CHUNK_SIZE) if chunk: writer.write(chunk) else: break csv_file.close() os.remove(error_file_path) # stream warning file with open(warning_file_path, 'rb') as warning_csv_file: with smart_open.smart_open( S3Handler.create_file_path( self.get_file_name(warning_file_name)), 'w') as warning_writer: while True: chunk = warning_csv_file.read(CHUNK_SIZE) if chunk: warning_writer.write(chunk) else: break warning_csv_file.close() os.remove(warning_file_path) # write all recorded errors to database error_list.write_all_row_errors(job_id) # Update error info for submission populate_job_error_info(job) # mark job status as "finished" mark_job_status(job_id, "finished") job_duration = (datetime.now() - job_start).total_seconds() logger.info({ 'message': 'Completed cross-file validations on submission_id: ' + str(submission_id), 'message_type': 'ValidatorInfo', 'submission_id': submission_id, 'job_id': job.job_id, 'action': 'run_cross_validations', 'status': 'finish', 'start': job_start, 'duration': job_duration }) # set number of errors and warnings for submission. submission = populate_submission_error_info(submission_id) # TODO: Remove temporary step below # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their # submissions as publishable # Publish only if no errors are present if submission.number_of_errors == 0: submission.publishable = True sess.commit() # Mark validation complete mark_file_complete(job_id)
def setUpClass(cls): """Set up class-wide resources (test data)""" super(JobTests, cls).setUpClass() user = cls.userId # Flag for testing a million+ errors (can take ~30 min to run) cls.includeLongTests = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_upload'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_prereq'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['external_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['wrong_type'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['not_ready'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile('testEmpty.csv', user), job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['empty'] = job_info.job_id # create dependency dependency = JobDependency(job_id=jobDict["bad_prereq"], prerequisite_id=jobDict["bad_upload"]) sess.add(dependency) colIdDict = {} for fileId in range(1, 5): for columnId in range(1, 6): if columnId < 3: fieldType = FIELD_TYPE_DICT['INT'] else: fieldType = FIELD_TYPE_DICT['STRING'] columnName = "header_{}".format(columnId) fileCol = FileColumn( file_id=fileId, field_types_id=fieldType, name=columnName, required=(columnId != FIELD_TYPE_DICT['STRING'])) sess.add(fileCol) sess.flush() colIdDict["header_{}_file_type_{}".format( columnId, fileId)] = fileCol.file_column_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) cls.jobDict = jobDict
def setUpClass(cls): """ Set up class-wide resources (test data) """ super(FABSUploadTests, cls).setUpClass() # TODO: refactor into a pytest fixture with create_app().app_context(): # get the submission test users sess = GlobalDB.db().session cls.session = sess admin_user = sess.query(User).filter( User.email == cls.test_users['admin_user']).one() agency_user = sess.query(User).filter( User.email == cls.test_users['agency_user']).one() editfabs_user = sess.query(User).filter( User.email == cls.test_users['editfabs_user']).one() cls.admin_user_id = admin_user.user_id cls.agency_user_id = agency_user.user_id cls.agency_user_email = agency_user.email cls.editfabs_email = editfabs_user.email # setup submission/jobs data for test_check_status cls.d2_submission = cls.insert_submission(sess, cls.admin_user_id, cgac_code="SYS", start_date="10/2015", end_date="12/2015", is_quarter=True) cls.d2_submission_2 = cls.insert_submission(sess, cls.agency_user_id, cgac_code="SYS", start_date="10/2015", end_date="12/2015", is_quarter=True) cls.published_submission = cls.insert_submission( sess, cls.admin_user_id, cgac_code="SYS", start_date="10/2015", end_date="12/2015", is_quarter=True, publish_status_id=PUBLISH_STATUS_DICT["published"]) cls.other_submission = cls.insert_submission(sess, cls.admin_user_id, cgac_code="SYS", start_date="07/2015", end_date="09/2015", is_quarter=True, d2_submission=False) cls.test_agency_user_submission_id = cls.insert_submission( sess, cls.agency_user_id, cgac_code="NOT", start_date="10/2015", end_date="12/2015", is_quarter=True, d2_submission=True) cls.insert_agency_user_submission_data( sess, cls.test_agency_user_submission_id)
def updateLastLogin(user, unlock_user=False): """ This updates the last login date to today's datetime for the user to the current date upon successful login. """ sess = GlobalDB.db().session user.last_login_date = time.strftime("%c") if not unlock_user else None sess.commit()
def load_sf133(filename, fiscal_year, fiscal_period, force_load=False): """Load SF 133 (budget execution report) lookup table.""" with createApp().app_context(): sess = GlobalDB.db().session existing_records = sess.query(SF133).filter( SF133.fiscal_year == fiscal_year, SF133.period == fiscal_period) if force_load: # force a reload of this period's current data logger.info('Force SF 133 load: deleting existing records for {} {}'.format( fiscal_year, fiscal_period)) delete_count = existing_records.delete() logger.info('{} records deleted'.format(delete_count)) elif existing_records.count(): # if there's existing data & we're not forcing a load, skip logger.info('SF133 {} {} already in database ({} records). Skipping file.'.format( fiscal_year, fiscal_period, existing_records.count())) return data = pd.read_csv(filename, dtype=str) data = LoaderUtils.cleanData( data, SF133, {"ata": "allocation_transfer_agency", "aid": "agency_identifier", "availability_type_code": "availability_type_code", "bpoa": "beginning_period_of_availa", "epoa": "ending_period_of_availabil", "main_account": "main_account_code", "sub_account": "sub_account_code", "fiscal_year": "fiscal_year", "period": "period", "line_num": "line", "amount_summed": "amount"}, {"allocation_transfer_agency": {"pad_to_length": 3}, "agency_identifier": {"pad_to_length": 3}, "main_account_code": {"pad_to_length": 4}, "sub_account_code": {"pad_to_length": 3}, # next 3 lines handle the TAS fields that shouldn't # be padded but should still be empty spaces rather # than NULLs. this ensures that the downstream pivot & melt # (which insert the missing 0-value SF-133 lines) # will work as expected (values used in the pivot # index cannot be NULL). # the "pad_to_length: 0" works around the fact # that sometimes the incoming data for these columns # is a single space and sometimes it is blank/NULL. "beginning_period_of_availa": {"pad_to_length": 0}, "ending_period_of_availabil": {"pad_to_length": 0}, "availability_type_code": {"pad_to_length": 0}, "amount": {"strip_commas": True}} ) # todo: find out how to handle dup rows (e.g., same tas/period/line number) # line numbers 2002 and 2012 are the only duped SF 133 report line numbers, # and they are not used by the validation rules, so for now # just remove them before loading our SF-133 table dupe_line_numbers = ['2002', '2102'] data = data[~data.line.isin(dupe_line_numbers)] # add concatenated TAS field for internal use (i.e., joining to staging tables) data['tas'] = data.apply(lambda row: format_internal_tas(row), axis=1) # incoming .csv does not always include rows for zero-value SF-133 lines # so we add those here because they're needed for the SF-133 validations. # 1. "pivot" the sf-133 dataset to explode it horizontally, creating one # row for each tas/fiscal year/period, with columns for each SF-133 line. # the "fill_value=0" parameter puts a 0 into any Sf-133 line number cell # with a missing value for a specific tas/fiscal year/period. # 2. Once the zeroes are filled in, "melt" the pivoted data back to its normal # format of one row per tas/fiscal year/period. # NOTE: fields used for the pivot in step #1 (i.e., items in pivot_idx) cannot # have NULL values, else they will be silently dropped by pandas :( pivot_idx = ['created_at', 'updated_at', 'agency_identifier', 'allocation_transfer_agency', 'availability_type_code', 'beginning_period_of_availa', 'ending_period_of_availabil', 'main_account_code', 'sub_account_code', 'tas', 'fiscal_year', 'period'] data.amount = data.amount.astype(float) data = pd.pivot_table(data, values='amount', index=pivot_idx, columns=['line'], fill_value=0).reset_index() data = pd.melt(data, id_vars=pivot_idx, value_name='amount') # Now that we've added zero lines for EVERY tas and SF 133 line number, get rid of the ones # we don't actually use in the validations. Arguably, it would be better just to include # everything, but that drastically increases the number of records we're inserting to the # sf_133 table. If we ever decide that we need *all* SF 133 lines that are zero value, # remove the next two lines. sf_133_validation_lines = [ '1000', '1010', '1011', '1012', '1013', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1029', '1030', '1031', '1032', '1033', '1040', '1041', '1042', '1160', '1180', '1260', '1280', '1340', '1440', '1540', '1640', '1750', '1850', '1910', '2190', '2490', '2500', '3020', '4801', '4802', '4881', '4882', '4901', '4902', '4908', '4981', '4982' ] data = data[(data.line.isin(sf_133_validation_lines)) | (data.amount != 0)] # we didn't use the the 'keep_null' option when padding allocation transfer agency, # because nulls in that column break the pivot (see above comments). # so, replace the ata '000' with an empty value before inserting to db data['allocation_transfer_agency'] = data['allocation_transfer_agency'].str.replace('000', '') # make a pass through the dataframe, changing any empty values to None, to ensure # that those are represented as NULL in the db. data = data.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None) # insert to db table_name = SF133.__table__.name num = LoaderUtils.insertDataframe(data, table_name, sess.connection()) sess.commit() logger.info('{} records inserted to {}'.format(num, table_name))
def create_jobs(upload_files, submission, existing_submission=False): """Create the set of jobs associated with the specified submission Arguments: upload_files -- list of named tuples that describe files uploaded to the broker submission -- submission existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ sess = GlobalDB.db().session submission_id = submission.submission_id # create the file upload and single-file validation jobs and # set up the dependencies between them # before starting, sort the incoming list of jobs by letter # to ensure that jobs dependent on the awards jobs being present # are processed last. jobs_required = [] upload_dict= {} sorted_uploads = sorted(upload_files, key=attrgetter('file_letter')) for upload_file in sorted_uploads: validation_job_id, upload_job_id = add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission) if validation_job_id: jobs_required.append(validation_job_id) upload_dict[upload_file.file_type] = upload_job_id # once single-file upload/validation jobs are created, create the cross-file # validation job and dependencies # todo: remove external validation jobs from the code-base--they aren't used if existing_submission: # find cross-file and external validation jobs and mark them as waiting # (note: job_type of 'validation' is a cross-file job) val_job = sess.query(Job).\ filter_by( submission_id = submission_id, job_type_id = JOB_TYPE_DICT["validation"]).\ one() val_job.job_status_id = JOB_STATUS_DICT["waiting"] ext_job = sess.query(Job).\ filter_by( submission_id = submission_id, job_type_id = JOB_TYPE_DICT["external_validation"]).\ one() ext_job.job_status_id = JOB_STATUS_DICT["waiting"] submission.updated_at = time.strftime("%c") else: # create cross-file validation job validation_job = Job( job_status_id=JOB_STATUS_DICT["waiting"], job_type_id=JOB_TYPE_DICT["validation"], submission_id=submission_id) sess.add(validation_job) # create external validation job external_job = Job( job_status_id=JOB_STATUS_DICT["waiting"], job_type_id=JOB_TYPE_DICT["external_validation"], submission_id=submission_id) sess.add(external_job) sess.flush() # create dependencies for validation jobs for job_id in jobs_required: val_dependency = JobDependency(job_id=validation_job.job_id, prerequisite_id=job_id) sess.add(val_dependency) ext_dependency = JobDependency(job_id=external_job.job_id, prerequisite_id=job_id) sess.add(ext_dependency) sess.commit() upload_dict["submission_id"] = submission_id return upload_dict
def certify_dabs_submission(submission, file_manager): """ Certify a DABS submission Args: submission: the submission to be certified file_manager: a FileHandler object to be used to call move_certified_files Returns: Nothing if successful, JsonResponse error containing the details of the error if something went wrong """ current_user_id = g.user.user_id if not submission.publishable: return JsonResponse.error( ValueError( "Submission cannot be certified due to critical errors"), StatusCode.CLIENT_ERROR) if not submission.is_quarter_format: return JsonResponse.error( ValueError("Monthly submissions cannot be certified"), StatusCode.CLIENT_ERROR) if submission.publish_status_id == PUBLISH_STATUS_DICT['published']: return JsonResponse.error( ValueError("Submission has already been certified"), StatusCode.CLIENT_ERROR) windows = get_windows() for window in windows: if window.block_certification: return JsonResponse.error(ValueError(window.message), StatusCode.CLIENT_ERROR) response = find_existing_submissions_in_period( submission.cgac_code, submission.frec_code, submission.reporting_fiscal_year, submission.reporting_fiscal_period, submission.submission_id) if response.status_code == StatusCode.OK: sess = GlobalDB.db().session # create the certify_history entry certify_history = CertifyHistory( created_at=datetime.utcnow(), user_id=current_user_id, submission_id=submission.submission_id) sess.add(certify_history) sess.commit() # get the certify_history entry including the PK certify_history = sess.query(CertifyHistory).filter_by(submission_id=submission.submission_id).\ order_by(CertifyHistory.created_at.desc()).first() # move files (locally we don't move but we still need to populate the certified_files_history table) file_manager.move_certified_files(submission, certify_history, file_manager.is_local) # set submission contents submission.certifying_user_id = current_user_id submission.publish_status_id = PUBLISH_STATUS_DICT['published'] sess.commit() print(submission.__dict__)
def start_generation_job(job, start_date, end_date, agency_code=None): """ Validates the dates for a D file generation job and passes the Job ID to SQS Args: job: File generation job to start start_date: Start date of the file generation end_date: End date of the file generation agency_code: Agency code for detached D file generations Returns: Tuple of boolean indicating successful start, and error response if False """ sess = GlobalDB.db().session file_type = job.file_type.letter_name try: if file_type in ['D1', 'D2']: # Validate and set Job's start and end dates if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( "Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) job.start_date = start_date job.end_date = end_date sess.commit() elif file_type not in ["E", "F"]: raise ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) except ResponseException as e: return False, JsonResponse.error(e, e.status, file_type=file_type, status='failed') mark_job_status(job.job_id, "waiting") # Add job_id to the SQS job queue logger.info({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Sending file generation job {} to Validator in SQS'.format(job.job_id) }) queue = sqs_queue() message_attr = { 'agency_code': { 'DataType': 'String', 'StringValue': agency_code } } if agency_code else {} response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) logger.debug({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Send message response: {}'.format(response) }) return True, None
def check_current_page(submission): sess = GlobalDB.db().session submission_id = submission.submission_id # /v1/uploadDetachedFiles/ # DetachedFiles if submission.d2_submission: data = { "message": "The current progress of this submission ID is on /v1/uploadDetachedFiles/ page.", "step": "6" } return JsonResponse.create(StatusCode.OK, data) # /v1/reviewData/ # Checks that both E and F files are finished review_data = sess.query(Job).filter(Job.submission_id == submission_id, Job.file_type_id.in_([6, 7]), Job.job_status_id == 4) # Need to check that cross file is done as well generate_ef = sess.query(Job).filter(Job.submission_id == submission_id, Job.job_type_id == 4, Job.number_of_errors == 0, Job.job_status_id == 4) if review_data.count() == 2 and generate_ef.count() > 0: data = { "message": "The current progress of this submission ID is on /v1/reviewData/ page.", "step": "5" } return JsonResponse.create(StatusCode.OK, data) # /v1/generateEF/ if generate_ef.count() > 0: data = { "message": "The current progress of this submission ID is on /v1/generateEF/ page.", "step": "4" } return JsonResponse.create(StatusCode.OK, data) validate_cross_file = sess.query(Job).filter(Job.submission_id == submission_id, Job.file_type_id.in_([4, 5]), Job.job_type_id == 2, Job.number_of_errors == 0, Job.file_size.isnot(None), Job.job_status_id == 4) generate_files = sess.query(Job).filter(Job.submission_id == submission_id, Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2, Job.number_of_errors == 0, Job.file_size.isnot(None), Job.job_status_id == 4) # /v1/validateCrossFile/ if validate_cross_file.count() == 2 and generate_files.count() == 3: data = { "message": "The current progress of this submission ID is on /v1/validateCrossFile/ page.", "step": "3" } return JsonResponse.create(StatusCode.OK, data) # /v1/generateFiles/ if generate_files.count() == 3: data = { "message": "The current progress of this submission ID is on /v1/generateFiles/ page.", "step": "2" } return JsonResponse.create(StatusCode.OK, data) # /v1/validateData/ validate_data = sess.query(Job).filter(Job.submission_id == submission_id, Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2, Job.number_of_errors != 0, Job.file_size.isnot(None)) check_header_errors = sess.query(Job).filter(Job.submission_id == submission_id, Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2, Job.job_status_id != 4, Job.file_size.isnot(None)) if validate_data.count() or check_header_errors.count() > 0: data = { "message": "The current progress of this submission ID is on /v1/validateData/ page.", "step": "1" } return JsonResponse.create(StatusCode.OK, data) else: return JsonResponse.error(ValueError("The submisssion ID returns no response"), StatusCode.CLIENT_ERROR)
def setupJobTrackerDB(): """Create job tracker tables from model metadata.""" with createApp().app_context(): sess = GlobalDB.db().session insertCodes(sess) sess.commit()
def setupValidationDB(): """Create validation tables from model metadata and do initial inserts.""" with createApp().app_context(): sess = GlobalDB.db().session insertCodes(sess) sess.commit()
def purge(): sess = GlobalDB.db().session sess.query(SQS).delete() sess.commit()
def load_program_activity_data(base_path, force_reload=False, export=False): """ Load program activity lookup table. Args: base_path: directory of domain config files force_reload: whether or not to force a reload export: whether or not to export a public copy of the file """ now = datetime.datetime.now() metrics_json = { 'script_name': 'load_program_activity.py', 'start_time': str(now), 'records_received': 0, 'duplicates_dropped': 0, 'invalid_records_dropped': 0, 'records_deleted': 0, 'records_inserted': 0 } dropped_count = 0 logger.info('Checking PA upload dates to see if we can skip.') last_upload = get_date_of_current_pa_upload(base_path) if not (last_upload > get_stored_pa_last_upload()) and not force_reload: logger.info('Skipping load as it\'s already been done') else: logger.info('Getting the progrma activity file') program_activity_file = get_program_activity_file(base_path) logger.info('Loading program activity: {}'.format(PA_FILE_NAME)) with create_app().app_context(): sess = GlobalDB.db().session try: raw_data = pd.read_csv(program_activity_file, dtype=str) except pd.io.common.EmptyDataError: log_blank_file() exit_if_nonlocal(4) # exit code chosen arbitrarily, to indicate distinct failure states return headers = set([header.upper() for header in list(raw_data)]) if not VALID_HEADERS.issubset(headers): logger.error('Missing required headers. Required headers include: %s' % str(VALID_HEADERS)) exit_if_nonlocal(4) return try: dropped_count, data = clean_data( raw_data, ProgramActivity, {'fyq': 'fiscal_year_period', 'agency_code': 'agency_id', 'allocation_id': 'allocation_transfer_id', 'account_code': 'account_number', 'pa_code': 'program_activity_code', 'pa_title': 'program_activity_name'}, {'program_activity_code': {'pad_to_length': 4}, 'agency_id': {'pad_to_length': 3}, 'allocation_transfer_id': {'pad_to_length': 3, 'keep_null': True}, 'account_number': {'pad_to_length': 4}}, ['agency_id', 'program_activity_code', 'account_number', 'program_activity_name'], True ) except FailureThresholdExceededException as e: if e.count == 0: log_blank_file() exit_if_nonlocal(4) return else: logger.error('Loading of program activity file failed due to exceeded failure threshold. ' 'Application tried to drop {} rows'.format(e.count)) exit_if_nonlocal(5) return metrics_json['records_deleted'] = sess.query(ProgramActivity).delete() metrics_json['invalid_records_dropped'] = dropped_count # Lowercase Program Activity Name data['program_activity_name'] = data['program_activity_name'].apply(lambda x: lowercase_or_notify(x)) # Convert FYQ to FYP data['fiscal_year_period'] = data['fiscal_year_period'].apply(lambda x: convert_fyq_to_fyp(x)) # because we're only loading a subset of program activity info, there will be duplicate records in the # dataframe. this is ok, but need to de-duped before the db load. We also need to log them. base_count = len(data.index) metrics_json['records_received'] = base_count data.drop_duplicates(inplace=True) dupe_count = base_count - len(data.index) logger.info('Dropped {} duplicate rows.'.format(dupe_count)) metrics_json['duplicates_dropped'] = dupe_count # insert to db table_name = ProgramActivity.__table__.name num = insert_dataframe(data, table_name, sess.connection()) sess.commit() if export: export_public_pa(raw_data) end_time = datetime.datetime.now() update_external_data_load_date(now, end_time, 'program_activity') update_external_data_load_date(last_upload, end_time, 'program_activity_upload') logger.info('{} records inserted to {}'.format(num, table_name)) metrics_json['records_inserted'] = num metrics_json['duration'] = str(end_time - now) with open('load_program_activity_metrics.json', 'w+') as metrics_file: json.dump(metrics_json, metrics_file) if dropped_count > 0: exit_if_nonlocal(3) return
if __name__ == '__main__': parser = get_parser() args = parser.parse_args() historic = args.historic local = args.local monthly = args.monthly daily = args.daily benchmarks = args.benchmarks update = args.update with create_app().app_context(): configure_logging() sess = GlobalDB.db().session if monthly and daily: logger.error("For loading a single local file, you must provide either monthly or daily.") sys.exit(1) if historic and update: logger.error("For multiple file loads you must choose either historic or update.") sys.exit(1) elif (monthly or daily) and local: logger.error("Local directory specified with a local file.") sys.exit(1) elif monthly: parse_sam_file(monthly, sess=sess, monthly=True, benchmarks=benchmarks) elif daily: parse_sam_file(daily, sess=sess, benchmarks=benchmarks) else:
def setup_submission_type_db(): """Create job tracker tables from model metadata.""" with create_app().app_context(): sess = GlobalDB.db().session insert_codes(sess) sess.commit()