def setupErrorDB(): """Create error tables from model metadata.""" with createApp().app_context(): sess = GlobalDB.db().session insertCodes(sess) sess.commit()
def loadObjectClass(filename): """Load object class lookup table.""" model = ObjectClass with createApp().app_context(): sess = GlobalDB.db().session # for object class, delete and replace values sess.query(model).delete() data = pd.read_csv(filename, dtype=str) data = LoaderUtils.cleanData( data, model, {"max_oc_code": "object_class_code", "max_object_class_name": "object_class_name"}, {} ) # de-dupe data.drop_duplicates(subset=['object_class_code'], inplace=True) # insert to db table_name = model.__table__.name num = LoaderUtils.insertDataframe(data, table_name, sess.connection()) sess.commit() logger.info('{} records inserted to {}'.format(num, table_name))
def loadCgac(filename): """Load CGAC (high-level agency names) lookup table.""" model = CGAC with createApp().app_context(): sess = GlobalDB.db().session # for CGAC, delete and replace values sess.query(model).delete() # read CGAC values from csv data = pd.read_csv(filename, dtype=str) # clean data data = LoaderUtils.cleanData(data, model, { "cgac": "cgac_code", "agency": "agency_name" }, {"cgac_code": { "pad_to_length": 3 }}) # de-dupe data.drop_duplicates(subset=['cgac_code'], inplace=True) # insert to db table_name = model.__table__.name num = LoaderUtils.insertDataframe(data, table_name, sess.connection()) sess.commit() logger.info('{} records inserted to {}'.format(num, table_name))
def test_cross_file(self): crossId = self.jobDict["crossFile"] # Run jobs for A, B, C, and D2, then cross file validation job # Note: test files used for cross validation use the short column names # as a way to ensure those are handled correctly by the validator awardFinResponse = self.validateJob(self.jobDict["crossAwardFin"]) self.assertEqual(awardFinResponse.status_code, 200, msg=str(awardFinResponse.json)) awardResponse = self.validateJob(self.jobDict["crossAward"]) self.assertEqual(awardResponse.status_code, 200, msg=str(awardResponse.json)) appropResponse = self.validateJob(self.jobDict["crossApprop"]) self.assertEqual(appropResponse.status_code, 200, msg=str(appropResponse.json)) pgmActResponse = self.validateJob(self.jobDict["crossPgmAct"]) self.assertEqual(pgmActResponse.status_code, 200, msg=str(pgmActResponse.json)) crossFileResponse = self.validateJob(crossId) self.assertEqual(crossFileResponse.status_code, 200, msg=str(crossFileResponse.json)) with createApp().app_context(): sess = GlobalDB.db().session job = sess.query(Job).filter(Job.job_id == crossId).one() # Check number of cross file validation errors in DB for this job self.assertEqual(checkNumberOfErrorsByJobId(crossId, "fatal"), 0) self.assertEqual(checkNumberOfErrorsByJobId(crossId, "warning"), 3) self.assertEqual(job.job_status_id, JOB_STATUS_DICT['finished']) # Check that cross file validation report exists and is the right size submissionId = job.submission_id sizePathPairs = [ (89, getCrossReportName(submissionId, "appropriations", "program_activity")), (89, getCrossReportName(submissionId, "award_financial", "award")), (2348, getCrossWarningReportName(submissionId, "appropriations", "program_activity")), (89, getCrossWarningReportName(submissionId, "award_financial", "award")), ] for size, path in sizePathPairs: if self.local: self.assertFileSizeAppxy(size, path) else: self.assertGreater(s3UrlHandler.getFileSize("errors/" + path), size - 5) self.assertLess(s3UrlHandler.getFileSize("errors/" + path), size + 5)
def test_file_submission(self): """Test broker file submission and response.""" response = self.call_file_submission() self.assertEqual(response.status_code, 200) self.assertEqual(response.headers.get("Content-Type"), "application/json") json = response.json self.assertIn("test1.csv", json["appropriations_key"]) self.assertIn("test2.csv", json["award_financial_key"]) self.assertIn(CONFIG_BROKER["award_file_name"], json["award_key"]) self.assertIn("test4.csv", json["program_activity_key"]) self.assertIn("credentials", json) credentials = json["credentials"] for requiredField in [ "AccessKeyId", "SecretAccessKey", "SessionToken", "SessionToken" ]: self.assertIn(requiredField, credentials) self.assertTrue(len(credentials[requiredField])) self.assertIn("bucket_name", json) self.assertTrue(len(json["bucket_name"])) fileResults = self.uploadFileByURL("/" + json["appropriations_key"], "test1.csv") self.assertGreater(fileResults['bytesWritten'], 0) # Test that job ids are returned responseDict = json fileKeys = ["program_activity", "award_financial", "appropriations"] with createApp().app_context(): sess = GlobalDB.db().session for key in fileKeys: idKey = '{}_id'.format(key) self.assertIn(idKey, responseDict) jobId = responseDict[idKey] self.assertIsInstance(jobId, int) # Check that original filenames were stored in DB originalFilename = sess.query(Job).filter( Job.job_id == jobId).one().original_filename self.assertEquals(originalFilename, self.filenames[key]) # check that submission got mapped to the correct user submissionId = responseDict["submission_id"] self.file_submission_id = submissionId submission = sess.query(Submission).filter( Submission.submission_id == submissionId).one() self.assertEqual(submission.user_id, self.submission_user_id) # Check that new submission is unpublished self.assertEqual(submission.publish_status_id, self.publishStatusDict['unpublished']) # Call upload complete route finalizeResponse = self.check_upload_complete( responseDict["appropriations_id"]) self.assertEqual(finalizeResponse.status_code, 200)
def loadTas(tasFile=None): """Load TAS file into broker database. """ # read TAS file to dataframe, to make sure all is well # with the file before firing up a db transaction if not tasFile: tasFile = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "cars_tas.csv") with createApp().app_context(): updateTASLookups(tasFile)
def setUpClass(cls): """Set up resources to be shared within a test class""" #TODO: refactor into a pytest class fixtures and inject as necessary # update application's db config options so unittests # run against test databases suite = cls.__name__.lower() config = dataactcore.config.CONFIG_DB cls.num = randint(1, 9999) config['error_db_name'] = 'unittest{}_{}_error_data'.format( cls.num, suite) config['job_db_name'] = 'unittest{}_{}_job_tracker'.format( cls.num, suite) config['user_db_name'] = 'unittest{}_{}_user_manager'.format( cls.num, suite) config['validator_db_name'] = 'unittest{}_{}_validator'.format( cls.num, suite) config['staging_db_name'] = 'unittest{}_{}_staging'.format( cls.num, suite) dataactcore.config.CONFIG_DB = config app = createApp() app.config['TESTING'] = True cls.app = TestApp(app) # Allow us to augment default test failure msg w/ more detail cls.longMessage = True # Flag for each route call to launch a new thread cls.useThreads = False # Upload files to S3 (False = skip re-uploading on subsequent runs) cls.uploadFiles = True # Run tests for local broker or not cls.local = CONFIG_BROKER['local'] # This needs to be set to the local directory for error reports if local is True cls.local_file_directory = CONFIG_SERVICES['error_report_path'] # suppress INFO-level logging from Alembic migrations logging.disable(logging.WARN) # drop and re-create test job db/tables setupJobTrackerDB() # drop and re-create test error db/tables setupErrorDB() # drop and re-create test staging db setupStagingDB() # drop and re-create test vaidation db setupValidationDB(True) # reset logging defaults logging.disable(logging.NOTSET) cls.interfaces = InterfaceHolder() cls.jobTracker = cls.interfaces.jobDb cls.stagingDb = cls.interfaces.stagingDb cls.errorInterface = cls.interfaces.errorDb cls.validationDb = cls.interfaces.validationDb cls.userId = 1
def reset_alembic(alembic_version): with createApp().app_context(): db = GlobalDB.db() engine = db.engine sess = db.session metadata = MetaData(bind=engine) alembic_table = Table('alembic_version', metadata, autoload=True) u = update(alembic_table) u = u.values({"version_num": alembic_version}) sess.execute(u) sess.commit()
def test_update_submission(self): """ Test submit_files with an existing submission ID """ self.call_file_submission() # note: this is a quarterly test submission, so # updated dates must still reflect a quarter if CONFIG_BROKER["use_aws"]: updateJson = { "existing_submission_id": self.updateSubmissionId, "award_financial": "updated.csv", "reporting_period_start_date": "04/2016", "reporting_period_end_date": "06/2016" } else: # If local must use full destination path filePath = CONFIG_BROKER["broker_files"] updateJson = { "existing_submission_id": self.updateSubmissionId, "award_financial": os.path.join(filePath, "updated.csv"), "reporting_period_start_date": "04/2016", "reporting_period_end_date": "06/2016" } # Mark submission as published with createApp().app_context(): sess = GlobalDB.db().session updateSubmission = sess.query(Submission).filter( Submission.submission_id == self.updateSubmissionId).one() updateSubmission.publish_status_id = self.publishStatusDict[ 'published'] sess.commit() updateResponse = self.app.post_json( "/v1/submit_files/", updateJson, headers={"x-session-id": self.session_id}) self.assertEqual(updateResponse.status_code, 200) self.assertEqual(updateResponse.headers.get("Content-Type"), "application/json") json = updateResponse.json self.assertIn("updated.csv", json["award_financial_key"]) submissionId = json["submission_id"] submission = sess.query(Submission).filter( Submission.submission_id == submissionId).one() self.assertEqual(submission.cgac_code, "SYS") # Should not have changed agency name self.assertEqual(submission.reporting_start_date.strftime("%m/%Y"), "04/2016") self.assertEqual(submission.reporting_end_date.strftime("%m/%Y"), "06/2016") self.assertEqual(submission.publish_status_id, self.publishStatusDict['updated'])
def createAdmin(): """Create initial admin user.""" adminEmail = CONFIG_BROKER['admin_email'] adminPass = CONFIG_BROKER['admin_password'] with createApp().app_context(): sess = GlobalDB.db().session user = sess.query(User).filter(User.email == adminEmail).one_or_none() if not user: # once the rest of the setup scripts are updated to use # GlobalDB instead of databaseSession, move the app_context # creation up to initialize() user = createUserWithPassword(adminEmail, adminPass, Bcrypt(), permission=2) return user
def loadProgramActivity(filename): """Load program activity lookup table.""" model = ProgramActivity with createApp().app_context(): sess = GlobalDB.db().session # for program activity, delete and replace values?? sess.query(model).delete() data = pd.read_csv(filename, dtype=str) data = LoaderUtils.cleanData( data, model, { "year": "budget_year", "agency_id": "agency_id", "alloc_id": "allocation_transfer_id", "account": "account_number", "pa_code": "program_activity_code", "pa_name": "program_activity_name" }, { "program_activity_code": { "pad_to_length": 4 }, "agency_id": { "pad_to_length": 3 }, "allocation_transfer_id": { "pad_to_length": 3, "keep_null": True }, "account_number": { "pad_to_length": 4 } }) # because we're only loading a subset of program activity info, # there will be duplicate records in the dataframe. this is ok, # but need to de-duped before the db load. data.drop_duplicates(inplace=True) # insert to db table_name = model.__table__.name num = LoaderUtils.insertDataframe(data, table_name, sess.connection()) sess.commit() logger.info('{} records inserted to {}'.format(num, table_name))
def test_award_fin_mixed(self): """Test mixed award job with some rows failing.""" jobId = self.jobDict["awardFinMixed"] self.passed = self.run_test(jobId, 200, "finished", 7537, 6, "complete", 47, 36, 9091) with createApp().app_context(): sess = GlobalDB.db().session job = sess.query(Job).filter(Job.job_id == jobId).one() # todo: these whitespace and comma cases probably belong in unit tests # Test that whitespace is converted to null rowThree = sess.query(AwardFinancial).\ filter(AwardFinancial.parent_award_id == "ZZZZ", AwardFinancial.submission_id == job.submission_id).\ first() self.assertIsNone(rowThree.agency_identifier) self.assertIsNone(rowThree.piid) # Test that commas are removed for numeric values rowThirteen = sess.query(AwardFinancial).\ filter(AwardFinancial.parent_award_id == "YYYY", AwardFinancial.submission_id == job.submission_id).\ first() self.assertEqual(rowThirteen.deobligations_recov_by_awa_cpe, 26000)
def loadFields(fileTypeName, schemaFileName): """Load specified schema from a .csv.""" with createApp().app_context(): sess = GlobalDB.db().session # get file type object for specified fileTypeName fileType = sess.query(FileType).filter( FileType.name == fileTypeName).one() # delete existing schema from database SchemaLoader.removeColumnsByFileType(sess, fileType) # get allowable datatypes typeQuery = sess.query(FieldType.name, FieldType.field_type_id).all() types = {type.name: type.field_type_id for type in typeQuery} # add schema to database with open(schemaFileName, 'rU') as csvfile: reader = csv.DictReader(csvfile) file_column_count = 0 for record in reader: record = FieldCleaner.cleanRecord(record) fields = ["fieldname", "required", "data_type"] if all(field in record for field in fields): SchemaLoader.addColumnByFileType( sess, types, fileType, FieldCleaner.cleanString(record["fieldname"]), FieldCleaner.cleanString( record["fieldname_short"]), record["required"], record["data_type"], record["padded_flag"], record["field_length"]) file_column_count += 1 else: raise ValueError('CSV File does not follow schema') sess.commit() logger.info('{} {} schema records added to {}'.format( file_column_count, fileTypeName, FileColumn.__tablename__))
def setUpClass(cls): """Set up resources to be shared within a test class""" #TODO: refactor into a pytest class fixtures and inject as necessary # update application's db config options so unittests # run against test databases suite = cls.__name__.lower() config = dataactcore.config.CONFIG_DB cls.num = randint(1, 9999) config['db_name'] = 'unittest{}_{}_data_broker'.format( cls.num, suite) dataactcore.config.CONFIG_DB = config createDatabase(CONFIG_DB['db_name']) runMigrations() app = createApp() app.config['TESTING'] = True app.config['DEBUG'] = False cls.app = TestApp(app) # Allow us to augment default test failure msg w/ more detail cls.longMessage = True # Upload files to S3 (False = skip re-uploading on subsequent runs) cls.uploadFiles = True # Run tests for local broker or not cls.local = CONFIG_BROKER['local'] # This needs to be set to the local directory for error reports if local is True cls.local_file_directory = CONFIG_SERVICES['error_report_path'] # drop and re-create test job db/tables setupJobTrackerDB() # drop and re-create test error db/tables setupErrorDB() # drop and re-create test validation db setupValidationDB() cls.userId = None # constants to use for default submission start and end dates cls.SUBMISSION_START_DEFAULT = datetime(2015, 10, 1) cls.SUBMISSION_END_DEFAULT = datetime(2015, 10, 31)
def loadSql(cls, filename): """Load SQL-based validation rules to db.""" with createApp().app_context(): sess = GlobalDB.db().session # Delete all records currently in table sess.query(RuleSql).delete() # Create rule severity and file type lookups severity = sess.query(RuleSeverity) severityDict = {s.name: s.rule_severity_id for s in severity.all()} ft = sess.query(FileTypeValidation) fileTypeDict = {f.name: f.file_id for f in ft.all()} filename = os.path.join(cls.sql_rules_path, filename) # open csv with open(filename, 'rU') as csvfile: # read header header = csvfile.readline() # split header into filed names rawFieldNames = header.split(',') fieldNames = [] # clean field names for field in rawFieldNames: fieldNames.append(FieldCleaner.cleanString(field)) unknownFields = set(fieldNames) - set(cls.headers) if len(unknownFields) != 0: raise KeyError("".join([ "Found unexpected fields: ", str(list(unknownFields)) ])) missingFields = set(cls.headers) - set(fieldNames) if len(missingFields) != 0: raise ValueError("".join([ "Missing required fields: ", str(list(missingFields)) ])) reader = csv.DictReader(csvfile, fieldnames=fieldNames) for row in reader: sql = cls.readSqlStr(row['query_name']) rule_sql = RuleSql( rule_sql=sql, rule_label=row['rule_label'], rule_description=row['rule_description'], rule_error_message=row['rule_error_message'], query_name=row['query_name']) # look up file type id try: fileId = fileTypeDict[row["file_type"]] except Exception as e: raise Exception( "{}: file type={}, rule label={}. Rule not loaded." .format(e, row["file_type"], row["rule_label"])) try: if row["target_file"].strip() == "": # No target file provided targetFileId = None else: targetFileId = fileTypeDict[row["target_file"]] except Exception as e: raise Exception( "{}: file type={}, rule label={}. Rule not loaded." .format(e, row["target_file"], row["rule_label"])) # set cross file flag if (FieldCleaner.cleanString(row["rule_cross_file_flag"]) in ['true', 't', 'y', 'yes']): cross_file_flag = True else: cross_file_flag = False rule_sql.rule_severity_id = severityDict[ row['severity_name']] rule_sql.file_id = fileId rule_sql.target_file_id = targetFileId rule_sql.rule_cross_file_flag = cross_file_flag sess.merge(rule_sql) sess.commit()
def load_sf133(filename, fiscal_year, fiscal_period, force_load=False): """Load SF 133 (budget execution report) lookup table.""" with createApp().app_context(): sess = GlobalDB.db().session existing_records = sess.query(SF133).filter( SF133.fiscal_year == fiscal_year, SF133.period == fiscal_period) if force_load: # force a reload of this period's current data logger.info('Force SF 133 load: deleting existing records for {} {}'.format( fiscal_year, fiscal_period)) delete_count = existing_records.delete() logger.info('{} records deleted'.format(delete_count)) elif existing_records.count(): # if there's existing data & we're not forcing a load, skip logger.info('SF133 {} {} already in database ({} records). Skipping file.'.format( fiscal_year, fiscal_period, existing_records.count())) return data = pd.read_csv(filename, dtype=str) data = LoaderUtils.cleanData( data, SF133, {"ata": "allocation_transfer_agency", "aid": "agency_identifier", "availability_type_code": "availability_type_code", "bpoa": "beginning_period_of_availa", "epoa": "ending_period_of_availabil", "main_account": "main_account_code", "sub_account": "sub_account_code", "fiscal_year": "fiscal_year", "period": "period", "line_num": "line", "amount_summed": "amount"}, {"allocation_transfer_agency": {"pad_to_length": 3}, "agency_identifier": {"pad_to_length": 3}, "main_account_code": {"pad_to_length": 4}, "sub_account_code": {"pad_to_length": 3}, # next 3 lines handle the TAS fields that shouldn't # be padded but should still be empty spaces rather # than NULLs. this ensures that the downstream pivot & melt # (which insert the missing 0-value SF-133 lines) # will work as expected (values used in the pivot # index cannot be NULL). # the "pad_to_length: 0" works around the fact # that sometimes the incoming data for these columns # is a single space and sometimes it is blank/NULL. "beginning_period_of_availa": {"pad_to_length": 0}, "ending_period_of_availabil": {"pad_to_length": 0}, "availability_type_code": {"pad_to_length": 0}, "amount": {"strip_commas": True}} ) # todo: find out how to handle dup rows (e.g., same tas/period/line number) # line numbers 2002 and 2012 are the only duped SF 133 report line numbers, # and they are not used by the validation rules, so for now # just remove them before loading our SF-133 table dupe_line_numbers = ['2002', '2102'] data = data[~data.line.isin(dupe_line_numbers)] # add concatenated TAS field for internal use (i.e., joining to staging tables) data['tas'] = data.apply(lambda row: format_internal_tas(row), axis=1) # incoming .csv does not always include rows for zero-value SF-133 lines # so we add those here because they're needed for the SF-133 validations. # 1. "pivot" the sf-133 dataset to explode it horizontally, creating one # row for each tas/fiscal year/period, with columns for each SF-133 line. # the "fill_value=0" parameter puts a 0 into any Sf-133 line number cell # with a missing value for a specific tas/fiscal year/period. # 2. Once the zeroes are filled in, "melt" the pivoted data back to its normal # format of one row per tas/fiscal year/period. # NOTE: fields used for the pivot in step #1 (i.e., items in pivot_idx) cannot # have NULL values, else they will be silently dropped by pandas :( pivot_idx = ['created_at', 'updated_at', 'agency_identifier', 'allocation_transfer_agency', 'availability_type_code', 'beginning_period_of_availa', 'ending_period_of_availabil', 'main_account_code', 'sub_account_code', 'tas', 'fiscal_year', 'period'] data.amount = data.amount.astype(float) data = pd.pivot_table(data, values='amount', index=pivot_idx, columns=['line'], fill_value=0).reset_index() data = pd.melt(data, id_vars=pivot_idx, value_name='amount') # Now that we've added zero lines for EVERY tas and SF 133 line number, get rid of the ones # we don't actually use in the validations. Arguably, it would be better just to include # everything, but that drastically increases the number of records we're inserting to the # sf_133 table. If we ever decide that we need *all* SF 133 lines that are zero value, # remove the next two lines. sf_133_validation_lines = [ '1000', '1010', '1011', '1012', '1013', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1029', '1030', '1031', '1032', '1033', '1040', '1041', '1042', '1160', '1180', '1260', '1280', '1340', '1440', '1540', '1640', '1750', '1850', '1910', '2190', '2490', '2500', '3020', '4801', '4802', '4881', '4882', '4901', '4902', '4908', '4981', '4982' ] data = data[(data.line.isin(sf_133_validation_lines)) | (data.amount != 0)] # we didn't use the the 'keep_null' option when padding allocation transfer agency, # because nulls in that column break the pivot (see above comments). # so, replace the ata '000' with an empty value before inserting to db data['allocation_transfer_agency'] = data['allocation_transfer_agency'].str.replace('000', '') # make a pass through the dataframe, changing any empty values to None, to ensure # that those are represented as NULL in the db. data = data.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None) # insert to db table_name = SF133.__table__.name num = LoaderUtils.insertDataframe(data, table_name, sess.connection()) sess.commit() logger.info('{} records inserted to {}'.format(num, table_name))
def setupValidationDB(): """Create validation tables from model metadata and do initial inserts.""" with createApp().app_context(): sess = GlobalDB.db().session insertCodes(sess) sess.commit()
def setUpClass(cls): """Set up class-wide resources.""" super(FileTypeTests, cls).setUpClass() #TODO: refactor into a pytest fixture user = cls.userId # TODO: get rid of this flag once we're using a tempdb for test fixtures force_tas_load = False with createApp().app_context(): sess = GlobalDB.db().session # Create submissions and jobs, also uploading # the files needed for each job. statusReadyId = JOB_STATUS_DICT['ready'] jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation'] jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['valid'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("programActivityValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programValid'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardFinancialValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinValid'] = job_info.job_id # next two jobs have the same submission id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardValid'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardProcValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_procurement'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardProcValid'] = job_info.job_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) # Load fields and rules FileTypeTests.load_definitions(sess, force_tas_load) cls.jobDict = jobDict
def test_check_status(self): """Test broker status route response.""" postJson = {"submission_id": self.status_check_submission_id} # Populating error info before calling route to avoid changing last update time with createApp().app_context(): sess = GlobalDB.db().session populateSubmissionErrorInfo(self.status_check_submission_id) response = self.app.post_json( "/v1/check_status/", postJson, headers={"x-session-id": self.session_id}) self.assertEqual(response.status_code, 200, msg=str(response.json)) self.assertEqual(response.headers.get("Content-Type"), "application/json") json = response.json # response ids are coming back as string, so patch the jobIdDict jobIdDict = { k: str(self.jobIdDict[k]) for k in self.jobIdDict.keys() } jobList = json["jobs"] appropJob = None crossJob = None for job in jobList: if str(job["job_id"]) == str(jobIdDict["appropriations"]): # Found the job to be checked appropJob = job elif str(job["job_id"]) == str(jobIdDict["cross_file"]): # Found cross file job crossJob = job # Must have an approp job and cross-file job self.assertNotEqual(appropJob, None) self.assertNotEqual(crossJob, None) # And that job must have the following self.assertEqual(appropJob["job_status"], "ready") self.assertEqual(appropJob["job_type"], "csv_record_validation") self.assertEqual(appropJob["file_type"], "appropriations") self.assertEqual(appropJob["filename"], "approp.csv") self.assertEqual(appropJob["file_status"], "complete") self.assertIn("missing_header_one", appropJob["missing_headers"]) self.assertIn("missing_header_two", appropJob["missing_headers"]) self.assertIn("duplicated_header_one", appropJob["duplicated_headers"]) self.assertIn("duplicated_header_two", appropJob["duplicated_headers"]) # Check file size and number of rows self.assertEqual(appropJob["file_size"], 2345) self.assertEqual(appropJob["number_of_rows"], 567) self.assertEqual(appropJob["error_type"], "row_errors") # Check error metadata for specified error ruleErrorData = None for data in appropJob["error_data"]: if data["field_name"] == "header_three": ruleErrorData = data self.assertIsNotNone(ruleErrorData) self.assertEqual(ruleErrorData["field_name"], "header_three") self.assertEqual(ruleErrorData["error_name"], "rule_failed") self.assertEqual(ruleErrorData["error_description"], "A rule failed for this value") self.assertEqual(ruleErrorData["occurrences"], "7") self.assertEqual(ruleErrorData["rule_failed"], "Header three value must be real") self.assertEqual(ruleErrorData["original_label"], "A1") # Check warning metadata for specified warning warningErrorData = None for data in appropJob["warning_data"]: if data["field_name"] == "header_three": warningErrorData = data self.assertIsNotNone(warningErrorData) self.assertEqual(warningErrorData["field_name"], "header_three") self.assertEqual(warningErrorData["error_name"], "rule_failed") self.assertEqual(warningErrorData["error_description"], "A rule failed for this value") self.assertEqual(warningErrorData["occurrences"], "7") self.assertEqual(warningErrorData["rule_failed"], "Header three value looks odd") self.assertEqual(warningErrorData["original_label"], "A2") ruleErrorData = None for data in crossJob["error_data"]: if data["field_name"] == "header_four": ruleErrorData = data self.assertEqual(ruleErrorData["source_file"], "appropriations") self.assertEqual(ruleErrorData["target_file"], "award") # Check submission metadata self.assertEqual(json["cgac_code"], "SYS") self.assertEqual(json["reporting_period_start_date"], "Q1/2016") self.assertEqual(json["reporting_period_end_date"], "Q1/2016") # Check submission level info self.assertEqual(json["number_of_errors"], 17) self.assertEqual(json["number_of_rows"], 667) # Get submission from db for attribute checks submission = sess.query(Submission).filter( Submission.submission_id == self.status_check_submission_id).one() # Check number of errors and warnings in submission table self.assertEqual(submission.number_of_errors, 17) self.assertEqual(submission.number_of_warnings, 7) # Check that submission was created today, this test may fail if run right at midnight UTC self.assertEqual(json["created_on"], datetime.utcnow().strftime("%m/%d/%Y")) self.assertEqual( json["last_updated"], submission.updated_at.strftime("%Y-%m-%dT%H:%M:%S"))
def setUpClass(cls): """Set up class-wide resources (test data)""" super(FileTests, cls).setUpClass() #TODO: refactor into a pytest fixture with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session cls.session = sess submission_user = sess.query(User).filter( User.email == cls.test_users['submission_email']).one() cls.submission_user_id = submission_user.user_id other_user = sess.query(User).filter( User.email == cls.test_users['inactive_email']).one() cls.other_user_id = other_user.user_id # setup submission/jobs data for test_check_status cls.status_check_submission_id = cls.insertSubmission( sess, cls.submission_user_id, cgac_code="SYS", startDate="10/2015", endDate="12/2015", is_quarter=True) cls.generation_submission_id = cls.insertSubmission( sess, cls.submission_user_id, cgac_code="SYS", startDate="07/2015", endDate="09/2015", is_quarter=True) cls.setupFileGenerationSubmission(sess) cls.jobIdDict = cls.setupJobsForStatusCheck( sess, cls.status_check_submission_id) # setup submission/jobs data for test_error_report cls.error_report_submission_id = cls.insertSubmission( sess, cls.submission_user_id, cgac_code="SYS", startDate="10/2015", endDate="10/2015") cls.setupJobsForReports(sess, cls.error_report_submission_id) # setup file status data for test_metrics cls.test_metrics_submission_id = cls.insertSubmission( sess, cls.submission_user_id, cgac_code="SYS", startDate="08/2015", endDate="08/2015") cls.setupFileData(sess, cls.test_metrics_submission_id) cls.row_error_submission_id = cls.insertSubmission( sess, cls.submission_user_id, cgac_code="SYS", startDate="10/2015", endDate="12/2015", is_quarter=True, number_of_errors=1) cls.setupSubmissionWithError(sess, cls.row_error_submission_id)
def run_test(self, jobId, statusId, statusName, fileSize, stagingRows, errorStatus, numErrors, numWarnings = 0, warningFileSize = None): """ Runs a validation test Args: jobId: ID of job for this validation statusId: Expected HTTP status code for this test statusName: Expected status in job tracker, False if job should not exist fileSize: Expected file size of error report, False if error report should not exist stagingRows: Expected number of rows in validation db staging tables. False if no rows are expected errorStatus: Expected status in file table of error DB, False if file object should not exist numErrors: Expected number of errors rowErrorsPresent: Checks flag for whether row errors occurred, None to skip the check Returns: """ with createApp().app_context(): sess = GlobalDB.db().session response = self.validateJob(jobId) self.assertEqual(response.status_code, statusId, str(self.getResponseInfo(response))) # get the job from db job = sess.query(Job).filter(Job.job_id == jobId).one() if statusName is not False: self.assertEqual(job.job_status_id, JOB_STATUS_DICT[statusName]) self.assertEqual( response.headers.get("Content-Type"), "application/json") # Check valid row count for this job if stagingRows is not False: self.assertEqual(job.number_of_rows_valid, stagingRows) if errorStatus is not False: self.assertEqual( sess.query(File).filter(File.job_id == jobId).one().file_status_id, FILE_STATUS_DICT[errorStatus] ) self.assertEqual(checkNumberOfErrorsByJobId(jobId, 'fatal'), numErrors) self.assertEqual(checkNumberOfErrorsByJobId(jobId, 'warning'), numWarnings) if fileSize is not False: reportPath = get_report_path(job, 'error') if self.local: self.assertFileSizeAppxy(fileSize, reportPath) else: self.assertGreater(s3UrlHandler.getFileSize( 'errors/{}'.format(reportPath)), fileSize - 5) self.assertLess(s3UrlHandler.getFileSize( 'errors/{}'.format(reportPath)), fileSize + 5) if warningFileSize is not None and warningFileSize is not False: reportPath = get_report_path(job, 'warning') if self.local: self.assertFileSizeAppxy(warningFileSize, reportPath) else: self.assertGreater(s3UrlHandler.getFileSize( 'errors/{}'.format(reportPath)), warningFileSize - 5) self.assertLess(s3UrlHandler.getFileSize( 'errors/{}'.format(reportPath)), warningFileSize + 5) return response
import logging import sys from dataactcore.interfaces.db import GlobalDB from dataactbroker.fsrs import (configValid, fetchAndReplaceBatch, GRANT, PROCUREMENT) from dataactvalidator.app import createApp logger = logging.getLogger(__name__) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) with createApp().app_context(): sess = GlobalDB.db().session if not configValid(): logger.error("No config for broker/fsrs/[service]/wsdl") sys.exit(1) else: procs = fetchAndReplaceBatch(sess, PROCUREMENT) grants = fetchAndReplaceBatch(sess, GRANT) awards = procs + grants numSubAwards = sum(len(a.subawards) for a in awards) logger.info("Inserted/Updated %s awards, %s subawards", len(awards), numSubAwards)
def setUpClass(cls): """Set up class-wide resources.""" super(MixedFileTests, cls).setUpClass() user = cls.userId force_tas_load = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. statusReadyId = JOB_STATUS_DICT['ready'] jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation'] jobDict = {} # next three jobs belong to the same submission and are tests # for single-file validations that contain failing rows submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['mixed'] = job_info.job_id job_info = Job(filename=cls.uploadFile("programActivityMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programMixed'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardMixed'] = job_info.job_id # next job tests single-file validations for award_financial # (submission has a non-Q1 end date) submissionId = cls.insertSubmission(sess, user, datetime(2015, 3, 15)) job_info = Job(filename=cls.uploadFile("awardFinancialMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinMixed'] = job_info.job_id # job below tests a file that has a mixed-delimiter heading submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardMixedDelimiter.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardMixedDelimiter'] = job_info.job_id # next five jobs are cross-file and belong to the same submission submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("cross_file_A.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossApprop'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_B.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossPgmAct'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_C.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossAwardFin'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_D2.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossAward'] = job_info.job_id job_info = Job(job_status_id=statusReadyId, job_type_id=JOB_TYPE_DICT['validation'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossFile'] = job_info.job_id # next four jobs test short columns names and belong to the same submission submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropValidShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['appropValidShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile( "programActivityMixedShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programMixedShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile( "awardFinancialMixedShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinMixedShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardValidShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardValidShortcols'] = job_info.job_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) # Load fields and rules FileTypeTests.load_definitions(sess, force_tas_load, cls.RULES_TO_APPLY) cls.jobDict = jobDict
def setUpClass(cls): """Set up class-wide resources (test data)""" super(JobTests, cls).setUpClass() user = cls.userId # Flag for testing a million+ errors (can take ~30 min to run) cls.includeLongTests = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_upload'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_prereq'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['external_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['wrong_type'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['not_ready'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile('testEmpty.csv', user), job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['empty'] = job_info.job_id # create dependency dependency = JobDependency(job_id=jobDict["bad_prereq"], prerequisite_id=jobDict["bad_upload"]) sess.add(dependency) colIdDict = {} for fileId in range(1, 5): for columnId in range(1, 6): if columnId < 3: fieldType = FIELD_TYPE_DICT['INT'] else: fieldType = FIELD_TYPE_DICT['STRING'] columnName = "header_{}".format(columnId) fileCol = FileColumn( file_id=fileId, field_types_id=fieldType, name=columnName, required=(columnId != FIELD_TYPE_DICT['STRING'])) sess.add(fileCol) sess.flush() colIdDict["header_{}_file_type_{}".format( columnId, fileId)] = fileCol.file_column_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) cls.jobDict = jobDict
def setupEmails(): """Create email templates from model metadata.""" with createApp().app_context(): sess = GlobalDB.db().session # insert email template types typeList = [('validate_email', ''), ('account_approved', ''), ('account_rejected', ''), ('reset_password', ''), ('account_creation', ''), ('account_creation_user', ''), ('unlock_account', ''), ('review_submission', '')] for t in typeList: emailId = sess.query( EmailTemplateType.email_template_type_id).filter( EmailTemplateType.name == t[0]).one_or_none() if not emailId: type = EmailTemplateType(name=t[0], description=t[1]) sess.add(type) sess.commit() # insert email templates #Confirm template = "This email address was just used to create a user account with the DATA Act Broker. To continue the registration process, please click <a href='[URL]'>here</a>. The link will expire in 24 hours. <br /> <br /> If you did not initiate this process, you may disregard this email.<br /><br />The DATA Act Broker Helpdesk<br />[email protected] " loadEmailTemplate(sess, "DATA Act Broker - Registration", template, "validate_email") #Approve template = "Thank you for registering for a user account with the DATA Act Broker. Your request has been approved by the DATA Act Broker Help Desk. You may now log into the Data Broker portal, using the password you created at registration, by clicking <a href='[URL]'>here</a>.<br /><br /> If you have any questions, please contact the DATA Act Broker Help Desk at [EMAIL].<br /><br />DATA Act Broker Helpdesk<br />[email protected]" loadEmailTemplate(sess, "DATA Act Broker - Access Approved", template, "account_approved") #Reject template = "Thank you for requesting log-in credentials for the DATA Act Broker. Your attempt to register has been denied. If you believe this determination was made in error, please contact the DATA Act Broker Helpdesk at [email protected].<br /><br />DATA Act Broker Helpdesk<br />[email protected]" loadEmailTemplate(sess, "DATA Act Broker - Access Denied", template, "account_rejected") #Password Reset template = "You have requested your password to be reset for your account. Please click the following link <a href='[URL]'>here</a> to start the processs. The link will expire in 24 hours. <br/> <br/> If you did not request this password reset, please notify the DATA Act Broker Helpdesk ([email protected]) <br /><br />DATA Act Broker Helpdesk<br /><br />[email protected]" loadEmailTemplate(sess, "DATA Act Broker - Password Reset", template, "reset_password") #Admin Email template = "This email is to notify you that the following person has requested an account for the DATA Act Broker:<br /><br />Name: [REG_NAME]<br /><br />Title: [REG_TITLE]<br /><br />Agency Name: [REG_AGENCY_NAME]<br /><br />CGAC Code: [REG_CGAC_CODE]<br /><br />Email: [REG_EMAIL]<br /><br /><br /><br />To approve or deny this user for access to the Data Broker, please click <a href='[URL]'>here</a>.<br /><br />This action must be taken within 24 hours. <br /><br />Thank you for your prompt attention.<br /><br />DATA Act Broker Helpdesk<br />[email protected]" loadEmailTemplate(sess, "New Data Broker registration - Action Required", template, "account_creation") #User Email When finished submitting template = ( "Thank you for registering a DATA Act Broker user account. " "The final registration step is for the Help Desk to review your " "request. You should receive an e-mail update from them within one " "business day, saying whether they've approved or denied your access." "<br /><br />" "Until the Help Desk approves your request, you won't be able to log " "into the Broker. Thanks for being patient with the security process--" "we appreciate your interest and look forward to working with you." "<br /><br/>" "If you have any questions or haven't received a follow-up e-mail " "within one business day, please get in touch with the Help Desk at " "[EMAIL]." "<br /><br />" "The DATA Act Implementation Team <br />" "[EMAIL]") loadEmailTemplate(sess, "DATA Act Broker - Registration", template, "account_creation_user") #Unlock account email template = "Your account has been unlocked and requires your password to be reset. Please click the following link <a href='[URL]'>here</a> to start the processs. The link will expire in 24 hours. <br/> <br/> If you did not request your account to be unlocked, please notify the DATA Act Broker Helpdesk ([email protected]) <br /><br />DATA Act Broker Helpdesk<br /><br />[email protected]" loadEmailTemplate(sess, "DATA Act Broker - Password Reset", template, "unlock_account") #Submission Review template = "[REV_USER_NAME] has shared a DATA Act broker submission with you. Click <a href='[REV_URL]'>here</a> to review their submission. For questions or comments, please email the DATA Act Broker Helpdesk ([email protected])." loadEmailTemplate(sess, "DATA Act Broker - Submission Ready for Review", template, "review_submission")