def test_wrong_file_name_case(self): samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') input_filename = 'Genomic-Test-Set-v1%swrong-name.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) with self.assertRaises(DataError): genomic_pipeline.process_genomic_water_line() manifest_result_file = test_data.open_genomic_set_file( 'Genomic-Manifest-Result-test.csv') manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%swrong-name.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(manifest_result_filename, manifest_result_file, bucket=_FAKE_BIOBANK_SAMPLE_BUCKET, folder=_FAKE_BUCKET_RESULT_FOLDER) with self.assertRaises(DataError): genomic_pipeline.process_genomic_water_line()
def test_read_from_csv_file(self): participant = self.participant_dao.insert(Participant(participantId=123, biobankId=1234)) self.summary_dao.insert(self.participant_summary(participant)) bo = self._make_biobank_order(participantId=participant.participantId, biobankOrderId='123', identifiers=[BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345678')]) BiobankOrderDao().insert(bo) participant2 = self.participant_dao.insert(Participant(participantId=124, biobankId=1235)) self.summary_dao.insert(self.participant_summary(participant2)) bo2 = self._make_biobank_order(participantId=participant2.participantId, biobankOrderId='124', identifiers=[BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345679')]) BiobankOrderDao().insert(bo2) participant3 = self.participant_dao.insert(Participant(participantId=125, biobankId=1236)) self.summary_dao.insert(self.participant_summary(participant3)) bo3 = self._make_biobank_order(participantId=participant3.participantId, biobankOrderId='125', identifiers=[BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345680')]) BiobankOrderDao().insert(bo3) samples_file = test_data.open_genomic_set_file('Genomic-Test-Set-test-1.csv') input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(clock.CLOCK.now()).strftime( genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) genomic_set_file_handler.read_genomic_set_from_bucket() set_dao = GenomicSetDao() obj = set_dao.get_all()[0] self.assertEqual(obj.genomicSetName, 'name_xxx') self.assertEqual(obj.genomicSetCriteria, 'criteria_xxx') self.assertEqual(obj.genomicSetVersion, 1) member_dao = GenomicSetMemberDao() items = member_dao.get_all() for item in items: self.assertIn(item.participantId, [123, 124, 125]) self.assertIn(item.biobankOrderId, ['123', '124', '125']) self.assertIn(item.biobankId, ['1234', '1235', '1236']) self.assertIn(item.biobankOrderClientId, ['12345678', '12345679', '12345680']) self.assertEqual(item.genomicSetId, 1) self.assertIn(item.genomeType, ['aou_wgs', 'aou_array']) self.assertIn(item.nyFlag, [0, 1]) self.assertIn(item.sexAtBirth, ['F', 'M'])
def test_over_24hours_genomic_set_file_case(self): samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') over_24hours_time = clock.CLOCK.now() - datetime.timedelta(hours=25) input_filename = 'Genomic-Test-Set-v1%s.csv' % self \ ._naive_utc_to_naive_central(over_24hours_time) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) genomic_pipeline.process_genomic_water_line() member_dao = GenomicSetMemberDao() members = member_dao.get_all() self.assertEqual(len(members), 0)
def test_end_to_end_invalid_case(self): participant = self._make_participant() self._make_summary(participant, dateOfBirth='2018-02-14') self._make_biobank_order(participantId=participant.participantId, biobankOrderId=participant.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345678') ]) participant2 = self._make_participant() self._make_summary(participant2, consentForStudyEnrollmentTime=datetime.datetime( 1990, 1, 1)) self._make_biobank_order(participantId=participant2.participantId, biobankOrderId=participant2.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345679') ]) participant3 = self._make_participant() self._make_summary(participant3, zipCode='') self._make_biobank_order(participantId=participant3.participantId, biobankOrderId=participant3.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345680') ]) participant4 = self._make_participant() self._make_summary(participant4) self._make_biobank_order( participantId=participant4.participantId, biobankOrderId=participant4.participantId, identifiers=[BiobankOrderIdentifier(system=u'c', value=u'e')]) samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') input_filename = 'Genomic-Test-Set-v1%s.csv' % self\ ._naive_utc_to_naive_central(clock.CLOCK.now())\ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) genomic_pipeline.process_genomic_water_line() # verify result file bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME) path = self._find_latest_genomic_set_csv(bucket_name, 'Validation-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') class ResultCsvColumns(object): """Names of CSV columns that we read from the genomic set upload.""" GENOMIC_SET_NAME = 'genomic_set_name' GENOMIC_SET_CRITERIA = 'genomic_set_criteria' PID = 'pid' BIOBANK_ORDER_ID = 'biobank_order_id' NY_FLAG = 'ny_flag' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' STATUS = 'status' INVALID_REASON = 'invalid_reason' ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID, BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE, STATUS, INVALID_REASON) missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(len(rows), 4) self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], 'INVALID_AGE') self.assertEqual(rows[0][ResultCsvColumns.PID], '1') self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1') self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], 'INVALID_CONSENT') self.assertEqual(rows[1][ResultCsvColumns.PID], '2') self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2') self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], 'INVALID_NY_ZIPCODE') self.assertEqual(rows[2][ResultCsvColumns.PID], '3') self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3') self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[3][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[3][ResultCsvColumns.INVALID_REASON], 'INVALID_BIOBANK_ORDER_CLIENT_ID') self.assertEqual(rows[3][ResultCsvColumns.PID], '4') self.assertEqual(rows[3][ResultCsvColumns.BIOBANK_ORDER_ID], '4') self.assertEqual(rows[3][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[3][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[3][ResultCsvColumns.SEX_AT_BIRTH], 'F')
def test_end_to_end_valid_case(self): participant = self._make_participant() self._make_summary(participant) self._make_biobank_order(participantId=participant.participantId, biobankOrderId=participant.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345678') ]) participant2 = self._make_participant() self._make_summary(participant2) self._make_biobank_order(participantId=participant2.participantId, biobankOrderId=participant2.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345679') ]) participant3 = self._make_participant() self._make_summary(participant3) self._make_biobank_order(participantId=participant3.participantId, biobankOrderId=participant3.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345680') ]) samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-2.csv') input_filename = 'Genomic-Test-Set-v1%s.csv' % self\ ._naive_utc_to_naive_central(clock.CLOCK.now())\ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) manifest_result_file = test_data.open_genomic_set_file( 'Genomic-Manifest-Result-test.csv') manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%s.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(manifest_result_filename, manifest_result_file, bucket=_FAKE_BIOBANK_SAMPLE_BUCKET, folder=_FAKE_BUCKET_RESULT_FOLDER) genomic_pipeline.process_genomic_water_line() # verify result file bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME) path = self._find_latest_genomic_set_csv(bucket_name, 'Validation-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') class ResultCsvColumns(object): """Names of CSV columns that we read from the genomic set upload.""" GENOMIC_SET_NAME = 'genomic_set_name' GENOMIC_SET_CRITERIA = 'genomic_set_criteria' PID = 'pid' BIOBANK_ORDER_ID = 'biobank_order_id' NY_FLAG = 'ny_flag' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' STATUS = 'status' INVALID_REASON = 'invalid_reason' ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID, BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE, STATUS, INVALID_REASON) missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(len(rows), 3) self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[0][ResultCsvColumns.PID], '1') self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1') self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[1][ResultCsvColumns.PID], '2') self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2') self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[2][ResultCsvColumns.PID], '3') self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3') self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M') # verify manifest files bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME) class ExpectedCsvColumns(object): VALUE = 'value' BIOBANK_ID = 'biobank_id' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' NY_FLAG = 'ny_flag' REQUEST_ID = 'request_id' PACKAGE_ID = 'package_id' ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID, PACKAGE_ID) path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678') self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1') self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679') self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2') self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680') self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3') self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N') # verify manifest result files bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME) class ExpectedCsvColumns(object): VALUE = 'value' BIOBANK_ID = 'biobank_id' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' NY_FLAG = 'ny_flag' REQUEST_ID = 'request_id' PACKAGE_ID = 'package_id' ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID, PACKAGE_ID) path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678') self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1') self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX1') self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679') self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2') self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX2') self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680') self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3') self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX3') # verify package id in database member_dao = GenomicSetMemberDao() members = member_dao.get_all() for member in members: self.assertIn( member.packageId, ['PKG-XXXX-XXXX1', 'PKG-XXXX-XXXX2', 'PKG-XXXX-XXXX3'])