def test_end_to_end(self): dao = BiobankStoredSampleDao() self.assertEquals(dao.count(), 0) # Create 3 participants and pass their (random) IDs into sample rows. summary_dao = ParticipantSummaryDao() biobank_ids = [] participant_ids = [] for _ in xrange(3): participant = self.participant_dao.insert(Participant()) summary_dao.insert(self.participant_summary(participant)) participant_ids.append(participant.participantId) biobank_ids.append(participant.biobankId) self.assertEquals( summary_dao.get( participant.participantId).numBaselineSamplesArrived, 0) test1, test2, test3 = random.sample(_BASELINE_TESTS, 3) samples_file = test_data.open_biobank_samples(*biobank_ids, test1=test1, test2=test2, test3=test3) input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central( clock.CLOCK.now()).strftime( biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file.read()) biobank_samples_pipeline.upsert_from_latest_csv() self.assertEquals(dao.count(), 3) self._check_summary(participant_ids[0], test1, '2016-11-29T12:19:32') self._check_summary(participant_ids[1], test2, '2016-11-29T12:38:58') self._check_summary(participant_ids[2], test3, '2016-11-29T12:41:26')
def test_sample_from_row_wrong_prefix(self): samples_file = test_data.open_biobank_samples(111, 222, 333) reader = csv.DictReader(samples_file, delimiter='\t') row = reader.next() row[biobank_samples_pipeline._Columns.CONFIRMED_DATE] = '2016 11 19' self.assertIsNone( biobank_samples_pipeline._create_sample_from_row(row, 'Q'))
def test_sample_from_row_invalid(self): samples_file = test_data.open_biobank_samples(111, 222, 333) reader = csv.DictReader(samples_file, delimiter='\t') row = reader.next() row[biobank_samples_pipeline._Columns.CONFIRMED_DATE] = '2016 11 19' with self.assertRaises(biobank_samples_pipeline.DataError): biobank_samples_pipeline._create_sample_from_row( row, get_biobank_id_prefix())
def test_end_to_end(self): dao = BiobankStoredSampleDao() self.assertEquals(dao.count(), 0) # Create 3 participants and pass their (random) IDs into sample rows. summary_dao = ParticipantSummaryDao() biobank_ids = [] participant_ids = [] nids = 16 # equal to the number of parent rows in 'biobank_samples_1.csv' cids = 1 # equal to the number of child rows in 'biobank_samples_1.csv' for _ in xrange(nids): participant = self.participant_dao.insert(Participant()) summary_dao.insert(self.participant_summary(participant)) participant_ids.append(participant.participantId) biobank_ids.append(participant.biobankId) self.assertEquals( summary_dao.get( participant.participantId).numBaselineSamplesArrived, 0) test_codes = random.sample(_BASELINE_TESTS, nids) samples_file = test_data.open_biobank_samples(biobank_ids=biobank_ids, tests=test_codes) lines = samples_file.split('\n')[1:] # remove field name line input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central( clock.CLOCK.now()).strftime( biobank_samples_pipeline.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) biobank_samples_pipeline.upsert_from_latest_csv() self.assertEquals(dao.count(), nids - cids) for x in range(0, nids): cols = lines[x].split('\t') if cols[10].strip(): # skip child sample continue # If status is 'In Prep', then sample confirmed timestamp should be empty if cols[2] == 'In Prep': self.assertEquals(len(cols[11]), 0) else: status = SampleStatus.RECEIVED ts_str = cols[11] # DA-814 - Participant Summary test status should be: Unset, Received or Disposed only. # If sample is disposed, then check disposed timestamp, otherwise check confirmed timestamp. # DA-871 - Only check status is disposed when reason code is a bad disposal. if cols[2] == 'Disposed' and get_sample_status_enum_value( cols[8]) > SampleStatus.UNKNOWN: status = SampleStatus.DISPOSED ts_str = cols[9] ts = datetime.datetime.strptime(ts_str, '%Y/%m/%d %H:%M:%S') self._check_summary(participant_ids[x], test_codes[x], ts, status)
def test_sample_from_row_old_test(self): samples_file = test_data.open_biobank_samples(111, 222, 333) reader = csv.DictReader(samples_file, delimiter='\t') row = reader.next() row[biobank_samples_pipeline._Columns.TEST_CODE] = '2PST8' sample = biobank_samples_pipeline._create_sample_from_row( row, get_biobank_id_prefix()) self.assertIsNotNone(sample) cols = biobank_samples_pipeline._Columns self.assertEquals(sample.biobankStoredSampleId, row[cols.SAMPLE_ID]) self.assertEquals(sample.test, row[cols.TEST_CODE])
def test_sample_from_row(self): samples_file = test_data.open_biobank_samples(112, 222, 333) reader = csv.DictReader(samples_file, delimiter='\t') row = reader.next() sample = biobank_samples_pipeline._create_sample_from_row( row, get_biobank_id_prefix()) self.assertIsNotNone(sample) cols = biobank_samples_pipeline._Columns self.assertEquals(sample.biobankStoredSampleId, row[cols.SAMPLE_ID]) self.assertEquals(to_client_biobank_id(sample.biobankId), row[cols.EXTERNAL_PARTICIPANT_ID]) self.assertEquals(sample.test, row[cols.TEST_CODE]) confirmed_date = self._naive_utc_to_naive_central(sample.confirmed) self.assertEquals( confirmed_date.strftime( biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT), row[cols.CONFIRMED_DATE]) received_date = self._naive_utc_to_naive_central(sample.created) self.assertEquals( received_date.strftime( biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT), row[cols.CREATE_DATE])