def test_delete_old_metrics(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId='', metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') self.metrics_bucket_dao.insert(metrics_bucket_1) self.metrics_bucket_dao.insert(metrics_bucket_2) # For up to 3 days, the metrics stay around. with FakeClock(TIME_4): self.metrics_version_dao.delete_old_versions() expected_mv = MetricsVersion( metricsVersionId=1, inProgress=True, complete=False, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) expected_mv.buckets.append(metrics_bucket_1) expected_mv.buckets.append(metrics_bucket_2) self.assertEquals( expected_mv.asdict(follow=['buckets']), self.metrics_version_dao.get_with_children(1).asdict( follow=['buckets'])) # After 3 days, the metrics are gone. with FakeClock(TIME_5): self.metrics_version_dao.delete_old_versions() self.assertIsNone(self.metrics_version_dao.get_with_children(1))
def setup_buckets(self): self.version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='', metrics='{ "x": "a" }') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='PITT', metrics='{ "x": "b" }') metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=self.tomorrow, hpoId='', metrics='{ "y": "c" }') self.bucket_dao.insert(metrics_bucket_1) self.bucket_dao.insert(metrics_bucket_2) self.bucket_dao.insert(metrics_bucket_3) self.version_dao.set_pipeline_finished(True)
def reduce_hpo_date_metric_counts_to_database_buckets(reducer_key, reducer_values, version_id=None): """Emits a metrics bucket with counts for metrics for a given hpoId + date to SQL Args: reducer_key: hpoId|date ('*' for hpoId for cross-HPO counts) reducer_values: list of participant_type|metric|count strings """ metrics_dict = collections.defaultdict(lambda: 0) (hpo_id, date_str) = parse_tuple(reducer_key) if hpo_id == '*': hpo_id = '' date = datetime.strptime(date_str, DATE_FORMAT) for reducer_value in reducer_values: (participant_type, metric_key, count) = parse_tuple(reducer_value) if metric_key == PARTICIPANT_KIND: if participant_type == _REGISTERED_PARTICIPANT: metrics_dict[metric_key] += int(count) else: kind = FULL_PARTICIPANT_KIND if participant_type == _FULL_PARTICIPANT else PARTICIPANT_KIND metrics_dict['%s.%s' % (kind, metric_key)] += int(count) version_id = version_id or context.get().mapreduce_spec.mapper.params.get('version_id') bucket = MetricsBucket(metricsVersionId=version_id, date=date, hpoId=hpo_id, metrics=json.dumps(metrics_dict)) # Use upsert here; when reducer shards retry, we will just replace any metrics bucket that was # written before, rather than failing. MetricsBucketDao().upsert(bucket)
def test_insert_duplicate_bucket(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') self.metrics_bucket_dao.insert(metrics_bucket_1) with self.assertRaises(IntegrityError): self.metrics_bucket_dao.insert(metrics_bucket_2) # Upsert should work, and replace the bucket. self.metrics_bucket_dao.upsert(metrics_bucket_2) self.assertEquals( metrics_bucket_2.asdict(), self.metrics_bucket_dao.get([1, datetime.date.today(), PITT]).asdict())
def test_schema(self): session = self.database.make_session() hpo = HPO(hpoId=1, name='UNSET', displayName='No organization set', organizationType=OrganizationType.UNSET) code_book = CodeBook(codeBookId=1, created=datetime.datetime.now(), latest=True, name="pmi", system="http://foo/bar", version="v0.1.1") session.add(hpo) session.add(code_book) session.commit() organization = Organization(organizationId=1, externalId='org', displayName='Organization', hpoId=1) session.add(organization) session.commit() site = Site(siteId=1, siteName='site', googleGroup='*****@*****.**', mayolinkClientNumber=12345, organizationId=1) code1 = Code(codeId=1, codeBookId=1, system="a", value="b", shortValue="q", display=u"c", topic=u"d", codeType=CodeType.MODULE, mapped=True, created=datetime.datetime.now()) codeHistory1 = CodeHistory(codeId=1, codeBookId=1, system="a", value="b", shortValue="q", display=u"c", topic=u"d", codeType=CodeType.MODULE, mapped=True, created=datetime.datetime.now()) session.add(site) session.add(code1) session.add(codeHistory1) session.commit() code2 = Code(codeId=2, codeBookId=1, parentId=1, system="a", value="c", display=u"X", topic=u"d", codeType=CodeType.QUESTION, mapped=True, created=datetime.datetime.now()) codeHistory2 = CodeHistory(codeId=2, codeBookId=1, parentId=1, system="a", value="c", display=u"X", topic=u"d", codeType=CodeType.QUESTION, mapped=True, created=datetime.datetime.now()) session.add(code2) session.add(codeHistory2) session.commit() code3 = Code(codeId=3, codeBookId=1, parentId=2, system="a", value="d", display=u"Y", topic=u"d", codeType=CodeType.ANSWER, mapped=False, created=datetime.datetime.now()) codeHistory3 = CodeHistory(codeId=3, codeBookId=1, parentId=2, system="a", value="d", display=u"Y", topic=u"d", codeType=CodeType.ANSWER, mapped=False, created=datetime.datetime.now()) session.add(code3) session.add(codeHistory3) session.commit() session.commit() p = self._participant_with_defaults( participantId=1, version=1, biobankId=2, clientId='*****@*****.**', hpoId=hpo.hpoId, signUpTime=datetime.datetime.now(), lastModified=datetime.datetime.now()) ps = self._participant_summary_with_defaults( participantId=1, biobankId=2, lastModified=datetime.datetime.now(), hpoId=hpo.hpoId, firstName=self.fake.first_name(), middleName=self.fake.first_name(), lastName=self.fake.last_name(), email=self.fake.email(), zipCode='78751', dateOfBirth=datetime.date.today(), genderIdentityId=1, consentForStudyEnrollment=QuestionnaireStatus.SUBMITTED, consentForStudyEnrollmentTime=datetime.datetime.now(), numBaselineSamplesArrived=2) p.participantSummary = ps session.add(p) ph = self._participant_history_with_defaults( participantId=1, biobankId=2, clientId='*****@*****.**', hpoId=hpo.hpoId, signUpTime=datetime.datetime.now(), lastModified=datetime.datetime.now()) session.add(ph) session.commit() session.add( BiobankStoredSample(biobankStoredSampleId='WEB1234542', biobankId=p.biobankId, biobankOrderIdentifier='KIT', test='1UR10', confirmed=datetime.datetime.utcnow())) session.add( BiobankStoredSample( biobankStoredSampleId='WEB99999', # Sample ID must be unique. biobankId=p. biobankId, # Participant ID and test may be duplicated. biobankOrderIdentifier='KIT', test='1UR10', confirmed=datetime.datetime.utcnow())) pm = PhysicalMeasurements(physicalMeasurementsId=1, participantId=1, created=datetime.datetime.now(), resource='blah', final=False, logPosition=LogPosition()) pm2 = PhysicalMeasurements(physicalMeasurementsId=2, participantId=1, created=datetime.datetime.now(), resource='blah', final=True, amendedMeasurementsId=1, logPosition=LogPosition()) session.add(pm) session.add(pm2) session.commit() q1 = Measurement(measurementId=3, physicalMeasurementsId=pm.physicalMeasurementsId, codeSystem='codeSystem', codeValue='codeValue', measurementTime=datetime.datetime.now(), valueCodeSystem='valueCodeSystem', valueCodeValue='value3') session.add(q1) session.commit() m1 = Measurement(measurementId=1, physicalMeasurementsId=pm.physicalMeasurementsId, codeSystem='codeSystem', codeValue='codeValue', measurementTime=datetime.datetime.now(), bodySiteCodeSystem='bodySiteCodeSystem', bodySiteCodeValue='bodySiteCodeValue', valueString='a', valueDecimal=1.2, valueUnit='cm', valueCodeSystem='valueCodeSystem', valueCodeValue='value', valueDateTime=datetime.datetime.now(), qualifierId=q1.measurementId) session.add(m1) session.commit() m2 = Measurement(measurementId=2, physicalMeasurementsId=pm.physicalMeasurementsId, codeSystem='codeSystem', codeValue='codeValue', measurementTime=datetime.datetime.now(), valueCodeSystem='valueCodeSystem', valueCodeValue='value2', parentId=m1.measurementId, qualifierId=q1.measurementId) session.add(m2) session.commit() q = Questionnaire(questionnaireId=1, version=1, created=datetime.datetime.now(), lastModified=datetime.datetime.now(), resource='what?') qh = QuestionnaireHistory(questionnaireId=1, version=1, created=datetime.datetime.now(), lastModified=datetime.datetime.now(), resource='what?') qh.questions.append( QuestionnaireQuestion(questionnaireQuestionId=1, questionnaireId=1, questionnaireVersion=1, linkId="1.2.3", codeId=2, repeats=True)) qh.concepts.append( QuestionnaireConcept(questionnaireConceptId=1, questionnaireId=1, questionnaireVersion=1, codeId=1)) session.add(q) session.add(qh) session.commit() qr = QuestionnaireResponse(questionnaireResponseId=1, questionnaireId=1, questionnaireVersion=1, participantId=1, created=datetime.datetime.now(), resource='blah') qr.answers.append( QuestionnaireResponseAnswer(questionnaireResponseAnswerId=1, questionnaireResponseId=1, questionId=1, endTime=datetime.datetime.now(), valueSystem='a', valueCodeId=3, valueDecimal=123, valueString=self.fake.first_name(), valueDate=datetime.date.today())) session.add(qr) session.commit() mv = MetricsVersion(metricsVersionId=1, inProgress=False, complete=True, date=datetime.datetime.utcnow(), dataVersion=1) session.add(mv) session.commit() mb = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId='PITT', metrics='blah') session.add(mb) session.commit()
def test_set_pipeline_finished_in_progress_with_buckets(self): with FakeClock(TIME): self.metrics_version_dao.set_pipeline_in_progress() metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId='', metrics='foo') metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=datetime.date.today(), hpoId=PITT, metrics='bar') tomorrow = datetime.date.today() + datetime.timedelta(days=1) metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=tomorrow, hpoId='', metrics='baz') self.metrics_bucket_dao.insert(metrics_bucket_1) self.metrics_bucket_dao.insert(metrics_bucket_2) self.metrics_bucket_dao.insert(metrics_bucket_3) with FakeClock(TIME_2): self.metrics_version_dao.set_pipeline_finished(True) expected_mv = MetricsVersion(metricsVersionId=1, inProgress=False, complete=True, date=TIME, dataVersion=SERVING_METRICS_DATA_VERSION) self.assertEquals( expected_mv.asdict(), self.metrics_version_dao.get_serving_version().asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets() self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) # Filter on start date. active_buckets = self.metrics_bucket_dao.get_active_buckets( start_date=datetime.date.today()) self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets( start_date=tomorrow) self.assertEquals(1, len(active_buckets)) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[0].asdict()) # Filter on end date. active_buckets = self.metrics_bucket_dao.get_active_buckets( end_date=tomorrow) self.assertEquals(3, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict()) self.assertEquals(metrics_bucket_3.asdict(), active_buckets[2].asdict()) active_buckets = self.metrics_bucket_dao.get_active_buckets( end_date=datetime.date.today()) self.assertEquals(2, len(active_buckets)) self.assertEquals(metrics_bucket_1.asdict(), active_buckets[0].asdict()) self.assertEquals(metrics_bucket_2.asdict(), active_buckets[1].asdict())