Beispiel #1
0
    def test_delete_old_metrics(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId='',
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        self.metrics_bucket_dao.insert(metrics_bucket_2)

        # For up to 3 days, the metrics stay around.
        with FakeClock(TIME_4):
            self.metrics_version_dao.delete_old_versions()
            expected_mv = MetricsVersion(
                metricsVersionId=1,
                inProgress=True,
                complete=False,
                date=TIME,
                dataVersion=SERVING_METRICS_DATA_VERSION)
            expected_mv.buckets.append(metrics_bucket_1)
            expected_mv.buckets.append(metrics_bucket_2)
            self.assertEquals(
                expected_mv.asdict(follow=['buckets']),
                self.metrics_version_dao.get_with_children(1).asdict(
                    follow=['buckets']))

        # After 3 days, the metrics are gone.
        with FakeClock(TIME_5):
            self.metrics_version_dao.delete_old_versions()
            self.assertIsNone(self.metrics_version_dao.get_with_children(1))
  def setup_buckets(self):
    self.version_dao.set_pipeline_in_progress()
    metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='',
                                     metrics='{ "x": "a" }')
    metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='PITT',
                                     metrics='{ "x": "b" }')

    metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=self.tomorrow, hpoId='',
                                     metrics='{ "y": "c" }')
    self.bucket_dao.insert(metrics_bucket_1)
    self.bucket_dao.insert(metrics_bucket_2)
    self.bucket_dao.insert(metrics_bucket_3)
    self.version_dao.set_pipeline_finished(True)
def reduce_hpo_date_metric_counts_to_database_buckets(reducer_key, reducer_values, version_id=None):
  """Emits a metrics bucket with counts for metrics for a given hpoId + date to SQL
  Args:
     reducer_key: hpoId|date ('*' for hpoId for cross-HPO counts)
     reducer_values: list of participant_type|metric|count strings
  """
  metrics_dict = collections.defaultdict(lambda: 0)
  (hpo_id, date_str) = parse_tuple(reducer_key)
  if hpo_id == '*':
    hpo_id = ''
  date = datetime.strptime(date_str, DATE_FORMAT)
  for reducer_value in reducer_values:
    (participant_type, metric_key, count) = parse_tuple(reducer_value)
    if metric_key == PARTICIPANT_KIND:
      if participant_type == _REGISTERED_PARTICIPANT:
        metrics_dict[metric_key] += int(count)
    else:
      kind = FULL_PARTICIPANT_KIND if participant_type == _FULL_PARTICIPANT else PARTICIPANT_KIND
      metrics_dict['%s.%s' % (kind, metric_key)] += int(count)

  version_id = version_id or context.get().mapreduce_spec.mapper.params.get('version_id')
  bucket = MetricsBucket(metricsVersionId=version_id,
                         date=date,
                         hpoId=hpo_id,
                         metrics=json.dumps(metrics_dict))
  # Use upsert here; when reducer shards retry, we will just replace any metrics bucket that was
  # written before, rather than failing.
  MetricsBucketDao().upsert(bucket)
Beispiel #4
0
    def test_insert_duplicate_bucket(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        with self.assertRaises(IntegrityError):
            self.metrics_bucket_dao.insert(metrics_bucket_2)

        # Upsert should work, and replace the bucket.
        self.metrics_bucket_dao.upsert(metrics_bucket_2)
        self.assertEquals(
            metrics_bucket_2.asdict(),
            self.metrics_bucket_dao.get([1, datetime.date.today(),
                                         PITT]).asdict())
Beispiel #5
0
    def test_schema(self):
        session = self.database.make_session()

        hpo = HPO(hpoId=1,
                  name='UNSET',
                  displayName='No organization set',
                  organizationType=OrganizationType.UNSET)
        code_book = CodeBook(codeBookId=1,
                             created=datetime.datetime.now(),
                             latest=True,
                             name="pmi",
                             system="http://foo/bar",
                             version="v0.1.1")
        session.add(hpo)
        session.add(code_book)
        session.commit()

        organization = Organization(organizationId=1,
                                    externalId='org',
                                    displayName='Organization',
                                    hpoId=1)
        session.add(organization)
        session.commit()

        site = Site(siteId=1,
                    siteName='site',
                    googleGroup='*****@*****.**',
                    mayolinkClientNumber=12345,
                    organizationId=1)
        code1 = Code(codeId=1,
                     codeBookId=1,
                     system="a",
                     value="b",
                     shortValue="q",
                     display=u"c",
                     topic=u"d",
                     codeType=CodeType.MODULE,
                     mapped=True,
                     created=datetime.datetime.now())
        codeHistory1 = CodeHistory(codeId=1,
                                   codeBookId=1,
                                   system="a",
                                   value="b",
                                   shortValue="q",
                                   display=u"c",
                                   topic=u"d",
                                   codeType=CodeType.MODULE,
                                   mapped=True,
                                   created=datetime.datetime.now())
        session.add(site)
        session.add(code1)
        session.add(codeHistory1)
        session.commit()

        code2 = Code(codeId=2,
                     codeBookId=1,
                     parentId=1,
                     system="a",
                     value="c",
                     display=u"X",
                     topic=u"d",
                     codeType=CodeType.QUESTION,
                     mapped=True,
                     created=datetime.datetime.now())
        codeHistory2 = CodeHistory(codeId=2,
                                   codeBookId=1,
                                   parentId=1,
                                   system="a",
                                   value="c",
                                   display=u"X",
                                   topic=u"d",
                                   codeType=CodeType.QUESTION,
                                   mapped=True,
                                   created=datetime.datetime.now())
        session.add(code2)
        session.add(codeHistory2)
        session.commit()

        code3 = Code(codeId=3,
                     codeBookId=1,
                     parentId=2,
                     system="a",
                     value="d",
                     display=u"Y",
                     topic=u"d",
                     codeType=CodeType.ANSWER,
                     mapped=False,
                     created=datetime.datetime.now())
        codeHistory3 = CodeHistory(codeId=3,
                                   codeBookId=1,
                                   parentId=2,
                                   system="a",
                                   value="d",
                                   display=u"Y",
                                   topic=u"d",
                                   codeType=CodeType.ANSWER,
                                   mapped=False,
                                   created=datetime.datetime.now())
        session.add(code3)
        session.add(codeHistory3)
        session.commit()

        session.commit()

        p = self._participant_with_defaults(
            participantId=1,
            version=1,
            biobankId=2,
            clientId='*****@*****.**',
            hpoId=hpo.hpoId,
            signUpTime=datetime.datetime.now(),
            lastModified=datetime.datetime.now())
        ps = self._participant_summary_with_defaults(
            participantId=1,
            biobankId=2,
            lastModified=datetime.datetime.now(),
            hpoId=hpo.hpoId,
            firstName=self.fake.first_name(),
            middleName=self.fake.first_name(),
            lastName=self.fake.last_name(),
            email=self.fake.email(),
            zipCode='78751',
            dateOfBirth=datetime.date.today(),
            genderIdentityId=1,
            consentForStudyEnrollment=QuestionnaireStatus.SUBMITTED,
            consentForStudyEnrollmentTime=datetime.datetime.now(),
            numBaselineSamplesArrived=2)
        p.participantSummary = ps
        session.add(p)
        ph = self._participant_history_with_defaults(
            participantId=1,
            biobankId=2,
            clientId='*****@*****.**',
            hpoId=hpo.hpoId,
            signUpTime=datetime.datetime.now(),
            lastModified=datetime.datetime.now())
        session.add(ph)
        session.commit()

        session.add(
            BiobankStoredSample(biobankStoredSampleId='WEB1234542',
                                biobankId=p.biobankId,
                                biobankOrderIdentifier='KIT',
                                test='1UR10',
                                confirmed=datetime.datetime.utcnow()))
        session.add(
            BiobankStoredSample(
                biobankStoredSampleId='WEB99999',  # Sample ID must be unique.
                biobankId=p.
                biobankId,  # Participant ID and test may be duplicated.
                biobankOrderIdentifier='KIT',
                test='1UR10',
                confirmed=datetime.datetime.utcnow()))

        pm = PhysicalMeasurements(physicalMeasurementsId=1,
                                  participantId=1,
                                  created=datetime.datetime.now(),
                                  resource='blah',
                                  final=False,
                                  logPosition=LogPosition())
        pm2 = PhysicalMeasurements(physicalMeasurementsId=2,
                                   participantId=1,
                                   created=datetime.datetime.now(),
                                   resource='blah',
                                   final=True,
                                   amendedMeasurementsId=1,
                                   logPosition=LogPosition())
        session.add(pm)
        session.add(pm2)
        session.commit()

        q1 = Measurement(measurementId=3,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value3')
        session.add(q1)
        session.commit()

        m1 = Measurement(measurementId=1,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         bodySiteCodeSystem='bodySiteCodeSystem',
                         bodySiteCodeValue='bodySiteCodeValue',
                         valueString='a',
                         valueDecimal=1.2,
                         valueUnit='cm',
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value',
                         valueDateTime=datetime.datetime.now(),
                         qualifierId=q1.measurementId)
        session.add(m1)
        session.commit()

        m2 = Measurement(measurementId=2,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value2',
                         parentId=m1.measurementId,
                         qualifierId=q1.measurementId)
        session.add(m2)
        session.commit()

        q = Questionnaire(questionnaireId=1,
                          version=1,
                          created=datetime.datetime.now(),
                          lastModified=datetime.datetime.now(),
                          resource='what?')
        qh = QuestionnaireHistory(questionnaireId=1,
                                  version=1,
                                  created=datetime.datetime.now(),
                                  lastModified=datetime.datetime.now(),
                                  resource='what?')
        qh.questions.append(
            QuestionnaireQuestion(questionnaireQuestionId=1,
                                  questionnaireId=1,
                                  questionnaireVersion=1,
                                  linkId="1.2.3",
                                  codeId=2,
                                  repeats=True))
        qh.concepts.append(
            QuestionnaireConcept(questionnaireConceptId=1,
                                 questionnaireId=1,
                                 questionnaireVersion=1,
                                 codeId=1))
        session.add(q)
        session.add(qh)
        session.commit()

        qr = QuestionnaireResponse(questionnaireResponseId=1,
                                   questionnaireId=1,
                                   questionnaireVersion=1,
                                   participantId=1,
                                   created=datetime.datetime.now(),
                                   resource='blah')
        qr.answers.append(
            QuestionnaireResponseAnswer(questionnaireResponseAnswerId=1,
                                        questionnaireResponseId=1,
                                        questionId=1,
                                        endTime=datetime.datetime.now(),
                                        valueSystem='a',
                                        valueCodeId=3,
                                        valueDecimal=123,
                                        valueString=self.fake.first_name(),
                                        valueDate=datetime.date.today()))

        session.add(qr)
        session.commit()

        mv = MetricsVersion(metricsVersionId=1,
                            inProgress=False,
                            complete=True,
                            date=datetime.datetime.utcnow(),
                            dataVersion=1)
        session.add(mv)
        session.commit()

        mb = MetricsBucket(metricsVersionId=1,
                           date=datetime.date.today(),
                           hpoId='PITT',
                           metrics='blah')
        session.add(mb)
        session.commit()
Beispiel #6
0
    def test_set_pipeline_finished_in_progress_with_buckets(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId='',
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        tomorrow = datetime.date.today() + datetime.timedelta(days=1)
        metrics_bucket_3 = MetricsBucket(metricsVersionId=1,
                                         date=tomorrow,
                                         hpoId='',
                                         metrics='baz')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        self.metrics_bucket_dao.insert(metrics_bucket_2)
        self.metrics_bucket_dao.insert(metrics_bucket_3)
        with FakeClock(TIME_2):
            self.metrics_version_dao.set_pipeline_finished(True)
        expected_mv = MetricsVersion(metricsVersionId=1,
                                     inProgress=False,
                                     complete=True,
                                     date=TIME,
                                     dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(
            expected_mv.asdict(),
            self.metrics_version_dao.get_serving_version().asdict())
        active_buckets = self.metrics_bucket_dao.get_active_buckets()
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        # Filter on start date.
        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            start_date=datetime.date.today())
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            start_date=tomorrow)
        self.assertEquals(1, len(active_buckets))
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[0].asdict())

        # Filter on end date.
        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            end_date=tomorrow)
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            end_date=datetime.date.today())
        self.assertEquals(2, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())