def _query_and_write_reports(exporter, now, report_type, path_received, path_missing, path_modified,
                             path_withdrawals):
  """Runs the reconciliation MySQL queries and writes result rows to the given CSV writers.

  Note that due to syntax differences, the query runs on MySQL only (not SQLite in unit tests).
  """

  report_cover_range = 10
  if report_type == 'monthly':
    report_cover_range = 60

  # Gets all sample/order pairs where everything arrived, within the past n days.
  received_predicate = lambda result: (result[_RECEIVED_TEST_INDEX] and
                                       result[_SENT_COUNT_INDEX] <= result[_RECEIVED_COUNT_INDEX]
                                       and
                                       in_past_n_days(result, now, report_cover_range))

  # Gets samples or orders where something has gone missing within the past n days, and if an order
  # was placed, it was placed at least 36 hours ago.
  missing_predicate = lambda result: ((result[_SENT_COUNT_INDEX] != result[_RECEIVED_COUNT_INDEX] or
                                      (result[_SENT_FINALIZED_INDEX] and
                                      not result[_RECEIVED_TEST_INDEX])) and
                                      in_past_n_days(result, now, report_cover_range,
                                      ordered_before=now - _THIRTY_SIX_HOURS_AGO))

  # Gets samples or orders where something has modified within the past n days.
  modified_predicate = lambda result: (result[_EDITED_CANCELLED_RESTORED_STATUS_FLAG_INDEX] and
                                       in_past_n_days(result, now, report_cover_range))

  code_dao = CodeDao()
  race_question_code = code_dao.get_code(PPI_SYSTEM, RACE_QUESTION_CODE)
  native_american_race_code = code_dao.get_code(PPI_SYSTEM, RACE_AIAN_CODE)

  # break into three steps to avoid OOM issue
  report_paths = [path_received, path_missing, path_modified]
  report_predicates = [received_predicate, missing_predicate, modified_predicate]

  for report_path, report_predicate in zip(report_paths, report_predicates):
    with exporter.open_writer(report_path, report_predicate) as report_writer:
      exporter.run_export_with_writer(report_writer, replace_isodate(_RECONCILIATION_REPORT_SQL),
                                      {'race_question_code_id': race_question_code.codeId,
                                       'native_american_race_code_id':
                                         native_american_race_code.codeId,
                                       'biobank_id_prefix': get_biobank_id_prefix(),
                                       'pmi_ops_system': _PMI_OPS_SYSTEM,
                                       'kit_id_system': _KIT_ID_SYSTEM,
                                       'tracking_number_system': _TRACKING_NUMBER_SYSTEM,
                                       'n_days_ago': now - datetime.timedelta(
                                         days=(report_cover_range + 1))})

  # Now generate the withdrawal report, within the past n days.
  exporter.run_export(path_withdrawals, replace_isodate(_WITHDRAWAL_REPORT_SQL),
                      {'race_question_code_id': race_question_code.codeId,
                       'native_american_race_code_id': native_american_race_code.codeId,
                       'n_days_ago': now - datetime.timedelta(days=report_cover_range),
                       'biobank_id_prefix': get_biobank_id_prefix()})
def _query_and_write_reports(exporter, now, path_received, path_late,
                             path_missing, path_withdrawals):
    """Runs the reconciliation MySQL queries and writes result rows to the given CSV writers.

  Note that due to syntax differences, the query runs on MySQL only (not SQLite in unit tests).
  """
    # Gets all sample/order pairs where everything arrived, regardless of timing.
    received_predicate = lambda result: (result[_RECEIVED_TEST_INDEX] and
                                         result[_SENT_COUNT_INDEX] == result[
                                             _RECEIVED_COUNT_INDEX])

    # Gets orders for which the samples arrived, but they arrived late, within the past 7 days.
    late_predicate = lambda result: (result[_ELAPSED_HOURS_INDEX] and int(
        result[_ELAPSED_HOURS_INDEX]) >= 24 and in_past_week(result, now))

    # Gets samples or orders where something has gone missing within the past 7 days, and if an order
    # was placed, it was placed at least 36 hours ago.
    missing_predicate = lambda result: (
        (result[_SENT_COUNT_INDEX] != result[_RECEIVED_COUNT_INDEX] or (result[
            _SENT_FINALIZED_INDEX] and not result[_RECEIVED_TEST_INDEX])) and
        in_past_week(result, now, ordered_before=now - _THIRTY_SIX_HOURS_AGO))

    code_dao = CodeDao()
    race_question_code = code_dao.get_code(PPI_SYSTEM, RACE_QUESTION_CODE)
    native_american_race_code = code_dao.get_code(PPI_SYSTEM, RACE_AIAN_CODE)

    # Open three files and a database session; run the reconciliation query and pipe the output
    # to the files, using per-file predicates to filter out results.
    with exporter.open_writer(path_received, received_predicate) as received_writer, \
         exporter.open_writer(path_late, late_predicate) as late_writer, \
         exporter.open_writer(path_missing, missing_predicate) as missing_writer, \
         database_factory.get_database().session() as session:
        writer = CompositeSqlExportWriter(
            [received_writer, late_writer, missing_writer])
        exporter.run_export_with_session(
            writer, session, replace_isodate(_RECONCILIATION_REPORT_SQL), {
                'race_question_code_id': race_question_code.codeId,
                'native_american_race_code_id':
                native_american_race_code.codeId,
                'biobank_id_prefix': get_biobank_id_prefix(),
                'pmi_ops_system': _PMI_OPS_SYSTEM,
                'kit_id_system': _KIT_ID_SYSTEM,
                'tracking_number_system': _TRACKING_NUMBER_SYSTEM
            })

    # Now generate the withdrawal report.
    exporter.run_export(
        path_withdrawals, replace_isodate(_WITHDRAWAL_REPORT_SQL), {
            'race_question_code_id': race_question_code.codeId,
            'native_american_race_code_id': native_american_race_code.codeId,
            'seven_days_ago': now - datetime.timedelta(days=7),
            'biobank_id_prefix': get_biobank_id_prefix()
        })
def _upsert_samples_from_csv(csv_reader):
  """Inserts/updates BiobankStoredSamples from a csv.DictReader."""
  missing_cols = set(CsvColumns.ALL) - set(csv_reader.fieldnames)
  if missing_cols:
    raise DataError(
        'CSV is missing columns %s, had columns %s.' % (missing_cols, csv_reader.fieldnames))
  samples_dao = BiobankStoredSampleDao()
  biobank_id_prefix = get_biobank_id_prefix()
  written = 0
  try:
    samples = []
    with ParticipantDao().session() as session:

      for row in csv_reader:
        sample = _create_sample_from_row(row, biobank_id_prefix)
        if sample:
          # DA-601 - Ensure biobank_id exists before accepting a sample record.
          if session.query(Participant).filter(
                      Participant.biobankId == sample.biobankId).count() < 1:
            logging.error('Bio bank Id ({0}) does not exist in the Participant table.'.
                          format(sample.biobankId))
            continue

          samples.append(sample)
          if len(samples) >= _BATCH_SIZE:
            written += samples_dao.upsert_all(samples)
            samples = []

      if samples:
        written += samples_dao.upsert_all(samples)

    return written
  except ValueError, e:
    raise DataError(e)
Example #4
0
 def test_sample_from_row_invalid(self):
     samples_file = test_data.open_biobank_samples(111, 222, 333)
     reader = csv.DictReader(samples_file, delimiter='\t')
     row = reader.next()
     row[biobank_samples_pipeline._Columns.CONFIRMED_DATE] = '2016 11 19'
     with self.assertRaises(biobank_samples_pipeline.DataError):
         biobank_samples_pipeline._create_sample_from_row(
             row, get_biobank_id_prefix())
Example #5
0
 def test_sample_from_row_old_test(self):
     samples_file = test_data.open_biobank_samples(111, 222, 333)
     reader = csv.DictReader(samples_file, delimiter='\t')
     row = reader.next()
     row[biobank_samples_pipeline._Columns.TEST_CODE] = '2PST8'
     sample = biobank_samples_pipeline._create_sample_from_row(
         row, get_biobank_id_prefix())
     self.assertIsNotNone(sample)
     cols = biobank_samples_pipeline._Columns
     self.assertEquals(sample.biobankStoredSampleId, row[cols.SAMPLE_ID])
     self.assertEquals(sample.test, row[cols.TEST_CODE])
def _upsert_samples_from_csv(csv_reader):
    """Inserts/updates BiobankStoredSamples from a csv.DictReader."""
    missing_cols = _Columns.ALL - set(csv_reader.fieldnames)
    if missing_cols:
        raise DataError('CSV is missing columns %s, had columns %s.' %
                        (missing_cols, csv_reader.fieldnames))
    samples_dao = BiobankStoredSampleDao()
    biobank_id_prefix = get_biobank_id_prefix()
    written = 0
    try:
        samples = []
        for row in csv_reader:
            sample = _create_sample_from_row(row, biobank_id_prefix)
            if sample:
                samples.append(sample)
                if len(samples) >= _BATCH_SIZE:
                    written += samples_dao.upsert_all(samples)
                    samples = []
        if samples:
            written += samples_dao.upsert_all(samples)
        return written
    except ValueError, e:
        raise DataError(e)
Example #7
0
    def test_sample_from_row(self):
        samples_file = test_data.open_biobank_samples(112, 222, 333)
        reader = csv.DictReader(samples_file, delimiter='\t')
        row = reader.next()
        sample = biobank_samples_pipeline._create_sample_from_row(
            row, get_biobank_id_prefix())
        self.assertIsNotNone(sample)

        cols = biobank_samples_pipeline._Columns
        self.assertEquals(sample.biobankStoredSampleId, row[cols.SAMPLE_ID])
        self.assertEquals(to_client_biobank_id(sample.biobankId),
                          row[cols.EXTERNAL_PARTICIPANT_ID])
        self.assertEquals(sample.test, row[cols.TEST_CODE])
        confirmed_date = self._naive_utc_to_naive_central(sample.confirmed)
        self.assertEquals(
            confirmed_date.strftime(
                biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT),
            row[cols.CONFIRMED_DATE])
        received_date = self._naive_utc_to_naive_central(sample.created)
        self.assertEquals(
            received_date.strftime(
                biobank_samples_pipeline._INPUT_TIMESTAMP_FORMAT),
            row[cols.CREATE_DATE])