Ejemplo n.º 1
0
def process_genomic_water_line():
    """
  Entrypoint, executed as a cron job
  """
    genomic_set_id = genomic_set_file_handler.read_genomic_set_from_bucket()
    if genomic_set_id is not None:
        logging.info('Read input genomic set file successfully.')
        dao = GenomicSetDao()
        validation.validate_and_update_genomic_set_by_id(genomic_set_id, dao)
        genomic_set = dao.get(genomic_set_id)
        if genomic_set.genomicSetStatus == GenomicSetStatus.VALID:
            genomic_biobank_menifest_handler\
              .create_and_upload_genomic_biobank_manifest_file(genomic_set_id)
            logging.info(
                'Validation passed, generate biobank manifest file successfully.'
            )
        else:
            logging.info('Validation failed.')
        genomic_set_file_handler.create_genomic_set_status_result_file(
            genomic_set_id)
    else:
        logging.info('No file found or nothing read from genomic set file')

    genomic_biobank_menifest_handler.process_genomic_manifest_result_file_from_bucket(
    )
    genomic_center_menifest_handler.process_genotyping_manifest_files()
def _insert_genomic_set_from_row(row, csv_filename, timestamp):
    """Creates a new GenomicSet object from a CSV row.

  Raises:
    DataError if the row is invalid.
  Returns:
    A new GenomicSet.
  """
    genomic_set_name = row[CsvColumns.GENOMIC_SET_NAME],

    set_dao = GenomicSetDao()
    genomic_set_version = set_dao.get_new_version_number(genomic_set_name)
    kwargs = dict(
        genomicSetName=genomic_set_name,
        genomicSetCriteria=row[CsvColumns.GENOMIC_SET_CRITERIA],
        genomicSetFile=csv_filename,
        genomicSetFileTime=timestamp,
        genomicSetStatus=GenomicSetStatus.UNSET,
        genomicSetVersion=genomic_set_version,
    )

    genomic_set = GenomicSet(**kwargs)
    set_dao.insert(genomic_set)

    return genomic_set
Ejemplo n.º 3
0
 def setUp(self, with_data=True, use_mysql=False):
     super(GenomicSetValidationBaseTestCase,
           self).setUp(with_data=with_data, use_mysql=use_mysql)
     self.participant_dao = ParticipantDao()
     self.summary_dao = ParticipantSummaryDao()
     self.genomic_set_dao = GenomicSetDao()
     self.genomic_member_dao = GenomicSetMemberDao()
     self._participant_i = 0
     self.setup_data()
  def test_read_from_csv_file(self):
    participant = self.participant_dao.insert(Participant(participantId=123, biobankId=1234))
    self.summary_dao.insert(self.participant_summary(participant))
    bo = self._make_biobank_order(participantId=participant.participantId, biobankOrderId='123',
                                  identifiers=[BiobankOrderIdentifier(
                                    system=u'https://www.pmi-ops.org', value=u'12345678')])
    BiobankOrderDao().insert(bo)

    participant2 = self.participant_dao.insert(Participant(participantId=124, biobankId=1235))
    self.summary_dao.insert(self.participant_summary(participant2))
    bo2 = self._make_biobank_order(participantId=participant2.participantId, biobankOrderId='124',
                                   identifiers=[BiobankOrderIdentifier(
                                     system=u'https://www.pmi-ops.org', value=u'12345679')])
    BiobankOrderDao().insert(bo2)

    participant3 = self.participant_dao.insert(Participant(participantId=125, biobankId=1236))
    self.summary_dao.insert(self.participant_summary(participant3))
    bo3 = self._make_biobank_order(participantId=participant3.participantId, biobankOrderId='125',
                                   identifiers=[BiobankOrderIdentifier(
                                     system=u'https://www.pmi-ops.org', value=u'12345680')])
    BiobankOrderDao().insert(bo3)

    samples_file = test_data.open_genomic_set_file('Genomic-Test-Set-test-1.csv')

    input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(clock.CLOCK.now()).strftime(
        genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

    self._write_cloud_csv(input_filename, samples_file)
    genomic_set_file_handler.read_genomic_set_from_bucket()
    set_dao = GenomicSetDao()
    obj = set_dao.get_all()[0]

    self.assertEqual(obj.genomicSetName, 'name_xxx')
    self.assertEqual(obj.genomicSetCriteria, 'criteria_xxx')
    self.assertEqual(obj.genomicSetVersion, 1)

    member_dao = GenomicSetMemberDao()
    items = member_dao.get_all()
    for item in items:
      self.assertIn(item.participantId, [123, 124, 125])
      self.assertIn(item.biobankOrderId, ['123', '124', '125'])
      self.assertIn(item.biobankId, ['1234', '1235', '1236'])
      self.assertIn(item.biobankOrderClientId, ['12345678', '12345679', '12345680'])
      self.assertEqual(item.genomicSetId, 1)
      self.assertIn(item.genomeType, ['aou_wgs', 'aou_array'])
      self.assertIn(item.nyFlag, [0, 1])
      self.assertIn(item.sexAtBirth, ['F', 'M'])
Ejemplo n.º 5
0
    def _create_fake_genomic_set(self, genomic_set_name, genomic_set_criteria,
                                 genomic_set_filename):
        now = clock.CLOCK.now()
        genomic_set = GenomicSet()
        genomic_set.genomicSetName = genomic_set_name
        genomic_set.genomicSetCriteria = genomic_set_criteria
        genomic_set.genomicSetFile = genomic_set_filename
        genomic_set.genomicSetFileTime = now
        genomic_set.genomicSetStatus = GenomicSetStatus.INVALID

        set_dao = GenomicSetDao()
        genomic_set.genomicSetVersion = set_dao.get_new_version_number(
            genomic_set.genomicSetName)

        set_dao.insert(genomic_set)

        return genomic_set
Ejemplo n.º 6
0
def validate_and_update_genomic_set_by_id(genomic_set_id, dao=None):
    """
  Determine and write validation statuses and times for the specified GenomicSet and all of it's
  GenomicSetMembers in a single transaction.

  :param genomic_set_id: The id of the GenomicSet to validate
  :param dao: (optional)
  :type dao: GenomicSetDao or None
  """
    now = clock.CLOCK.now()
    date_of_birth_cutoff = datetime.date(year=now.year - GENOMIC_VALID_AGE,
                                         month=now.month,
                                         day=now.day)
    dao = dao or GenomicSetDao()

    MemberIdStatusPair = collections.namedtuple('MemberIdStatusPair', [
        'member_id',
        'status',
    ])
    update_queue = collections.deque()

    with dao.member_dao.session() as session:
        try:
            for row in dao.iter_validation_data_for_genomic_set_id_with_session(
                    session, genomic_set_id):
                update_queue.append(
                    MemberIdStatusPair(
                        row.id,
                        _get_validation_status(row, date_of_birth_cutoff),
                    ))

            dao.member_dao.bulk_update_validation_status_with_session(
                session, update_queue)

            genomic_set = dao.get_with_session(session, genomic_set_id)
            for task in update_queue:
                if task.status != GenomicValidationStatus.VALID:
                    genomic_set.genomicSetStatus = GenomicSetStatus.INVALID
            if genomic_set.genomicSetStatus != GenomicSetStatus.INVALID:
                genomic_set.genomicSetStatus = GenomicSetStatus.VALID
                genomic_set.validatedTime = now
            dao.update_with_session(session, genomic_set)
        except Exception:
            session.rollback()
            raise
Ejemplo n.º 7
0
def validate_and_update_genomic_set_by_id(genomic_set_id, dao=None):
    """
  Determine and write validation statuses and times for the specified GenomicSet and all of it's
  GenomicSetMembers in a single transaction.

  :param genomic_set_id: The id of the GenomicSet to validate
  :param dao: (optional)
  :type dao: GenomicSetDao or None
  """
    now = clock.CLOCK.now()
    date_of_birth_cutoff = datetime.date(year=now.year - GENOMIC_VALID_AGE,
                                         month=now.month,
                                         day=now.day)
    dao = dao or GenomicSetDao()

    update_queue = collections.deque()

    with dao.member_dao.session() as session:
        try:
            for row in dao.iter_validation_data_for_genomic_set_id_with_session(
                    session, genomic_set_id):
                flags = list(_iter_validation_flags(row, date_of_birth_cutoff))
                status = GenomicSetMemberStatus.INVALID if len(
                    flags) > 0 else GenomicSetMemberStatus.VALID
                update_queue.append(
                    dao.member_dao.BulkUpdateValidationParams(
                        row.id, status, flags))

            dao.member_dao.bulk_update_validation_status_with_session(
                session, update_queue)

            genomic_set = dao.get_with_session(session, genomic_set_id)
            if any(
                    itertools.imap(
                        lambda task: task.status == GenomicSetMemberStatus.
                        INVALID, update_queue)):
                genomic_set.genomicSetStatus = GenomicSetStatus.INVALID
            else:
                genomic_set.genomicSetStatus = GenomicSetStatus.VALID
                genomic_set.validatedTime = now
            dao.update_with_session(session, genomic_set)
        except Exception:
            session.rollback()
            raise
def create_genomic_set_status_result_file(genomic_set_id):
    set_dao = GenomicSetDao()
    genomic_set = set_dao.get(genomic_set_id)
    _create_and_upload_result_file(genomic_set)
def _is_filename_exist(csv_filename):
    set_dao = GenomicSetDao()
    if set_dao.get_one_by_file_name(csv_filename):
        return True
    else:
        return False
Ejemplo n.º 10
0
class GenomicSetValidationBaseTestCase(SqlTestBase):
    def setUp(self, with_data=True, use_mysql=False):
        super(GenomicSetValidationBaseTestCase,
              self).setUp(with_data=with_data, use_mysql=use_mysql)
        self.participant_dao = ParticipantDao()
        self.summary_dao = ParticipantSummaryDao()
        self.genomic_set_dao = GenomicSetDao()
        self.genomic_member_dao = GenomicSetMemberDao()
        self._participant_i = 0
        self.setup_data()

    def setup_data(self):
        pass

    def make_participant(self, **kwargs):
        """
    Make a participant with custom settings.
    default should create a valid participant.
    """
        i = self._participant_i
        self._participant_i += 1
        participant = Participant(participantId=i, biobankId=i, **kwargs)
        self.participant_dao.insert(participant)
        return participant

    def make_summary(self, participant, **override_kwargs):
        """
    Make a summary with custom settings.
    default should create a valid summary.
    """
        valid_kwargs = dict(participantId=participant.participantId,
                            biobankId=participant.biobankId,
                            withdrawalStatus=participant.withdrawalStatus,
                            dateOfBirth=datetime.datetime(2000, 1, 1),
                            firstName='foo',
                            lastName='bar',
                            zipCode='12345',
                            sampleStatus1ED04=SampleStatus.RECEIVED,
                            sampleStatus1SAL2=SampleStatus.RECEIVED,
                            samplesToIsolateDNA=SampleStatus.RECEIVED,
                            consentForStudyEnrollmentTime=datetime.datetime(
                                2019, 1, 1))
        kwargs = dict(valid_kwargs, **override_kwargs)
        summary = self._participant_summary_with_defaults(**kwargs)
        self.summary_dao.insert(summary)
        return summary

    def make_genomic_set(self, **override_kwargs):
        """
    Make a genomic set with custom settings.
    default should create a valid set.
    """
        valid_kwargs = dict(genomicSetName='foo',
                            genomicSetCriteria='something',
                            genomicSetVersion=1,
                            genomicSetStatus=GenomicSetStatus.UNSET)
        kwargs = dict(valid_kwargs, **override_kwargs)
        genomic_set = GenomicSet(**kwargs)
        self.genomic_set_dao.insert(genomic_set)
        return genomic_set

    def make_genomic_member(self, genomic_set, participant, **override_kwargs):
        """
    Make a genomic member with custom settings.
    default should create a valid member.
    """
        valid_kwargs = dict(genomicSetId=genomic_set.id,
                            participantId=participant.participantId,
                            sexAtBirth='F',
                            biobankId=participant.biobankId,
                            biobankOrderClientId='12345678')
        kwargs = dict(valid_kwargs, **override_kwargs)
        member = GenomicSetMember(**kwargs)
        self.genomic_member_dao.insert(member)
        return member