Example #1
0
    def test_get_participant_data(self, mock_get):
        mock_get.return_value.status_code = 200
        mock_get.return_value.json.return_value = self.json_response_entry

        expected_response = psr.get_participant_data(self.fake_url,
                                                     self.fake_headers)

        self.assertEqual(expected_response, self.participant_data)
    def test_fake_website(self, mock_get, mock_token):
        mock_token.return_value = self.fake_token

        status_code = 500
        error_msg = 'Error: API request failed because <Response [{status_code}]>'
        mock_get.return_value = FakeHTTPResponse(status_code=status_code)
        with self.assertRaises(RuntimeError) as e:
            _ = psr.get_participant_data(self.fake_url, self.fake_headers)
        self.assertEqual(str(e.exception),
                         error_msg.format(status_code=status_code))
        self.assertEqual(mock_get.call_count, 1)

        status_code = 404
        mock_get.return_value = FakeHTTPResponse(status_code=status_code)
        with self.assertRaises(RuntimeError) as e:
            _ = psr.get_participant_data(self.fake_url, self.fake_headers)
        self.assertEqual(str(e.exception),
                         error_msg.format(status_code=status_code))
        self.assertEqual(mock_get.call_count, 2)
    def test_get_participant_data(self, mock_get_session, mock_token):
        mock_token.return_value = self.fake_token
        mock_session = MagicMock()
        mock_get_session.return_value = mock_session
        mock_session.get.return_value.status_code = 200
        mock_session.get.return_value.json.return_value = self.json_response_entry

        actual_response = psr.get_participant_data(self.fake_url,
                                                   self.fake_headers)

        self.assertEqual(actual_response, self.participant_data)
    def test_get_participant_data(self, mock_get):
        """
        Mocks calling the participant summary api.
        """
        # pre conditions
        mock_get.return_value.status_code = 200
        mock_get.return_value.json.return_value = self.json_response_entry

        # test
        expected_response = psr.get_participant_data(self.url, self.headers)

        # post conditions
        self.assertEqual(expected_response, self.participant_data)
def get_participants(api_project_id, existing_pids):
    """
    Method to hit the participant summary API based on cutoff dates and max age. Filters out participants that already
    exist in the pipeline_tables._deid_map table.

    Loops through the results to double check the API returned the correct participants.

    :param api_project_id: project_id to send to API call
    :param existing_pids: list of pids that already exist in mapping table
    :return: dataframe with single column person_id from participant summary API, which needs RIDS created for
    """

    # create datetimes from cutoff dates and datetimes to bin API call requests
    bin_1_gt_datetime = datetime.strptime('2019-08-31T23:59:59',
                                          '%Y-%m-%dT%H:%M:%S')
    bin_1_lt_datetime = datetime.strptime('2020-08-02T00:00:00',
                                          '%Y-%m-%dT%H:%M:%S')
    bin_2_datetime = datetime.strptime('2019-01-01T00:00:00',
                                       '%Y-%m-%dT%H:%M:%S')
    bin_3_gt_datetime = datetime.strptime('2018-12-31T23:59:59',
                                          '%Y-%m-%dT%H:%M:%S')
    bin_3_lt_datetime = datetime.strptime('2019-09-01T00:00:00',
                                          '%Y-%m-%dT%H:%M:%S')

    # Make request to get API version. This is the current RDR version for reference
    # See https://github.com/all-of-us/raw-data-repository/blob/master/opsdataAPI.md for documentation of this api.
    request_url_cutoff_participants = "https://{0}.appspot.com/rdr/v1/ParticipantSummary?_sort=" \
                                      "consentForStudyEnrollmentAuthored&withdrawalStatus={1}" \
                                      "&consentForStudyEnrollmentAuthored=gt{2}&consentForStudyEnrollmentAuthored=" \
                                      "lt{3}".format(api_project_id, 'NOT_WITHDRAWN', bin_1_gt_datetime,
                                                     bin_1_lt_datetime)
    request_url_max_age_participants_1 = "https://{0}.appspot.com/rdr/v1/ParticipantSummary?_sort=" \
                                         "consentForStudyEnrollmentAuthored&withdrawalStatus={1}" \
                                         "&consentForStudyEnrollmentAuthored=lt{2}".format(api_project_id,
                                                                                           'NOT_WITHDRAWN',
                                                                                           bin_2_datetime)
    request_url_max_age_participants_2 = "https://{0}.appspot.com/rdr/v1/ParticipantSummary?_sort=" \
                                         "consentForStudyEnrollmentAuthored&withdrawalStatus={1}" \
                                         "&consentForStudyEnrollmentAuthored=gt{2}&consentForStudyEnrollmentAuthored=" \
                                         "lt{3}".format(api_project_id, 'NOT_WITHDRAWN', bin_3_gt_datetime,
                                                        bin_3_lt_datetime)

    list_url_requests = [
        request_url_cutoff_participants, request_url_max_age_participants_1,
        request_url_max_age_participants_2
    ]
    participant_data = []

    # loop through urls and create new tokens each request to avoid API timing out
    for url in list_url_requests:
        token = get_access_token()
        headers = {
            'content-type': 'application/json',
            'Authorization': 'Bearer {0}'.format(token)
        }
        participant_data = participant_data + get_participant_data(
            url, headers)

    participants = pd.DataFrame(columns=['person_id'])
    # Loop through participant_data to retrieve only person_id
    for participant in participant_data:
        participant = participant['resource']
        participant_id = int(participant['participantId'].replace('P', ''))

        # remove pids that already exist in mapping table
        if participant_id not in existing_pids:
            # turn string into datetime to compare
            participants = participants.append({'person_id': participant_id},
                                               ignore_index=True)

    return participants.drop_duplicates()