Ejemplo n.º 1
0
def _post_to_bigquery(log_records, logplex_frame_id):
    rows = []
    for i, record in enumerate(log_records):
        row = {
            'insertId': '%s-%d' % (logplex_frame_id, i),
            'json': record,
        }
        rows.append(row)
    insert_req = {
        'kind': 'bigquery#tableDataInsertAllRequest',
        'skipInvalidRows': settings.BIG_QUERY_SKIP_INVALID_ROWS,
        'ignoreUnknownValues': settings.BIG_QUERY_IGNORE_UNKNOWN_VALUES,
        'rows': rows,
    }
    url = 'https://www.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s/insertAll' % (
        settings.BIG_QUERY_PROJECT_ID, settings.BIG_QUERY_DATASET_ID,
        settings.BIG_QUERY_TABLE_ID)
    authed_http = AuthorizedHttp(credentials)
    response = authed_http.request(
        'POST',
        url,
        body=json.dumps(insert_req),
        headers={'Content-Type': 'application/json'})
    diagnostics.big_query_response_codes[response.status] += 1
    if response.status == 200 and not _json_from_response(response).get(
            'error'):
        diagnostics.big_query_rows_inserted += len(log_records)
    else:
        diagnostics.big_query_rows_failed += len(log_records)
        diagnostics.sample_big_query_insert_errors.append(response.data)
Ejemplo n.º 2
0
def datastore_to_storage(request):

    projectid = 'pantree-198302'
    request_body = {
        "outputUrlPrefix": "gs://pantree_datastore_kind_backup",
        "entityFilter": {
            "kinds": [
                "User", "Order", "Product", "Recipe", "Conversion", "Location",
                "ShoppingList", "Chef", "Session"
            ],
            "namespaceIds": ["development.amete"]
        }
    }

    ### Authenticate and Call Server to Server API ###
    ### Define scope and authentication file
    SCOPES = [
        'https://www.googleapis.com/auth/datastore',
        'https://www.googleapis.com/auth/cloud-platform'
    ]
    SERVICE_ACCOUNT_FILE = 'google_cloud_key.development.json'
    credentials = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    ### Authorize creedentials and send a post request to export data from datastore to cloud storage
    authed_http = AuthorizedHttp(credentials)
    response = authed_http.request(
        'POST',
        "https://datastore.googleapis.com/v1/projects/pantree-198302:export",
        body=str(request_body))
    print(response.status)
    print(response.data)
    return 'ok'
Ejemplo n.º 3
0
        def request(self, method, url, fields=None, headers=None):
            """Make a request using urllib3 AuthorizedHttp."""

            authed_http = AuthorizedHttp(self.credentials)
            return authed_http.request(method,
                                       url,
                                       fields=fields,
                                       headers=headers)
Ejemplo n.º 4
0
def main(data, context):
  """Extracts features from a patient bundle for online prediction.

  This process is broken down into a few steps:

  1. Fetch the Resource we get triggered on, and fetch/extract the patient that
     it is related to.
  2. Fetch everything for the patient from step 1, and extract the
     features we are interested in.
  3. Send the features to Cloud ML for online prediction, and write the
     results back to the FHIR store.

  Args:
    data (dict): Cloud PubSub payload. The `data` field is what we are looking
      for.
    context (google.cloud.functions.Context): Metadata for the event.
  """

  if 'data' not in data:
    LOGGER.info('`data` field is not present, skipping...')
    return

  resource_name = base64.b64decode(data['data']).decode('utf-8')
  if (utils.CONDITION_TYPE not in resource_name and
      utils.PATIENT_TYPE not in resource_name and
      utils.OBSERVATION_TYPE not in resource_name):
    LOGGER.info('Skipping resource %s which is irrelevant for prediction.',
                resource_name)
    return

  credentials, _ = google.auth.default()
  http = AuthorizedHttp(credentials)
  resource = get_resource(http, resource_name)
  if resource is None:
    return

  patient = get_corresponding_patient(http, resource_name, resource)
  if patient is None:
    LOGGER.error('Could not find corresponding patient in resource %s',
                 resource_name)
    return

  project_id, location, dataset_id, fhir_store_id, _ = _parse_resource_name(
      resource_name)
  patient_id = 'Patient/{}'.format(patient['id'])
  patient_name = _construct_resource_name(project_id, location, dataset_id,
                                          fhir_store_id, patient_id)
  patient_bundle = get_patient_everything(http, patient_name)
  if patient_bundle is None:
    return

  predictions = predict(features.build_example(patient_bundle))
  if predictions is None:
    return

  action = get_action(data)
  create_or_update_risk_assessment(http, patient_name, predictions, action)
Ejemplo n.º 5
0
def trip():
    # helpers.handling_authorization()

    if not google.authorized:
        return redirect(url_for('google.login'))
    creds = Credentials(google.token['access_token'])
    print(google.token)
    http = AuthorizedHttp(creds)

    try:
        response = http.request(
            'GET',
            'https://people.googleapis.com/v1/people/me?personFields=names,emailAddresses'
        )
        auth = helpers.handling_authorization(creds, response)
        return render_trip(auth)

    except Exception as e:
        print("Exception in auth:")
        print(repr(e))
        return redirect(url_for('google.login'))
Ejemplo n.º 6
0
 def _http_client(self) -> urllib3.PoolManager:
     """
     A urllib3 HTTP client with OAuth 2.0 credentials.
     """
     # By default, AuthorizedHTTP attempts to refresh the credentials on a 401
     # response, which is never helpful. When using service account
     # credentials, a fresh token is obtained for every lambda invocation,
     # which will never persist long enough for the token to expire. User
     # tokens can expire, but attempting to refresh them raises
     # `google.auth.exceptions.RefreshError` due to the credentials not being
     # configured with (among other fields) the client secret.
     return AuthorizedHttp(self.credentials,
                           http_client(),
                           refresh_status_codes=())
Ejemplo n.º 7
0
    def __init__(self, merlin_url: str, use_google_oauth: bool = True):
        self._merlin_url = merlin_url
        config = Configuration()
        config.host = self._merlin_url + "/v1"

        self._api_client = ApiClient(config)
        if use_google_oauth:
            credentials, project = google.auth.default(scopes=OAUTH_SCOPES)
            autorized_http = AuthorizedHttp(credentials, urllib3.PoolManager())
            self._api_client.rest_client.pool_manager = autorized_http

        self._project_api = ProjectApi(self._api_client)
        self._model_api = ModelsApi(self._api_client)
        self._version_api = VersionApi(self._api_client)
        self._endpoint_api = EndpointApi(self._api_client)
        self._env_api = EnvironmentApi(self._api_client)
Ejemplo n.º 8
0
    def __init__(self, host: str, project_name: str = None, use_google_oauth: bool = True):
        """
        Create new session

        :param host: URL of Turing API
        :param project_name: name of the project, this session should stick to
        :param use_google_oauth: should be True if Turing API is protected with Google OAuth
        """
        config = Configuration(host=os.path.join(host, 'v1'))
        self._api_client = ApiClient(config)

        if use_google_oauth:
            import google.auth
            from google.auth.transport.urllib3 import urllib3, AuthorizedHttp

            credentials, project = google.auth.default(scopes=TuringSession.OAUTH_SCOPES)
            authorized_http = AuthorizedHttp(credentials, urllib3.PoolManager())
            self._api_client.rest_client.pool_manager = authorized_http

        self._project = None

        if project_name:
            self.set_project(project_name)
Ejemplo n.º 9
0
 def oauthed_http(self) -> AuthorizedHttp:
     """
     A urllib3 HTTP client with OAuth credentials.
     """
     return AuthorizedHttp(self.credentials.with_scopes(self.oauth_scopes),
                           urllib3.PoolManager(ca_certs=certifi.where()))
Ejemplo n.º 10
0
def main(data, context):
  """Extracts features from a patient bundle for online prediction.

  This process is broken down into a few steps:

  1. Fetch the QuestionnaireResponse we get triggered on (note that we
     only react to this resource type), and extract the patient that
     answered it.
  2. Fetch everything for the patient from step 1, and extract the
     features we are interested in.
  3. Send the features to Cloud ML for online prediction, and write the
     results back to the FHIR store.

  Args:
    data (dict): Cloud PubSub payload. The `data` field is what we are
    looking for.
    context (google.cloud.functions.Context): Metadata for the event.
  """

  if 'data' not in data:
    LOGGER.info('`data` field is not present, skipping...')
    return

  resource_name = base64.b64decode(data['data']).decode('utf-8')
  if QUESTIONNAIRERESPONSE_TYPE not in resource_name:
    LOGGER.info("Skipping resource %s which is irrelevant for prediction." %
      resource_name)
    return

  credentials, _ = google.auth.default()
  http = AuthorizedHttp(credentials)
  questionnaire_response = get_resource(http, resource_name)
  if questionnaire_response is None:
    return

  patient_id = questionnaire_response['subject']['reference']
  project_id, location, dataset_id, fhir_store_id, _ = _parse_resource_name(
    resource_name)
  patient = get_resource(http, _construct_resource_name(project_id, location,
    dataset_id, fhir_store_id, patient_id))
  if patient is None:
    return

  predictions = predict(build_examples(patient, questionnaire_response))
  if predictions is None:
    return

  pid = "%s/%s" % (PATIENT_TYPE, patient['id'])
  qid = "%s/%s" % (QUESTIONNAIRERESPONSE_TYPE, questionnaire_response['id'])

  action = get_action(data)
  for disease, idx in DISEASE_MAP.items():
    scores = predictions[idx]['probabilities']
    LOGGER.info("Prediction results: %s", scores)
    # Last element represents risk.
    score = scores[1]
    risk = RISKS[-1] if score == 1 else RISKS[int(score / 0.2)]

    path = _construct_resource_name(project_id, location, dataset_id,
      fhir_store_id, RISKASSESSMENT_TYPE)
    if action == UPDATE_RESOURCE_ACTION:
      resources = search_resource(http, path, "subject=%s" % pid)
      res = filter_resource(resources, qid, disease)
      if res is None:
        LOGGER.info("No existing RiskAssessment, createing a new one...")
        create_or_update_resource(http, path, build_risk_assessment(pid,
          qid, disease, risk))
        continue
      rid = res['id']

      path = _construct_resource_name(project_id, location, dataset_id,
        fhir_store_id, "%s/%s" % (RISKASSESSMENT_TYPE, rid))
      create_or_update_resource(http, path, build_risk_assessment(pid,
        qid, disease, risk, rid=rid))
    elif action == CREATE_RESOURCE_ACTION:
      create_or_update_resource(http, path, build_risk_assessment(pid,
        qid, disease, risk))
Ejemplo n.º 11
0
 def _http_client(self) -> urllib3.PoolManager:
     return AuthorizedHttp(self.credentials, http_client())
Ejemplo n.º 12
0
class TestTDRRepositoryProxy(RepositoryPluginTestCase):
    mock_service_url = f'https://serpentine.datarepo-dev.broadinstitute.net.test.{config.domain_name}'
    mock_source_names = ['mock_snapshot_1', 'mock_snapshot_2']
    make_mock_source_spec = 'tdr:mock:snapshot/{}:'.format
    mock_sources = set(map(make_mock_source_spec, mock_source_names))

    catalog = 'testtdr'
    catalog_config = {
        catalog:
        config.Catalog(name=catalog,
                       atlas='hca',
                       internal=False,
                       plugins=dict(
                           metadata=config.Catalog.Plugin(name='hca'),
                           repository=config.Catalog.Plugin(name='tdr')),
                       sources=mock_sources)
    }

    @mock.patch.dict(os.environ, AZUL_TDR_SERVICE_URL=mock_service_url)
    @mock.patch.object(TerraClient, '_http_client',
                       AuthorizedHttp(
                           MagicMock(),
                           urllib3.PoolManager(ca_certs=certifi.where())))
    def test_repository_files_proxy(self, mock_get_cached_sources):
        mock_get_cached_sources.return_value = []
        client = http_client()

        file_uuid = '701c9a63-23da-4978-946b-7576b6ad088a'
        file_version = '2018-09-12T121154.054628Z'
        organic_file_name = 'foo.txt'
        drs_path_id = 'v1_c99baa6f-24ce-4837-8c4a-47ca4ec9d292_b967ecc9-98b2-43c6-8bac-28c0a4fa7812'
        file_doc = {
            'name': organic_file_name,
            'version': file_version,
            'drs_path': drs_path_id,
            'size': 1,
        }
        for fetch in True, False:
            with self.subTest(fetch=fetch):
                with mock.patch.object(RepositoryService,
                                       'get_data_file',
                                       return_value=file_doc):
                    azul_url = self.base_url.set(
                        path=['repository', 'files', file_uuid],
                        args=dict(catalog=self.catalog, version=file_version))
                    if fetch:
                        azul_url.path.segments.insert(0, 'fetch')

                    file_name = 'foo.gz'
                    gs_bucket_name = 'gringotts-wizarding-bank'
                    gs_drs_id = 'some_dataset_id/some_object_id'
                    gs_file_url = f'gs://{gs_bucket_name}/{gs_drs_id}/{file_name}'

                    pre_signed_gs = furl(url=gs_file_url,
                                         args={
                                             'X-Goog-Algorithm':
                                             'SOMEALGORITHM',
                                             'X-Goog-Credential':
                                             'SOMECREDENTIAL',
                                             'X-Goog-Date': 'CURRENTDATE',
                                             'X-Goog-Expires': '900',
                                             'X-Goog-SignedHeaders': 'host',
                                             'X-Goog-Signature':
                                             'SOMESIGNATURE',
                                         })
                    with mock.patch.object(DRSClient,
                                           'get_object',
                                           return_value=Access(
                                               method=AccessMethod.https,
                                               url=str(pre_signed_gs))):
                        response = client.request('GET',
                                                  str(azul_url),
                                                  redirect=False)
                        self.assertEqual(200 if fetch else 302,
                                         response.status)
                        if fetch:
                            response = json.loads(response.data)
                            self.assertUrlEqual(pre_signed_gs,
                                                response['Location'])
                            self.assertEqual(302, response["Status"])
                        else:
                            response = dict(response.headers)
                            self.assertUrlEqual(pre_signed_gs,
                                                response['Location'])

    def test_list_sources(
        self,
        mock_get_cached_sources,
    ):
        # Includes extra sources to check that the endpoint only returns results
        # for the current catalog
        extra_sources = ['foo', 'bar']
        mock_source_names_by_id = {
            str(i): source_name
            for i, source_name in enumerate(self.mock_source_names +
                                            extra_sources)
        }
        mock_source_jsons = [{
            'id':
            id,
            'spec':
            str(
                TDRSourceSpec.parse(
                    self.make_mock_source_spec(name)).effective)
        } for id, name in mock_source_names_by_id.items()
                             if name not in extra_sources]
        client = http_client()
        azul_url = furl(self.base_url,
                        path='/repository/sources',
                        query_params=dict(catalog=self.catalog))

        def _list_sources(headers) -> JSON:
            response = client.request('GET', str(azul_url), headers=headers)
            self.assertEqual(response.status, 200)
            return json.loads(response.data)

        def _test(*, authenticate: bool, cache: bool):
            with self.subTest(authenticate=authenticate, cache=cache):
                response = _list_sources({'Authorization': 'Bearer foo_token'}
                                         if authenticate else {})
                self.assertEqual(
                    response, {
                        'sources': [{
                            'sourceId': source['id'],
                            'sourceSpec': source['spec']
                        } for source in mock_source_jsons]
                    })

        mock_get_cached_sources.return_value = mock_source_jsons
        _test(authenticate=True, cache=True)
        _test(authenticate=False, cache=True)
        mock_get_cached_sources.return_value = None
        mock_get_cached_sources.side_effect = NotFound('foo_token')
        with mock.patch('azul.terra.TDRClient.snapshot_names_by_id',
                        return_value=mock_source_names_by_id):
            _test(authenticate=True, cache=False)
            _test(authenticate=False, cache=False)