def do_query(project_id, project_name, dataset_name, table_name, gene_symbol, value_field,
             cohort_dataset, cohort_table, cohort_id_array):
    bigquery_service = authorize_credentials_with_Google()

    query = build_query(project_name, dataset_name, table_name, gene_symbol, value_field,
                        cohort_dataset, cohort_table, cohort_id_array)
    query_body = {
        'query': query
    }

    table_data = bigquery_service.jobs()
    query_response = table_data.query(projectId=project_id, body=query_body).execute()
    result = []
    num_result_rows = int(query_response['totalRows'])
    if num_result_rows == 0:
        return result

    for row in query_response['rows']:
        result.append({
            'patient_id': row['f'][0]['v'],
            'sample_id': row['f'][1]['v'],
            'aliquot_id': row['f'][2]['v'],
            'value': float(row['f'][3]['v'])
        })

    return result
    def _streaming_insert(self, rows):
        bigquery_service = authorize_credentials_with_Google()
        table_data = bigquery_service.tabledata()

        body = self._build_request_body_from_rows(rows)

        response = table_data.insertAll(projectId=self.project_id,
                                        datasetId=self.dataset_id,
                                        tableId=self.table_id,
                                        body=body).execute()

        return response
Exemple #3
0
def get_feature_vectors_tcga_only(params_array, poll_retry_limit=20, skip_formatting_for_plot=False):
    bigquery_service = authorize_credentials_with_Google()
    provider_array = []

    cohort_settings = settings.GET_BQ_COHORT_SETTINGS()

    # Submit jobs
    for parameter_object in params_array:
        job_item = submit_tcga_job(parameter_object, bigquery_service, cohort_settings)
        provider_array.append(job_item)

    project_id = settings.BQ_PROJECT_ID
    result = get_submitted_job_results(provider_array, project_id, poll_retry_limit, skip_formatting_for_plot)

    return result
Exemple #4
0
def do_query(
    project_id,
    project_name,
    dataset_name,
    table_name,
    gene_label,
    value_field,
    cohort_dataset,
    cohort_table,
    cohort_id_array,
):
    bigquery_service = authorize_credentials_with_Google()

    query = build_query(
        project_name, dataset_name, table_name, gene_label, value_field, cohort_dataset, cohort_table, cohort_id_array
    )
    query_body = {"query": query}

    table_data = bigquery_service.jobs()
    query_response = table_data.query(projectId=project_id, body=query_body).execute()

    result = []
    num_result_rows = int(query_response["totalRows"])
    if num_result_rows == 0:
        return result

    for row in query_response["rows"]:
        result.append(
            {
                "patient_id": row["f"][0]["v"],
                "sample_id": row["f"][1]["v"],
                "aliquot_id": row["f"][2]["v"],
                "value": row["f"][5]["v"],
            }
        )
        result.append(
            {
                "patient_id": row["f"][0]["v"],
                "sample_id": row["f"][3]["v"],
                "aliquot_id": row["f"][4]["v"],
                "value": row["f"][5]["v"],
            }
        )

    return result
Exemple #5
0
 def precomputed_results(self, request):
     """ Used by the web application."""
     bq_table = "brca_pwpv"
     query = (
         "SELECT A_valueType, A_chr, A_startPos, A_endPos, A_featureName, A_N, A_dataType,"
         "B_valueType, B_chr, B_startPos, B_endPos, B_featureName, B_N, B_dataType,"
         "logP  FROM [isb-cgc:test.brca_pwpv] "
         'where B_chr != "null" '
         'and A_chr != "null"'
         'and A_startPos != "null" and A_endPos != "null"'
         'and B_startPos != "null" and B_endPos != "null"'
         "LIMIT 50;"
     )
     query_body = {"query": query}
     bigquery_service = authorize_credentials_with_Google()
     table_data = bigquery_service.jobs()
     query_response = table_data.query(projectId=settings.BQ_PROJECT_ID, body=query_body).execute()
     association_list = []
     feature_list = []
     for row in query_response["rows"]:
         node1 = Feature(
             annotated_type=row["f"][0]["v"].encode("utf-8") if row["f"][0]["v"] else None,
             chr=row["f"][1]["v"].encode("utf-8").replace("chr", "") if row["f"][1]["v"] else None,
             start=int(row["f"][2]["v"]) if row["f"][2]["v"] else None,
             end=int(row["f"][3]["v"]) if row["f"][3]["v"] else None,
             label=row["f"][4]["v"].encode("utf-8") if row["f"][4]["v"] else "",
             mutation_count=int(row["f"][5]["v"]) if row["f"][5]["v"] else None,
             source=row["f"][6]["v"].encode("utf-8") if row["f"][6]["v"] else None,
         )
         node2 = Feature(
             annotated_type=row["f"][7]["v"].encode("utf-8") if row["f"][7]["v"] else None,
             chr=row["f"][8]["v"].encode("utf-8").replace("chr", "") if row["f"][8]["v"] else None,
             start=int(row["f"][9]["v"]) if row["f"][9]["v"] else None,
             end=int(row["f"][10]["v"]) if row["f"][10]["v"] else None,
             label=row["f"][11]["v"].encode("utf-8") if row["f"][11]["v"] else "",
             mutation_count=int(row["f"][12]["v"]) if row["f"][12]["v"] else None,
             source=row["f"][13]["v"].encode("utf-8") if row["f"][13]["v"] else None,
         )
         logP = float(row["f"][14]["v"])
         association_list.append(Association(node1=node1, node2=node2, logged_pvalue=logP))
         feature_list.append(node1)
         feature_list.append(node2)
     return CircvizOutput(items=association_list)
Exemple #6
0
def submit_jobs_with_user_data(params_array):
    bigquery_service = authorize_credentials_with_Google()
    provider_array = []

    cohort_settings = settings.GET_BQ_COHORT_SETTINGS()

    # Submit jobs
    for parameter_object in params_array:
        feature_id = parameter_object.feature_id
        cohort_id_array = parameter_object.cohort_id_array

        user_data = user_feature_handler(feature_id, cohort_id_array)

        if user_data['include_tcga']:
            job_item = submit_tcga_job(parameter_object, bigquery_service, cohort_settings)
            provider_array.append(job_item)

        if len(user_data['user_studies']) > 0:
            converted_feature_id = user_data['converted_feature_id']
            user_feature_id = user_data['user_feature_id']
            logging.debug("user_feature_id: {0}".format(user_feature_id))
            provider = UserFeatureProvider(converted_feature_id, user_feature_id=user_feature_id)

            # The UserFeatureProvider instance might not generate a BigQuery query and job at all given the combination
            # of cohort(s) and feature identifiers. The provider is not added to the array, and therefore to the
            # polling loop below, if it would not submit a BigQuery job.
            if provider.is_queryable(cohort_id_array):
                job_reference = provider.get_data_job_reference(cohort_id_array, cohort_settings.dataset_id, cohort_settings.table_id)

                logging.info("Submitted USER {job_id}: {fid} - {cohorts}".format(job_id=job_reference['jobId'], fid=feature_id,
                                                                                 cohorts=str(cohort_id_array)))
                provider_array.append({
                    'feature_id': feature_id,
                    'provider': provider,
                    'ready': False,
                    'job_reference': job_reference
                })
            else:
                logging.debug("No UserFeatureDefs for '{0}'".format(converted_feature_id))

    return provider_array
    def get_bq_service(self):
        if self.bigquery_service is None:
            self.bigquery_service = authorize_credentials_with_Google()

        return self.bigquery_service