def _calc_label_stats_cat_feature(self) -> pd.DataFrame:
        """Calculates the statistics for label by categorical feature values.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from label.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_struct_column_list_sql(
            self._categorical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'label_column': self._label_column,
            'sql_code_segment': sql_segment
        }

        sql_query = viz_utils.patch_sql(
            _NUMERICAL_LABEL_SQL_FILES['calc_num_label_stats'], query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results
Ejemplo n.º 2
0
    def _extract_numerical_feature_sample(self) -> pd.DataFrame:
        """Extracts a random sample of values from selected numerical features.

    Returns:
      results: Extracted values as a DataFrame.
    """
        logging.info('Extracting a random sample of numerical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_column_list_sql(
            self._numerical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'label_column': self._label_column,
            'positive_class_label': self._positive_class_label,
            'negative_class_label': self._negative_class_label,
            'num_pos_instances': self._num_pos_instances,
            'num_neg_instances': self._num_neg_instances,
            'sql_code_segment': sql_segment
        }
        sql_query = utils.configure_sql(_EXTRACT_NUM_FEATURE_SAMPLE_SQL_PATH,
                                        query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results
    def _calc_categorical_feature_stats(self) -> pd.DataFrame:
        """Calculates the statistics from selected categorical features.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from categorical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_struct_column_list_sql(
            self._categorical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'sql_code_segment': sql_segment
        }

        sql_template_path = ''
        if self._label_type == 'binary':
            sql_template_path = _BINARY_LABEL_SQL_FILES[
                'calc_cat_feature_stats']
        else:
            sql_template_path = _NUMERICAL_LABEL_SQL_FILES[
                'calc_cat_feature_stats']

        sql_query = viz_utils.patch_sql(sql_template_path, query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results
Ejemplo n.º 4
0
    def test_execute_sql_returns_pd_dataframe(self):
        fake_sql_query = 'SELECT * FROM project.dataset.table;'

        self.mock_bq_client.query.return_value.to_dataframe.return_value = TESTDATA_1

        results = viz_utils.execute_sql(self.mock_bq_client, fake_sql_query)

        self.mock_bq_client.query.return_value.result.assert_called_once()
        pd.testing.assert_frame_equal(results, TESTDATA_1)
    def _extract_numerical_feature_sample(self) -> pd.DataFrame:
        """Extracts a random sample of values from selected numerical features.

    Returns:
      results: Extracted values as a DataFrame.
    """
        logging.info('Extracting a random sample of numerical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_column_list_sql(
            self._numerical_feature_list)

        query_params = {
            'bq_features_table': self._features_table_path,
            'label_column': self._label_column,
            'column_list_sql': sql_segment
        }

        sql_template_path = ''
        if self._label_type == 'binary':
            sql_template_path = _BINARY_LABEL_SQL_FILES['extract_num_feature']

            sql_positive_class_label = self._positive_class_label
            sql_negative_class_label = self._negative_class_label
            if isinstance(self._positive_class_label, str):
                sql_positive_class_label = f"'{self._positive_class_label}'"
            if isinstance(self._negative_class_label, str):
                sql_negative_class_label = f"'{self._negative_class_label}'"

            query_params.update({
                'positive_class_label': sql_positive_class_label,
                'negative_class_label': sql_negative_class_label,
                'num_pos_instances': self._num_pos_instances,
                'num_neg_instances': self._num_neg_instances
            })
        else:
            sql_template_path = _NUMERICAL_LABEL_SQL_FILES[
                'extract_num_feature']
            query_params.update({'num_instances': self._num_instances})

        sql_query = viz_utils.patch_sql(sql_template_path, query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results
Ejemplo n.º 6
0
    def _calc_numerical_fact_stats(self) -> pd.DataFrame:
        """Calculates the statistics for selected numerical fact variables.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from numerical facts.')
        logging.info('Reading the sql query from the file.')
        query_params = {
            'bq_facts_table': self._numerical_facts_table_path,
        }
        sql_query = utils.configure_sql(_CALC_NUM_FACT_STATS_SQL_PATH,
                                        query_params)

        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished calculating statistics from numerical facts.')

        results['date'] = pd.to_datetime(results['date'])
        return results
Ejemplo n.º 7
0
    def _calc_categorical_fact_stats(self) -> pd.DataFrame:
        """Calculates the statistics for selected categorical fact variables.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from categorical facts.')
        logging.info('Reading the sql query from the file.')
        query_params = {
            'bq_facts_table': self._facts_table_path,
            'categorical_fact_list': self._categorical_facts,
            'number_top_levels': self._number_top_levels
        }
        sql_query = utils.configure_sql(_CALC_CAT_FACT_STATS_SQL_PATH,
                                        query_params)

        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished calculating statistics from categorical facts.')

        return results
Ejemplo n.º 8
0
    def _calc_categorical_feature_stats(self) -> pd.DataFrame:
        """Calculates the statistics from selected categorical features.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from categorical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_struct_column_list_sql(
            self._categorical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'sql_code_segment': sql_segment
        }
        sql_query = utils.configure_sql(_CALC_CAT_FEATURE_STATS_SQL_PATH,
                                        query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results