def test_database_opens_new_connection(self, mock_connect, mock_fetch): db = Database() mock_connect.side_effect = test_connect mock_fetch.side_effect = test_fetch connection_1 = db.fetch(db, 'SELECT a from abc') connection_2 = db.fetch(db, 'SELECT b from def') self.assertEqual(2, mock_connect.call_count) self.assertNotEqual(connection_1, connection_2)
def fetch_data( database: Database, queries: List[Type[QueryBuilder]], dimensions: Iterable[Field], share_dimensions: Iterable[Field] = (), reference_groups=(), ) -> Tuple[int, pd.DataFrame]: queries = [str(query) for query in queries] # Indicate which dimensions need to be parsed as date types # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value pandas_parse_dates = {} for dimension in dimensions: unmodified_dimension = find_field_in_modified_field(dimension) if unmodified_dimension.data_type == DataType.date: pandas_parse_dates[alias_selector( unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT results = database.fetch_dataframes(*queries, parse_dates=pandas_parse_dates) max_rows_returned = max([len(x) for x in results], default=0) logger.info('max_rows_returned', extra={ 'row_count': max_rows_returned, 'database': str(database) }) return max_rows_returned, reduce_result_set(results, reference_groups, dimensions, share_dimensions)
def test_database_reuse_passed_connection(self, mock_connect, mock_fetch): db = Database() mock_connect.side_effect = test_connect mock_fetch.side_effect = test_fetch with db.connect() as connection: connection_1 = db.fetch(db, 'SELECT a from abc', connection=connection) connection_2 = db.fetch(db, 'SELECT b from def', connection=connection) self.assertEqual(1, mock_connect.call_count) self.assertEqual(connection_1, connection_2)
def test_fetch_dataframe(self, mock_connect, mock_read_sql): query = 'SELECT 1' mock_read_sql.return_value = 'OK' result = Database().fetch_dataframe(query) self.assertEqual(mock_read_sql.return_value, result) mock_read_sql.assert_called_once_with(query, mock_connect().__enter__())
def test_fetch(self, mock_connect): mock_cursor_func = mock_connect.return_value.__enter__.return_value.cursor mock_cursor = mock_cursor_func.return_value = MagicMock(name='mock_cursor') mock_cursor.fetchall.return_value = 'OK' result = Database().fetch('SELECT 1') self.assertEqual(mock_cursor.fetchall.return_value, result) mock_cursor_func.assert_called_once_with() mock_cursor.execute.assert_called_once_with('SELECT 1') mock_cursor.fetchall.assert_called_once_with()
def _do_fetch_data(query: str, database: Database): """ Executes a query to fetch data from database middleware and builds/cleans the data as a data frame. The query execution is logged with its duration. :param database: instance of `fireant.Database`, database middleware :param query: Query string :return: `pd.DataFrame` constructed from the result of the query """ with database.connect() as connection: return pd.read_sql(query, connection, coerce_float=True, parse_dates=True)
def test_database_api(self): db = Database() with self.assertRaises(NotImplementedError): db.connect() with self.assertRaises(NotImplementedError): db.trunc_date(Field('abc'), 'day')
def fetch_data( database: Database, queries: Union[Sized, Iterable], dimensions: Iterable[Field], share_dimensions: Iterable[Field] = (), reference_groups=(), ): queries = [ str( query.limit( min(query._limit or float("inf"), database.max_result_set_size))) for query in queries ] results = database.fetch_dataframes(*queries) return reduce_result_set(results, reference_groups, dimensions, share_dimensions)
def fetch_data( database: Database, queries: List[Type[QueryBuilder]], dimensions: Iterable[Field], share_dimensions: Iterable[Field] = (), reference_groups=(), ) -> Tuple[int, pd.DataFrame]: queries = [str(query) for query in queries] # Indicate which dimensions need to be parsed as date types # For this we create a dictionary with the dimension alias as key and PANDAS_TO_DATETIME_FORMAT as value pandas_parse_dates = {} for dimension in dimensions: unmodified_dimension = find_field_in_modified_field(dimension) if unmodified_dimension.data_type == DataType.date: pandas_parse_dates[alias_selector( unmodified_dimension.alias)] = PANDAS_TO_DATETIME_FORMAT results = database.fetch_dataframes(*queries, parse_dates=pandas_parse_dates) max_rows_returned = 0 for result_df in results: row_count = len(result_df) if row_count > max_rows_returned: max_rows_returned = row_count if row_count > database.max_result_set_size: logger.warning('row_count_over_max', extra={ 'row_count': len(result_df), 'database': str(database) }) # drop all result rows above database.max_result_set_size in place result_df.drop(result_df.index[database.max_result_set_size:], inplace=True) logger.info('max_rows_returned', extra={ 'row_count': max_rows_returned, 'database': str(database) }) return max_rows_returned, reduce_result_set(results, reference_groups, dimensions, share_dimensions)
def test_to_char(self): db = Database() to_char = db.to_char(Field('field')) self.assertEqual(str(to_char), 'CAST("field" AS VARCHAR)')
def test_no_custom_middlewares_specified_still_gives_connection_middleware( self): db = Database() self.assertEqual(1, len(db.middlewares)) self.assertIs(db.middlewares[0], connection_middleware)