def _get_source_data(sql, hostname): """Gets the DataFrame containing all the rows of the table The DataFrame will be indexed by the table's primary key(s) Args: sql(str): The table definition representing the table to query connection(Connection): A connection to the database Returns: DataFrame: The rows of the table """ connection = rds_connection(hostname) data = pdsql.read_sql(sql, connection) connection.close() return data.iloc[0][0]
def _get_source_data(sql, hostname, sample_size): """Gets the DataFrame containing all the rows of the table The DataFrame will be indexed by the table's primary key(s) Args: sql(str): The table definition representing the table to query connection(Connection): A connection to the database Returns: DataFrame: The rows of the table """ connection = rds_connection(hostname) query = re.sub( r'(?i)LIMIT_PLACEHOLDER', str(sample_size), sql, ) data = pdsql.read_sql(query, connection) connection.close() # All columns apart from last are PK columns return data.set_index(list(data.columns[:-1]))
def get_source_data(sql, hostname, sample_size): """Gets the DataFrame containing all the rows of the table The DataFrame will be indexed by the table's primary key(s) Args: sql(str): The table definition representing the table to query connection(Connection): A connection to the database Returns: DataFrame: The rows of the table """ connection = rds_connection(hostname) query = re.sub( r'(?i)LIMIT_PLACEHOLDER', str(sample_size), sql, ) data = pdsql.read_sql(query, connection) connection.close() # All columns apart from last are PK columns return data.set_index(list(data.columns[:-1]))