def _get_source_data(sql, hostname):
    """Gets the DataFrame containing all the rows of the table
    The DataFrame will be indexed by the table's primary key(s)

    Args:
        sql(str): The table definition representing the table to query
        connection(Connection): A connection to the database

    Returns:
        DataFrame: The rows of the table
    """
    connection = rds_connection(hostname)
    data = pdsql.read_sql(sql, connection)
    connection.close()
    return data.iloc[0][0]
def _get_source_data(sql, hostname):
    """Gets the DataFrame containing all the rows of the table
    The DataFrame will be indexed by the table's primary key(s)

    Args:
        sql(str): The table definition representing the table to query
        connection(Connection): A connection to the database

    Returns:
        DataFrame: The rows of the table
    """
    connection = rds_connection(hostname)
    data = pdsql.read_sql(sql, connection)
    connection.close()
    return data.iloc[0][0]
def _get_source_data(sql, hostname, sample_size):
    """Gets the DataFrame containing all the rows of the table
    The DataFrame will be indexed by the table's primary key(s)

    Args:
        sql(str): The table definition representing the table to query
        connection(Connection): A connection to the database

    Returns:
        DataFrame: The rows of the table
    """
    connection = rds_connection(hostname)
    query = re.sub(
        r'(?i)LIMIT_PLACEHOLDER',
        str(sample_size),
        sql,
    )

    data = pdsql.read_sql(query, connection)
    connection.close()
    # All columns apart from last are PK columns
    return data.set_index(list(data.columns[:-1]))
Exemple #4
0
def get_source_data(sql, hostname, sample_size):
    """Gets the DataFrame containing all the rows of the table
    The DataFrame will be indexed by the table's primary key(s)

    Args:
        sql(str): The table definition representing the table to query
        connection(Connection): A connection to the database

    Returns:
        DataFrame: The rows of the table
    """
    connection = rds_connection(hostname)
    query = re.sub(
        r'(?i)LIMIT_PLACEHOLDER',
        str(sample_size),
        sql,
    )

    data = pdsql.read_sql(query, connection)
    connection.close()
    # All columns apart from last are PK columns
    return data.set_index(list(data.columns[:-1]))