def test_is_dataframe(self):
        df = pd.DataFrame({
            'category': ['a', 'b', 'c'],
            'gender': ['F', 'M', 'F'],
            'age': [1, 5, 4]
        })

        self.assertTrue(filters.is_dataframe(df))
Exemple #2
0
    def test_is_dataframe(self):
        df = pd.DataFrame({
            'category': ['a', 'b', 'c'],
            'gender': ['F', 'M', 'F'],
            'age': [1, 5, 4]
        })

        self.assertTrue(filters.is_dataframe(df))
    def test_is_not_dataframe(self):
        junk_inputs = [
            'foo',
            42,
            [1, 2, 3],
            [[1, 2, 3], [1, 2, 3], [1, 2, 3], ],
            {'a': 1}
        ]

        for junk in junk_inputs:
            self.assertFalse(filters.is_dataframe(junk))
Exemple #4
0
    def test_is_not_dataframe(self):
        junk_inputs = [
            'foo',
            42,
            [1, 2, 3],
            [[1, 2, 3], [1, 2, 3], [1, 2, 3], ],
            {'a': 1}
        ]

        for junk in junk_inputs:
            self.assertFalse(filters.is_dataframe(junk))
def write_to_db_agnostic(engine, table, dataframe, schema=None):
    """
    Given an sqlalchemy engine or sqlite connection, writes a dataframe to a table
    
    Args:
        engine (sqlalchemy.engine.base.Engine, sqlite3.Connection): the database engine or connection object
        table (str): destination table
        dataframe (pandas.DataFrame): the data to write
        schema (str): the optional database schema
    """
    # Validate inputs
    is_engine = isinstance(engine, sqlalchemy.engine.base.Engine)

    if sqlite3_is_loaded:
        is_sqlite_connection = isinstance(engine, sqlite3.Connection)
    else:
        is_sqlite_connection = False

    if not is_engine and not is_sqlite_connection:
        raise HealthcareAIError('sqlalchemy engine or sqlite connection required, a {} was given'.format(type(engine)))
    if not is_dataframe(dataframe):
        raise HealthcareAIError('Dataframe required, a {} was given'.format(type(dataframe)))
    if not isinstance(table, str):
        raise HealthcareAIError('Table name required, a {} was given'.format(type(table)))

    # Verify that tables exist for databases
    if is_engine and not healthcareai.common.database_validators.does_table_exist(engine, table, schema):
        raise HealthcareAIError('Destination table ({}) does not exist. Please create it.'.format(table))
    elif is_sqlite_connection:
        healthcareai.common.database_validators.verify_sqlite_table_exists(engine, table)

    try:
        # Count before
        before_count = pd.read_sql('select count(*) from {}'.format(table), engine).iloc[0][0]

        # Insert into database
        dataframe.to_sql(table, engine, if_exists='append', index=False)

        # Count after
        after_count = pd.read_sql('select count(*) from {}'.format(table), engine).iloc[0][0]
        delta = after_count - before_count
        print('\nSuccessfully inserted {} rows. Dataframe contained {} rows'.format(delta, len(dataframe)))

    # TODO catch other errors here:
    except (sqlalchemy.exc.SQLAlchemyError, sqlite3.Error, pd.io.sql.DatabaseError):
        raise HealthcareAIError("""Failed to insert values into {}.\n
        Please verify that the table [{}] exists.\n
        Was your test insert successful earlier?\n
        If so, what has changed with your database/table/entity since then?""".format(table, table))
def write_to_db_agnostic(engine, table, dataframe, schema=None):
    """
    Given an sqlalchemy engine or sqlite connection, writes a dataframe to a table
    
    Args:
        engine (sqlalchemy.engine.base.Engine, sqlite3.Connection): the database engine or connection object
        table (str): destination table
        dataframe (pandas.DataFrame): the data to write
        schema (str): the optional database schema
    """
    # Validate inputs
    is_engine = isinstance(engine, sqlalchemy.engine.base.Engine)

    if sqlite3_is_loaded:
        is_sqlite_connection = isinstance(engine, sqlite3.Connection)
    else:
        is_sqlite_connection = False

    if not is_engine and not is_sqlite_connection:
        raise HealthcareAIError(
            'sqlalchemy engine or sqlite connection required, a {} was given'.
            format(type(engine)))
    if not is_dataframe(dataframe):
        raise HealthcareAIError('Dataframe required, a {} was given'.format(
            type(dataframe)))
    if not isinstance(table, str):
        raise HealthcareAIError('Table name required, a {} was given'.format(
            type(table)))

    # Verify that tables exist for databases
    if is_engine and not healthcareai.common.database_validators.does_table_exist(
            engine, table, schema):
        raise HealthcareAIError(
            'Destination table ({}) does not exist. Please create it.'.format(
                table))
    elif is_sqlite_connection:
        healthcareai.common.database_validators.verify_sqlite_table_exists(
            engine, table)

    try:
        # Count before
        before_count = pd.read_sql('select count(*) from {}'.format(table),
                                   engine).iloc[0][0]

        # Insert into database
        dataframe.to_sql(table, engine, if_exists='append', index=False)

        # Count after
        after_count = pd.read_sql('select count(*) from {}'.format(table),
                                  engine).iloc[0][0]
        delta = after_count - before_count
        print('\nSuccessfully inserted {} rows. Dataframe contained {} rows'.
              format(delta, len(dataframe)))

    # TODO catch other errors here:
    except (sqlalchemy.exc.SQLAlchemyError, sqlite3.Error,
            pd.io.sql.DatabaseError):
        raise HealthcareAIError("""Failed to insert values into {}.\n
        Please verify that the table [{}] exists.\n
        Was your test insert successful earlier?\n
        If so, what has changed with your database/table/entity since then?""".
                                format(table, table))