예제 #1
0
def test_csv_import_dupcols():
    with bayesdb_csv_stream('foo,foo\n0,1\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
    with bayesdb_csv_stream('foo,FOO\n0,1\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
예제 #2
0
def test_csv_import():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb,
                                   'employees',
                                   f,
                                   header=True,
                                   create=True)
예제 #3
0
def test_csv_import_dupcols():
    with bayesdb_csv_stream('foo,foo\n0,1\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
    with bayesdb_csv_stream('foo,FOO\n0,1\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
예제 #4
0
    def dot_csv(self, line):
        '''create table from CSV file
        <table> </path/to/data.csv>

        Create a SQL table named <table> from the data in
        </path/to/data.csv>.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write('Usage: .csv <table> </path/to/data.csv>\n')
            return
        table = tokens[0]
        pathname = tokens[1]
        try:
            with open(pathname, 'rU') as f:
                bayeslite.bayesdb_read_csv(self._bdb,
                                           table,
                                           f,
                                           header=True,
                                           create=True,
                                           ifnotexists=False)
        except IOError as e:
            self.stdout.write('%s\n' % (e, ))
        except Exception:
            self.stdout.write(traceback.format_exc())
예제 #5
0
def test_csv_import_empty():
    with bayesdb_csv_stream('') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb,
                                       'empty',
                                       f,
                                       header=True,
                                       create=True)
예제 #6
0
def test_csv_import_onecol_key():
    with bayesdb_csv_stream('foo\n0\none\n2\n') as (bdb, f):
        # foo will be a key column, hence no columns to model.
        bayeslite.bayesdb_read_csv(bdb, 'onecol_key', f, header=True,
            create=True)
        with pytest.raises(ValueError):
            bayeslite.guess.bayesdb_guess_generator(bdb, 'onecol_key_cc',
                'onecol_key', 'crosscat')
예제 #7
0
def test_csv_import_nocols():
    with bayesdb_csv_stream('\n') as (bdb, f):
        # CSV import rejects no columns.
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb,
                                       'nocols',
                                       f,
                                       header=True,
                                       create=True)
예제 #8
0
def test_insert():
    with test_csv.bayesdb_csv_stream(test_csv.csv_data) as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True)
        guess.bayesdb_guess_generator(bdb, 't_cc', 't', 'crosscat')
        bdb.execute('initialize 2 models for t_cc')
        bdb.execute('analyze t_cc for 1 iteration wait')
        generator_id = core.bayesdb_get_generator(bdb, 't_cc')
        row = (41, 'F', 96000, 73, 'data science', 2)
        bqlfn.bayesdb_insert(bdb, generator_id, row)
예제 #9
0
def test_insert():
    with test_csv.bayesdb_csv_stream(test_csv.csv_data) as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True)
        guess.bayesdb_guess_generator(bdb, 't_cc', 't', 'crosscat')
        bdb.execute('initialize 2 models for t_cc')
        bdb.execute('analyze t_cc for 1 iteration wait')
        generator_id = core.bayesdb_get_generator(bdb, 't_cc')
        row = (41, 'F', 96000, 73, 'data science', 2)
        bqlfn.bayesdb_insert(bdb, generator_id, row)
예제 #10
0
def test_insert():
    with test_csv.bayesdb_csv_stream(test_csv.csv_data) as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, "t", f, header=True, create=True)
        guess.bayesdb_guess_generator(bdb, "t_cc", "t", "crosscat")
        bdb.execute("initialize 2 models for t_cc")
        bdb.execute("analyze t_cc for 1 iteration wait")
        generator_id = core.bayesdb_get_generator(bdb, "t_cc")
        row = (41, "F", 96000, 73, "data science", 2)
        bqlfn.bayesdb_insert(bdb, generator_id, row)
예제 #11
0
def test_csv_import_onecol_key():
    with bayesdb_csv_stream('foo\n0\none\n2\n') as (bdb, f):
        # foo will be a key column, hence no columns to model.
        bayeslite.bayesdb_read_csv(bdb,
                                   'onecol_key',
                                   f,
                                   header=True,
                                   create=True)
        with pytest.raises(ValueError):
            bayeslite.guess.bayesdb_guess_population(bdb, 'p_onecol_key',
                                                     'onecol_key')
예제 #12
0
def test_codebook_value_map():
    '''
    A categorical column in crosscat can only take on a fixed number of values
    v1, v2, ..., v3.  In this test, we have a categorical column called
    `city` which takes on values `RIO, LA, SF, DC` as specified in the codebook
    value map.

        INITIALIZE dummy table with only RIO and SF appearing in dataset
        ANALYZE dummy_cc
        INSERT rows with `city` names `LA` and `DC`
        ANALYZE dummy_cc
        SIMULATE specifying `city` = `LA` (throws KeyError)
    '''

    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        bayeslite.bayesdb_read_csv(bdb,
                                   'dummy',
                                   dummy_data,
                                   header=True,
                                   create=True)

        with tempfile.NamedTemporaryFile(prefix='bayeslite') as tempbook:
            with open(tempbook.name, 'w') as f:
                f.write(dummy_codebook)
            bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dummy',
                                                     tempbook.name)

        bdb.execute('''
            CREATE GENERATOR dummy_cc FOR dummy
                USING crosscat(
                    GUESS(*),
                    kerberos IGNORE,
                    age NUMERICAL,
                    city CATEGORICAL
                )
        ''')
        bdb.execute('INITIALIZE 10 MODELS FOR dummy_cc')
        bdb.execute('ANALYZE dummy_cc FOR 20 ITERATIONS WAIT')
        bdb.execute('SIMULATE age FROM dummy_cc GIVEN city = RIO LIMIT 5')
        bdb.sql_execute('''
            INSERT INTO dummy (kerberos, age, city) VALUES
                ('jackie', 18, 'LA'), ('rocker', 22, 'DC')
        ''')
        bdb.execute('ANALYZE dummy_cc FOR 20 ITERATIONS WAIT')
        c = bdb.sql_execute('SELECT * FROM dummy')
        with pytest.raises(KeyError):
            bdb.execute('SIMULATE age FROM dummy_cc GIVEN city = LA LIMIT 5')
예제 #13
0
def test_codebook_value_map():
    """
    A categorical column in crosscat can only take on a fixed number of values
    v1, v2, ..., v3.  In this test, we have a categorical column called
    `city` which takes on values `RIO, LA, SF, DC` as specified in the codebook
    value map.

        INITIALIZE dummy table with only RIO and SF appearing in dataset
        ANALYZE dummy_cc
        INSERT rows with `city` names `LA` and `DC`
        ANALYZE dummy_cc
        SIMULATE specifying `city` = `LA` (throws KeyError)
    """

    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        bayeslite.bayesdb_read_csv(bdb, "dummy", dummy_data, header=True, create=True)

        with tempfile.NamedTemporaryFile(prefix="bayeslite") as tempbook:
            with open(tempbook.name, "w") as f:
                f.write(dummy_codebook)
            bayeslite.bayesdb_load_codebook_csv_file(bdb, "dummy", tempbook.name)

        bdb.execute(
            """
            CREATE GENERATOR dummy_cc FOR dummy
                USING crosscat(
                    GUESS(*),
                    kerberos IGNORE,
                    age NUMERICAL,
                    city CATEGORICAL
                )
        """
        )
        bdb.execute("INITIALIZE 10 MODELS FOR dummy_cc")
        bdb.execute("ANALYZE dummy_cc FOR 20 ITERATIONS WAIT")
        bdb.execute("SIMULATE age FROM dummy_cc GIVEN city = RIO LIMIT 5")
        bdb.sql_execute(
            """
            INSERT INTO dummy (kerberos, age, city) VALUES
                ('jackie', 18, 'LA'), ('rocker', 22, 'DC')
        """
        )
        bdb.execute("ANALYZE dummy_cc FOR 20 ITERATIONS WAIT")
        c = bdb.sql_execute("SELECT * FROM dummy")
        with pytest.raises(KeyError):
            bdb.execute("SIMULATE age FROM dummy_cc GIVEN city = LA LIMIT 5")
예제 #14
0
def test_csv_import_badschema0():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE emPloyEES(
                AGE INTeger,
                geNder Text,
                -- saLAry REal,
                heighT inteGER,
                DIVision TEXt,
                rank INTEGER
            )
        ''')
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'employees', f, header=True,
                create=False)
예제 #15
0
def test_csv_import_badschema1():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE employees(
                age INTEGER,
                zorblaxianism TEXT,
                salary INTEGER,
                height INTEGER NOT NULL PRIMARY KEY,
                division TEXT,
                rank CATEGORICAL
            )
        ''')
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'employees', f, header=True,
                create=False)
예제 #16
0
def test_csv_import_schema_case():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE emPloyEES(
                AGE INTeger,
                geNder Text,
                saLAry REal,
                heighT inteGER,
                DIVision TEXt,
                rank INTEGER
            )
        ''')
        bayeslite.bayesdb_read_csv(bdb, 'employees', f, header=True,
            create=False)
        bayeslite.guess.bayesdb_guess_generator(bdb, 'employees_cc',
            'employees', 'crosscat')
예제 #17
0
def test_engine_stamp_two_clients():
    """Confirm analysis by one worker makes cache in other worker stale."""
    with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
        with bayeslite.bayesdb_open(f.name) as bdb0:
            bayeslite.bayesdb_read_csv(bdb0,
                                       't',
                                       StringIO(test_csv.csv_data),
                                       header=True,
                                       create=True)
            bdb0.execute('''
                CREATE POPULATION p FOR t (
                    age NUMERICAL;
                    gender CATEGORICAL;
                    salary NUMERICAL;
                    height IGNORE;
                    division CATEGORICAL;
                    rank CATEGORICAL
                )
            ''')

            bdb0.execute('CREATE METAMODEL m FOR p WITH BASELINE crosscat;')
            cgpm_metamodel = bdb0.metamodels['cgpm']
            population_id = bayeslite.core.bayesdb_get_population(bdb0, 'p')
            generator_id = bayeslite.core.bayesdb_get_generator(
                bdb0, population_id, 'm')

            assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 0

            with bayeslite.bayesdb_open(f.name) as bdb1:
                bdb1.execute('INITIALIZE 1 MODEL FOR m')
                assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 1
                assert cgpm_metamodel._engine_stamp(bdb1, generator_id) == 1

            bdb0.execute('ANALYZE m FOR 1 ITERATION WAIT')
            assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 2
            assert cgpm_metamodel._get_cache_entry(bdb0, generator_id,
                                                   'engine') is not None

            with bayeslite.bayesdb_open(f.name) as bdb2:
                bdb2.execute('ANALYZE m FOR 1 ITERATION WAIT')
                assert cgpm_metamodel._engine_stamp(bdb2, generator_id) == 3
                assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 3

            # Engine in cache of bdb0 should be stale, since bdb2 analyzed.
            assert cgpm_metamodel._engine_latest(bdb0, generator_id) is None
예제 #18
0
def test_csv_import_badschema1():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE employees(
                age INTEGER,
                zorblaxianism TEXT,
                salary INTEGER,
                height INTEGER NOT NULL PRIMARY KEY,
                division TEXT,
                rank CATEGORICAL
            )
        ''')
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb,
                                       'employees',
                                       f,
                                       header=True,
                                       create=False)
예제 #19
0
def test_csv_import_badschema0():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE emPloyEES(
                AGE INTeger,
                geNder Text,
                -- saLAry REal,
                heighT inteGER,
                DIVision TEXt,
                rank INTEGER
            )
        ''')
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb,
                                       'employees',
                                       f,
                                       header=True,
                                       create=False)
예제 #20
0
def test_csv_import_schema():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE employees(
                age INTEGER,
                gender TEXT,
                salary REAL,
                height INTEGER,
                division TEXT,
                rank INTEGER
            )
        ''')
        bayeslite.bayesdb_read_csv(bdb,
                                   'employees',
                                   f,
                                   header=True,
                                   create=False)
        bdb.execute('select height from employees').fetchall()
        # XXX Currently this test fails because we compile the query
        # into `SELECT "idontexist" FROM "employees"', and for
        # compatibility with MySQL idiocy or something, SQLite treats
        # double-quotes as single-quotes if the alternative would be
        # an error.
        with pytest.raises(apsw.SQLError):
            bdb.execute('select idontexist from employees')
            raise apsw.SQLError('BQL compiler is broken;'
                                ' a.k.a. sqlite3 is stupid.')
        bdb.execute('''
            CREATE POPULATION p_employees FOR employees (
                height IGNORE;
                age NUMERICAL;
                gender NOMINAL;
                salary CYCLIC;
                division NOMINAL;
                rank NOMINAL
            )
        ''')
        bdb.execute('''
            CREATE GENERATOR p_employees_cc for p_employees USING cgpm;
        ''')
        bdb.execute('estimate height from p_employees').fetchall()
        with pytest.raises(bayeslite.BQLError):
            bdb.execute('estimate predict height with confidence 0.9'
                        ' from p_employees')
예제 #21
0
def test_engine_increment_stamp():
    """Confirm the engine stamp is incremented appropriately."""
    with bayeslite.bayesdb_open(':memory:') as bdb:
        bayeslite.bayesdb_read_csv(bdb,
                                   't',
                                   StringIO(test_csv.csv_data),
                                   header=True,
                                   create=True)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                age NUMERICAL;
                gender CATEGORICAL;
                salary NUMERICAL;
                height IGNORE;
                division CATEGORICAL;
                rank CATEGORICAL
            )
        ''')
        bdb.execute('CREATE METAMODEL m FOR p WITH BASELINE crosscat;')
        cgpm_metamodel = bdb.metamodels['cgpm']
        population_id = bayeslite.core.bayesdb_get_population(bdb, 'p')
        generator_id = bayeslite.core.bayesdb_get_generator(
            bdb, population_id, 'm')
        # The engine stamp should be at zero without models.
        assert cgpm_metamodel._engine_stamp(bdb, generator_id) == 0
        # The engine stamp should equal after initializing models.
        bdb.execute('INITIALIZE 2 MODELS FOR m;')
        assert cgpm_metamodel._engine_stamp(bdb, generator_id) == 1
        # No caching on initialize.
        assert cgpm_metamodel._get_cache_entry(bdb, generator_id, 'engine') \
            is None
        # The engine stamp should increment after analysis.
        bdb.execute('ANALYZE m FOR 1 ITERATIONS WAIT;')
        assert cgpm_metamodel._engine_stamp(bdb, generator_id) == 2
        # Caching on analyze.
        assert cgpm_metamodel._get_cache_entry(bdb, generator_id, 'engine') \
            is not None
        # Wipe the cache, run a simulation, and confirm the caching.
        cgpm_metamodel._del_cache_entry(bdb, generator_id, 'engine')
        assert cgpm_metamodel._get_cache_entry(bdb, generator_id, 'engine') \
            is None
        bdb.execute('SIMULATE age FROM p LIMIT 1;').fetchall()
        assert cgpm_metamodel._get_cache_entry(bdb, generator_id, 'engine') \
            is not None
예제 #22
0
def test_csv_missing():
    with bayesdb_csv_stream(csv_data_missing) as (bdb, f):
        # XXX Test the automatic column type guessing too.
        bdb.sql_execute('CREATE TABLE t(a REAL, b REAL, c REAL)')
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False)
        def clean(column_name):
            qcn = bql_quote_name(column_name)
            sql = "UPDATE t SET %s = NULL WHERE %s = '' OR %s LIKE 'NaN'" % \
                (qcn, qcn, qcn)
            bdb.sql_execute(sql)
        clean('a')
        clean('b')
        clean('c')
        assert bdb.execute('select * from t').fetchall() == [
            (1.0, 2.0, 3.0),
            (10.0, None, 30.0),
            (100.0, 200.0, None),
            (4.0, 5.0, 6.0),
        ]
예제 #23
0
def test_csv_import_schema_case():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE emPloyEES(
                AGE INTeger,
                geNder Text,
                saLAry REal,
                heighT inteGER,
                DIVision TEXt,
                rank INTEGER
            )
        ''')
        bayeslite.bayesdb_read_csv(bdb,
                                   'employees',
                                   f,
                                   header=True,
                                   create=False)
        bayeslite.guess.bayesdb_guess_population(bdb, 'p_employees',
                                                 'employees')
예제 #24
0
def test_engine_stamp_two_clients():
    """Confirm analysis by one worker makes cache in other worker stale."""
    with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
        with bayeslite.bayesdb_open(f.name) as bdb0:
            bayeslite.bayesdb_read_csv(bdb0, 't', StringIO(test_csv.csv_data),
                header=True, create=True)
            bdb0.execute('''
                CREATE POPULATION p FOR t (
                    age NUMERICAL;
                    gender NOMINAL;
                    salary NUMERICAL;
                    height IGNORE;
                    division NOMINAL;
                    rank NOMINAL;
                )
            ''')

            bdb0.execute('CREATE GENERATOR m FOR p;')
            cgpm_backend = bdb0.backends['cgpm']
            population_id = bayeslite.core.bayesdb_get_population(bdb0, 'p')
            generator_id = bayeslite.core.bayesdb_get_generator(
                bdb0, population_id, 'm')

            assert cgpm_backend._engine_stamp(bdb0, generator_id) == 0

            with bayeslite.bayesdb_open(f.name) as bdb1:
                bdb1.execute('INITIALIZE 1 MODEL FOR m')
                assert cgpm_backend._engine_stamp(bdb0, generator_id) == 1
                assert cgpm_backend._engine_stamp(bdb1, generator_id) == 1

            bdb0.execute('ANALYZE m FOR 1 ITERATION')
            assert cgpm_backend._engine_stamp(bdb0, generator_id) == 2
            assert cgpm_backend._get_cache_entry(
                bdb0, generator_id, 'engine') is not None

            with bayeslite.bayesdb_open(f.name) as bdb2:
                bdb2.execute('ANALYZE m FOR 1 ITERATION')
                assert cgpm_backend._engine_stamp(bdb2, generator_id) == 3
                assert cgpm_backend._engine_stamp(bdb0, generator_id) == 3

            # Engine in cache of bdb0 should be stale, since bdb2 analyzed.
            assert cgpm_backend._engine_latest(bdb0, generator_id) is None
예제 #25
0
def test_engine_increment_stamp():
    """Confirm the engine stamp is incremented appropriately."""
    with bayeslite.bayesdb_open(':memory:') as bdb:
        bayeslite.bayesdb_read_csv(bdb, 't', StringIO(test_csv.csv_data),
            header=True, create=True)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                age NUMERICAL;
                gender NOMINAL;
                salary NUMERICAL;
                height IGNORE;
                division NOMINAL;
                rank NOMINAL;
            )
        ''')
        bdb.execute('CREATE GENERATOR m FOR p;')
        cgpm_backend = bdb.backends['cgpm']
        population_id = bayeslite.core.bayesdb_get_population(bdb, 'p')
        generator_id = bayeslite.core.bayesdb_get_generator(
            bdb, population_id, 'm')
        # The engine stamp should be at zero without models.
        assert cgpm_backend._engine_stamp(bdb, generator_id) == 0
        # The engine stamp should equal after initializing models.
        bdb.execute('INITIALIZE 2 MODELS FOR m;')
        assert cgpm_backend._engine_stamp(bdb, generator_id) == 1
        # No caching on initialize.
        assert cgpm_backend._get_cache_entry(bdb, generator_id, 'engine') \
            is None
        # The engine stamp should increment after analysis.
        bdb.execute('ANALYZE m FOR 1 ITERATIONS')
        assert cgpm_backend._engine_stamp(bdb, generator_id) == 2
        # Caching on analyze.
        assert cgpm_backend._get_cache_entry(bdb, generator_id, 'engine') \
            is not None
        # Wipe the cache, run a simulation, and confirm the caching.
        cgpm_backend._del_cache_entry(bdb, generator_id, 'engine')
        assert cgpm_backend._get_cache_entry(bdb, generator_id, 'engine') \
            is None
        bdb.execute('SIMULATE age FROM p LIMIT 1;').fetchall()
        assert cgpm_backend._get_cache_entry(bdb, generator_id, 'engine') \
            is not None
예제 #26
0
def test_csv_import_schema():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bdb.sql_execute('''
            CREATE TABLE employees(
                age INTEGER,
                gender TEXT,
                salary REAL,
                height INTEGER,
                division TEXT,
                rank INTEGER
            )
        ''')
        bayeslite.bayesdb_read_csv(bdb, 'employees', f, header=True,
            create=False)
        bdb.execute('select height from employees').fetchall()
        # XXX Currently this test fails because we compile the query
        # into `SELECT "idontexist" FROM "employees"', and for
        # compatibility with MySQL idiocy or something, SQLite treats
        # double-quotes as single-quotes if the alternative would be
        # an error.
        with pytest.raises(apsw.SQLError):
            bdb.execute('select idontexist from employees')
            raise apsw.SQLError('BQL compiler is broken;'
                ' a.k.a. sqlite3 is stupid.')
        bdb.execute('''
            CREATE POPULATION p_employees FOR employees (
                height IGNORE;
                age NUMERICAL;
                gender NOMINAL;
                salary CYCLIC;
                division NOMINAL;
                rank NOMINAL
            )
        ''')
        bdb.execute('''
            CREATE GENERATOR p_employees_cc for p_employees USING cgpm;
        ''')
        bdb.execute('estimate height from p_employees').fetchall()
        with pytest.raises(bayeslite.BQLError):
            bdb.execute('estimate predict height with confidence 0.9'
                ' from p_employees')
예제 #27
0
    def dot_csv(self, line):
        """create table from CSV file
        <table> </path/to/data.csv>

        Create a SQL table named <table> from the data in
        </path/to/data.csv>.
        """
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write("Usage: .csv <table> </path/to/data.csv>\n")
            return
        table = tokens[0]
        pathname = tokens[1]
        try:
            with open(pathname, "rU") as f:
                bayeslite.bayesdb_read_csv(self._bdb, table, f, header=True, create=True, ifnotexists=False)
        except IOError as e:
            self.stdout.write("%s\n" % (e,))
        except Exception:
            self.stdout.write(traceback.format_exc())
예제 #28
0
def test_csv_missing():
    with bayesdb_csv_stream(csv_data_missing) as (bdb, f):
        # XXX Test the automatic column type guessing too.
        bdb.sql_execute('CREATE TABLE t(a REAL, b REAL, c REAL)')
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False)

        def clean(column_name):
            qcn = bql_quote_name(column_name)
            sql = "UPDATE t SET %s = NULL WHERE %s = '' OR %s LIKE 'NaN'" % \
                (qcn, qcn, qcn)
            bdb.sql_execute(sql)

        clean('a')
        clean('b')
        clean('c')
        assert bdb.execute('select * from t').fetchall() == [
            (1.0, 2.0, 3.0),
            (10.0, None, 30.0),
            (100.0, 200.0, None),
            (4.0, 5.0, 6.0),
        ]
예제 #29
0
def test_csv_import():
    with bayesdb_csv_stream(csv_data) as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, 'employees', f, header=True,
            create=True)
예제 #30
0
def test_csv_import_toomanycols():
    with bayesdb_csv_stream('foo,bar\n0,1\n0,1,2\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
예제 #31
0
def test_csv_import_onecol():
    with bayesdb_csv_stream('foo\n0\none\n2\n0\n') as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, 'onecol', f, header=True, create=True)
예제 #32
0
def test_read_csv():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=False,
                                       create=False,
                                       ifnotexists=False)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=False,
                                       create=False,
                                       ifnotexists=True)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=False,
                                       create=True,
                                       ifnotexists=False)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=False,
                                       create=True,
                                       ifnotexists=True)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=True,
                                       create=False,
                                       ifnotexists=False)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb,
                                       't',
                                       f,
                                       header=True,
                                       create=False,
                                       ifnotexists=True)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            with bdb.savepoint():
                # Table must not exist if ifnotexists=False.
                bdb.sql_execute('CREATE TABLE t(x)')
                bayeslite.bayesdb_read_csv(bdb,
                                           't',
                                           f,
                                           header=True,
                                           create=True,
                                           ifnotexists=False)
        with pytest.raises(IOError):
            # Table must have no empty values in header.
            csv_hdrdata_prime = csv_hdrdata[1:]
            f = StringIO.StringIO(csv_hdrdata_prime)
            with bdb.savepoint():
                bayeslite.bayesdb_read_csv(bdb,
                                           't',
                                           f,
                                           header=True,
                                           create=True,
                                           ifnotexists=False)

        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb,
                                   't',
                                   f,
                                   header=True,
                                   create=True,
                                   ifnotexists=False)
        data = bdb.sql_execute('SELECT * FROM t').fetchall()
        assert data == [
            # XXX Would be nice if the NaN could actually be that, or
            # at least None/NULL.
            (1, 2, 3, 'foo', 'bar', u'nan', u'', u'quagga'),
            (4, 5, 6, 'baz', 'quux', 42.0, u'', u'eland'),
            (7, 8, 6, 'zot', 'mumble', 87.0, u'zoot', u'caribou'),
        ]

        f = StringIO.StringIO(csv_hdr)
        bayeslite.bayesdb_read_csv(bdb,
                                   't',
                                   f,
                                   header=True,
                                   create=True,
                                   ifnotexists=True)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data
        assert cursor_value(bdb.sql_execute('SELECT sql FROM sqlite_master'
                    ' WHERE name = ?', ('t',))) == \
            'CREATE TABLE "t"' \
            '("a" NUMERIC,"b" NUMERIC,"c" NUMERIC,"name" NUMERIC,' \
            '"nick" NUMERIC,"age" NUMERIC,"muppet" NUMERIC,"animal" NUMERIC)'

        f = StringIO.StringIO(csv_data)
        bayeslite.bayesdb_read_csv(bdb,
                                   't',
                                   f,
                                   header=False,
                                   create=False,
                                   ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data + data

        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb,
                                   't',
                                   f,
                                   header=True,
                                   create=False,
                                   ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata)
            bayeslite.bayesdb_read_csv_file(bdb,
                                            't',
                                            temp.name,
                                            header=True,
                                            create=False,
                                            ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data + data

        # Test the BQL CREATE TABLE FROM <csv-file> syntax.
        f = StringIO.StringIO(csv_hdrdata)
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata)
            bdb.execute('CREATE TABLE t2 FROM \'%s\'' % (temp.name, ))
            assert bdb.sql_execute('SELECT * FROM t2').fetchall() == data

        # Trying to read a csv with an empty column name should fail.
        csv_header_corrupt = csv_hdr.replace('a,b', ',')
        csv_hdrdata_corrupt = csv_header_corrupt + csv_data
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata_corrupt)
            with pytest.raises(IOError):
                bayeslite.bayesdb_read_csv_file(bdb,
                                                't3',
                                                temp.name,
                                                header=True,
                                                create=True)
예제 #33
0
def test_csv_import_nocols():
    with bayesdb_csv_stream('\n') as (bdb, f):
        # CSV import rejects no columns.
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'nocols', f, header=True,
                create=True)
예제 #34
0
def test_csv_import_empty():
    with bayesdb_csv_stream('') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'empty', f, header=True,
                create=True)
예제 #35
0
def test_csv_import_onecol():
    with bayesdb_csv_stream('foo\n0\none\n2\n0\n') as (bdb, f):
        bayeslite.bayesdb_read_csv(bdb, 'onecol', f, header=True, create=True)
예제 #36
0
def test_csv_import_toomanycols():
    with bayesdb_csv_stream('foo,bar\n0,1\n0,1,2\n') as (bdb, f):
        with pytest.raises(IOError):
            bayeslite.bayesdb_read_csv(bdb, 'bad', f, header=True, create=True)
예제 #37
0
def test_add_variable():
    with bayesdb_open() as bdb:
        bayesdb_read_csv(bdb,
                         't',
                         StringIO.StringIO(test_csv.csv_data),
                         header=True,
                         create=True)
        bdb.execute('''
            CREATE POPULATION p FOR t WITH SCHEMA(
                age         numerical;
                gender      nominal;
                salary      numerical;
                height      ignore;
                division    ignore;
                rank        ignore;
            )
        ''')
        bdb.metamodels['cgpm'].set_multiprocess(False)
        bdb.execute('CREATE METAMODEL m0 FOR p WITH BASELINE crosscat;')
        bdb.execute('INITIALIZE 1 MODELS FOR m0;')
        bdb.execute('ANALYZE m0 FOR 5 ITERATION WAIT;')

        # Run some queries on the new variable in a metamodel or aggregated.
        def run_queries(target, m):
            extra = 'MODELED BY %s' % (m, ) if m is not None else ''
            bdb.execute('''
                ESTIMATE PROBABILITY DENSITY OF %s = 1 BY p %s
            ''' % (
                target,
                extra,
            )).fetchall()
            for other in ['age', 'gender', 'salary']:
                cursor = bdb.execute('''
                    ESTIMATE DEPENDENCE PROBABILITY OF %s WITH %s
                    BY p %s
                ''' % (target, other, extra))
                assert cursor_value(cursor) >= 0
            bdb.execute('''
                ESTIMATE SIMILARITY IN THE CONTEXT OF %s
                FROM PAIRWISE p %s;
            ''' % (
                target,
                extra,
            )).fetchall()

        # Fail to run quieres on height, does not exist yet.
        with pytest.raises(BQLError):
            run_queries('height', 'm0')
        # Add the height variable
        bdb.execute('ALTER POPULATION p ADD VARIABLE height numerical;')
        # Run targeted analysis on the newly included height variable.
        bdb.execute('ANALYZE m0 FOR 5 ITERATION WAIT;')
        bdb.execute('ANALYZE m0 FOR 5 ITERATION WAIT (VARIABLES height);')
        # Queries should now be successful.
        run_queries('height', 'm0')
        # Create a new metamodel, and create a custom cateogry model for
        # the new variable `height`.
        bdb.execute('''
            CREATE METAMODEL m1 FOR p WITH BASELINE crosscat(
                SET CATEGORY MODEL FOR age TO exponential;
                SET CATEGORY MODEL FOR height TO lognormal;
            )
        ''')
        bdb.execute('INITIALIZE 2 MODELS FOR m1')
        bdb.execute('ANALYZE m1 FOR 2 ITERATION WAIT;')
        # Run height queries on m1.
        run_queries('height', 'm1')
        # Run height queries on population, aggregating m0 and m1.
        run_queries('height', None)
        # Add a third variable rank.
        bdb.execute('ALTER POPULATION p ADD VARIABLE rank numerical;')
        # Analyze rank on m0.
        bdb.execute('''
            ANALYZE m0 FOR 2 ITERATION WAIT (OPTIMIZED; VARIABLES rank);
        ''')
        # Analyze all except rank on m0.
        bdb.execute('''
            ANALYZE m0 FOR 2 ITERATION WAIT (OPTIMIZED; SKIP rank);
        ''')
        # Fail on m1 with OPTIMIZED, since non-standard category models.
        with pytest.raises(ValueError):
            bdb.execute('''
                ANALYZE m1 FOR 2 ITERATION WAIT (OPTIMIZED; VARIABLES rank);
            ''')
        # Succeed analysis on non-optimized analysis.
        bdb.execute('ANALYZE m1 FOR 2 ITERATION WAIT;')
        # Run queries on the new variable.
        run_queries('rank', 'm0')
        run_queries('rank', 'm1')
        run_queries('rank', None)
예제 #38
0
def test_add_variable():
    with bayesdb_open() as bdb:
        bayesdb_read_csv(
            bdb, 't', StringIO.StringIO(test_csv.csv_data),
            header=True, create=True)
        bdb.execute('''
            CREATE POPULATION p FOR t WITH SCHEMA(
                age         numerical;
                gender      nominal;
                salary      numerical;
                height      ignore;
                division    ignore;
                rank        ignore;
            )
        ''')
        bdb.backends['cgpm'].set_multiprocess(False)
        bdb.execute('CREATE GENERATOR m0 FOR p;')
        bdb.execute('INITIALIZE 1 MODELS FOR m0;')
        bdb.execute('ANALYZE m0 FOR 5 ITERATION')
        # Run some queries on the new variable in the generator or aggregated.
        def run_queries(target, m):
            extra = 'MODELED BY %s' % (m,) if m is not None else ''
            bdb.execute('''
                ESTIMATE PROBABILITY DENSITY OF %s = 1 BY p %s
            ''' % (target, extra,)).fetchall()
            for other in ['age', 'gender', 'salary']:
                cursor = bdb.execute('''
                    ESTIMATE DEPENDENCE PROBABILITY OF %s WITH %s
                    BY p %s
                ''' % (target, other, extra))
                assert cursor_value(cursor) >= 0
            bdb.execute('''
                ESTIMATE SIMILARITY IN THE CONTEXT OF %s
                FROM PAIRWISE p %s;
            ''' % (target, extra,)).fetchall()
        # Fail to run quieres on height, does not exist yet.
        with pytest.raises(BQLError):
            run_queries('height', 'm0')
        # Add the height variable
        bdb.execute('ALTER POPULATION p ADD VARIABLE height numerical;')
        # Run targeted analysis on the newly included height variable.
        bdb.execute('ANALYZE m0 FOR 5 ITERATION')
        bdb.execute('ANALYZE m0 FOR 5 ITERATION (VARIABLES height);')
        # Queries should now be successful.
        run_queries('height', 'm0')
        # Create a new generator, and create a custom category model for
        # the new variable `height`.
        bdb.execute('''
            CREATE GENERATOR m1 FOR p(
                SET CATEGORY MODEL FOR age TO exponential;
                SET CATEGORY MODEL FOR height TO lognormal;
            )
        ''')
        bdb.execute('INITIALIZE 2 MODELS FOR m1')
        bdb.execute('ANALYZE m1 FOR 2 ITERATION')
        # Run height queries on m1.
        run_queries('height', 'm1')
        # Run height queries on population, aggregating m0 and m1.
        run_queries('height', None)
        # Add a third variable rank.
        bdb.execute('ALTER POPULATION p ADD VARIABLE rank numerical;')
        # Analyze rank on m0.
        bdb.execute('''
            ANALYZE m0 FOR 2 ITERATION (OPTIMIZED; VARIABLES rank);
        ''')
        # Analyze all except rank on m0.
        bdb.execute('''
            ANALYZE m0 FOR 2 ITERATION (OPTIMIZED; SKIP rank);
        ''')
        # Fail on m1 with OPTIMIZED, since non-standard category models.
        with pytest.raises(ValueError):
            bdb.execute('''
                ANALYZE m1 FOR 2 ITERATION (OPTIMIZED; VARIABLES rank);
            ''')
        # Succeed analysis on non-optimized analysis.
        bdb.execute('ANALYZE m1 FOR 2 ITERATION')
        # Run queries on the new variable.
        run_queries('rank', 'm0')
        run_queries('rank', 'm1')
        run_queries('rank', None)
예제 #39
0
def test_read_csv():
    with bayeslite.bayesdb_open(builtin_backends=False) as bdb:

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
                ifnotexists=False)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
                ifnotexists=True)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=True,
                ifnotexists=False)

        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=True,
                ifnotexists=True)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
                ifnotexists=False)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
                ifnotexists=True)

        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            with bdb.savepoint():
                # Table must not exist if ifnotexists=False.
                bdb.sql_execute('CREATE TABLE t(x)')
                bayeslite.bayesdb_read_csv(bdb, 't', f, header=True,
                    create=True, ifnotexists=False)
        with pytest.raises(IOError):
            # Table must have no empty values in header.
            csv_hdrdata_prime = csv_hdrdata[1:]
            f = StringIO.StringIO(csv_hdrdata_prime)
            with bdb.savepoint():
                bayeslite.bayesdb_read_csv(bdb, 't', f, header=True,
                    create=True, ifnotexists=False)

        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True,
            ifnotexists=False)
        data = bdb.sql_execute('SELECT * FROM t').fetchall()
        assert data == [
            # XXX Would be nice if the NaN could actually be that, or
            # at least None/NULL.
            (1,2,3,'foo','bar',u'nan',u'',u'quagga'),
            (4,5,6,'baz','quux',42.0,u'',u'eland'),
            (7,8,6,'zot','mumble',87.0,u'zoot',u'caribou'),
        ]

        f = StringIO.StringIO(csv_hdr)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True,
            ifnotexists=True)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data
        assert cursor_value(bdb.sql_execute('SELECT sql FROM sqlite_master'
                    ' WHERE name = ?', ('t',))) == \
            'CREATE TABLE "t"' \
            '("a" NUMERIC,"b" NUMERIC,"c" NUMERIC,"name" NUMERIC,' \
            '"nick" NUMERIC,"age" NUMERIC,"muppet" NUMERIC,"animal" NUMERIC)'

        f = StringIO.StringIO(csv_data)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
            ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data + data

        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
            ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata)
            bayeslite.bayesdb_read_csv_file(bdb, 't', temp.name, header=True,
                create=False, ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data + data

        # Test the BQL CREATE TABLE FROM <csv-file> syntax.
        f = StringIO.StringIO(csv_hdrdata)
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata)
            bdb.execute('CREATE TABLE t2 FROM \'%s\'' % (temp.name,))
            assert bdb.sql_execute('SELECT * FROM t2').fetchall() == data

        # Trying to read a csv with an empty column name should fail.
        csv_header_corrupt = csv_hdr.replace('a,b',',')
        csv_hdrdata_corrupt = csv_header_corrupt + csv_data
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata_corrupt)
            with pytest.raises(IOError):
                bayeslite.bayesdb_read_csv_file(
                    bdb, 't3', temp.name, header=True, create=True)
예제 #40
0
def test_read_csv():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
                ifnotexists=False)
        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
                ifnotexists=True)
        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=True,
                ifnotexists=False)
        f = StringIO.StringIO(csv_data)
        with pytest.raises(ValueError):
            # Must pass create=False for header=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=True,
                ifnotexists=True)
        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Table must already exist for create=False.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
                ifnotexists=False)
        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            # Must pass create=True for ifnotexists=True.
            bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
                ifnotexists=True)
        f = StringIO.StringIO(csv_hdrdata)
        with pytest.raises(ValueError):
            with bdb.savepoint():
                # Table must not exist if ifnotexists=False.
                bdb.sql_execute('CREATE TABLE t(x)')
                bayeslite.bayesdb_read_csv(bdb, 't', f, header=True,
                    create=True, ifnotexists=False)
        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True,
            ifnotexists=False)
        data = bdb.sql_execute('SELECT * FROM t').fetchall()
        assert data == [
            # XXX Would be nice if the NaN could actually be that, or
            # at least None/NULL.
            (1,2,3,'foo','bar',u'nan',u'',u'quagga'),
            (4,5,6,'baz','quux',42.0,u'',u'eland'),
            (7,8,6,'zot','mumble',87.0,u'zoot',u'caribou'),
        ]
        f = StringIO.StringIO(csv_hdr)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=True,
            ifnotexists=True)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data
        assert cursor_value(bdb.sql_execute('SELECT sql FROM sqlite_master'
                    ' WHERE name = ?', ('t',))) == \
            'CREATE TABLE "t"' \
            '("a" NUMERIC,"b" NUMERIC,"c" NUMERIC,"name" NUMERIC,' \
            '"nick" NUMERIC,"age" NUMERIC,"muppet" NUMERIC,"animal" NUMERIC)'
        f = StringIO.StringIO(csv_data)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=False, create=False,
            ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == data + data
        f = StringIO.StringIO(csv_hdrdata)
        bayeslite.bayesdb_read_csv(bdb, 't', f, header=True, create=False,
            ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as temp:
            with open(temp.name, 'w') as f:
                f.write(csv_hdrdata)
            bayeslite.bayesdb_read_csv_file(bdb, 't', temp.name, header=True,
                create=False, ifnotexists=False)
        assert bdb.sql_execute('SELECT * FROM t').fetchall() == \
            data + data + data + data