def test_legacy_models_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
            dha_models, create=True)
    with open(dha_csv, 'rU') as f:
        read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
        dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))')
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    bql = '''
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql, ('Albany NY',)).fetchall() == [
            ('Albany NY',),
            ('Scranton PA',),
            ('United States US',),
            ('Norfolk VA',),
            ('Reading PA',),
            ('Salisbury MD',),
            ('Louisville KY',),
            ('Cleveland OH',),
            ('Covington KY',),
            ('Akron OH',),
        ]
    # Tickles an issue in case-folding of column names.
    bql = '''
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ('McAllen TX',),
            ('Worcester MA',),
            ('Beaumont TX',),
            ('Temple TX',),
            ('Corpus Christi TX',),
            ('Takoma Park MD',),
            ('Kingsport TN',),
            ('Bangor ME',),
            ('Lebanon NH',),
            ('Panama City FL',),
        ]
Beispiel #2
0
def test_legacy_models__ci_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    with open(dha_csv, "rU") as f:
        read_csv.bayesdb_read_csv(bdb, "dha", f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute("create generator dha_cc0 for dha using crosscat(guess(*))")
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    bql = """
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql, ("Albany NY",)).fetchall() == [
            ("Albany NY",),
            ("Scranton PA",),
            ("United States US",),
            ("Norfolk VA",),
            ("Reading PA",),
            ("Salisbury MD",),
            ("Louisville KY",),
            ("Cleveland OH",),
            ("Covington KY",),
            ("Akron OH",),
        ]
    # Tickles an issue in case-folding of column names.
    bql = """
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ("McAllen TX",),
            ("Worcester MA",),
            ("Beaumont TX",),
            ("Temple TX",),
            ("Corpus Christi TX",),
            ("Takoma Park MD",),
            ("Kingsport TN",),
            ("Bangor ME",),
            ("Lebanon NH",),
            ("Panama City FL",),
        ]
Beispiel #3
0
def test_codebook_value_map():
    '''
    A categorical column in crosscat can only take on a fixed number of values
    v1, v2, ..., v3.  In this test, we have a categorical column called
    `city` which takes on values `RIO, LA, SF, DC` as specified in the codebook
    value map.

        INITIALIZE dummy table with only RIO and SF appearing in dataset
        ANALYZE dummy_cc
        INSERT rows with `city` names `LA` and `DC`
        ANALYZE dummy_cc
        SIMULATE specifying `city` = `LA` (throws KeyError)
    '''

    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        bayeslite.bayesdb_read_csv(bdb,
                                   'dummy',
                                   dummy_data,
                                   header=True,
                                   create=True)

        with tempfile.NamedTemporaryFile(prefix='bayeslite') as tempbook:
            with open(tempbook.name, 'w') as f:
                f.write(dummy_codebook)
            bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dummy',
                                                     tempbook.name)

        bdb.execute('''
            CREATE GENERATOR dummy_cc FOR dummy
                USING crosscat(
                    GUESS(*),
                    kerberos IGNORE,
                    age NUMERICAL,
                    city CATEGORICAL
                )
        ''')
        bdb.execute('INITIALIZE 10 MODELS FOR dummy_cc')
        bdb.execute('ANALYZE dummy_cc FOR 20 ITERATIONS WAIT')
        bdb.execute('SIMULATE age FROM dummy_cc GIVEN city = RIO LIMIT 5')
        bdb.sql_execute('''
            INSERT INTO dummy (kerberos, age, city) VALUES
                ('jackie', 18, 'LA'), ('rocker', 22, 'DC')
        ''')
        bdb.execute('ANALYZE dummy_cc FOR 20 ITERATIONS WAIT')
        c = bdb.sql_execute('SELECT * FROM dummy')
        with pytest.raises(KeyError):
            bdb.execute('SIMULATE age FROM dummy_cc GIVEN city = LA LIMIT 5')
def test_codebook_value_map():
    """
    A categorical column in crosscat can only take on a fixed number of values
    v1, v2, ..., v3.  In this test, we have a categorical column called
    `city` which takes on values `RIO, LA, SF, DC` as specified in the codebook
    value map.

        INITIALIZE dummy table with only RIO and SF appearing in dataset
        ANALYZE dummy_cc
        INSERT rows with `city` names `LA` and `DC`
        ANALYZE dummy_cc
        SIMULATE specifying `city` = `LA` (throws KeyError)
    """

    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        bayeslite.bayesdb_read_csv(bdb, "dummy", dummy_data, header=True, create=True)

        with tempfile.NamedTemporaryFile(prefix="bayeslite") as tempbook:
            with open(tempbook.name, "w") as f:
                f.write(dummy_codebook)
            bayeslite.bayesdb_load_codebook_csv_file(bdb, "dummy", tempbook.name)

        bdb.execute(
            """
            CREATE GENERATOR dummy_cc FOR dummy
                USING crosscat(
                    GUESS(*),
                    kerberos IGNORE,
                    age NUMERICAL,
                    city CATEGORICAL
                )
        """
        )
        bdb.execute("INITIALIZE 10 MODELS FOR dummy_cc")
        bdb.execute("ANALYZE dummy_cc FOR 20 ITERATIONS WAIT")
        bdb.execute("SIMULATE age FROM dummy_cc GIVEN city = RIO LIMIT 5")
        bdb.sql_execute(
            """
            INSERT INTO dummy (kerberos, age, city) VALUES
                ('jackie', 18, 'LA'), ('rocker', 22, 'DC')
        """
        )
        bdb.execute("ANALYZE dummy_cc FOR 20 ITERATIONS WAIT")
        c = bdb.sql_execute("SELECT * FROM dummy")
        with pytest.raises(KeyError):
            bdb.execute("SIMULATE age FROM dummy_cc GIVEN city = LA LIMIT 5")
Beispiel #5
0
    def dot_codebook(self, line):
        """load codebook for table
        <table> </path/to/codebook.csv>

        Load a codebook -- short names, descriptions, and value
        descriptions for the columns of a table -- from a CSV file.
        """
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write("Usage: .codebook <table> " "</path/to/codebook.csv>\n")
            return
        table = tokens[0]
        pathname = tokens[1]
        try:
            bayeslite.bayesdb_load_codebook_csv_file(self._bdb, table, pathname)
        except IOError as e:
            self.stdout.write("%s\n" % (e,))
        except Exception:
            self.stdout.write(traceback.format_exc())
Beispiel #6
0
    def dot_codebook(self, line):
        '''load codebook for table
        <table> </path/to/codebook.csv>

        Load a codebook -- short names, descriptions, and value
        descriptions for the columns of a table -- from a CSV file.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write('Usage: .codebook <table> '
                              '</path/to/codebook.csv>\n')
            return
        table = tokens[0]
        pathname = tokens[1]
        try:
            bayeslite.bayesdb_load_codebook_csv_file(self._bdb, table,
                                                     pathname)
        except IOError as e:
            self.stdout.write('%s\n' % (e,))
        except Exception:
            self.stdout.write(traceback.format_exc())
Beispiel #7
0
def test_legacy_models__ci_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb,
                                             'dha_cc',
                                             'dha',
                                             'crosscat',
                                             dha_models,
                                             create=True)
    with open(dha_csv, 'rU') as f:
        read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb,
                                         'dha_cc',
                                         'dha',
                                         'crosscat',
                                         dha_models,
                                         create=True)
    # Make sure guessing also works.
    bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))')
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    bql = '''
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql, ('Albany NY', )).fetchall() == [
            ('Albany NY', ),
            ('Scranton PA', ),
            ('United States US', ),
            ('Norfolk VA', ),
            ('Reading PA', ),
            ('Salisbury MD', ),
            ('Louisville KY', ),
            ('Cleveland OH', ),
            ('Covington KY', ),
            ('Akron OH', ),
        ]
    # Tickles an issue in case-folding of column names.
    bql = '''
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ('McAllen TX', ),
            ('Worcester MA', ),
            ('Beaumont TX', ),
            ('Temple TX', ),
            ('Corpus Christi TX', ),
            ('Takoma Park MD', ),
            ('Kingsport TN', ),
            ('Bangor ME', ),
            ('Lebanon NH', ),
            ('Panama City FL', ),
        ]
Beispiel #8
0
def test_empty_codebook():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        bdb.sql_execute('create table t(x, y)')
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as tf:
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('gnome, shotname, descruption, value_mop\n')
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('name, shortname, description, value_map\n')
            bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('name, shortname, description, value_map\n')
                f.write('x, eks, Greek chi,\n')
                f.write('y, why, quagga\n')
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('name, shortname, description, value_map\n')
                f.write('x, eks, Greek chi,\n')
                f.write('y, why, quagga,{x=42}\n')
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('name, shortname, description, value_map\n')
                f.write('x, eks, Greek chi,\n')
                f.write('y, why, quagga,"[1,2,3]"\n')
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)
            with open(tf.name, 'w') as f:
                f.write('name, shortname, description, value_map\n')
                f.write('x, eks, Greek chi,\n')
                f.write('y, why, quagga,{"x":42}\n')
                f.write('z, zee, eland,\n')
            with pytest.raises(IOError):
                bayeslite.bayesdb_load_codebook_csv_file(bdb, 't', tf.name)