Esempio n. 1
0
    def dot_legacymodels(self, line):
        '''load legacy models
        <generator> <table> </path/to/models.pkl.gz>

        Create a Crosscat generator named <generator> for the table
        <table> from the legacy models stored in
        </path/to/models.pkl.gz>.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 3:
            self.stdout.write('Usage:'
                              ' .legacymodels <generator> <table>'
                              ' </path/to/models.pkl.gz>\n')
            return
        generator = tokens[0]
        table = tokens[1]
        pathname = tokens[2]
        try:
            bayeslite.bayesdb_load_legacy_models(self._bdb, generator, table,
                                                 self._metamodel, pathname,
                                                 create=True)
        except IOError as e:
            self.stdout.write('%s\n' % (e,))
        except Exception:
            self.stdout.write(traceback.format_exc())
Esempio n. 2
0
    def dot_legacymodels(self, line):
        '''load legacy models
        <generator> <table> </path/to/models.pkl.gz>

        Create a Crosscat generator named <generator> for the table
        <table> from the legacy models stored in
        </path/to/models.pkl.gz>.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 3:
            self.stdout.write('Usage:'
                              ' .legacymodels <generator> <table>'
                              ' </path/to/models.pkl.gz>\n')
            return
        generator = tokens[0]
        table = tokens[1]
        pathname = tokens[2]
        try:
            bayeslite.bayesdb_load_legacy_models(self._bdb, generator, table,
                                                 self._metamodel, pathname,
                                                 create=True)
        except IOError as e:
            self.stdout.write('%s\n' % (e,))
        except Exception:
            self.stdout.write(traceback.format_exc())
Esempio n. 3
0
def test_legacy_models_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
            dha_models, create=True)
    with open(dha_csv, 'rU') as f:
        read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
        dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))')
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    bql = '''
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql, ('Albany NY',)).fetchall() == [
            ('Albany NY',),
            ('Scranton PA',),
            ('United States US',),
            ('Norfolk VA',),
            ('Reading PA',),
            ('Salisbury MD',),
            ('Louisville KY',),
            ('Cleveland OH',),
            ('Covington KY',),
            ('Akron OH',),
        ]
    # Tickles an issue in case-folding of column names.
    bql = '''
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ('McAllen TX',),
            ('Worcester MA',),
            ('Beaumont TX',),
            ('Temple TX',),
            ('Corpus Christi TX',),
            ('Takoma Park MD',),
            ('Kingsport TN',),
            ('Bangor ME',),
            ('Lebanon NH',),
            ('Panama City FL',),
        ]
Esempio n. 4
0
def test_legacy_models__ci_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    with open(dha_csv, "rU") as f:
        read_csv.bayesdb_read_csv(bdb, "dha", f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute("create generator dha_cc0 for dha using crosscat(guess(*))")
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    bql = """
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql, ("Albany NY",)).fetchall() == [
            ("Albany NY",),
            ("Scranton PA",),
            ("United States US",),
            ("Norfolk VA",),
            ("Reading PA",),
            ("Salisbury MD",),
            ("Louisville KY",),
            ("Cleveland OH",),
            ("Covington KY",),
            ("Akron OH",),
        ]
    # Tickles an issue in case-folding of column names.
    bql = """
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ("McAllen TX",),
            ("Worcester MA",),
            ("Beaumont TX",),
            ("Temple TX",),
            ("Corpus Christi TX",),
            ("Takoma Park MD",),
            ("Kingsport TN",),
            ("Bangor ME",),
            ("Lebanon NH",),
            ("Panama City FL",),
        ]
Esempio n. 5
0
def test_legacy_models__ci_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb,
                                             'dha_cc',
                                             'dha',
                                             'crosscat',
                                             dha_models,
                                             create=True)
    with open(dha_csv, 'rU') as f:
        read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb,
                                         'dha_cc',
                                         'dha',
                                         'crosscat',
                                         dha_models,
                                         create=True)
    # Make sure guessing also works.
    bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))')
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    bql = '''
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql, ('Albany NY', )).fetchall() == [
            ('Albany NY', ),
            ('Scranton PA', ),
            ('United States US', ),
            ('Norfolk VA', ),
            ('Reading PA', ),
            ('Salisbury MD', ),
            ('Louisville KY', ),
            ('Cleveland OH', ),
            ('Covington KY', ),
            ('Akron OH', ),
        ]
    # Tickles an issue in case-folding of column names.
    bql = '''
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ('McAllen TX', ),
            ('Worcester MA', ),
            ('Beaumont TX', ),
            ('Temple TX', ),
            ('Corpus Christi TX', ),
            ('Takoma Park MD', ),
            ('Kingsport TN', ),
            ('Bangor ME', ),
            ('Lebanon NH', ),
            ('Panama City FL', ),
        ]