Example #1
0
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('trigger', 'bayesdb_composer_column_toposort_check'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
        ('trigger', 'bayesdb_composer_column_foreign_predictor_check')
    ]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format((kind,name)))
    bdb.close()
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [('table', 'bayesdb_composer_cc_id'),
              ('table', 'bayesdb_composer_column_owner'),
              ('table', 'bayesdb_composer_column_toposort'),
              ('trigger', 'bayesdb_composer_column_toposort_check'),
              ('table', 'bayesdb_composer_column_parents'),
              ('table', 'bayesdb_composer_column_foreign_predictor'),
              ('trigger', 'bayesdb_composer_column_foreign_predictor_check')]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format(
                (kind, name)))
    bdb.close()
Example #3
0
def test_topological_sort():
    # Acyclic graph should return a correct topo sort.
    graph = {1:[], 2:[1], 3:[2,6], 4:[2], 5:[4], 6:[4,1]}
    topo = Composer.topological_sort(graph)
    for i in xrange(len(topo)):
        node, parents = topo[i]
        for n in topo[i+1:]:
            assert n not in parents
    # Cyclic graph should throw an error.
    graph = {1:[], 2:[1], 3:[2,4,6], 4:[2], 5:[3,4], 6:[4,5,1]}
    with pytest.raises(BLE):
        topo = Composer.topological_sort(graph)
def test_topological_sort():
    # Acyclic graph should return a correct topo sort.
    graph = {1: [], 2: [1], 3: [2, 6], 4: [2], 5: [4], 6: [4, 1]}
    topo = Composer.topological_sort(graph)
    for i in xrange(len(topo)):
        node, parents = topo[i]
        for n in topo[i + 1:]:
            assert n not in parents
    # Cyclic graph should throw an error.
    graph = {1: [], 2: [1], 3: [2, 4, 6], 4: [2], 5: [3, 4], 6: [4, 5, 1]}
    with pytest.raises(BLE):
        topo = Composer.topological_sort(graph)
Example #5
0
from bdbcontrib.predictors import random_forest
from bdbcontrib.predictors import keplers_law

# Get output directory.
if len(sys.argv) < 2:
    outdir = '.'
else:
    outdir = sys.argv[1]

# Find the satellites file.
PATH_KEPLER = os.path.dirname(os.path.abspath(__file__))
PATH_EXAMPLES = os.path.dirname(PATH_KEPLER)
PATH_SATELLITES = os.path.join(PATH_EXAMPLES, 'satellites')
PATH_SATELLITES_CSV = os.path.join(PATH_SATELLITES, 'satellites.csv')

composer = Composer()
composer.register_foreign_predictor(keplers_law.KeplersLaw)
composer.register_foreign_predictor(random_forest.RandomForest)

if os.path.exists(os.path.join(outdir, 'kepler.bdb')):
    os.remove(os.path.join(outdir, 'kepler.bdb'))

bdb = bayeslite.bayesdb_open(os.path.join(outdir, 'kepler.bdb'))
bayeslite.bayesdb_register_metamodel(bdb, composer)
bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV,
    header=True, create=True)

bdbcontrib.query(bdb, '''
    CREATE GENERATOR sat_kepler FOR satellites USING composer(
        default (
            Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
Example #6
0
def test_create_generator_schema():
    bdb = bayeslite.bayesdb_open()
    bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV,
        header=True, create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Using crosscat and default to specify models should work.
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL
            ),
            crosscat (
                Anticipated_Lifetime NUMERICAL, Contractor CATEGORICAL
            )
        );''')
    assert bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    # IGNORE and GUESS(*) are forbidden and should crash.
    with pytest.raises(AttributeError):
        bdb.execute('''
            CREATE GENERATOR t2 FOR satellites USING composer(
                default (
                    GUESS(*), Country_of_Operator IGNORE,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                )
            );''')
    # Test unregistered foreign predictor.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t3 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Unregistered foreign predictor should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t4 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Registered foreign predictor should work.
    composer.register_foreign_predictor(random_forest.RandomForest)
    bdb.execute('''
        CREATE GENERATOR t5 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Eccentricity NUMERICAL
            ),
            random_forest (
                Apogee_km NUMERICAL GIVEN Operator_Owner
            )
        );''')
    # Wrong stattype in predictor should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t6 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km RADIAL GIVEN Operator_Owner
                )
            );''')
    # Missing GIVEN keyword should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t6 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL, Operator_Owner
                )
            );''')
    # Missing conditions in random forest conditions should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t7 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Test duplicate declarations.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t7 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Class_of_orbit CATEGORICAL GIVEN Operator_Owner
                )
            );''')
    # Arbitrary DAG with foreign predictors.
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    bdb.execute('''
        CREATE GENERATOR t8 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
            ),
            random_forest (
                Apogee_km NUMERICAL GIVEN Operator_Owner, Users
            ),
            multiple_regression (
                Eccentricity NUMERICAL GIVEN Apogee_km, Users, Perigee_km
            )
        );''')
    # Duplicate declarations in foreign predictors should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t9 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Perigee_km NUMERICAL GIVEN Purpose
                ),
                multiple_regression (
                    Perigee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # MML for default models should work.
    bdb.execute('''
        CREATE GENERATOR t10 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Apogee_km NUMERICAL
            )
            random_forest (
                Perigee_km NUMERICAL GIVEN Purpose
            )
            multiple_regression (
                Eccentricity NUMERICAL GIVEN Operator_Owner, Class_of_orbit
            )
            DEPENDENT(Apogee_km, Perigee_km, Purpose),
            INDEPENDENT(Country_of_Operator, Purpose)
        );''')
    # MML for foreign predictors should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t11 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL,
                    Apogee_km NUMERICAL
                ),
                random_forest (
                    Perigee_km NUMERICAL GIVEN Purpose
                ),
                multiple_regression (
                    Eccentricity NUMERICAL GIVEN Operator_Owner, Class_of_orbit
                )
                DEPENDENT(Apogee_km, Eccentricity, Country_of_Operator),
                INDEPENDENT(Perigee_km, Purpose)
            );''')
    # Test full generator.
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t12 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg, Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            INDEPENDENT(Country_of_Operator, longitude_radians_of_geo)
        );''')
    bdb.close()
Example #7
0
def test_composer_integration__ci_slow():
    # But currently difficult to seperate these tests into smaller tests because
    # of their sequential nature. We will still test all internal functions
    # with different regimes of operation.

    # SETUP
    # -----
    # Dataset.
    bdb = bayeslite.bayesdb_open()
    bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV,
        header=True, create=True)
    bdbcontrib.nullify(bdb, 'satellites', 'NaN')
    # Composer.
    composer = Composer(n_samples=5)
    composer.register_foreign_predictor(
        multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    composer.register_foreign_predictor(random_forest.RandomForest)
    # Use complex generator for interesting test cases.
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')


    # ----------------------
    # TEST INITIALIZE MODELS
    # ----------------------

    bdb.execute('INITIALIZE 2 MODELS FOR t1')
    # Check number of models.
    df = bdbcontrib.describe_generator_models(bdb, 't1')
    assert len(df) == 2
    df = bdbcontrib.describe_generator_models(bdb, 't1_cc')
    assert len(df) == 2

    # -------------------
    # TEST ANALYZE MODELS
    # -------------------

    bdb.execute('ANALYZE t1 FOR 2 ITERATIONS WAIT;')
    # Check number of iterations of composer.
    df = bdbcontrib.describe_generator_models(bdb, 't1')
    for index, modelno, iterations in df.itertuples():
        assert iterations == 2
    # Check number of iterations of composer_cc.
    df = bdbcontrib.describe_generator_models(bdb, 't1_cc')
    for index, modelno, iterations in df.itertuples():
        assert iterations == 2

    # ----------------------------------
    # TEST COLUMN DEPENDENCE PROBABILITY
    # ----------------------------------

    # Special 0/1 regimes.
    # Local with a INDEPENDENT local should be 0.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Date_of_Launch
            WITH Country_of_Operator FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 0
    # Local with a DEPENDENT local should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Perigee_km WITH Eccentricity
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Apogee_km WITH Eccentricity
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1
    # Foreign with a local parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH Apogee_km
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH Power_watts
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a foreign parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Type_of_Orbit WITH
            Anticipated_Lifetime FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a local non-parent DEPENDENT with local parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            Eccentricity FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with foreign sharing common direct ancestor should be 1.
    # Launch_Mass_kg is the common parent.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Type_of_Orbit FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a foreign sharing a common DEPENDENT ancestor should be 1.
    # Eccentricity is a parent of Type_of_orbit, and is dependent
    # with Period_minutes through DEPENDENT(Apogee_km, Perigee_km, Eccentricity)
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            Type_of_Orbit FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Column with itself should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Anticipated_Lifetime FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.

    # Unknown [0,1] regimes.
    # Foreign with a local of unknown relation with parents.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            longitude_radians_of_geo FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            longitude_radians_of_geo FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.
    # Foreign with a foreign of unknown ancestry relation.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Period_minutes FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.

    # ----------------------------------
    # TEST SIMULATE
    # ----------------------------------

    # Crash tests for various code paths. Quality of simulations ignored.
    # Joint local.
    curs = bdb.execute('''
        SIMULATE Power_watts, Launch_Mass_kg FROM t1 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Forward simulate foreign.
    curs = bdb.execute('''
        SIMULATE Period_minutes FROM t1 GIVEN Apogee_km = 1000, Perigee_km = 980
            LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Forward simulate foreign with missing parents.
    curs = bdb.execute('''
        SIMULATE Anticipated_Lifetime FROM t1 GIVEN Dry_Mass_kg = 2894,
            Launch_Mass_kg = 1730 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Joint simulate foreign with parents, and missing parents.
    curs = bdb.execute('''
        SIMULATE Type_of_Orbit, Eccentricity FROM t1 GIVEN Dry_Mass_kg = 2894,
            Launch_Mass_kg = 1730 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Joint simulate foreign with non-parents.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Eccentricity FROM t1 GIVEN Apogee_km = 38000
            LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate joint local conditioned on two foreigns.
    curs = bdb.execute('''
        SIMULATE Country_of_Operator, Inclination_radians FROM t1
            GIVEN Period_minutes = 1432, Anticipated_Lifetime = 5 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate joint foreign conditioned on third foreign.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Anticipated_Lifetime FROM t1
            GIVEN Type_of_Orbit = 'Deep Highly Eccentric' LIMIT 2
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate foreign conditioned on itself.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Apogee_km FROM t1
            GIVEN Period_minutes = 102 LIMIT 2
    ''')
    assert [s[0] for s in curs] == [102] * 2

    # -----------------------------
    # TEST COLUMN VALUE PROBABILITY
    # -----------------------------

    # Crash tests for various code path. Quality of logpdf ignored.
    # Conditional local.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Power_watts = 800 GIVEN (Perigee_km = 980,
            Launch_Mass_kg = 890) FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Unconditional foreign
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Period_minutes = 1020 FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Conditional foreign on parent and non-parents.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Period_minutes = 1020 GIVEN
            (Apogee_km = 38000, Eccentricity = 0.03) FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[0]
    # Conditional foriegn on foreign.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Anticipated_Lifetime = 4.09 GIVEN
            (Class_of_Orbit = 'LEO', Purpose='Astrophysics',
                Period_minutes = 1436) FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Categorical foreign should be less than 1.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Type_of_Orbit = 'Polar' FROM t1 LIMIT 1;
    ''')
    assert curs.next()[0] <= 1.
    # Query inconsistent with evidence should be 0.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF "Type_of_Orbit" = 'Polar'
            GIVEN ("Type_of_Orbit" = 'Deep Highly Eccentric') FROM t1 LIMIT 1;
    ''')
    assert curs.next()[0] == 0.
    # In theory, query consistent with evidence should be 1, but this is very
    # hard to ensure due to stochastic sampling giving different estimates of
    # P(Y), once in joint and once in marginal Monte Carlo estimation.

    # -----------------------
    # TEST MUTUAL INFORMATION
    # -----------------------

    # Two local columns.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Country_of_Contractor WITH
            longitude_radians_of_geo USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX Small sample sizes non-deterministically produce negative MI
    assert -1 <= curs.next()[0]
    # One local and one foreign column.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Period_minutes WITH
            longitude_radians_of_geo USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX This non-deterministically fails when sample sizes are small
    # assert 0. <= curs.next()[0]
    assert float("-inf") <= curs.next()[0]
    # Two foreign columns.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Period_minutes WITH
            Anticipated_Lifetime USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX This non-deterministically fails when sample sizes are small
    # assert 0. <= curs.next()[0]
    assert float("-inf") <= curs.next()[0]

    # -----------------------
    # TEST PREDICT CONFIDENCE
    # -----------------------

    # Continuous local column.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Dry_Mass_kg CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert curs.next()[1] >= 0.
    # Discrete local column with no children.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Purpose CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1
    # Discrete local column with children.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Contractor CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1
    # Continuous foreign columns.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Period_minutes CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert curs.next()[1] >= 0.
    # Discrete foreign column.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Type_of_Orbit CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1

    bdb.close()
Example #8
0
def test_drop_generator():
    bdb = bayeslite.bayesdb_open()
    # Initialize the database
    bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV,
        header=True, create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')
    generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1')
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
    ]
    # Iterate through tables before dropping.
    for _, name in schema:
        bdb.sql_execute('''
            SELECT * FROM {} WHERE generator_id=?
        '''.format(quote(name)), (generator_id,)).next()
    # Drop generator and ensure table lookups with generator_id throw error.
    bdb.execute('DROP GENERATOR t1')
    for _, name in schema:
        with pytest.raises(StopIteration):
            bdb.sql_execute('''
                SELECT * FROM {} WHERE generator_id=?
            '''.format(quote(name)), (generator_id,)).next()
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1')
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    bdb.close()
Example #9
0
def test_register_foreign_predictor():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Register valid predictors.
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    # Register duplicates.
    with pytest.raises(BLE):
        composer.register_foreign_predictor(keplers_law.KeplersLaw)
    with pytest.raises(BLE):
        composer.register_foreign_predictor(
            multiple_regression.MultipleRegression)
    with pytest.raises(BLE):
        composer.register_foreign_predictor(random_forest.RandomForest)
    # Register invalid predictors.
    with pytest.raises(AssertionError):
        composer.register_foreign_predictor(None)
    with pytest.raises(AssertionError):
        composer.register_foreign_predictor('bans')
Example #10
0
from bdbcontrib.predictors import random_forest
from bdbcontrib.predictors import keplers_law

# Get output directory.
if len(sys.argv) < 2:
    outdir = '.'
else:
    outdir = sys.argv[1]

# Find the satellites file.
PATH_KEPLER = os.path.dirname(os.path.abspath(__file__))
PATH_EXAMPLES = os.path.dirname(PATH_KEPLER)
PATH_SATELLITES = os.path.join(PATH_EXAMPLES, 'satellites')
PATH_SATELLITES_CSV = os.path.join(PATH_SATELLITES, 'satellites.csv')

composer = Composer()
composer.register_foreign_predictor(keplers_law.KeplersLaw)
composer.register_foreign_predictor(random_forest.RandomForest)

if os.path.exists(os.path.join(outdir, 'kepler.bdb')):
    os.remove(os.path.join(outdir, 'kepler.bdb'))

bdb = bayeslite.bayesdb_open(os.path.join(outdir, 'kepler.bdb'))
bayeslite.bayesdb_register_metamodel(bdb, composer)
bayeslite.bayesdb_read_csv_file(bdb,
                                'satellites',
                                PATH_SATELLITES_CSV,
                                header=True,
                                create=True)

bdbcontrib.query(
Example #11
0
def test_create_generator_schema():
    bdb = bayeslite.bayesdb_open()
    bayeslite.bayesdb_read_csv_file(bdb,
                                    'satellites',
                                    PATH_SATELLITES_CSV,
                                    header=True,
                                    create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Using crosscat and default to specify models should work.
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL
            ),
            crosscat (
                Anticipated_Lifetime NUMERICAL, Contractor CATEGORICAL
            )
        );''')
    assert bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    # IGNORE and GUESS(*) are forbidden and should crash.
    with pytest.raises(AttributeError):
        bdb.execute('''
            CREATE GENERATOR t2 FOR satellites USING composer(
                default (
                    GUESS(*), Country_of_Operator IGNORE,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                )
            );''')
    # Test unregistered foreign predictor.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t3 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Unregistered foreign predictor should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t4 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Registered foreign predictor should work.
    composer.register_foreign_predictor(random_forest.RandomForest)
    bdb.execute('''
        CREATE GENERATOR t5 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Eccentricity NUMERICAL
            ),
            random_forest (
                Apogee_km NUMERICAL GIVEN Operator_Owner
            )
        );''')
    # Wrong stattype in predictor should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t6 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km RADIAL GIVEN Operator_Owner
                )
            );''')
    # Missing GIVEN keyword should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t6 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL, Operator_Owner
                )
            );''')
    # Missing conditions in random forest conditions should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t7 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Apogee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # Test duplicate declarations.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t7 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Class_of_orbit CATEGORICAL GIVEN Operator_Owner
                )
            );''')
    # Arbitrary DAG with foreign predictors.
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    bdb.execute('''
        CREATE GENERATOR t8 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
            ),
            random_forest (
                Apogee_km NUMERICAL GIVEN Operator_Owner, Users
            ),
            multiple_regression (
                Eccentricity NUMERICAL GIVEN Apogee_km, Users, Perigee_km
            )
        );''')
    # Duplicate declarations in foreign predictors should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t9 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                    Apogee_km NUMERICAL, Eccentricity NUMERICAL
                ),
                random_forest (
                    Perigee_km NUMERICAL GIVEN Purpose
                ),
                multiple_regression (
                    Perigee_km NUMERICAL GIVEN Operator_Owner
                )
            );''')
    # MML for default models should work.
    bdb.execute('''
        CREATE GENERATOR t10 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Apogee_km NUMERICAL
            )
            random_forest (
                Perigee_km NUMERICAL GIVEN Purpose
            )
            multiple_regression (
                Eccentricity NUMERICAL GIVEN Operator_Owner, Class_of_orbit
            )
            DEPENDENT(Apogee_km, Perigee_km, Purpose),
            INDEPENDENT(Country_of_Operator, Purpose)
        );''')
    # MML for foreign predictors should crash.
    with pytest.raises(BLE):
        bdb.execute('''
            CREATE GENERATOR t11 FOR satellites USING composer(
                default (
                    Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                    Users CATEGORICAL, Purpose CATEGORICAL,
                    Class_of_orbit CATEGORICAL,
                    Apogee_km NUMERICAL
                ),
                random_forest (
                    Perigee_km NUMERICAL GIVEN Purpose
                ),
                multiple_regression (
                    Eccentricity NUMERICAL GIVEN Operator_Owner, Class_of_orbit
                )
                DEPENDENT(Apogee_km, Eccentricity, Country_of_Operator),
                INDEPENDENT(Perigee_km, Purpose)
            );''')
    # Test full generator.
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t12 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg, Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            INDEPENDENT(Country_of_Operator, longitude_radians_of_geo)
        );''')
    bdb.close()
Example #12
0
def test_composer_integration__ci_slow():
    # But currently difficult to seperate these tests into smaller tests because
    # of their sequential nature. We will still test all internal functions
    # with different regimes of operation.

    # SETUP
    # -----
    # Dataset.
    bdb = bayeslite.bayesdb_open()
    bayeslite.bayesdb_read_csv_file(bdb,
                                    'satellites',
                                    PATH_SATELLITES_CSV,
                                    header=True,
                                    create=True)
    bdbcontrib.bql_utils.nullify(bdb, 'satellites', 'NaN')
    # Composer.
    composer = Composer(n_samples=5)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    composer.register_foreign_predictor(random_forest.RandomForest)
    # Use complex generator for interesting test cases.
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')

    # ----------------------
    # TEST INITIALIZE MODELS
    # ----------------------

    bdb.execute('INITIALIZE 2 MODELS FOR t1')
    # Check number of models.
    df = describe_generator_models(bdb, 't1')
    assert len(df) == 2
    df = describe_generator_models(bdb, 't1_cc')
    assert len(df) == 2

    # -------------------
    # TEST ANALYZE MODELS
    # -------------------

    bdb.execute('ANALYZE t1 FOR 2 ITERATIONS WAIT;')
    # Check number of iterations of composer.
    df = describe_generator_models(bdb, 't1')
    for index, modelno, iterations in df.itertuples():
        assert iterations == 2
    # Check number of iterations of composer_cc.
    df = describe_generator_models(bdb, 't1_cc')
    for index, modelno, iterations in df.itertuples():
        assert iterations == 2

    # ----------------------------------
    # TEST COLUMN DEPENDENCE PROBABILITY
    # ----------------------------------

    # Special 0/1 regimes.
    # Local with a INDEPENDENT local should be 0.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Date_of_Launch
            WITH Country_of_Operator FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 0
    # Local with a DEPENDENT local should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Perigee_km WITH Eccentricity
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Apogee_km WITH Eccentricity
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1
    # Foreign with a local parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH Apogee_km
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH Power_watts
            FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a foreign parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Type_of_Orbit WITH
            Anticipated_Lifetime FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a local non-parent DEPENDENT with local parent should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            Eccentricity FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with foreign sharing common direct ancestor should be 1.
    # Launch_Mass_kg is the common parent.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Type_of_Orbit FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Foreign with a foreign sharing a common DEPENDENT ancestor should be 1.
    # Eccentricity is a parent of Type_of_orbit, and is dependent
    # with Period_minutes through DEPENDENT(Apogee_km, Perigee_km, Eccentricity)
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            Type_of_Orbit FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.
    # Column with itself should be 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Anticipated_Lifetime FROM t1 LIMIT 1
    ''')
    assert curs.next()[0] == 1.

    # Unknown [0,1] regimes.
    # Foreign with a local of unknown relation with parents.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            longitude_radians_of_geo FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Period_minutes WITH
            longitude_radians_of_geo FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.
    # Foreign with a foreign of unknown ancestry relation.
    curs = bdb.execute('''
        ESTIMATE DEPENDENCE PROBABILITY OF Anticipated_Lifetime WITH
            Period_minutes FROM t1 LIMIT 1
    ''')
    assert 0 <= curs.next()[0] <= 1.

    # ----------------------------------
    # TEST SIMULATE
    # ----------------------------------

    # Crash tests for various code paths. Quality of simulations ignored.
    # Joint local.
    curs = bdb.execute('''
        SIMULATE Power_watts, Launch_Mass_kg FROM t1 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Forward simulate foreign.
    curs = bdb.execute('''
        SIMULATE Period_minutes FROM t1 GIVEN Apogee_km = 1000, Perigee_km = 980
            LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Forward simulate foreign with missing parents.
    curs = bdb.execute('''
        SIMULATE Anticipated_Lifetime FROM t1 GIVEN Dry_Mass_kg = 2894,
            Launch_Mass_kg = 1730 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Joint simulate foreign with parents, and missing parents.
    curs = bdb.execute('''
        SIMULATE Type_of_Orbit, Eccentricity FROM t1 GIVEN Dry_Mass_kg = 2894,
            Launch_Mass_kg = 1730 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Joint simulate foreign with non-parents.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Eccentricity FROM t1 GIVEN Apogee_km = 38000
            LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate joint local conditioned on two foreigns.
    curs = bdb.execute('''
        SIMULATE Country_of_Operator, Inclination_radians FROM t1
            GIVEN Period_minutes = 1432, Anticipated_Lifetime = 5 LIMIT 2;
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate joint foreign conditioned on third foreign.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Anticipated_Lifetime FROM t1
            GIVEN Type_of_Orbit = 'Deep Highly Eccentric' LIMIT 2
    ''')
    assert len(curs.fetchall()) == 2
    # Simulate foreign conditioned on itself.
    curs = bdb.execute('''
        SIMULATE Period_minutes, Apogee_km FROM t1
            GIVEN Period_minutes = 102 LIMIT 2
    ''')
    assert [s[0] for s in curs] == [102] * 2

    # -----------------------------
    # TEST COLUMN VALUE PROBABILITY
    # -----------------------------

    # Crash tests for various code path. Quality of logpdf ignored.
    # Conditional local.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Power_watts = 800 GIVEN (Perigee_km = 980,
            Launch_Mass_kg = 890) FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Unconditional foreign
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Period_minutes = 1020 FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Conditional foreign on parent and non-parents.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Period_minutes = 1020 GIVEN
            (Apogee_km = 38000, Eccentricity = 0.03) FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[0]
    # Conditional foriegn on foreign.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Anticipated_Lifetime = 4.09 GIVEN
            (Class_of_Orbit = 'LEO', Purpose='Astrophysics',
                Period_minutes = 1436) FROM t1 LIMIT 1;
    ''')
    assert 0. <= curs.next()[0]
    # Categorical foreign should be less than 1.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF Type_of_Orbit = 'Polar' FROM t1 LIMIT 1;
    ''')
    assert curs.next()[0] <= 1.
    # Query inconsistent with evidence should be 0.
    curs = bdb.execute('''
        ESTIMATE PROBABILITY OF "Type_of_Orbit" = 'Polar'
            GIVEN ("Type_of_Orbit" = 'Deep Highly Eccentric') FROM t1 LIMIT 1;
    ''')
    assert curs.next()[0] == 0.
    # In theory, query consistent with evidence should be 1, but this is very
    # hard to ensure due to stochastic sampling giving different estimates of
    # P(Y), once in joint and once in marginal Monte Carlo estimation.

    # -----------------------
    # TEST MUTUAL INFORMATION
    # -----------------------

    # Two local columns.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Country_of_Contractor WITH
            longitude_radians_of_geo USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX Small sample sizes non-deterministically produce negative MI
    assert -1 <= curs.next()[0]
    # One local and one foreign column.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Period_minutes WITH
            longitude_radians_of_geo USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX This non-deterministically fails when sample sizes are small
    # assert 0. <= curs.next()[0]
    assert float("-inf") <= curs.next()[0]
    # Two foreign columns.
    curs = bdb.execute('''
        ESTIMATE MUTUAL INFORMATION OF Period_minutes WITH
            Anticipated_Lifetime USING 5 SAMPLES FROM t1 LIMIT 1;
    ''')
    # XXX This non-deterministically fails when sample sizes are small
    # assert 0. <= curs.next()[0]
    assert float("-inf") <= curs.next()[0]

    # -----------------------
    # TEST PREDICT CONFIDENCE
    # -----------------------

    # Continuous local column.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Dry_Mass_kg CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert curs.next()[1] >= 0.
    # Discrete local column with no children.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Purpose CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1
    # Discrete local column with children.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Contractor CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1
    # Continuous foreign columns.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Period_minutes CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert curs.next()[1] >= 0.
    # Discrete foreign column.
    curs = bdb.execute('''
        INFER EXPLICIT PREDICT Type_of_Orbit CONFIDENCE c FROM t1 LIMIT 1;
    ''')
    assert 0 <= curs.next()[1] <= 1

    bdb.close()
Example #13
0
def test_drop_generator():
    bdb = bayeslite.bayesdb_open()
    # Initialize the database
    bayeslite.bayesdb_read_csv_file(bdb,
                                    'satellites',
                                    PATH_SATELLITES_CSV,
                                    header=True,
                                    create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')
    generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1')
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
    ]
    # Iterate through tables before dropping.
    for _, name in schema:
        bdb.sql_execute(
            '''
            SELECT * FROM {} WHERE generator_id=?
        '''.format(quote(name)), (generator_id, )).next()
    # Drop generator and ensure table lookups with generator_id throw error.
    bdb.execute('DROP GENERATOR t1')
    for _, name in schema:
        with pytest.raises(StopIteration):
            bdb.sql_execute(
                '''
                SELECT * FROM {} WHERE generator_id=?
            '''.format(quote(name)), (generator_id, )).next()
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1')
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    bdb.close()
Example #14
0
def test_register_foreign_predictor():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Register valid predictors.
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    # Register duplicates.
    with pytest.raises(BLE):
        composer.register_foreign_predictor(keplers_law.KeplersLaw)
    with pytest.raises(BLE):
        composer.register_foreign_predictor(
            multiple_regression.MultipleRegression)
    with pytest.raises(BLE):
        composer.register_foreign_predictor(random_forest.RandomForest)
    # Register invalid predictors.
    with pytest.raises(AssertionError):
        composer.register_foreign_predictor(None)
    with pytest.raises(AssertionError):
        composer.register_foreign_predictor('bans')