예제 #1
0
def test_initialize_models():
    test_tablename, _ = create_dha(path='data/dha_missing.csv')

    engine = Engine(seed=0)
    num_models = 5
    engine.initialize_models(test_tablename, num_models)

    model_ids = engine.persistence_layer.get_model_ids(test_tablename)
    assert sorted(model_ids) == range(num_models)
    for i in range(num_models):
        model = engine.persistence_layer.get_models(test_tablename, i)
        assert model['iterations'] == 0
예제 #2
0
def test_initialize_models():
    test_tablename, _ = create_dha(path='data/dha_missing.csv')

    engine = Engine(seed=0)
    num_models = 5
    engine.initialize_models(test_tablename, num_models)

    model_ids = engine.persistence_layer.get_model_ids(test_tablename)
    assert sorted(model_ids) == range(num_models)
    for i in range(num_models):
        model = engine.persistence_layer.get_models(test_tablename, i)
        assert model['iterations'] == 0
예제 #3
0
def test_infer():
    # TODO: whereclauses
    test_tablename, _ = create_dha(path='data/dha_missing.csv')

    # dha_missing has missing qual_score in first 5 rows, and missing name in rows 6-10.
    engine = Engine(seed=0)
    engine.initialize_models(test_tablename, 20)

    functions = bql.bql_statement.parseString('infer name, qual_score from test',
                                              parseAll=True).functions
    whereclause = None
    limit = float('inf')
    order_by = False
    numsamples = 30
    confidence = 0
    infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit,
                                numsamples, order_by)
    assert 'column_labels' in infer_result
    assert 'data' in infer_result
    assert infer_result['column_labels'] == ['key', 'name', 'qual_score']
    # 307 is the total number of rows in the dataset.
    assert(len(infer_result['data']) == 307 and
           len(infer_result['data'][0]) == len(infer_result['column_labels']))
    assert type(infer_result['data'][0][0]) == numpy.string_  # type of key is int
    t = type(infer_result['data'][0][1])
    assert (t == unicode) or (t == numpy.string_)  # type of name is string
    assert type(infer_result['data'][0][2]) == float  # type of qual_score is float

    all_possible_names = [infer_result['data'][row][1] for row in range(5) + range(10, 307)]
    all_observed_qual_scores = [infer_result['data'][row][2] for row in range(5, 307)]

    for row in range(5):
        inferred_name = infer_result['data'][row+5][1]
        inferred_qual_score = infer_result['data'][row][2]
        assert inferred_name in all_possible_names
        assert type(inferred_qual_score) == type(1.2)
        assert inferred_qual_score > min(all_observed_qual_scores)
        assert inferred_qual_score < max(all_observed_qual_scores)

    # Now, try infer with higher confidence, and make sure that name isn't inferred anymore.
    confidence = 0.9
    infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit,
                                numsamples, order_by)

    for row in range(5):
        # TODO: what do missing values look like? these should be missing
        inferred_name = infer_result['data'][row+5][1]
        inferred_qual_score = infer_result['data'][row][2]
        assert numpy.isnan(inferred_name)
        assert numpy.isnan(inferred_qual_score)
예제 #4
0
def test_infer():
    # TODO: whereclauses
    test_tablename, _ = create_dha(path='data/dha_missing.csv')

    # dha_missing has missing qual_score in first 5 rows, and missing name in rows 6-10.
    engine = Engine(seed=0)
    engine.initialize_models(test_tablename, 20)

    functions = bql.bql_statement.parseString('infer name, qual_score from test',
                                              parseAll=True).functions
    whereclause = None
    limit = float('inf')
    order_by = False
    numsamples = 30
    confidence = 0
    infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit,
                                numsamples, order_by)
    assert 'column_labels' in infer_result
    assert 'data' in infer_result
    assert infer_result['column_labels'] == ['key', 'name', 'qual_score']
    # 307 is the total number of rows in the dataset.
    assert(len(infer_result['data']) == 307 and
           len(infer_result['data'][0]) == len(infer_result['column_labels']))
    assert type(infer_result['data'][0][0]) == numpy.string_  # type of key is int
    t = type(infer_result['data'][0][1])
    assert (t == unicode) or (t == numpy.string_)  # type of name is string
    assert type(infer_result['data'][0][2]) == float  # type of qual_score is float

    all_possible_names = [infer_result['data'][row][1] for row in range(5) + range(10, 307)]
    all_observed_qual_scores = [infer_result['data'][row][2] for row in range(5, 307)]

    for row in range(5):
        inferred_name = infer_result['data'][row+5][1]
        inferred_qual_score = infer_result['data'][row][2]
        assert inferred_name in all_possible_names
        assert type(inferred_qual_score) == type(1.2)
        assert inferred_qual_score > min(all_observed_qual_scores)
        assert inferred_qual_score < max(all_observed_qual_scores)

    # Now, try infer with higher confidence, and make sure that name isn't inferred anymore.
    confidence = 0.9
    infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit,
                                numsamples, order_by)

    for row in range(5):
        # TODO: what do missing values look like? these should be missing
        inferred_name = infer_result['data'][row+5][1]
        inferred_qual_score = infer_result['data'][row][2]
        assert numpy.isnan(inferred_name)
        assert numpy.isnan(inferred_qual_score)
예제 #5
0
#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#

from twisted.web import server, resource  # iweb
# from twisted.web.resource import EncodingResourceWrapper

# from twisted.internet import ssl
# import traceback

from twisted.internet import reactor

from jsonrpc.server import ServerEvents, JSON_RPC

from bayesdb.engine import Engine
engine = Engine()

from bayesdb.client import Client
client = Client()


class ExampleServer(ServerEvents):
    # inherited hooks
    def log(self, responses, txrequest, error):
        print(txrequest.code, end=' ')
        if isinstance(responses, list):
            for response in responses:
                msg = self._get_msg(response)
                print(txrequest, msg)
        else:
            msg = self._get_msg(responses)
예제 #6
0
def setup_function(function):
    global test_tablenames
    test_tablenames = []
    global engine
    engine = Engine()
예제 #7
0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#

import time
import inspect
import psycopg2
import pickle
import os

from bayesdb.engine import Engine
engine = Engine('local')


def run_test(hostname='localhost', middleware_port=8008, online=False):
    URI = 'http://' + hostname + ':%d' % middleware_port
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    test_tablenames = ['dha_small', 'anneal_small']

    for tablename in test_tablenames:
        table_csv = open('%s/../../www/data/%s.csv' % (cur_dir, tablename),
                         'r').read()
        run_test_with(tablename, table_csv, URI, online)


def run_test_with(tablename,
                  table_csv,