def test_initialize_models(): test_tablename, _ = create_dha(path='data/dha_missing.csv') engine = Engine(seed=0) num_models = 5 engine.initialize_models(test_tablename, num_models) model_ids = engine.persistence_layer.get_model_ids(test_tablename) assert sorted(model_ids) == range(num_models) for i in range(num_models): model = engine.persistence_layer.get_models(test_tablename, i) assert model['iterations'] == 0
def test_infer(): # TODO: whereclauses test_tablename, _ = create_dha(path='data/dha_missing.csv') # dha_missing has missing qual_score in first 5 rows, and missing name in rows 6-10. engine = Engine(seed=0) engine.initialize_models(test_tablename, 20) functions = bql.bql_statement.parseString('infer name, qual_score from test', parseAll=True).functions whereclause = None limit = float('inf') order_by = False numsamples = 30 confidence = 0 infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit, numsamples, order_by) assert 'column_labels' in infer_result assert 'data' in infer_result assert infer_result['column_labels'] == ['key', 'name', 'qual_score'] # 307 is the total number of rows in the dataset. assert(len(infer_result['data']) == 307 and len(infer_result['data'][0]) == len(infer_result['column_labels'])) assert type(infer_result['data'][0][0]) == numpy.string_ # type of key is int t = type(infer_result['data'][0][1]) assert (t == unicode) or (t == numpy.string_) # type of name is string assert type(infer_result['data'][0][2]) == float # type of qual_score is float all_possible_names = [infer_result['data'][row][1] for row in range(5) + range(10, 307)] all_observed_qual_scores = [infer_result['data'][row][2] for row in range(5, 307)] for row in range(5): inferred_name = infer_result['data'][row+5][1] inferred_qual_score = infer_result['data'][row][2] assert inferred_name in all_possible_names assert type(inferred_qual_score) == type(1.2) assert inferred_qual_score > min(all_observed_qual_scores) assert inferred_qual_score < max(all_observed_qual_scores) # Now, try infer with higher confidence, and make sure that name isn't inferred anymore. confidence = 0.9 infer_result = engine.infer(test_tablename, functions, confidence, whereclause, limit, numsamples, order_by) for row in range(5): # TODO: what do missing values look like? these should be missing inferred_name = infer_result['data'][row+5][1] inferred_qual_score = infer_result['data'][row][2] assert numpy.isnan(inferred_name) assert numpy.isnan(inferred_qual_score)
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # from twisted.web import server, resource # iweb # from twisted.web.resource import EncodingResourceWrapper # from twisted.internet import ssl # import traceback from twisted.internet import reactor from jsonrpc.server import ServerEvents, JSON_RPC from bayesdb.engine import Engine engine = Engine() from bayesdb.client import Client client = Client() class ExampleServer(ServerEvents): # inherited hooks def log(self, responses, txrequest, error): print(txrequest.code, end=' ') if isinstance(responses, list): for response in responses: msg = self._get_msg(response) print(txrequest, msg) else: msg = self._get_msg(responses)
def setup_function(function): global test_tablenames test_tablenames = [] global engine engine = Engine()
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import time import inspect import psycopg2 import pickle import os from bayesdb.engine import Engine engine = Engine('local') def run_test(hostname='localhost', middleware_port=8008, online=False): URI = 'http://' + hostname + ':%d' % middleware_port cur_dir = os.path.dirname(os.path.abspath(__file__)) test_tablenames = ['dha_small', 'anneal_small'] for tablename in test_tablenames: table_csv = open('%s/../../www/data/%s.csv' % (cur_dir, tablename), 'r').read() run_test_with(tablename, table_csv, URI, online) def run_test_with(tablename, table_csv,