def test_flights(): client = Client() cmds = [] cmds.append('drop ptable jayt;') cmds.append('create ptable jayt from /home/sgeadmin/tabular_predDB/Examples/flight_data_subset.csv;') cmds.append('create 2 models for jayt;') cmds.append('analyze jayt for 1 iterations;') cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20;') cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0;') cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime;') cmds.append('select dayofweek, actualelapsedtime, similarity to 0 with respect to actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime, dayofweek;') cmds.append('select dayofweek, actualelapsedtime, similarity to 0 from jayt where distance > 800 limit 5;') cmds.append('select dayofweek, actualelapsedtime, arrtime, similarity to 0 with respect to arrtime from jayt where distance > 800 limit 5 order by similarity to 0 with respect to arrtime, dayofweek;') cmds.append('select probability(actualelapsedtime=200) from jayt where distance > 800 limit 20;') # cmds.append('select * from jayt limit 5;') #cmds.append('infer actualelapsedtime from jayt with confidence 0.8 limit 20;') cmds.append('simulate dayofweek, deptime, crsdeptime FROM jayt where dayofweek = 7 TIMES 3;') cmds.append('estimate dependence probabilities from jayt;') cmds.append('estimate dependence probabilities from jayt referencing actualelapsedtime limit 6 save to fz;') cmds.append('estimate dependence probabilities from jayt referencing actualelapsedtime with confidence 0.5;') #cmds.append('drop ptable jayt;') #cmds.append('estimate dependence probabilities from dan_kiva referencing activity limit 10 save to activity_z;') #cmds.append('select * from dha_small;') #cmds.append('select probability(mdcr_spnd_outp=1), probability(mdcr_spnd_outp=2), probability(mdcr_spnd_outp=3) from dha_small;') for cmd in cmds: print '>>> %s' % cmd result = client.execute(cmd, timing=True) print result
def test_flights(): client = Client() cmds = [] cmds.append('drop ptable jayt;') cmds.append( 'create ptable jayt from /home/sgeadmin/tabular_predDB/Examples/flight_data_subset.csv;' ) cmds.append('create 2 models for jayt;') cmds.append('analyze jayt for 1 iterations;') cmds.append( 'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20;' ) cmds.append( 'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0;' ) cmds.append( 'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime;' ) cmds.append( 'select dayofweek, actualelapsedtime, similarity to 0 with respect to actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime, dayofweek;' ) cmds.append( 'select dayofweek, actualelapsedtime, similarity to 0 from jayt where distance > 800 limit 5;' ) cmds.append( 'select dayofweek, actualelapsedtime, arrtime, similarity to 0 with respect to arrtime from jayt where distance > 800 limit 5 order by similarity to 0 with respect to arrtime, dayofweek;' ) cmds.append( 'select probability(actualelapsedtime=200) from jayt where distance > 800 limit 20;' ) # cmds.append('select * from jayt limit 5;') #cmds.append('infer actualelapsedtime from jayt with confidence 0.8 limit 20;') cmds.append( 'simulate dayofweek, deptime, crsdeptime FROM jayt where dayofweek = 7 TIMES 3;' ) cmds.append('estimate dependence probabilities from jayt;') cmds.append( 'estimate dependence probabilities from jayt referencing actualelapsedtime limit 6 save to fz;' ) cmds.append( 'estimate dependence probabilities from jayt referencing actualelapsedtime with confidence 0.5;' ) #cmds.append('drop ptable jayt;') #cmds.append('estimate dependence probabilities from dan_kiva referencing activity limit 10 save to activity_z;') #cmds.append('select * from dha_small;') #cmds.append('select probability(mdcr_spnd_outp=1), probability(mdcr_spnd_outp=2), probability(mdcr_spnd_outp=3) from dha_small;') for cmd in cmds: print '>>> %s' % cmd result = client.execute(cmd, timing=True) print result
def test_dha_story_demo(): client = Client() tests_dir = os.path.split(os.path.realpath(__file__))[0] dha_csv_path = os.path.join(tests_dir, 'data/dha.csv') dha_samples_path = os.path.join(tests_dir, 'samples/dha_samples.pkl.gz') test_results_path = os.path.join( tests_dir, 'regression_test_output/dha_story_results_record.pkl') cmd_list = [ 'DROP BTABLE dha_demo;', 'CREATE BTABLE dha_demo FROM %s;' % dha_csv_path, 'IMPORT SAMPLES %s INTO dha_demo;' % dha_samples_path, 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo LIMIT 10;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score LIMIT 6;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score WITH CONFIDENCE 0.9;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING pymt_p_md_visit LIMIT 6;', # 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\') LIMIT 10;', 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', qual_score), ami_score LIMIT 10;', 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', pymt_p_visit_ratio), ttl_mdcr_spnd LIMIT 10;', # 'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ami_score=95.0 TIMES 10;', # 'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ttl_mdcr_spnd=50000 TIMES 10;', ] dha_story_results = [] if len(sys.argv) > 1 and sys.argv[1] == 'record': print 'Recording new dha_story_results to %s' % test_results_path record = True else: ## Testing dha_story_results = pickle.load(open(test_results_path, 'r')) record = False for i, cmd in enumerate(cmd_list): print cmd result = client.execute(cmd, timing=False, pretty=True) if record: dha_story_results.append(result) else: if type(result) == dict: for k, v in result.iteritems(): if isinstance(v, numpy.ndarray): assert (v == dha_story_results[i][k]).all(), ( v, dha_story_results[i][k]) else: assert v == dha_story_results[i][k], ( v, dha_story_results[i][k]) else: #assert result == dha_story_results[i], (result, dha_story_results[i]) pass if record: pickle.dump(dha_story_results, open(filename, 'w'))
def test_dha_story_demo(): client = Client() tests_dir = os.path.split(os.path.realpath(__file__))[0] dha_csv_path = os.path.join(tests_dir, 'data/dha.csv') dha_samples_path = os.path.join(tests_dir, 'samples/dha_samples.pkl.gz') test_results_path = os.path.join(tests_dir, 'regression_test_output/dha_story_results_record.pkl') cmd_list = [ 'DROP BTABLE dha_demo;', 'CREATE BTABLE dha_demo FROM %s;' % dha_csv_path, 'IMPORT SAMPLES %s INTO dha_demo;' % dha_samples_path, 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo LIMIT 10;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score LIMIT 6;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score WITH CONFIDENCE 0.9;', 'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING pymt_p_md_visit LIMIT 6;', # 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\') LIMIT 10;', 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', qual_score), ami_score LIMIT 10;', 'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', pymt_p_visit_ratio), ttl_mdcr_spnd LIMIT 10;', # 'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ami_score=95.0 TIMES 10;', # 'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ttl_mdcr_spnd=50000 TIMES 10;', ] dha_story_results = [] if len(sys.argv) > 1 and sys.argv[1] == 'record': print 'Recording new dha_story_results to %s' % test_results_path record = True else: ## Testing dha_story_results = pickle.load(open(test_results_path, 'r')) record = False for i, cmd in enumerate(cmd_list): print cmd result = client.execute(cmd, timing=False, pretty=True) if record: dha_story_results.append(result) else: if type(result) == dict: for k,v in result.iteritems(): if isinstance(v, numpy.ndarray): assert (v == dha_story_results[i][k]).all(), (v, dha_story_results[i][k]) else: assert v == dha_story_results[i][k], (v, dha_story_results[i][k]) else: #assert result == dha_story_results[i], (result, dha_story_results[i]) pass if record: pickle.dump(dha_story_results, open(filename, 'w'))