Esempio n. 1
0
def test_flights():
    client = Client()

    cmds = []
    cmds.append('drop ptable jayt;')
    cmds.append('create ptable jayt from /home/sgeadmin/tabular_predDB/Examples/flight_data_subset.csv;')
    cmds.append('create 2 models for jayt;')
    cmds.append('analyze jayt for 1 iterations;')
    cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20;')
    cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0;')
    cmds.append('select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime;')
    cmds.append('select dayofweek, actualelapsedtime, similarity to 0 with respect to actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime, dayofweek;')
    cmds.append('select dayofweek, actualelapsedtime, similarity to 0 from jayt where distance > 800 limit 5;')
    cmds.append('select dayofweek, actualelapsedtime, arrtime, similarity to 0 with respect to arrtime from jayt where distance > 800 limit 5 order by similarity to 0 with respect to arrtime, dayofweek;')

    cmds.append('select probability(actualelapsedtime=200) from jayt where distance > 800 limit 20;')
    # cmds.append('select * from jayt limit 5;')
    #cmds.append('infer actualelapsedtime from jayt with confidence 0.8 limit 20;')

    cmds.append('simulate dayofweek, deptime, crsdeptime FROM jayt where dayofweek = 7 TIMES 3;')
    cmds.append('estimate dependence probabilities from jayt;')
    cmds.append('estimate dependence probabilities from jayt referencing actualelapsedtime limit 6 save to fz;')
    cmds.append('estimate dependence probabilities from jayt referencing actualelapsedtime with confidence 0.5;')
    #cmds.append('drop ptable jayt;')
    #cmds.append('estimate dependence probabilities from dan_kiva referencing activity limit 10 save to activity_z;')

    #cmds.append('select * from dha_small;')
    #cmds.append('select probability(mdcr_spnd_outp=1), probability(mdcr_spnd_outp=2), probability(mdcr_spnd_outp=3) from dha_small;')

    for cmd in cmds:
        print '>>> %s' % cmd
        result = client.execute(cmd, timing=True)
        print result
Esempio n. 2
0
def test_flights():
    client = Client()

    cmds = []
    cmds.append('drop ptable jayt;')
    cmds.append(
        'create ptable jayt from /home/sgeadmin/tabular_predDB/Examples/flight_data_subset.csv;'
    )
    cmds.append('create 2 models for jayt;')
    cmds.append('analyze jayt for 1 iterations;')
    cmds.append(
        'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20;'
    )
    cmds.append(
        'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0;'
    )
    cmds.append(
        'select dayofweek, deptime, crsdeptime, actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime;'
    )
    cmds.append(
        'select dayofweek, actualelapsedtime, similarity to 0 with respect to actualelapsedtime from jayt where distance > 800 limit 20 order by similarity to 0 with respect to actualelapsedtime, dayofweek;'
    )
    cmds.append(
        'select dayofweek, actualelapsedtime, similarity to 0 from jayt where distance > 800 limit 5;'
    )
    cmds.append(
        'select dayofweek, actualelapsedtime, arrtime, similarity to 0 with respect to arrtime from jayt where distance > 800 limit 5 order by similarity to 0 with respect to arrtime, dayofweek;'
    )

    cmds.append(
        'select probability(actualelapsedtime=200) from jayt where distance > 800 limit 20;'
    )
    # cmds.append('select * from jayt limit 5;')
    #cmds.append('infer actualelapsedtime from jayt with confidence 0.8 limit 20;')

    cmds.append(
        'simulate dayofweek, deptime, crsdeptime FROM jayt where dayofweek = 7 TIMES 3;'
    )
    cmds.append('estimate dependence probabilities from jayt;')
    cmds.append(
        'estimate dependence probabilities from jayt referencing actualelapsedtime limit 6 save to fz;'
    )
    cmds.append(
        'estimate dependence probabilities from jayt referencing actualelapsedtime with confidence 0.5;'
    )
    #cmds.append('drop ptable jayt;')
    #cmds.append('estimate dependence probabilities from dan_kiva referencing activity limit 10 save to activity_z;')

    #cmds.append('select * from dha_small;')
    #cmds.append('select probability(mdcr_spnd_outp=1), probability(mdcr_spnd_outp=2), probability(mdcr_spnd_outp=3) from dha_small;')

    for cmd in cmds:
        print '>>> %s' % cmd
        result = client.execute(cmd, timing=True)
        print result
Esempio n. 3
0
def test_dha_story_demo():
    client = Client()

    tests_dir = os.path.split(os.path.realpath(__file__))[0]
    dha_csv_path = os.path.join(tests_dir, 'data/dha.csv')
    dha_samples_path = os.path.join(tests_dir, 'samples/dha_samples.pkl.gz')
    test_results_path = os.path.join(
        tests_dir, 'regression_test_output/dha_story_results_record.pkl')

    cmd_list = [
        'DROP BTABLE dha_demo;',
        'CREATE BTABLE dha_demo FROM %s;' % dha_csv_path,
        'IMPORT SAMPLES %s INTO dha_demo;' % dha_samples_path,
        'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo LIMIT 10;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score LIMIT 6;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score WITH CONFIDENCE 0.9;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING pymt_p_md_visit LIMIT 6;',
        #    'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\') LIMIT 10;',
        'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', qual_score), ami_score  LIMIT 10;',
        'SELECT name, qual_score, ami_score,  pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', pymt_p_visit_ratio), ttl_mdcr_spnd  LIMIT 10;',
        #    'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ami_score=95.0  TIMES 10;',
        #    'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ttl_mdcr_spnd=50000 TIMES 10;',
    ]

    dha_story_results = []
    if len(sys.argv) > 1 and sys.argv[1] == 'record':
        print 'Recording new dha_story_results to %s' % test_results_path
        record = True
    else:
        ## Testing
        dha_story_results = pickle.load(open(test_results_path, 'r'))
        record = False

    for i, cmd in enumerate(cmd_list):
        print cmd
        result = client.execute(cmd, timing=False, pretty=True)
        if record:
            dha_story_results.append(result)
        else:
            if type(result) == dict:
                for k, v in result.iteritems():
                    if isinstance(v, numpy.ndarray):
                        assert (v == dha_story_results[i][k]).all(), (
                            v, dha_story_results[i][k])
                    else:
                        assert v == dha_story_results[i][k], (
                            v, dha_story_results[i][k])
            else:
                #assert result == dha_story_results[i], (result, dha_story_results[i])
                pass

    if record:
        pickle.dump(dha_story_results, open(filename, 'w'))
Esempio n. 4
0
def test_dha_story_demo():
    client = Client()

    tests_dir = os.path.split(os.path.realpath(__file__))[0]
    dha_csv_path = os.path.join(tests_dir, 'data/dha.csv')
    dha_samples_path = os.path.join(tests_dir, 'samples/dha_samples.pkl.gz')
    test_results_path = os.path.join(tests_dir, 'regression_test_output/dha_story_results_record.pkl')

    cmd_list = [
        'DROP BTABLE dha_demo;',
        'CREATE BTABLE dha_demo FROM %s;' % dha_csv_path,
        'IMPORT SAMPLES %s INTO dha_demo;' % dha_samples_path,
        'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo LIMIT 10;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score LIMIT 6;',
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING qual_score WITH CONFIDENCE 0.9;', 
        'ESTIMATE DEPENDENCE PROBABILITIES FROM dha_demo REFERENCING pymt_p_md_visit LIMIT 6;',
    #    'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\') LIMIT 10;',
        'SELECT name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', qual_score), ami_score  LIMIT 10;',
        'SELECT name, qual_score, ami_score,  pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo ORDER BY similarity_to(name=\'Albany NY\', pymt_p_visit_ratio), ttl_mdcr_spnd  LIMIT 10;',
    #    'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ami_score=95.0  TIMES 10;',
    #    'SIMULATE name, qual_score, ami_score, pymt_p_visit_ratio, ttl_mdcr_spnd, hosp_reimb_ratio, hosp_reimb_p_dcd, md_copay_p_dcd, ttl_copay_p_dcd FROM dha_demo WHERE ttl_mdcr_spnd=50000 TIMES 10;',
    ]


    dha_story_results = []
    if len(sys.argv) > 1 and sys.argv[1] == 'record':
        print 'Recording new dha_story_results to %s' % test_results_path
        record = True
    else:
        ## Testing
        dha_story_results = pickle.load(open(test_results_path, 'r'))
        record = False

    for i, cmd in enumerate(cmd_list):
        print cmd
        result = client.execute(cmd, timing=False, pretty=True)
        if record:
            dha_story_results.append(result)
        else:
            if type(result) == dict:
                for k,v in result.iteritems():
                    if isinstance(v, numpy.ndarray):
                        assert (v == dha_story_results[i][k]).all(), (v, dha_story_results[i][k])
                    else:
                        assert v == dha_story_results[i][k], (v, dha_story_results[i][k])
            else:
                #assert result == dha_story_results[i], (result, dha_story_results[i])
                pass


    if record:
        pickle.dump(dha_story_results, open(filename, 'w'))