Example #1
0
def test2():
    metadata = MetaData()
    eng = predictiondb.get_engine('prediction')
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    s = select([tbl])
    rp = con.execute(s)
    results = rp.fetchall()
    print(results)
Example #2
0
def test2(dbname):
    metadata = MetaData()
    eng = predictiondb.get_engine(dbname)
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    s = select([tbl])
    rp = con.execute(s)
    results = rp.fetchall()
    print(results)
Example #3
0
def test_make_file_of_unique_runs_and_subs(dbname):
    metadata = MetaData()
    eng = predictiondb.get_engine(dbname)
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    s = select([tbl.c.run, tbl.c.subrun])
    rp = con.execute(s)
    results = rp.fetchall()
    runs_subs = set()
    _ = [runs_subs.add(tuple(i)) for i in results]
    runs_subs = list(runs_subs)
    sorted_runs_subs = sorted(runs_subs, key=lambda x: x[1])
    f = open('runs_subs.txt', 'w')
    for i in sorted_runs_subs:
        f.write(str(i) + '\n')
    f.close()
Example #4
0
def test_make_file_of_unique_runs_and_subs(dbname):
    metadata = MetaData()
    eng = predictiondb.get_engine(dbname)
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    s = select([tbl.c.run, tbl.c.subrun])
    rp = con.execute(s)
    results = rp.fetchall()
    runs_subs = set()
    _ = [runs_subs.add(tuple(i)) for i in results]
    runs_subs = list(runs_subs)
    sorted_runs_subs = sorted(runs_subs, key=lambda x: x[1])
    f = open('runs_subs.txt', 'w')
    for i in sorted_runs_subs:
        f.write(str(i) + '\n')
    f.close()
Example #5
0
def test1():
    metadata = MetaData()
    eng = predictiondb.get_engine('prediction')
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    ins = tbl.insert().values(
        run=1,
        subrun=1,
        gate=1,
        phys_evt=1,
        segment=0,
        prob00=0.95,
        prob01=0.05,
        prob02=0.00,
        prob03=0.00,
        prob04=0.00,
        prob05=0.00,
        prob06=0.00,
        prob07=0.00,
        prob08=0.00,
        prob09=0.00,
        prob10=0.00)
    result = con.execute(ins)
    return result
Example #6
0
def test1(dbname):
    metadata = MetaData()
    eng = predictiondb.get_engine(dbname)
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(metadata, eng)
    ins = tbl.insert().values(
        run=1,
        subrun=1,
        gate=1,
        phys_evt=1,
        segment=0,
        prob00=0.95,
        prob01=0.05,
        prob02=0.00,
        prob03=0.00,
        prob04=0.00,
        prob05=0.00,
        prob06=0.00,
        prob07=0.00,
        prob08=0.00,
        prob09=0.00,
        prob10=0.00)
    result = con.execute(ins)
    return result
Example #7
0
def categorical_predict(build_cnn_fn, hyperpars, imgdat, runopts, networkstr,
                        get_eventids_hits_and_targets_fn,
                        get_id_tagged_inputlist_fn):
    """
    Make predictions based on the model _only_ (e.g., this routine should
    be used to produce prediction db's quickly or for data)

    `get_eventids_hits_and_targets_fn` needs to extract from a data slice
    a tuple of (eventids, [inputs], targets), where `[inputs]` might hold
    a single view or all three, etc.
    """
    logger.info("Loading data for prediction...")
    train_sizes, valid_sizes, test_sizes = \
        get_and_print_dataset_subsizes(runopts['data_file_list'])
    used_sizes, used_data_size = get_used_data_sizes_for_testing(
        train_sizes, valid_sizes, test_sizes, runopts['test_all_data'])

    metadata = None
    try:
        import predictiondb
        from sqlalchemy import MetaData
    except ImportError:
        logger.info("Cannot import sqlalchemy...")
        write_db = False
    if runopts['write_db']:
        db_tbl_fun = None
        if networkstr['noutputs'] == 67:
            db_tbl_fun = predictiondb.get_67segment_prediction_table
        elif networkstr['noutputs'] == 11:
            db_tbl_fun = predictiondb.get_11segment_prediction_table
        else:
            raise Exception('Invalid number of outputs for DB tables.')
        tstamp = get_tstamp_from_model_name(runopts['save_model_file'])
        metadata = MetaData()
        dbname = 'prediction' + tstamp
        eng = predictiondb.get_engine(dbname)
        con = predictiondb.get_connection(eng)
        tbl = predictiondb.get_active_table(metadata,
                                            eng,
                                            get_table_fn=db_tbl_fun)

    # Prepare Theano variables for inputs
    inputlist = networkstr['input_list']

    # Build the model
    network = build_cnn_fn(inputlist=inputlist,
                           imgw=imgdat['imgw'],
                           imgh=imgdat['imgh'],
                           convpooldictlist=networkstr['topology'],
                           nhidden=networkstr['nhidden'],
                           dropoutp=networkstr['dropoutp'],
                           noutputs=networkstr['noutputs'],
                           depth=networkstr['img_depth'])
    with np.load(runopts['save_model_file']) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Compute the prediction
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_prediction_values = T.argmax(test_prediction, axis=1)
    pred_fn = theano.function(inputlist,
                              [test_prediction, test_prediction_values],
                              allow_input_downcast=True)

    logger.info("Starting prediction...")

    test_slices = []
    for tsize in used_sizes:
        test_slices.append(slices_maker(tsize, slice_size=50000))
    test_set = None

    evtcounter = 0
    verbose_evt_print_freq = hyperpars['batchsize'] * 4
    for i, data_file in enumerate(runopts['data_file_list']):

        for tslice in test_slices[i]:
            t0 = time.time()
            test_set = None
            if runopts['test_all_data']:
                test_set = load_all_datasubsets(data_file, tslice)
            else:
                test_set = load_datasubset(data_file, 'test', tslice)
            _, test_dstream = make_scheme_and_stream(test_set,
                                                     hyperpars['batchsize'],
                                                     shuffle=False)
            t1 = time.time()
            logger.info("  Loading slice {} from {} took {:.3f}s.".format(
                tslice, data_file, t1 - t0))
            logger.debug("   dset sources: {}".format(
                test_set.provides_sources))

            t0 = time.time()
            for data in test_dstream.get_epoch_iterator():
                eventids, hits_list = get_id_tagged_inputlist_fn(data)
                probs, pred = pred_fn(*hits_list)
                evtcounter += hyperpars['batchsize']
                if runopts['write_db']:
                    for i, evtid in enumerate(eventids):
                        filldb(tbl, con, evtid, pred[i], probs[i])
                if runopts['be_verbose']:
                    if evtcounter % verbose_evt_print_freq == 0:
                        logger.info("processed {}/{}".format(
                            evtcounter, used_data_size))
            t1 = time.time()
            logger.info("  -Iterating over the slice took {:.3f}s.".format(t1 -
                                                                           t0))

            del test_set
            del test_dstream

    logger.info("Finished producing predictions!")
def categorical_predict(
        build_cnn_fn, hyperpars, imgdat, runopts, networkstr,
        get_eventids_hits_and_targets_fn, get_id_tagged_inputlist_fn
):
    """
    Make predictions based on the model _only_ (e.g., this routine should
    be used to produce prediction db's quickly or for data)

    `get_eventids_hits_and_targets_fn` needs to extract from a data slice
    a tuple of (eventids, [inputs], targets), where `[inputs]` might hold
    a single view or all three, etc.
    """
    logger.info("Loading data for prediction...")
    train_sizes, valid_sizes, test_sizes = \
        get_and_print_dataset_subsizes(runopts['data_file_list'])
    used_sizes, used_data_size = get_used_data_sizes_for_testing(
        train_sizes, valid_sizes, test_sizes, runopts['test_all_data']
    )

    metadata = None
    try:
        import predictiondb
        from sqlalchemy import MetaData
    except ImportError:
        logger.info("Cannot import sqlalchemy...")
        write_db = False
    if runopts['write_db']:
        tstamp = get_tstamp_from_model_name(runopts['save_model_file'])
        metadata = MetaData()
        dbname = 'prediction' + tstamp
        eng = predictiondb.get_engine(dbname)
        con = predictiondb.get_connection(eng)
        tbl = predictiondb.get_active_table(metadata, eng)

    # Prepare Theano variables for inputs
    inputlist = networkstr['input_list']

    # Build the model
    network = build_cnn_fn(inputlist=inputlist,
                           imgw=imgdat['imgw'], imgh=imgdat['imgh'],
                           convpooldictlist=networkstr['topology'],
                           nhidden=networkstr['nhidden'],
                           dropoutp=networkstr['dropoutp'],
                           noutputs=networkstr['noutputs'],
                           depth=networkstr['img_depth']
    )
    with np.load(runopts['save_model_file']) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Compute the prediction
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_prediction_values = T.argmax(test_prediction, axis=1)
    pred_fn = theano.function(inputlist,
                              [test_prediction, test_prediction_values],
                              allow_input_downcast=True)

    logger.info("Starting prediction...")

    test_slices = []
    for tsize in used_sizes:
        test_slices.append(slices_maker(tsize, slice_size=50000))
    test_set = None

    evtcounter = 0
    verbose_evt_print_freq = hyperpars['batchsize'] * 4
    for i, data_file in enumerate(runopts['data_file_list']):

        for tslice in test_slices[i]:
            t0 = time.time()
            test_set = None
            if runopts['test_all_data']:
                test_set = load_all_datasubsets(data_file, tslice)
            else:
                test_set = load_datasubset(data_file, 'test', tslice)
            _, test_dstream = make_scheme_and_stream(test_set,
                                                     hyperpars['batchsize'],
                                                     shuffle=False)
            t1 = time.time()
            logger.info("  Loading slice {} from {} took {:.3f}s.".format(
                tslice, data_file, t1 - t0)
            )
            logger.debug(
                "   dset sources: {}".format(test_set.provides_sources)
            )

            t0 = time.time()
            for data in test_dstream.get_epoch_iterator():
                eventids, hits_list = get_id_tagged_inputlist_fn(data)
                probs, pred = pred_fn(*hits_list)
                evtcounter += hyperpars['batchsize']
                if runopts['write_db']:
                    for i, evtid in enumerate(eventids):
                        filldb(tbl, con, evtid, pred[i], probs[i])
                if runopts['be_verbose']:
                    if evtcounter % verbose_evt_print_freq == 0:
                        logger.info("processed {}/{}". format(evtcounter,
                                                        used_data_size))
            t1 = time.time()
            logger.info("  -Iterating over the slice took {:.3f}s.".format(t1 - t0))

            del test_set
            del test_dstream

    logger.info("Finished producing predictions!")
def categorical_predict(build_cnn=None, data_file_list=None,
                        views='xuv', imgw=50, imgh=50, target_idx=5,
                        save_model_file='./params_file.npz',
                        be_verbose=False, convpooldictlist=None,
                        nhidden=None, dropoutp=None, write_db=True,
                        test_all_data=False, debug_print=False,
                        noutputs=11):
    """
    Make predictions based on the model _only_ (e.g., this routine should
    be used to produce prediction db's quickly or for data)

    noutputs=11 for zsegments, other vals for planecodes, etc.
    """
    print("Loading data for testing...")
    train_sizes, valid_sizes, test_sizes = \
        get_and_print_dataset_subsizes(data_file_list)
    used_sizes, used_data_size = get_used_data_sizes_for_testing(train_sizes,
                                                                 valid_sizes,
                                                                 test_sizes,
                                                                 test_all_data)

    metadata = None
    try:
        import predictiondb
        from sqlalchemy import MetaData
    except ImportError:
        print("Cannot import sqlalchemy...")
        write_db = False
    if write_db:
        tstamp = get_tstamp_from_model_name(save_model_file)
        metadata = MetaData()
        dbname = 'prediction' + tstamp
        eng = predictiondb.get_engine(dbname)
        con = predictiondb.get_connection(eng)
        tbl = predictiondb.get_active_table(metadata, eng)

    # Prepare Theano variables for inputs
    input_var_x = T.tensor4('inputs')
    input_var_u = T.tensor4('inputs')
    input_var_v = T.tensor4('inputs')
    inputlist = build_inputlist(input_var_x, input_var_u, input_var_v, views)

    # Build the model
    network = build_cnn(inputlist=inputlist, imgw=imgw, imgh=imgh,
                        convpooldictlist=convpooldictlist, nhidden=nhidden,
                        dropoutp=dropoutp, noutputs=noutputs)
    with np.load(save_model_file) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Compute the prediction
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_prediction_values = T.argmax(test_prediction, axis=1)
    pred_fn = theano.function(inputlist,
                              [test_prediction, test_prediction_values],
                              allow_input_downcast=True)

    print("Starting prediction...")

    test_slices = []
    for tsize in used_sizes:
        test_slices.append(slices_maker(tsize, slice_size=50000))
    test_set = None

    evtcounter = 0
    batch_size = 500
    evt_print_freq = batch_size * 4
    for i, data_file in enumerate(data_file_list):

        for tslice in test_slices[i]:
            t0 = time.time()
            test_set = None
            if test_all_data:
                test_set = load_all_datasubsets(data_file, tslice)
            else:
                test_set = load_datasubset(data_file, 'test', tslice)
            _, test_dstream = make_scheme_and_stream(test_set,
                                                     batch_size,
                                                     shuffle=False)
            t1 = time.time()
            print("  Loading slice {} from {} took {:.3f}s.".format(
                tslice, data_file, t1 - t0))
            if debug_print:
                print("   dset sources:", test_set.provides_sources)

            t0 = time.time()
            for data in test_dstream.get_epoch_iterator():
                eventids, hits_list = \
                    get_id_tagged_inputlist_from_data(data, views)
                probs, pred = pred_fn(*hits_list)
                evtcounter += batch_size
                if write_db:
                    for i, evtid in enumerate(eventids):
                        filldb(tbl, con, evtid, pred[i], probs[i])
                if be_verbose:
                    if evtcounter % evt_print_freq == 0:
                        print("processed {}/{}". format(evtcounter,
                                                        used_data_size))
            t1 = time.time()
            print("  -Iterating over the slice took {:.3f}s.".format(t1 - t0))

            del test_set
            del test_dstream

    print("Finished producing predictions!")
Example #10
0
def test3():
    metadata = MetaData()
    eng = predictiondb.get_engine('prediction')
    con = predictiondb.get_connection(eng)
    tbl = predictiondb.get_active_table(
        metadata,
        eng,
        predictiondb.get_67segment_prediction_table
    )
    ins = tbl.insert().values(
        run=1,
        subrun=1,
        gate=1,
        phys_evt=1,
        segment=0,
        prob00=0.80,
        prob01=0.05,
        prob02=0.00,
        prob03=0.00,
        prob04=0.00,
        prob05=0.01,
        prob06=0.00,
        prob07=0.00,
        prob08=0.00,
        prob09=0.00,
        prob10=0.01,
        prob11=0.01,
        prob12=0.00,
        prob13=0.00,
        prob14=0.00,
        prob15=0.01,
        prob16=0.01,
        prob17=0.00,
        prob18=0.00,
        prob19=0.00,
        prob20=0.01,
        prob21=0.00,
        prob22=0.00,
        prob23=0.00,
        prob24=0.00,
        prob25=0.01,
        prob26=0.00,
        prob27=0.00,
        prob28=0.00,
        prob29=0.00,
        prob30=0.01,
        prob31=0.00,
        prob32=0.00,
        prob33=0.00,
        prob34=0.00,
        prob35=0.01,
        prob36=0.00,
        prob37=0.00,
        prob38=0.00,
        prob39=0.00,
        prob40=0.01,
        prob41=0.00,
        prob42=0.00,
        prob43=0.00,
        prob44=0.00,
        prob45=0.01,
        prob46=0.00,
        prob47=0.00,
        prob48=0.00,
        prob49=0.00,
        prob50=0.01,
        prob51=0.00,
        prob52=0.00,
        prob53=0.00,
        prob54=0.00,
        prob55=0.01,
        prob56=0.00,
        prob57=0.00,
        prob58=0.00,
        prob59=0.00,
        prob60=0.01,
        prob61=0.00,
        prob62=0.00,
        prob63=0.00,
        prob64=0.00,
        prob65=0.01,
        prob66=0.00
    )
    result = con.execute(ins)
    return result