Beispiel #1
0
def main():
    session, cluster = connect_db()
    
    file_name = 'event_data_file_new'
    file_path = os.getcwd() + '/'+file_name+'.csv'
    
    data = DataPrep(
        filepath_in=os.getcwd() + '/event_data', 
        filepath_out=file_name
    )

    data.write_csv()
    execute_query_1(session, file_path, sessionId=338, itemInSession=4, verbose=True)
    execute_query_2(session, file_path, userId=10, sessionId=182, verbose=True)
    execute_query_3(session, file_path, song='All Hands Against His Own', verbose=True)

    session.execute("DROP TABLE IF EXISTS song_features")
    session.execute("DROP TABLE IF EXISTS artist_song_by_user")
    session.execute("DROP TABLE IF EXISTS user_name")                                                

    session.shutdown()
    cluster.shutdown()
Beispiel #2
0
# data scraped on Sep 14th, ahead of week 7 of the season
# meta_str = "../data/metadata/meta_stats_week7_sep14.csv"
# top_stats_str = "../data/top_stats/top_stats_week7_sep14.csv"
# season_str = "../data/season_stats/season_stats_week7_sep14.csv"

# data scraped Sep 14th, filtered for only players who have played at least 1 minute
meta_str = "../data/metadata/meta_stats_have_played_week7.csv"
top_stats_str = "../data/top_stats/top_stats_have_played_week7.csv"
season_str = "../data/season_stats/season_stats_have_played_week7.csv"

#model_locale = "../models/nn_3layers_sep4.pt"
#model_locale = "../models/nn_3layers_week7_sep14.pt"
model_locale = "../models/nn_2layers_sep17.pt"

dataprepped = DataPrep(meta_str, top_stats_str, season_str)
players = create_player_dict(dataprepped, model_locale, 7)

salaries, pred_points, teams = create_lp_dicts(players)

_variables = {
    k: pulp.LpVariable.dicts(k, v, cat="Binary")
    for k, v in pred_points.items()
}
_variables_teams = {
    k: pulp.LpVariable.dicts(k, v, cat="Binary")
    for k, v in teams.items()
}

results = []
Beispiel #3
0
def main(unused_argv):
    model_function = model_fn
    """
    if FLAGS.multi_gpu:
        validate_batch_size_for_multi_gpu(FLAGS.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_fn, loss_reduction=tf.losses.Reduction.MEAN)

    data_format = FLAGS.data_format
    if data_format is None:
        data_format = ('channels_first'
                       if tf.test.is_built_with_cuda() else 'channels_last')
    """

    # run_config=tf.estimator.RunConfig(model_dir=os.path.join(os.environ['PIPELINE_OUTPUT_PATH'],
    #                                                            'pipeline_tfserving/0')),

    # define a DataPrep object
    dp = DataPrep(FLAGS.data_dir, FLAGS.xColName, FLAGS.yColName,
                  FLAGS.zColName, FLAGS.propColName, FLAGS.wellColName,
                  FLAGS.sill, FLAGS.hNugget, FLAGS.hRange, FLAGS.vNugget,
                  FLAGS.vRange, FLAGS.nNeighborWells)

    # define the predictor estimator
    petroDDN_predictor = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=FLAGS.model_dir,
        params={
            'nLayers': FLAGS.nLayers,
            'nUnits': FLAGS.nUnits,
            'initRate': FLAGS.initRate,
            'batch_size': FLAGS.batch_size
        },
    )

    #    config=run_config)

    # Train the model
    def train_input_fn():
        ds = dp.train()
        ds = ds.cache().batch(FLAGS.batch_size).repeat(FLAGS.train_epochs)
        ds = ds.shuffle(buffer_size=50000)

        # Return the next batch of data.
        features, labels = ds.make_one_shot_iterator().get_next()
        return features, labels

    # Set up training hook that logs the training MSE every 100 steps.
    tensors_to_log = {'train_MSE': 'train_MSE'}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=100)

    # Train the model
    petroDDN_predictor.train(input_fn=train_input_fn, hooks=[logging_hook])

    # Evaluate the model and print results
    def eval_input_fn():
        return dp.validate().batch(
            FLAGS.batch_size).make_one_shot_iterator().get_next()

    eval_results = petroDDN_predictor.evaluate(input_fn=eval_input_fn)
    print()
    print('Evaluation results:\n\t%s' % eval_results)

    # Export the model
    if FLAGS.export_dir is not None:
        X = tf.placeholder(tf.float32,
                           shape=(FLAGS.batch_size, FLAGS.nNeighborWells * 9))
        input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
            'inputs':
            X,
        })
        petroDDN_predictor.export_savedmodel(FLAGS.export_dir, input_fn)

    # Predict property values at locations in the pointcloud file
    # indices where there is missing data
    nanIdxs = dp.processPointCloudData(FLAGS.input_pointcloud_file)

    def predict_input_fn():
        return dp.predict().batch(
            FLAGS.batch_size).make_one_shot_iterator().get_next(), None

    predictions = petroDDN_predictor.predict(input_fn=predict_input_fn)
    values = np.array(
        list(
            map(lambda item: item["predictions"][0],
                list(itertools.islice(predictions, 0, None)))))
    values = values * (dp.b4rReg.propMax -
                       dp.b4rReg.propMin) + dp.b4rReg.propMin
    values[nanIdxs] = dp.propNDV
    #print('\n\nPrediction results:\n\t%s' % values)

    # write the predictions to the output pointcloud file
    op = pd.DataFrame(data=values, columns=[FLAGS.propColName])
    op.to_csv(FLAGS.output_pointcloud_file, index=None)
import pickle as pkl
import os
import time as clock
import numpy as np

WRITE_LOG = True
accuracy_threshold = 0.50

search_hyper_param = {
    'batch_size': 16,
    'training_iterations': 50,
    'population_size': 64
}
dir_name = 'umbrella_3layerConv_0.975'

data = DataPrep()
data.mnist()

x1, y1, x_test1, y_test1 = data.sample_dataset([5, 6, 7, 8, 9])
x2, y2, x_test2, y_test2 = data.sample_dataset([0, 1, 2, 3, 4])
x3, y3, x_test3, y_test3 = data.sample_dataset([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

try:
    with open(dir_name + '/log.pkl', 'rb') as file:
        log = pkl.load(file)
except FileNotFoundError:
    log = {
        'path1': [],
        'path2': [],
        'path3': [],
        'eval1': [],