def main(): session, cluster = connect_db() file_name = 'event_data_file_new' file_path = os.getcwd() + '/'+file_name+'.csv' data = DataPrep( filepath_in=os.getcwd() + '/event_data', filepath_out=file_name ) data.write_csv() execute_query_1(session, file_path, sessionId=338, itemInSession=4, verbose=True) execute_query_2(session, file_path, userId=10, sessionId=182, verbose=True) execute_query_3(session, file_path, song='All Hands Against His Own', verbose=True) session.execute("DROP TABLE IF EXISTS song_features") session.execute("DROP TABLE IF EXISTS artist_song_by_user") session.execute("DROP TABLE IF EXISTS user_name") session.shutdown() cluster.shutdown()
# data scraped on Sep 14th, ahead of week 7 of the season # meta_str = "../data/metadata/meta_stats_week7_sep14.csv" # top_stats_str = "../data/top_stats/top_stats_week7_sep14.csv" # season_str = "../data/season_stats/season_stats_week7_sep14.csv" # data scraped Sep 14th, filtered for only players who have played at least 1 minute meta_str = "../data/metadata/meta_stats_have_played_week7.csv" top_stats_str = "../data/top_stats/top_stats_have_played_week7.csv" season_str = "../data/season_stats/season_stats_have_played_week7.csv" #model_locale = "../models/nn_3layers_sep4.pt" #model_locale = "../models/nn_3layers_week7_sep14.pt" model_locale = "../models/nn_2layers_sep17.pt" dataprepped = DataPrep(meta_str, top_stats_str, season_str) players = create_player_dict(dataprepped, model_locale, 7) salaries, pred_points, teams = create_lp_dicts(players) _variables = { k: pulp.LpVariable.dicts(k, v, cat="Binary") for k, v in pred_points.items() } _variables_teams = { k: pulp.LpVariable.dicts(k, v, cat="Binary") for k, v in teams.items() } results = []
parser.add_argument("--welch", dest="welch", action=ConflictHandler, nargs=0, help="Plot a welche periodigram.") parser.add_argument("--nfft", dest="nfft", type=int, default=256, help="Window size for the Welch periodigram.") parser.add_argument( "-d", "--down-samling-factor", dest="dsfac", type=int, default=0, help=("Downsampling for the data by factor " "(default=0, i.e. no down sampling)"), ) args = parser.parse_args() bgcounts = np.recfromtxt(args.bgfile, comments="#", delimiter="\t", dtype=int, names=True) counts = np.recfromtxt(args.file, comments="#", delimiter="\t", names=True, dtype=int) dp = DataPrep(counts, bgcounts, crop=args.crop, fill=args.fill, normalize=False, down_sampling=args.dsfac) dp.gap_stats() if args.lomb_scargle: # calculate the LombScargle Periodigram in the # frequency range of interessest freq_ls = np.linspace(5.0e-7, 5.0e-4, 500) ls = LombScargle(np.float_(dp.position), np.float_(dp.counts), freq_ls) f, pgram = ls() ax = plots.samples(f, pgram, window_title="Lomb-Scargle Periodigram") ax.semilogy() if args.welch: plots.samples(dp.position, dp.counts, window_title="Raw data") # xlim = (0.0, 5e-5) fs = 1.0 / np.diff(dp.position).min()
def main(unused_argv): model_function = model_fn """ if FLAGS.multi_gpu: validate_batch_size_for_multi_gpu(FLAGS.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_fn, loss_reduction=tf.losses.Reduction.MEAN) data_format = FLAGS.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') """ # run_config=tf.estimator.RunConfig(model_dir=os.path.join(os.environ['PIPELINE_OUTPUT_PATH'], # 'pipeline_tfserving/0')), # define a DataPrep object dp = DataPrep(FLAGS.data_dir, FLAGS.xColName, FLAGS.yColName, FLAGS.zColName, FLAGS.propColName, FLAGS.wellColName, FLAGS.sill, FLAGS.hNugget, FLAGS.hRange, FLAGS.vNugget, FLAGS.vRange, FLAGS.nNeighborWells) # define the predictor estimator petroDDN_predictor = tf.estimator.Estimator( model_fn=model_function, model_dir=FLAGS.model_dir, params={ 'nLayers': FLAGS.nLayers, 'nUnits': FLAGS.nUnits, 'initRate': FLAGS.initRate, 'batch_size': FLAGS.batch_size }, ) # config=run_config) # Train the model def train_input_fn(): ds = dp.train() ds = ds.cache().batch(FLAGS.batch_size).repeat(FLAGS.train_epochs) ds = ds.shuffle(buffer_size=50000) # Return the next batch of data. features, labels = ds.make_one_shot_iterator().get_next() return features, labels # Set up training hook that logs the training MSE every 100 steps. tensors_to_log = {'train_MSE': 'train_MSE'} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=100) # Train the model petroDDN_predictor.train(input_fn=train_input_fn, hooks=[logging_hook]) # Evaluate the model and print results def eval_input_fn(): return dp.validate().batch( FLAGS.batch_size).make_one_shot_iterator().get_next() eval_results = petroDDN_predictor.evaluate(input_fn=eval_input_fn) print() print('Evaluation results:\n\t%s' % eval_results) # Export the model if FLAGS.export_dir is not None: X = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.nNeighborWells * 9)) input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({ 'inputs': X, }) petroDDN_predictor.export_savedmodel(FLAGS.export_dir, input_fn) # Predict property values at locations in the pointcloud file # indices where there is missing data nanIdxs = dp.processPointCloudData(FLAGS.input_pointcloud_file) def predict_input_fn(): return dp.predict().batch( FLAGS.batch_size).make_one_shot_iterator().get_next(), None predictions = petroDDN_predictor.predict(input_fn=predict_input_fn) values = np.array( list( map(lambda item: item["predictions"][0], list(itertools.islice(predictions, 0, None))))) values = values * (dp.b4rReg.propMax - dp.b4rReg.propMin) + dp.b4rReg.propMin values[nanIdxs] = dp.propNDV #print('\n\nPrediction results:\n\t%s' % values) # write the predictions to the output pointcloud file op = pd.DataFrame(data=values, columns=[FLAGS.propColName]) op.to_csv(FLAGS.output_pointcloud_file, index=None)
import pickle as pkl import os import time as clock import numpy as np WRITE_LOG = True accuracy_threshold = 0.50 search_hyper_param = { 'batch_size': 16, 'training_iterations': 50, 'population_size': 64 } dir_name = 'umbrella_3layerConv_0.975' data = DataPrep() data.mnist() x1, y1, x_test1, y_test1 = data.sample_dataset([5, 6, 7, 8, 9]) x2, y2, x_test2, y_test2 = data.sample_dataset([0, 1, 2, 3, 4]) x3, y3, x_test3, y_test3 = data.sample_dataset([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) try: with open(dir_name + '/log.pkl', 'rb') as file: log = pkl.load(file) except FileNotFoundError: log = { 'path1': [], 'path2': [], 'path3': [], 'eval1': [],
thread = Flatten()(thread) thread = Dense(20, activation='relu')(thread) thread = Dense(2, activation='softmax')(thread) model = Model(inputs=inp, outputs=thread) model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=['accuracy']) return model optim = RMSprop() epochs = 10 batch_size = 16 data = DataPrep() data.cifar10() x1, y1, x_test1, y_test1 = data.sample_dataset([0, 6]) x2, y2, x_test2, y_test2 = data.sample_dataset([2, 3]) model = give_model(optim) model.summary() log1 = model.fit(x1, y1, batch_size=batch_size, epochs=epochs, verbose=True, validation_data=[x_test1, y_test1]).history['val_acc'] log2 = [] model = give_model(optim) for e in range(epochs):
import os import time as clock import numpy as np repeates = 1 WRITE_LOG = True accuracy_threshold = 0.975 noise = False search_hyper_param = { 'batch_size': 16, 'training_iterations': 50, 'population_size': 64 } dir_name = 'mnist_3layerConv_0.975' data = DataPrep() data.mnist() if noise: data.add_noise() x1, y1, x_test1, y_test1 = data.sample_dataset([0, 1, 2, 3, 4]) x2, y2, x_test2, y_test2 = data.sample_dataset([5, 6, 7, 8, 9]) try: with open(dir_name + '/log.pkl', 'rb') as file: log = pkl.load(file) except FileNotFoundError: log = { 's+s:path1': [], 's+s:path2': [], 's+s:eval1': [], 's+s:eval2': [],
from pathnet_keras import PathNet from dataprep import DataPrep from path_search import PathSearch from analytic import Analytic from matplotlib import pyplot as plt data = DataPrep() data.mnist() x1, y1, x_test, y_test = data.sample_dataset([1, 2]) x2, y2, x_test, y_test = data.sample_dataset([3, 4]) log = [] for i in range(1, 2): print('\t\t\tRound', i) pathnet, first = PathNet.binary_mnist() pathsearch = PathSearch(pathnet) analytic = Analytic(pathnet) path, fitness, l = pathsearch.binary_mnist_tournamet_search( x1, y1, first, stop_when_reached=0.99) l['training_counter'] = pathnet.training_counter log.append(l) for l in log: paths = l['path'] fit = l['fitness'] trcnt = l['training_counter'] train = l['avg_training']
from path_search import PathSearch from analytic import Analytic from dataprep import DataPrep from plot_pathnet import PathNetPlotter from datetime import datetime as dt from matplotlib import pyplot as plt import pickle as pkl import os import time as clock import numpy as np repeates = 2 WRITE_LOG = True accuracy_threshold = 0.99 data = DataPrep() data.mnist() data.add_noise(noise_factor=0.5, prob=0.15) x1, y1, x_test1, y_test1 = data.sample_dataset([3, 4]) x2, y2, x_test2, y_test2 = data.sample_dataset([1, 2]) log = { 's+s:path1': [], 's+s:path2': [], 's+s:eval1': [], 's+s:eval2': [], 's+s:gen1': [], 's+s:gen2': [], 's+s:avg_training1': [], 's+s:avg_training2': [], 's+s:module_reuse': [],