Python DataPrep Beispiele, dataprep.DataPrep Python Beispiele

Beispiel #1

0

Datei anzeigen

def main():
    session, cluster = connect_db()
    
    file_name = 'event_data_file_new'
    file_path = os.getcwd() + '/'+file_name+'.csv'
    
    data = DataPrep(
        filepath_in=os.getcwd() + '/event_data', 
        filepath_out=file_name
    )

    data.write_csv()
    execute_query_1(session, file_path, sessionId=338, itemInSession=4, verbose=True)
    execute_query_2(session, file_path, userId=10, sessionId=182, verbose=True)
    execute_query_3(session, file_path, song='All Hands Against His Own', verbose=True)

    session.execute("DROP TABLE IF EXISTS song_features")
    session.execute("DROP TABLE IF EXISTS artist_song_by_user")
    session.execute("DROP TABLE IF EXISTS user_name")                                                

    session.shutdown()
    cluster.shutdown()

Beispiel #2

0

Datei anzeigen

Datei: team_selection.py Projekt: rsherer/fastbal

# data scraped on Sep 14th, ahead of week 7 of the season
# meta_str = "../data/metadata/meta_stats_week7_sep14.csv"
# top_stats_str = "../data/top_stats/top_stats_week7_sep14.csv"
# season_str = "../data/season_stats/season_stats_week7_sep14.csv"

# data scraped Sep 14th, filtered for only players who have played at least 1 minute
meta_str = "../data/metadata/meta_stats_have_played_week7.csv"
top_stats_str = "../data/top_stats/top_stats_have_played_week7.csv"
season_str = "../data/season_stats/season_stats_have_played_week7.csv"

#model_locale = "../models/nn_3layers_sep4.pt"
#model_locale = "../models/nn_3layers_week7_sep14.pt"
model_locale = "../models/nn_2layers_sep17.pt"

dataprepped = DataPrep(meta_str, top_stats_str, season_str)
players = create_player_dict(dataprepped, model_locale, 7)

salaries, pred_points, teams = create_lp_dicts(players)

_variables = {
    k: pulp.LpVariable.dicts(k, v, cat="Binary")
    for k, v in pred_points.items()
}
_variables_teams = {
    k: pulp.LpVariable.dicts(k, v, cat="Binary")
    for k, v in teams.items()
}

results = []

Beispiel #3

0

Datei anzeigen

Datei: chipseq_fft.py Projekt: rhoef/cs-fft

    parser.add_argument("--welch", dest="welch", action=ConflictHandler, nargs=0, help="Plot a welche periodigram.")
    parser.add_argument("--nfft", dest="nfft", type=int, default=256, help="Window size for the Welch periodigram.")
    parser.add_argument(
        "-d",
        "--down-samling-factor",
        dest="dsfac",
        type=int,
        default=0,
        help=("Downsampling for the data by factor " "(default=0, i.e. no down sampling)"),
    )
    args = parser.parse_args()

    bgcounts = np.recfromtxt(args.bgfile, comments="#", delimiter="\t", dtype=int, names=True)
    counts = np.recfromtxt(args.file, comments="#", delimiter="\t", names=True, dtype=int)

    dp = DataPrep(counts, bgcounts, crop=args.crop, fill=args.fill, normalize=False, down_sampling=args.dsfac)
    dp.gap_stats()

    if args.lomb_scargle:
        # calculate the LombScargle Periodigram in the
        # frequency range of interessest
        freq_ls = np.linspace(5.0e-7, 5.0e-4, 500)
        ls = LombScargle(np.float_(dp.position), np.float_(dp.counts), freq_ls)
        f, pgram = ls()
        ax = plots.samples(f, pgram, window_title="Lomb-Scargle Periodigram")
        ax.semilogy()

    if args.welch:
        plots.samples(dp.position, dp.counts, window_title="Raw data")
        # xlim = (0.0, 5e-5)
        fs = 1.0 / np.diff(dp.position).min()

Beispiel #4

0

Datei anzeigen

Datei: pipeline_train.pyold.py Projekt: arnabid/model

def main(unused_argv):
    model_function = model_fn
    """
    if FLAGS.multi_gpu:
        validate_batch_size_for_multi_gpu(FLAGS.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_fn, loss_reduction=tf.losses.Reduction.MEAN)

    data_format = FLAGS.data_format
    if data_format is None:
        data_format = ('channels_first'
                       if tf.test.is_built_with_cuda() else 'channels_last')
    """

    # run_config=tf.estimator.RunConfig(model_dir=os.path.join(os.environ['PIPELINE_OUTPUT_PATH'],
    #                                                            'pipeline_tfserving/0')),

    # define a DataPrep object
    dp = DataPrep(FLAGS.data_dir, FLAGS.xColName, FLAGS.yColName,
                  FLAGS.zColName, FLAGS.propColName, FLAGS.wellColName,
                  FLAGS.sill, FLAGS.hNugget, FLAGS.hRange, FLAGS.vNugget,
                  FLAGS.vRange, FLAGS.nNeighborWells)

    # define the predictor estimator
    petroDDN_predictor = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=FLAGS.model_dir,
        params={
            'nLayers': FLAGS.nLayers,
            'nUnits': FLAGS.nUnits,
            'initRate': FLAGS.initRate,
            'batch_size': FLAGS.batch_size
        },
    )

    #    config=run_config)

    # Train the model
    def train_input_fn():
        ds = dp.train()
        ds = ds.cache().batch(FLAGS.batch_size).repeat(FLAGS.train_epochs)
        ds = ds.shuffle(buffer_size=50000)

        # Return the next batch of data.
        features, labels = ds.make_one_shot_iterator().get_next()
        return features, labels

    # Set up training hook that logs the training MSE every 100 steps.
    tensors_to_log = {'train_MSE': 'train_MSE'}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=100)

    # Train the model
    petroDDN_predictor.train(input_fn=train_input_fn, hooks=[logging_hook])

    # Evaluate the model and print results
    def eval_input_fn():
        return dp.validate().batch(
            FLAGS.batch_size).make_one_shot_iterator().get_next()

    eval_results = petroDDN_predictor.evaluate(input_fn=eval_input_fn)
    print()
    print('Evaluation results:\n\t%s' % eval_results)

    # Export the model
    if FLAGS.export_dir is not None:
        X = tf.placeholder(tf.float32,
                           shape=(FLAGS.batch_size, FLAGS.nNeighborWells * 9))
        input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
            'inputs':
            X,
        })
        petroDDN_predictor.export_savedmodel(FLAGS.export_dir, input_fn)

    # Predict property values at locations in the pointcloud file
    # indices where there is missing data
    nanIdxs = dp.processPointCloudData(FLAGS.input_pointcloud_file)

    def predict_input_fn():
        return dp.predict().batch(
            FLAGS.batch_size).make_one_shot_iterator().get_next(), None

    predictions = petroDDN_predictor.predict(input_fn=predict_input_fn)
    values = np.array(
        list(
            map(lambda item: item["predictions"][0],
                list(itertools.islice(predictions, 0, None)))))
    values = values * (dp.b4rReg.propMax -
                       dp.b4rReg.propMin) + dp.b4rReg.propMin
    values[nanIdxs] = dp.propNDV
    #print('\n\nPrediction results:\n\t%s' % values)

    # write the predictions to the output pointcloud file
    op = pd.DataFrame(data=values, columns=[FLAGS.propColName])
    op.to_csv(FLAGS.output_pointcloud_file, index=None)

Beispiel #5

0

Datei anzeigen

Datei: umbrella_search_mnist.py Projekt: wubukeneng/pathnet_keras

import pickle as pkl
import os
import time as clock
import numpy as np

WRITE_LOG = True
accuracy_threshold = 0.50

search_hyper_param = {
    'batch_size': 16,
    'training_iterations': 50,
    'population_size': 64
}
dir_name = 'umbrella_3layerConv_0.975'

data = DataPrep()
data.mnist()

x1, y1, x_test1, y_test1 = data.sample_dataset([5, 6, 7, 8, 9])
x2, y2, x_test2, y_test2 = data.sample_dataset([0, 1, 2, 3, 4])
x3, y3, x_test3, y_test3 = data.sample_dataset([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

try:
    with open(dir_name + '/log.pkl', 'rb') as file:
        log = pkl.load(file)
except FileNotFoundError:
    log = {
        'path1': [],
        'path2': [],
        'path3': [],
        'eval1': [],

Beispiel #6

0

Datei anzeigen

    thread = Flatten()(thread)
    thread = Dense(20, activation='relu')(thread)
    thread = Dense(2, activation='softmax')(thread)

    model = Model(inputs=inp, outputs=thread)
    model.compile(optimizer=optim,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


optim = RMSprop()
epochs = 10
batch_size = 16

data = DataPrep()
data.cifar10()
x1, y1, x_test1, y_test1 = data.sample_dataset([0, 6])
x2, y2, x_test2, y_test2 = data.sample_dataset([2, 3])

model = give_model(optim)
model.summary()
log1 = model.fit(x1,
                 y1,
                 batch_size=batch_size,
                 epochs=epochs,
                 verbose=True,
                 validation_data=[x_test1, y_test1]).history['val_acc']
log2 = []
model = give_model(optim)
for e in range(epochs):

Beispiel #7

0

Datei anzeigen

import os
import time as clock
import numpy as np

repeates = 1
WRITE_LOG = True
accuracy_threshold = 0.975
noise = False
search_hyper_param = {
    'batch_size': 16,
    'training_iterations': 50,
    'population_size': 64
}
dir_name = 'mnist_3layerConv_0.975'

data = DataPrep()
data.mnist()
if noise: data.add_noise()

x1, y1, x_test1, y_test1 = data.sample_dataset([0, 1, 2, 3, 4])
x2, y2, x_test2, y_test2 = data.sample_dataset([5, 6, 7, 8, 9])

try:
    with open(dir_name + '/log.pkl', 'rb') as file:
        log = pkl.load(file)
except FileNotFoundError:
    log = {
        's+s:path1': [],
        's+s:path2': [],
        's+s:eval1': [],
        's+s:eval2': [],

Beispiel #8

0

Datei anzeigen

from pathnet_keras import PathNet
from dataprep import DataPrep
from path_search import PathSearch
from analytic import Analytic
from matplotlib import pyplot as plt

data = DataPrep()
data.mnist()
x1, y1, x_test, y_test = data.sample_dataset([1, 2])
x2, y2, x_test, y_test = data.sample_dataset([3, 4])

log = []

for i in range(1, 2):
    print('\t\t\tRound', i)

    pathnet, first = PathNet.binary_mnist()
    pathsearch = PathSearch(pathnet)
    analytic = Analytic(pathnet)

    path, fitness, l = pathsearch.binary_mnist_tournamet_search(
        x1, y1, first, stop_when_reached=0.99)
    l['training_counter'] = pathnet.training_counter
    log.append(l)

for l in log:
    paths = l['path']
    fit = l['fitness']
    trcnt = l['training_counter']
    train = l['avg_training']

Beispiel #9

0

Datei anzeigen

from path_search import PathSearch
from analytic import Analytic
from dataprep import DataPrep
from plot_pathnet import PathNetPlotter
from datetime import datetime as dt
from matplotlib import pyplot as plt
import pickle as pkl
import os
import time as clock
import numpy as np

repeates = 2
WRITE_LOG = True
accuracy_threshold = 0.99

data = DataPrep()
data.mnist()
data.add_noise(noise_factor=0.5, prob=0.15)
x1, y1, x_test1, y_test1 = data.sample_dataset([3, 4])
x2, y2, x_test2, y_test2 = data.sample_dataset([1, 2])

log = {
    's+s:path1': [],
    's+s:path2': [],
    's+s:eval1': [],
    's+s:eval2': [],
    's+s:gen1': [],
    's+s:gen2': [],
    's+s:avg_training1': [],
    's+s:avg_training2': [],
    's+s:module_reuse': [],