parser.add_argument("--n_layers", "-nl", type=int, default=3) parser.add_argument("--encoder_hidden_size", "-ehs", type=int, default=64) parser.add_argument("--decoder_hidden_size", "-dhs", type=int, default=64) parser.add_argument("--seq_len", "-sl", type=int, default=7) parser.add_argument("--num_obs_to_train", "-not", type=int, default=1) parser.add_argument("--embedding_size", "-es", type=int, default=10) parser.add_argument("--standard_scaler", "-ss", action="store_true") parser.add_argument("--log_scaler", "-ls", action="store_true") parser.add_argument("--mean_scaler", "-ms", action="store_true") parser.add_argument("--show_plot", "-sp", action="store_true") parser.add_argument("--run_test", "-rt", action="store_true") parser.add_argument("--batch_size", "-b", type=int, default=64) args = parser.parse_args() if args.run_test: data_path = util.get_data_path() data = pd.read_csv(os.path.join(data_path, "LD_MT200_hour.csv"), parse_dates=["date"]) data["year"] = data["date"].apply(lambda x: x.year) data["day_of_week"] = data["date"].apply(lambda x: x.dayofweek) data = data.loc[(data["date"] >= date(2014, 1, 1)) & (data["date"] <= date(2014, 3, 1))] features = ["hour", "day_of_week"] # hours = pd.get_dummies(data["hour"]) # dows = pd.get_dummies(data["day_of_week"]) hours = data["hour"] dows = data["day_of_week"] X = np.c_[np.asarray(hours), np.asarray(dows)] num_features = X.shape[1] num_periods = len(data)
import findspark findspark.init() import pyspark import util from pyspark.sql import SparkSession import configparser config = configparser.ConfigParser() spark = SparkSession.builder.master("local[1]").appName( "SparkLocal").getOrCreate() # txt is rdd txt = spark.sparkContext.textFile(util.get_data_path('LICENSE.txt')) print(txt.count()) python_lines = txt.filter(lambda line: 'python' in line.lower()) print(python_lines.count())
import theano.tensor as T import matplotlib.pyplot as plt import random sys.path.append('src') from assemble_net import Tracer, MLP, AutoEncoder from run_net_recursively import run_net_recursively from view import view_reconstruction from train import train_net from test_model import Tester, test_autoencoder from util import get_data_path, setupLogging from load_data import load_data, load_test_id from params import * ROOT = get_data_path() #glob=True if data is in the global datasets directory # load data and params dataname = 'tracer_data_multi_full_33_17_17_nC20_3360_switch0.60_2718_noise' #dataname = 'tracer_data_multi_full_33_17_17_nC20_17424_switch0.60_2718_noise' #dataname = 'tracer_data_multi_full_33_17_17_nC20_87544_switch0.60_2718_noise' #dataname = 'tracer_data_expert_test_norm' try: datasets except: dataset = os.path.join(ROOT, dataname + '.pkl') datasets = load_data(dataset, target_is_int=True) # set rng rng = np.random.RandomState(23455)
plt.figure() plt.imshow(features) plt.show() def make_params_shared(params): shared_params = [] for p in params: p = theano.shared(np.asarray(p, dtype=theano.config.floatX)) shared_params.append(p) return shared_params#[[shared_params[0], shared_params[1]], [shared_params[0].T, shared_params[2]]] if __name__ == '__main__': ROOT = get_data_path() param_file = os.path.join('data', 'results', 'tracer_data_expert_test/nh_2000_dropout1/LRi_0.00100_reg_0.00/2013_10_11_18_50/params_tracer_data_expert_test_nh_2000_dropout1_LRi_0.00100_reg_0.00_73.450010.pkl') data_file = os.path.join(ROOT, 'tracer_data_expert_test.pkl') f = open(param_file, 'rb') params, details = pickle.load(f) f.close() train, valid, test = load_data(data_file, True) details['mode'] = 'test' params = make_params_shared(params) test_set_x = test[0] test_autoencoder(test_set_x, params, details)
import cPickle import gzip import os import sys import time import datetime import numpy as np import theano import theano.tensor as T sys.path.append('src') from util import get_data_path ROOT = get_data_path() LOAD_PARAMS = False PARAM_PATH = None def load_convpool_params(rng, img_size, n_in, n_kern, kern_size, n_hid, n_out, pool_ds): params = [] new_size = img_size for i in xrange(len(n_kern)): shape = (n_kern[i], n_in, kern_size[i], kern_size[i]) fan_in = np.prod(shape[1:]) fan_out = (shape[0] * np.prod(shape[2:]) / (pool_ds[i]**2)) params.append(set_params_random(rng, fan_in, fan_out, shape))
import matplotlib.pyplot as plt import random sys.path.append('src') from assemble_net import Tracer, MLP, AutoEncoder from run_net_recursively import run_net_recursively from view import view_reconstruction from train import train_net from test_model import Tester, test_autoencoder from util import get_data_path, setupLogging from load_data import load_data, load_test_id from params import * ROOT = get_data_path() #glob=True if data is in the global datasets directory # load data and params dataname = 'tracer_data_multi_full_33_17_17_nC20_3360_switch0.60_2718_noise' #dataname = 'tracer_data_multi_full_33_17_17_nC20_17424_switch0.60_2718_noise' #dataname = 'tracer_data_multi_full_33_17_17_nC20_87544_switch0.60_2718_noise' #dataname = 'tracer_data_expert_test_norm' try: datasets except: dataset = os.path.join(ROOT, dataname+'.pkl') datasets = load_data(dataset, target_is_int=True)
""" Test script to visualize some of the random Curve data. @author Graham Taylor """ from PIL import Image import numpy as np import matplotlib.pyplot as plt import sys sys.path.append('code') sys.path.append('code/util') from util import get_data_path, dispims, tile_raster_images from util.serialization import deserialize_object import os.path data_path = get_data_path() # will not reload if data is already in workspace try: datasets except NameError: print 'loading data' datasets = deserialize_object(os.path.join(data_path, 'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl')) w0 = datasets[2].T print w0.shape n_cases, n_dims = w0.shape im_w = int(np.sqrt(n_dims)) # assume square case_w = int(np.sqrt(n_cases))+1
""" Test script to visualize some of the random Curve data. @author Graham Taylor """ from PIL import Image import numpy as np import matplotlib.pyplot as plt import sys sys.path.append('code') sys.path.append('code/util') from util import get_data_path, dispims, tile_raster_images from util.serialization import deserialize_object import os.path data_path = get_data_path() # will not reload if data is already in workspace try: datasets except NameError: print 'loading data' datasets = deserialize_object( os.path.join( data_path, 'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl' )) w0 = datasets[2].T print w0.shape