def normal_schedule(self): def in_between(now, start, end): if start < end: return start <= now < end elif end < start: return start <= now or now < end else: return True def getDatetime(date_string): """Gets datetime from string with format HH:MM. Should be changed to datetime in-built function. """ return datetime.time(int(date_string.split(":")[0]), int(date_string.split(":")[1])) setpoints_array = self.advise_cfg["Advise"]["Baseline"][ self.now.weekday()] for j in setpoints_array: if in_between( self.now.time(), datetime.time(int(j[0].split(":")[0]), int(j[0].split(":")[1])), datetime.time(int(j[1].split(":")[0]), int(j[1].split(":")[1]))): SetpointLow = j[2] SetpointHigh = j[3] break dataManager = DataManager(self.cfg, self.advise_cfg, None, now=self.now, zone=self.zone) Safety_temps = dataManager.safety_constraints() if not isinstance(SetpointLow, (int, float, long)): SetpointLow = Safety_temps[0][0] if not isinstance(SetpointHigh, (int, float, long)): SetpointHigh = Safety_temps[0][1] if (self.cfg["Pricing"]["DR"] and in_between(self.now.time(), getDatetime(self.cfg["Pricing"]["DR_Start"]), getDatetime(self.cfg["Pricing"]["DR_Finish"]))) \ or self.now.weekday() == 4: # TODO REMOVE ALLWAYS HAVING DR ON FRIDAY WHEN DR SUBSCRIBE IS IMPLEMENTED SetpointHigh += self.advise_cfg["Advise"][ "Baseline_Dr_Extend_Percent"] SetpointLow -= self.advise_cfg["Advise"][ "Baseline_Dr_Extend_Percent"] # Making sure that the different between setpointHigh and Low is at least the Comfortband if SetpointHigh - SetpointLow < self.advise_cfg["Advise"][ "Minimum_Comfortband_Height"]: raise Exception( "Warning, the difference between SetpointHigh and SetpointLow is too narrow. Difference: %s. Check the config file schedule." % str(SetpointHigh - SetpointLow)) # making sure that we are not exceeding the Safety temps. # Only violates the Comfortband height if safefy temperatures violate it. if SetpointLow < Safety_temps[0][0]: diff = Safety_temps[0][0] - SetpointLow SetpointLow = Safety_temps[0][0] SetpointHigh = min(Safety_temps[0][1], SetpointHigh + diff) elif SetpointHigh > Safety_temps[0][1]: diff = SetpointHigh - Safety_temps[0][1] SetpointHigh = Safety_temps[0][1] SetpointLow = max(Safety_temps[0][0], SetpointLow - diff) p = { "override": True, "heating_setpoint": SetpointLow, "cooling_setpoint": SetpointHigh, "mode": 3 } for i in range(self.advise_cfg["Advise"]["Thermostat_Write_Tries"]): try: self.tstat.write(p) print("For zone: %s writing Baseline: %s" % (self.zone, str(p))) break except: if i == self.advise_cfg["Advise"]["Thermostat_Write_Tries"] - 1: e = sys.exc_info()[0] print e return False, p continue return True, p
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== dataManager = DataManager() # Load data print("Loading training data...") x_text, y, _ = dataManager.load_training_data() print("Finish loading data") x = [] for data in x_text: a = 100 - len(data) if a > 0: front = a / 2 back = a - front front_vec = [ np.zeros(dataManager.wordvector_dim + 2) for j in range(front) ]
conn = psycopg2.connect( database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port ) """ ########################################## # Init bot. ########################################## from DataManager import DataManager data_manager = DataManager(conn) from CianCianBot import CianCianBot bot = CianCianBot(data_manager) ########################################## # Init flask backend and linebot facility. ########################################## from flask import Flask, request, abort from linebot import (LineBotApi, WebhookHandler) from linebot.exceptions import (InvalidSignatureError) from linebot.models import ( MessageEvent, TextMessage,
def __init__(self, shop_name=None, *args, **kwargs): super(QuotesSpider, self).__init__(*args, **kwargs) self.database = DataManager(shop_name) self.shop_name = shop_name.lower() self.start_urls = self.database.getScrapyUrl() self.page_index = 1
from PIL import Image #torch import torch import torch.nn.functional as F import torchvision from torchvision import datasets, transforms, models from torch import nn #parameters Loading from AppParametersLoader import AppParametersLoader parameters = AppParametersLoader() parameters.print_all() #Data Loading from DataManager import DataManager data_manager = DataManager() data_manager.load_TrainTestValid(parameters.data_dir()) #model definition from ModelManager import ModelManager if parameters.arch() == 'vgg16': model = models.vgg16(pretrained=True) input_nodes = 25088 elif parameters.arch() == 'densenet121': model = models.densenet121(pretrained=True) input_nodes = 1024 classifier = nn.Sequential( nn.Linear(input_nodes, parameters.hidden_units()), nn.ReLU(), nn.Dropout(0.2), nn.Linear(parameters.hidden_units(),
action="store_true", help="Plot % of active cases of population") args = p.parse_args() if args.all: args.active = True args.recovered = True args.deaths = True args.population_percent = True logger = Logger("log", autosave=True) if not args.summary and not args.summary_only and not ( args.active or args.recovered or args.deaths or args.population_percent): logger.warning( "No output specified (active/recovered etc.). Use the -h option to get more information." ) exit(0) manager = DataManager(logger, args.countries, True) if args.summary_only: manager.load_summary() print_summary() exit(0) elif args.summary: manager.load_summary() print_summary() present_history(args.countries)
def train(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") TAG_EMBEDDING_DIM = 64 VAL_EMBEDDING_DIM = 128 HIDDEN_DIM = 1500 NUM_EPOCHS = 2 LAYER_NUM = 1 BATCH_SIZE = 256 data_manager_train = DataManager(TRAIN) data_manager_eval = DataManager(TEST) warnings.filterwarnings("ignore") tag_to_idx, idx_to_tag = data_manager_train.get_tag_dicts() val_to_idx, idx_to_val = data_manager_train.get_val_dicts() validate_split_idx = int(len(data_manager_eval.get_data()) * 0.04) # 2000 za eval data_train = torch.Tensor([( tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]), val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager_train.get_data())]) data_eval = torch.Tensor([( tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]), val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager_eval.get_data()[:validate_split_idx])]) train_data_loader = torch.utils.data.DataLoader(Dataset(data_train), BATCH_SIZE, shuffle=True, drop_last=True, num_workers=8) eval_data_loader = torch.utils.data.DataLoader(Dataset(data_eval), BATCH_SIZE, shuffle=False, drop_last=True, num_workers=8) model_tag = nn.DataParallel( AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM, VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, False)) model_val = nn.DataParallel( AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM, VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, True)) #model = torch.load(f"D://data//model_attention_1.pickle") loss_function = nn.NLLLoss() optimizer_tag = optim.Adam(model_tag.parameters()) optimizer_val = optim.Adam(model_val.parameters()) # -----------putting models on GPU------------- model_tag.cuda() model_val.cuda() # --------------------------------------------- model_iter = 1 # Sluzi za Tensorboard summary_writer = SummaryWriter() for epoch in range(NUM_EPOCHS): model_tag.train() model_val.train() for i, (sentence, y) in tqdm( enumerate(train_data_loader), total=len(train_data_loader), desc=f"Epoch: {epoch}", unit="batches", ): global_step = epoch * len(train_data_loader) + i size = int(sentence.size(0)) model_tag.zero_grad() model_val.zero_grad() model_tag.train() model_val.train() unk_idx = val_to_idx["UNK"] mask_unk = y[:, 1] != unk_idx # mask for all y val that are not UNK sentence_tag = sentence.to(device) y_pred_tag = model_tag(sentence_tag) y = y.to(device) correct_tag = (y_pred_tag.argmax(dim=1) == y[:, 0]).sum().item() loss_tag = loss_function(y_pred_tag, y[:, 0].long()) summary_writer.add_scalar("model_tag: train loss", loss_tag, global_step) summary_writer.add_scalar("model_tag: accuracy", 100 * (correct_tag / size), global_step) loss_tag.backward() nn.utils.clip_grad_value_(model_tag.parameters(), 5.0) optimizer_tag.step() loss_val = 0 if mask_unk.sum() > 0: # do forward for val_model sentence_val = sentence[mask_unk, :, :].to(device) y_pred_val = model_val(sentence_val) y = y.to(device) correct_val = (y_pred_val.argmax(dim=1) == y[mask_unk, 1]).sum().item() loss_val = loss_function(y_pred_val, y[mask_unk, 1].long()) summary_writer.add_scalar("model_value: train loss", loss_val, global_step) summary_writer.add_scalar("model_value: train accuracy", 100 * (correct_val / size), global_step) loss_val.backward() nn.utils.clip_grad_value_(model_val.parameters(), 5.0) optimizer_val.step() if (i + 1) % 200 == 0: tag = f"TRAIN tag accuracy: {100 * (correct_tag / size)}, tag loss: {loss_tag}, " val = f"val accuracy: {100 * (correct_val / size)}, val loss: {loss_val}\n" with open(f'{DATA_ROOT}log.txt', 'a') as log: log.write(tag) log.write(val) TIME_FOR_EVAL = 2500 if (i + 1) % TIME_FOR_EVAL == 0: #evaluation torch.save( model_tag, f"D://data//models//tag//budala_{model_iter}.pickle") torch.save( model_val, f"D://data//models//val//budala_{model_iter}.pickle") model_iter += 1 model_tag.eval() model_val.eval() correct_sum_tag = 0 correct_sum_val = 0 loss_sum_tag = 0 loss_sum_val = 0 size_sum_eval = 0 with torch.no_grad(): for i_eval, (sentence_eval, y_eval) in tqdm( enumerate(eval_data_loader), total=len(eval_data_loader), desc=f"Epoch eval: {global_step//TIME_FOR_EVAL}", unit="batches", ): global_step_eval = (global_step // TIME_FOR_EVAL ) * len(eval_data_loader) + i_eval size_eval = int(sentence_eval.size(0)) size_sum_eval += size_eval sentence_eval = sentence_eval.to(device) unk_idx = val_to_idx["UNK"] mask_unk = y_eval[:, 1] != unk_idx #tag sentence_tag = sentence_eval.to(device) y_pred_tag = model_tag(sentence_tag) y_eval = y_eval.to(device) correct_tag = (y_pred_tag.argmax( dim=1) == y_eval[:, 0]).sum().item() loss_tag = loss_function(y_pred_tag, y_eval[:, 0].long()) correct_sum_tag += correct_tag loss_sum_tag += loss_tag summary_writer.add_scalar("model_tag: evaluation loss", loss_tag, global_step_eval) summary_writer.add_scalar( "model_tag: evaluation accuracy", 100 * (correct_tag / size_eval), global_step_eval) if mask_unk.sum() > 0: sentence_eval = sentence_eval[mask_unk].to(device) y_pred_val = model_val(sentence_eval) y_eval = y_eval.to(device) correct_val = (y_pred_val.argmax( dim=1) == y_eval[mask_unk, 1]).sum().item() loss_val = loss_function( y_pred_val, y_eval[mask_unk, 1].long()) correct_sum_val += correct_val loss_sum_val += loss_val summary_writer.add_scalar( "model_value: evaluation loss", loss_val, global_step_eval) summary_writer.add_scalar( "model_value: evaluation accuracy", 100 * (correct_val / size_eval), global_step_eval) summary_writer.add_scalar( "model_tag: average evaluation loss", loss_sum_tag / len(eval_data_loader), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_tag: average evaluation accuracy", 100 * (correct_sum_tag / size_sum_eval), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_value: average evaluation loss", loss_sum_val / len(eval_data_loader), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_value: average evaluation accuracy", 100 * (correct_sum_val / size_sum_eval), global_step // TIME_FOR_EVAL) tag = f"EVAL: tag accuracy: {100 * (correct_sum_tag / size_sum_eval)}, tag loss: {loss_sum_tag/len(eval_data_loader)}, " val = f"val accuracy: {100 * (correct_sum_val / size_sum_eval)}, val loss: {loss_sum_val/len(eval_data_loader)}\n" with open(f'{DATA_ROOT}log.txt', 'a') as log: log.write(tag) log.write(val)
) # function that runs the shortest path algorithm and returns the action produced by the mpc def advise(self): self.advise_unit.shortest_path(self.root) path = self.advise_unit.reconstruct_path() action = self.advise_unit.g[path[0]][path[1]]['action'] if self.plot: fig = plotly_figure(self.advise_unit.g, path=path) py.plot(fig) return action if __name__ == '__main__': from DataManager import DataManager with open("config_south.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) dm = DataManager(cfg) adv = Advise( datetime.datetime.utcnow().replace( tzinfo=pytz.timezone("UTC")).astimezone( tz=pytz.timezone("America/Los_Angeles")), dm.preprocess_occ(), dm.preprocess_therm(), dm.weather_fetch(), "winter_rates", 0.99995, 15, 1, True, 87, 55, 0.075, 1.25, 400, 400.) print adv.advise()
# 452 Assignment 2 # Written by: Connor Moore # Student # : 20011955 # Date: Feb 26, 2019 from Glass_BPNV4 import BPNetwork from DataManager import DataManager import numpy as np from sklearn.metrics import confusion_matrix, classification_report # NNet = BPNetwork(9, 8, 6) fileWrite = False data = DataManager() test = BPNetwork(9, 8, 7) # initial weights init_weights = test.weights_ih # convert back to integer def convert_decode(arr): if arr == [1, 0, 0, 0, 0, 0, 0]: return 1 if arr == [0, 1, 0, 0, 0, 0, 0]: return 2 if arr == [0, 0, 1, 0, 0, 0, 0]: return 3 if arr == [0, 0, 0, 1, 0, 0, 0]: return 4 if arr == [0, 0, 0, 0, 1, 0, 0]: return 5 if arr == [0, 0, 0, 0, 0, 1, 0]:
def mainTF(options): import tensorflow as tf from CreateModel import CreateModel from DataManager import DataManager from DataSet import DataSet print "PROCESSING VALIDATION DATA" dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal = True, background = False) dgBg = DataGetter.DefinedVariables(options.netOp.vNames, signal = False, background = True) validDataSig = [((dataPath + "/trainingTuple_0_division_1_rpv_stop_850_validation_0.h5", ), 2),] validDataSig2 = [((dataPath + "/trainingTuple_0_division_1_stealth_stop_350_SHuHd_validation_0.h5", ), 2),] validDataSig3 = [((dataPath + "/trainingTuple_0_division_1_rpv_stop_350_validation_0.h5", ), 2),] validDataBgTTbar = [((dataPath + "/trainingTuple_20_division_1_TT_validation_0.h5", ), 1), ((dataPath + "/trainingTuple_2110_division_1_TT_validation_0.h5", ), 1),] print "Input Variables: ",len(dgSig.getList()) # Import data #print options.runOp.validationSamples validDataSig = getValidData(dgSig, validDataSig, options) validDataSig2 = getValidData(dgSig, validDataSig2, options) validDataSig3 = getValidData(dgSig, validDataSig3, options) validDataBgTTbar = getValidData(dgBg, validDataBgTTbar, options) validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar) validDataQCDMC = combineValidationData(validDataSig2, validDataBgTTbar) validDataQCDData = combineValidationData(validDataSig3, validDataBgTTbar) #get input/output sizes #print validData["data"].shape nFeatures = validDataTTbar["data"].shape[1] nLabels = validDataTTbar["labels"].shape[1] nWeights = validDataTTbar["weights"].shape[1] nDomain = validDataSig["domain"].shape[1] #Training parameters l2Reg = options.runOp.l2Reg MiniBatchSize = options.runOp.minibatchSize nEpoch = options.runOp.nepoch ReportInterval = options.runOp.reportInterval validationCount = min(options.runOp.nValidationEvents, validDataTTbar["data"].shape[0]) #scale data inputs to mean 0, stddev 1 categories = numpy.array(options.netOp.vCategories) mins = numpy.zeros(categories.shape, dtype=numpy.float32) ptps = numpy.zeros(categories.shape, dtype=numpy.float32) for i in xrange(categories.max()): selectedCategory = categories == i mins[selectedCategory] = validDataTTbar["data"][:,selectedCategory].mean() ptps[selectedCategory] = validDataTTbar["data"][:,selectedCategory].std() ptps[ptps < 1e-10] = 1.0 ##Create data manager, this class controls how data is fed to the network for training # DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale) signalDataSets = [ #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_350_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_450_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_550_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_650_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_750_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_850_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_350_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_450_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_550_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_650_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_750_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_850_SHuHd_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_350_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_450_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_550_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_650_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_750_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_850_SYY_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_*_training_0.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_*_SHuHd_training_0.h5", 365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 1), #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_*_SYY_training_0.h5", 365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 1), ] backgroundDataSets = [DataSet(dataPath + "/trainingTuple_*_division_0_TT_training_0.h5", 365.4, 61878989, 1.0, False, 0, 1.0, 1.0, len(signalDataSets)),] dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, nDomain, nWeights, options.runOp.ptReweight, signalDataSets, backgroundDataSets) # Build the graph denseNetwork = [nFeatures]+options.netOp.denseLayers+[nLabels] convLayers = options.netOp.convLayers rnnNodes = options.netOp.rnnNodes rnnLayers = options.netOp.rnnLayers mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers, dm.inputDataQueue, MiniBatchSize, mins, 1.0/ptps) #summary writer summary_path = "/storage/local/data1/gpuscratch/%s"%(USER) os.makedirs(summary_path) summary_writer = tf.summary.FileWriter(summary_path + "/log_graph", graph=tf.get_default_graph()) print "TRAINING NETWORK" with tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8) ) as sess: sess.run(tf.global_variables_initializer()) #start queue runners dm.launchQueueThreads(sess) print "Reporting validation loss every %i batches with %i events per batch for %i epochs"%(ReportInterval, MiniBatchSize, nEpoch) #preload the first data into staging area sess.run([mlp.stagingOp], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb}) i = 0 N_TRAIN_SUMMARY = 10 #flush queue until the sample fraction is approximately equal while dm.continueTrainingLoop(): result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) signalFraction = result[1][:,0].sum()/MiniBatchSize #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction if signalFraction < 0.5: break try: while dm.continueTrainingLoop(): grw = 1.0#1*(2/(1+exp(-i/10000.0)) - 1) #2/(1+exp(-i/10000.0)) - 1 #1000000000000.0*(2/(1+exp(-i/500000.0)) - 1) #run validation operations if i == 0 or not i % ReportInterval: #run validation operations validation_loss, accuracy, summary_vl = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_summary_op], feed_dict={mlp.x_ph: validDataTTbar["data"][:validationCount], mlp.y_ph_: validDataTTbar["labels"][:validationCount], mlp.p_ph_: validDataTTbar["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataTTbar["weights"][:validationCount]}) summary_writer.add_summary(summary_vl, i/N_TRAIN_SUMMARY) print('Interval %d, validation accuracy %0.6f, validation loss %0.6f' % (i/ReportInterval, accuracy, validation_loss)) validation_loss, accuracy, summary_vl_QCDMC = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDMC_summary_op], feed_dict={mlp.x_ph: validDataQCDMC["data"][:validationCount], mlp.y_ph_: validDataQCDMC["labels"][:validationCount], mlp.p_ph_: validDataQCDMC["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataQCDMC["weights"][:validationCount]}) summary_writer.add_summary(summary_vl_QCDMC, i/N_TRAIN_SUMMARY) validation_loss, accuracy, summary_vl_QCDData = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDData_summary_op], feed_dict={mlp.x_ph: validDataQCDData["data"][:validationCount], mlp.y_ph_: validDataQCDData["labels"][:validationCount], mlp.p_ph_: validDataQCDData["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataQCDData["weights"][:validationCount]}) summary_writer.add_summary(summary_vl_QCDData, i/N_TRAIN_SUMMARY) #print(sess.run(mlp.x)) #run training operations if i % N_TRAIN_SUMMARY == 0: _, _, summary = sess.run([mlp.stagingOp, mlp.train_step, mlp.merged_train_summary_op], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb, mlp.training: True, mlp.gradientReversalWeight:grw}) summary_writer.add_summary(summary, i/N_TRAIN_SUMMARY) else: sess.run([mlp.stagingOp, mlp.train_step], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb, mlp.training: True}) i += 1 #Should fix bad end of training state while dm.continueFlushingQueue(): sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) except Exception, e: # Report exceptions to the coordinator. dm.requestStop(e) finally:
def __init__(self): """Default constructor""" AssociationAnalysis.__init__(self) self.connFactory = DBUtil.ConnectionFactory() # Default connection source self.dataManager = DataManager()
def exitValue(self, ctx): if ctx.ID() is not None: if ctx.ID().getText() in self.memory: ctx.data_manager = self.memory[ctx.ID().getText()] elif ctx.ID().getText() in self.functions: value = LLVMGenerator.call_fun(ctx.ID().getText()) llvm_name = "%" + str(LLVMGenerator.str_i) python_name = llvm_name var_type = "i32" size = "4" is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const) ctx.data_manager = data_manager self.memory[python_name] = data_manager LLVMGenerator.allocate(data_manager=data_manager, value=value) else: raise RuntimeError("Variable: '" + str(ctx.ID().getText()) + "' not recognized") if ctx.INT() is not None: llvm_name = "%" + str(LLVMGenerator.str_i) python_name = llvm_name var_type = "i32" size = "4" is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const) ctx.data_manager = data_manager self.memory[python_name] = data_manager value = ctx.INT().getText() LLVMGenerator.allocate(data_manager=data_manager, value=value) if ctx.DOUBLE() is not None: llvm_name = "%" + str(LLVMGenerator.str_i) python_name = llvm_name var_type = "double" size = "8" is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const) ctx.data_manager = data_manager self.memory[python_name] = data_manager value = ctx.DOUBLE().getText() LLVMGenerator.allocate(data_manager=data_manager, value=value) if ctx.STRING() is not None: llvm_name = "@str" + str(LLVMGenerator.str_i) python_name = llvm_name string = ctx.STRING().getText()[1:-1] length = len(string) + 2 var_type = "[{} x i8]".format(length) size = "1" is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const, length=length) ctx.data_manager = data_manager self.memory[python_name] = data_manager LLVMGenerator.allocate_string(data_manager=data_manager, string=string) if ctx.arr() is not None: llvm_name = "%" + str(LLVMGenerator.str_i) python_name = llvm_name length = len(ctx.arr().value()) sub_type = ctx.arr().value()[0].data_manager.var_type var_type = "[{} x {}]".format(length, sub_type) size = ctx.arr().value()[0].data_manager.size is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const, sub_type=sub_type, length=length) ctx.data_manager = data_manager self.memory[python_name] = data_manager values = ctx.arr().value() LLVMGenerator.allocate_array(data_manager=data_manager, values=values) if ctx.arr_element() is not None: if ctx.arr_element().ID().getText() in self.memory: id_dm = self.memory[ctx.arr_element().ID().getText()] value_dm = ctx.arr_element().value().data_manager llvm_name = LLVMGenerator.get_elem(id_dm=id_dm, value_dm=value_dm) python_name = llvm_name var_type = id_dm.sub_type size = id_dm.size is_const = True data_manager = DataManager(llvm_name=llvm_name, python_name=python_name, var_type=var_type, size=size, is_const=is_const) self.memory[python_name] = data_manager ctx.data_manager = data_manager else: raise RuntimeError("Array not recognized") if ctx.struct_elem() is not None: id_1 = ctx.struct_elem().ID()[0].getText() id_2 = ctx.struct_elem().ID()[1].getText() struct = self.structures_obj[id_1] parent = struct.parent index = parent.get_number(id_2) id = LLVMGenerator.get_struct_elem(parent, struct, index) llvm_name = id python_name = llvm_name var_type = parent.types[index] size = 4 is_const = False data_manager = DataManager(llvm_name, python_name, var_type, size, is_const) ctx.data_manager = data_manager
from xbos.services.hod import HodClient from xbos.devices.thermostat import Thermostat hc = HodClient("xbos/hod", c) q = """SELECT ?uri ?zone FROM %s WHERE { ?tstat rdf:type/rdfs:subClassOf* brick:Thermostat . ?tstat bf:uri ?uri . ?tstat bf:controls/bf:feeds ?zone . };""" % cfg["Building"] import pickle with open("../Thermal Data/ciee_thermal_data_demo", "r") as f: thermal_data = pickle.load(f) dm = DataManager(cfg, advise_cfg, c, ZONE) tstat_query_data = hc.do_query(q)['Rows'] tstats = {tstat["?zone"]: Thermostat(c, tstat["?uri"]) for tstat in tstat_query_data} # TODO INTERVAL SHOULD NOT BE IN config_file.yml, THERE SHOULD BE A DIFFERENT INTERVAL FOR EACH ZONE from ThermalModel import * thermal_model = MPCThermalModel(thermal_data, interval_length=cfg["Interval_Length"]) thermal_model.setZoneTemperaturesAndFit( {dict_zone: dict_tstat.temperature for dict_zone, dict_tstat in tstats.items()}, dt=cfg["Interval_Length"]) thermal_model.setWeahterPredictions(dm.weather_fetch()) adv = Advise(["HVAC_Zone_Centralzone"], datetime.datetime.utcnow().replace(tzinfo=pytz.utc).astimezone( tz=pytz.timezone("America/Los_Angeles")), dm.preprocess_occ(),
def generate_theoretical_data(self, ticker_tgt, ticker_src, step=0.00005, pos_adj=None, neg_adj=None): """Generates theoretical data for a stock based on another stock. Given two tickers, a granularity/precision step, and manual offset/adjustments, generates more data for the first stock (gen) to match the length of data in the second stock (src). The generation is based on averages in existing real data and assumes an existing correlation between two stocks (e.g. UPRO and SPY supposedly have a correlation, or leverage factor of 3) Args: ticker_tgt: A ticker of the stock for which data should be generated, i.e. the target for the generation ticker_src: A ticker of the stock to be used as the data source to aid in data generation. NOTE: This implies the source data should be longer than the data for the stock for which the generation occurs step: A value corresponding to a level of precision, or the number of averages calculated and then used to generate the data. NOTE: precision != accuracy and a default value of 0.00005 is used if one is not given, based on testing done on different values pos_adj: A value to be used when adjusting movements in the positive direction, i.e. a higher value will lead to more pronounced positive moves (default: None, if None a hardcoded default value will be used depending on the ticker, typically 0) neg_adj: A value to be used when adjusting movements in the negative direction, i.e. a higher value will lead to more pronounced negative moves (default: None, if None a hardcoded default value will be used depending on the ticker, typically 0) Returns: A tuple of price LUTs, one LUT containing real data appended to a part of the generated data, the other containing a full set of generated data. The former is intended to be used in backtesting strategies, while the latter is intended to be used for verifying generation accuracy against existing real data. """ db = DataManager() # get prices for tickers price_lut_tgt = db.build_price_lut(ticker_tgt) price_lut_src = db.build_price_lut(ticker_src) # before doing any calculations, check if all data is on disk already # NOTE: feature disabled for now, as it didnt respond to changes # price_lut_gen_part = db.build_price_lut(ticker_tgt + '--GEN-PART') # price_lut_gen_full = db.build_price_lut(ticker_tgt + '--GEN-FULL') # if (len(price_lut_gen_part) == len(price_lut_src) # and len(price_lut_gen_full) == len(price_lut_src)): # return (price_lut_gen_part, price_lut_gen_full) # sorted dates needed later src_dates = sorted(price_lut_src.keys()) gen_dates = sorted(price_lut_tgt.keys()) # part of data will be real data price_lut_gen_part = price_lut_tgt.copy() # fully generated data needs a real point as an anchor price_lut_gen_full = {gen_dates[0]: price_lut_tgt[gen_dates[0]]} # a set of adjustments to use if not otherwise specified adjustments = { 'UPRO': (0, 0), 'TMF': (0.01, 0.05), 'TQQQ': (0.025, 0), 'UDOW': (0, 0.01) } if step == 0.00005 and pos_adj is None and neg_adj is None: try: pos_adj = adjustments[ticker_tgt.upper()][0] neg_adj = adjustments[ticker_tgt.upper()][1] except KeyError: pos_adj = 0 neg_adj = 0 # calculate % movements and leverage ratio, to use for the SA-LUT moves = {} ratios = {} for i in range(len(gen_dates) - 1): change_src = ( price_lut_src[gen_dates[i + 1]] / price_lut_src[gen_dates[i]] - 1) change_gen = ( price_lut_tgt[gen_dates[i + 1]] / price_lut_tgt[gen_dates[i]] - 1) moves[gen_dates[i + 1]] = change_src if change_src == 0: ratios[gen_dates[i + 1]] = 0.0 else: ratios[gen_dates[i + 1]] = change_gen / change_src sa_lut = SteppedAvgLookup(step, [moves[d] for d in gen_dates[1:]], [ratios[d] for d in gen_dates[1:]]) # generate data going forward from gen data's anchor point for i in range(len(gen_dates) - 1): move = moves[gen_dates[i + 1]] if move >= 0: adj = pos_adj else: adj = neg_adj price_lut_gen_full[gen_dates[i + 1]] = \ (price_lut_gen_full[gen_dates[i]] * (move * (sa_lut.get(move) + adj) + 1)) # generate data going backwards from gen data's anchor point for i in range(len(src_dates) - len(gen_dates) - 1, -1, -1): move = ( price_lut_src[src_dates[i + 1]] / price_lut_src[src_dates[i]] - 1) if move >= 0: adj = pos_adj else: adj = neg_adj gen_price = (price_lut_gen_full[src_dates[i + 1]] / (move * (sa_lut.get(move) + adj) + 1)) price_lut_gen_full[src_dates[i]] = gen_price price_lut_gen_part[src_dates[i]] = gen_price # save data to disk for faster retrieval next time db.write_stock_data( ticker_tgt + '--GEN-FULL', [[date, '-', '-', '-', str(price_lut_gen_full[date]), '-'] for date in src_dates], False) db.write_stock_data( ticker_tgt + '--GEN-PART', [[date, '-', '-', '-', str(price_lut_gen_part[date]), '-'] for date in src_dates], False) return (price_lut_gen_part, price_lut_gen_full)
print("epoch ", e, ": dev F1: ", devF1, ", test F1: ", testF1) f.write("epoch "+ str(e)+ ": dev F1: "+ str(devF1)+ ", test F1: "+ str(testF1)+ "\n") f.close() torch.save(model, "checkpoints/model_"+args.logfile+"_"+str(e)) if __name__ == "__main__": torch.manual_seed(1) if not os.path.exists('checkpoints'): os.mkdir('checkpoints') argv = sys.argv[1:] parser = Parser().getParser() args, _ = parser.parse_known_args(argv) print("Load data start...") dm = DataManager(args.datapath, args.testfile) wv = dm.vector train_data, test_data, dev_data = dm.data['train'], dm.data['test'], dm.data['dev'] print("train_data count: ", len(train_data)) print("test_data count: ", len(test_data)) print("dev_data count: ", len(dev_data)) model = Model(args.lr, args.dim, args.statedim, wv, dm.relation_count) model.cuda() if args.start != '': pretrain_model = torch.load(args.start) model_dict = model.state_dict() pretrained_dict = pretrain_model.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict)
parser.add_argument('--grained', type=int, default=3) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--lr_word_vector', type=float, default=0.1) parser.add_argument('--epoch', type=int, default=25) parser.add_argument('--batch', type=int, default=25) parser.add_argument('--patience', type=int, default=5) args, _ = parser.parse_known_args(argv) fold = args.fold seed = args.seed random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) data = DataManager(args.dataset, args.seed, grained=3) wordlist = data.gen_word() train_data, dev_data, test_data = data.gen_data(args.grained) print 'Data Generated' model = Model(wordlist, argv, len(data.dict_target)) print 'model instantiated' batch_n = (len(train_data) - 1) / args.batch + 1 optimizer = OptimizerList[args.optimizer](model.params, args.lr, args.lr_word_vector) details = {'loss': [], 'loss_train':[], 'loss_dev':[], 'loss_test':[], \ 'acc_train':[], 'acc_dev':[], 'acc_test':[], 'loss_l2':[]} patience = args.patience
skip_first = 0 for pair, df in self.returns_dict.items(): if skip_first == 0: skip_first = 1 continue combined_returns = pd.concat([combined_returns, df], ignore_index=True, axis=0) self.combined_returns = combined_returns self.total_returns = 1 for returns in combined_returns['returns'].values: self.total_returns = self.total_returns * (1 + returns) if __name__ == "__main__": dm = DataManager() # This code will just do it for one sector # x.data = x.getOneSector(sector="Energy", fromDate="2015-01-01", toDate="2016-09-21") dm.getOneSector(sector="Energy", fromDate="2013-01-01", toDate="2015-01-01") # x.calcReturns() strat = CointStrategyStopLoss bt = Backtester(strat, dm.data) bt.backtest() bt.plot_stuff() # bt.strat.CA.plot_pair(['MA','V'], fromDate="2014-01-01", toDate="2018-01-01") print(bt.total_returns) plt.show()
# handle downlaoding from list of tickers if args.download: for ticker in args.download: download_and_write(ticker, args.using) exit() if __name__ == "__main__": parser = argparse.ArgumentParser( description="Downloader for historical stock data.") parser.add_argument('--using', default='google', nargs=1, help=('a source/API from which to get the data, ' 'default: google')) download_group = parser.add_mutually_exclusive_group(required=True) download_group.add_argument('--download', nargs='+', help='the stock ticker(s) to download') download_group.add_argument('--download-from', nargs='+', help=('file(s) containing the stock tickers to' 'download')) downloader = Downloader() db = DataManager() main() print("Did nothing.") exit()
parser.add_argument('--fast', type=int, choices=[0, 1], default=0) parser.add_argument('--screen', type=int, choices=[0, 1], default=0) parser.add_argument('--optimizer', type=str, default='ADAGRAD') parser.add_argument('--grained', type=int, default=2) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--lr_word_vector', type=float, default=0.000007) parser.add_argument('--epoch', type=int, default=25) parser.add_argument('--batch', type=int, default=10) parser.add_argument('--doc_num', type=int, default=50000) #parser.add_argument('--reload', type=str, default=True) parser.add_argument('--saveto', type=str, default='best_model17.pkl') parser.add_argument('--reload_dic', type=str, default=False) #parser.add_argument('--reload_dic', type=str, default='dic.pkl') args, _ = parser.parse_known_args(argv) random.seed(args.seed) data = DataManager(args.dataset) if args.reload_dic: print('reloading dictionary...') wordlist = data.load_word(args.reload_dic) else: print('building dictionary...') wordlist = data.gen_word() print('saving dictionary...') pkl.dump(wordlist, open('dic.pkl', 'wb'), -1) print('%d unique words in total' % len(wordlist)) train_data, test_data = data.gen_data(args.grained) random.shuffle(train_data) num = int(len(train_data) * 0.11) dev_data = train_data[:num] train_data_new = train_data[num:]
def get_all(): return jsonify(DataManager().get_all_data())
parser.add_argument('--interval', type=int, default=10) # 解析设置的参数 args, _ = parser.parse_known_args(argv) # 配置日志文件格式 logging.basicConfig( filename=('log/%s.log' % args.name) * (1 - args.screen), level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s', datefmt='%H:%M:%S') # 加载语料文本,情感词,否定词,强度词文本 dm = DataManager( args.dataset, { 'negation': 'negation.txt', 'intensifier': 'intensifier.txt', 'sentiment': 'sentiment.txt' }) # 从原始语料提取各类别词语 dm.gen_word_list() # 将词语转成数值列表,构建出训练、验证和测试集 dm.gen_data() # 构建模型 model = Model(dm.words, dm.grained, argv) # 实例化评价器 Evaluator = EvaluatorList[dm.grained] def do_train(label, data):
def get_node(node): res = DataManager().get_node(node, request.json) return jsonify(res)
import numpy as np from keras.models import Sequential from keras.layers import LSTM, Dense, BatchNormalization, Flatten, Reshape from keras import regularizers from keras.preprocessing.image import ImageDataGenerator from keras import utils from DataManager import DataManager print("Loading training data...") dm = DataManager(random_state=0) training_data, training_labels = dm.loadTrainingData() testing_data, testing_labels = dm.loadTestingData() validation_data, validation_labels = dm.loadValidationData() print('Loaded shapes') for i in training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels: print(i.shape) input_shape = tuple(training_data.shape[1:]) num_classes = len(np.unique(training_labels)) print("input_shape: {}".format(input_shape)) print("num_classes: {}".format(num_classes)) # Convert to categorical classes training_labels = utils.to_categorical(training_labels, num_classes) testing_labels = utils.to_categorical(testing_labels, num_classes) validation_labels = utils.to_categorical(validation_labels, num_classes) data_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rotation_range=20,
def create(node): if not request.json: abort(400) return jsonify(DataManager().add_row(node,request.json))
def mainTF(options): import tensorflow as tf from CreateModel import CreateModel from DataManager import DataManager from DataSet import DataSet print "PROCESSING VALIDATION DATA" dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal=True) dgBg = DataGetter.DefinedVariables(options.netOp.vNames, background=True) validDataSig = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5", ), 1) ] validDataBgTTbar = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5", ), 1), ] validDataBgQCDMC = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT100to200_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT200to300_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT300to500_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT500to700_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT700to1000_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1000to1500_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1500to2000_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT2000toInf_validation_0.h5", ), 1) ] validDataBgQCDData = [(( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_Data_JetHT_2016_validation_0.h5", ), 1)] print "Input Variables: ", len(dgSig.getList()) # Import data #print options.runOp.validationSamples validDataSig = getValidData(dgSig, validDataSig, options) validDataBgTTbar = getValidData(dgBg, validDataBgTTbar, options) validDataBgQCDMC = getValidData(dgBg, validDataBgQCDMC, options) validDataBgQCDData = getValidData(dgBg, validDataBgQCDData, options) validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar) validDataQCDMC = combineValidationData(validDataSig, validDataBgQCDMC) validDataQCDData = combineValidationData(validDataSig, validDataBgQCDData) #get input/output sizes #print validData["data"].shape nFeatures = validDataTTbar["data"].shape[1] nLabels = validDataTTbar["labels"].shape[1] nWeights = validDataTTbar["weights"].shape[1] #Training parameters l2Reg = options.runOp.l2Reg MiniBatchSize = options.runOp.minibatchSize nEpoch = options.runOp.nepoch ReportInterval = options.runOp.reportInterval validationCount = min(options.runOp.nValidationEvents, validDataTTbar["data"].shape[0]) #scale data inputs to mean 0, stddev 1 categories = numpy.array(options.netOp.vCategories) mins = numpy.zeros(categories.shape, dtype=numpy.float32) ptps = numpy.zeros(categories.shape, dtype=numpy.float32) for i in xrange(categories.max()): selectedCategory = categories == i mins[selectedCategory] = validDataTTbar["data"][:, selectedCategory].mean( ) ptps[selectedCategory] = validDataTTbar["data"][:, selectedCategory].std( ) ptps[ptps < 1e-10] = 1.0 ##Create data manager, this class controls how data is fed to the network for training # DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale) signalDataSets = [ DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 8), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5", 365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 8), ] #pt reweighting histograms ttbarRatio = (numpy.array([ 0.7976347, 1.010679, 1.0329635, 1.0712056, 1.1147588, 1.0072196, 0.79854023, 0.7216115, 0.7717652, 0.851551, 0.8372917 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) QCDDataRatio = (numpy.array([ 0.50125164, 0.70985824, 1.007087, 1.6701245, 2.5925348, 3.6850858, 4.924969, 6.2674766, 7.5736594, 8.406105, 7.7529635 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) QCDMCRatio = (numpy.array([ 0.75231355, 1.0563549, 1.2571484, 1.3007764, 1.0678109, 0.83444154, 0.641499, 0.49130705, 0.36807108, 0.24333349, 0.06963781 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) backgroundDataSets = [ DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5", 365.4, 61878989, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5", 365.4, 61901450, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_Data_JetHT_2016_training_*.h5", 1.0, 1, 1.0, False, 1, 1.0, 1.0, 8, include=False), #QCDDataRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT100to200_training_*.h5", 27990000, 80684349, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT200to300_training_*.h5", 1712000, 57580393, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT300to500_training_*.h5", 347700, 54537903, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT500to700_training_*.h5", 32100, 62271343, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT700to1000_training_*.h5", 6831, 45232316, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1000to1500_training_*.h5", 1207, 15127293, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1500to2000_training_*.h5", 119.9, 11826702, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT2000toInf_training_*.h5", 25.24, 6039005, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), ] dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, 2, nWeights, options.runOp.ptReweight, signalDataSets, backgroundDataSets) # Build the graph denseNetwork = [nFeatures] + options.netOp.denseLayers + [nLabels] convLayers = options.netOp.convLayers rnnNodes = options.netOp.rnnNodes rnnLayers = options.netOp.rnnLayers mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers, dm.inputDataQueue, MiniBatchSize, mins, 1.0 / ptps) #summary writer summary_writer = tf.summary.FileWriter(options.runOp.directory + "log_graph", graph=tf.get_default_graph()) print "TRAINING NETWORK" with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=8)) as sess: sess.run(tf.global_variables_initializer()) #start queue runners dm.launchQueueThreads(sess) print "Reporting validation loss every %i batches with %i events per batch for %i epochs" % ( ReportInterval, MiniBatchSize, nEpoch) #preload the first data into staging area sess.run([mlp.stagingOp], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb }) i = 0 N_TRAIN_SUMMARY = 10 #flush queue until the sample fraction is approximately equal flushctr = 200 while dm.continueTrainingLoop(): result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) signalCount = result[1][:, 0].sum() bgCount = result[1][:, 1].sum() signalFraction = signalCount / (signalCount + bgCount) #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction if signalFraction < 0.5: flushctr -= 1 if flushctr <= 0: break try: while dm.continueTrainingLoop(): grw = 2 / (1 + exp(-i / 10000.0)) - 1 #run validation operations if i == 0 or not i % ReportInterval: #run validation operations validation_loss, accuracy, summary_vl = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_summary_op ], feed_dict={ mlp.x_ph: validDataTTbar["data"][:validationCount], mlp.y_ph_: validDataTTbar["labels"][:validationCount], mlp.p_ph_: validDataTTbar["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataTTbar["weights"][:validationCount] }) summary_writer.add_summary(summary_vl, i / N_TRAIN_SUMMARY) print( 'Interval %d, validation accuracy %0.6f, validation loss %0.6f' % (i / ReportInterval, accuracy, validation_loss)) validation_loss, accuracy, summary_vl_QCDMC = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDMC_summary_op ], feed_dict={ mlp.x_ph: validDataQCDMC["data"][:validationCount], mlp.y_ph_: validDataQCDMC["labels"][:validationCount], mlp.p_ph_: validDataQCDMC["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataQCDMC["weights"][:validationCount] }) summary_writer.add_summary(summary_vl_QCDMC, i / N_TRAIN_SUMMARY) validation_loss, accuracy, summary_vl_QCDData = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDData_summary_op ], feed_dict={ mlp.x_ph: validDataQCDData["data"][:validationCount], mlp.y_ph_: validDataQCDData["labels"][:validationCount], mlp.p_ph_: validDataQCDData["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataQCDData["weights"][:validationCount] }) summary_writer.add_summary(summary_vl_QCDData, i / N_TRAIN_SUMMARY) #run training operations if i % N_TRAIN_SUMMARY == 0: _, _, summary = sess.run( [ mlp.stagingOp, mlp.train_step, mlp.merged_train_summary_op ], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb, mlp.training: True, mlp.gradientReversalWeight: grw }) summary_writer.add_summary(summary, i / N_TRAIN_SUMMARY) else: sess.run( [mlp.stagingOp, mlp.train_step], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb, mlp.training: True }) i += 1 while dm.continueFlushingQueue(): sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) except Exception, e: # Report exceptions to the coordinator. dm.requestStop(e) finally:
def update(node,row_id): if not request.json: abort(400) return jsonify(DataManager().edit_row(node,row_id,request.json))
# ========== NetManager呼び出し ========== net_cls = NetManager() # ========== PathManager呼び出し ========== path_cls = PathManager(tfrecord_folder=TFRECORD_FOLDER, output_rootfolder=OUT_ROOT_FOLDER, epoch_output_rootfolder=EPOCH_OUT_ROOT_FOLDER) path_cls.all_makedirs() # 結果保存フォルダ生成 # ========== DataSet呼び出し ========== # プロパティデータ読み込み df = pd.read_csv(path_cls.get_property_path()) shuf_train_ds_cls = DataManager( tfrecord_path=path_cls.get_train_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=SHUF_LEARN_BATCH_SIZE, net_cls=net_cls, data_n=df.at[0, 'total_learn_data'], suffle_buffer=SUFFLE_BUFFER_SIZE, ) train_ds_cls = DataManager( tfrecord_path=path_cls.get_train_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=LEARN_BATCH_SIZE, net_cls=net_cls, data_n=df.at[0, 'total_learn_data'], ) test_ds_cls = DataManager( tfrecord_path=path_cls.get_test_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=TEST_BATCH_SIZE, net_cls=net_cls,
def delete(node,row_id): return jsonify(DataManager().delete(node,row_id))
return self.predictions[0][now_time] else: return self.predictions[now_time] if __name__ == '__main__': import yaml import sys sys.path.insert(0, '..') from DataManager import DataManager from xbos import get_client with open("../config_file.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) with open("../Buildings/ciee/ZoneConfigs/HVAC_Zone_Eastzone.yml", 'r') as ymlfile: advise_cfg = yaml.load(ymlfile) if cfg["Server"]: c = get_client(agent=cfg["Agent_IP"], entity=cfg["Entity_File"]) else: c = get_client() dm = DataManager(cfg, advise_cfg, c, "HVAC_Zone_Eastzone") occ = Occupancy(dm.preprocess_occ(), 15, 4, 4, advise_cfg["Advise"]["Occupancy_Sensors"]) for i in range(10): print "Intervals ahead: " + str(i) print occ.occ(i)
def main(): ''' Runs cross validation on the input Twitter data. ''' args = parser.parse_args() # Extract the data for LDA and divide into 10 folds dm = DataManager(args.train_path, 'twitter') if settings.DEBUG: print("Loading data...") # Time the process of loading in the data. start = time.perf_counter() # Load the data (possibly from the cache, if it exists) dm.load_data(args.cache_path) # The number of folds is passed in as a command-line arg dm.divide_into_folds(args.num_folds) end = time.perf_counter() if settings.DEBUG: print( f"Preparing the data (loading, dividing into folds) took {end-start:0.4f} seconds." ) # Initialize the best k and best likelihood, along with the list of k values to try best_k = None best_likelihood = -float("inf") # Get the list of topic numbers to try as a command line arg too. possible_k_values = args.topic_numbers # Store the results to the result path. Add the headers if the file doesn't exist yet. if not os.path.exists(args.results_path): fout = open(args.results_path, "w") out_writer = csv.writer(fout) out_writer.writerow([ "Model", "k", "Average Likelihood", "Number of Documents", "Source" ]) else: fout = open(args.results_path, "w") out_writer = csv.writer(fout) # Run cross validation once for each parameter value for k in possible_k_values: if settings.DEBUG: print(f"Trying k={k} components...") # We will create a list of accuracies for each validation set likelihoods = [] for i in range(dm.get_num_folds()): if settings.DEBUG: print(f" Iteration {i+1}/{dm.get_num_folds()}") # Update the validation fold. dm.set_validation(i) # Retrieve the training data and validation set. train, validate = get_data_for_LDA(dm) start = time.perf_counter() # Train the model with the param choice. lda_model = run_LDA_for_CV(train, k) # Compute the resulting accuracy on the validation set. likelihood = lda_model.score(validate) end = time.perf_counter() if settings.DEBUG: print(f" likelihood = {likelihood}") if settings.DEBUG: print(f" Training took {end-start:0.4f} seconds.") likelihoods.append(likelihood) avg_likelihood = sum(likelihoods) / len(likelihoods) out_writer.writerow([ "LDA", k, avg_likelihood, len(dm.get_all_fold_data()), settings.TWITTER_DIR ]) if settings.DEBUG: print(f" avg_likelihood = {avg_likelihood}") if avg_likelihood > best_likelihood: best_likelihood = avg_likelihood best_k = k print( f"Best average likelihood found was {best_likelihood} with parameter value k={best_k}" ) fout.close()