def __init__(self, data_set, batch_size, noise_dim, loss_type, noise_type, learning_rate=0.00015, sess=None, name="gan"): self.name = name self.learning_rate = learning_rate # create a tensorflow session if None if sess == None: self.sess = tf.Session() else: self.sess = sess self.disc_updates = 2 self.data_set = data_set self.DataSet_train, self.DataSet_test, self.img_size = get_data( data_set, False) self.batch_size = batch_size self.noise_type = noise_type self.noise_dim = noise_dim self.noise_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, self.noise_dim], name=self.name + "/generator_noise") self.input_placeholder = tf.placeholder(shape=[None] + self.img_size, dtype=tf.float32, name=self.name + "/discriminator_input") self.build_generator_discriminator() self.is_training = True self.build_loss(loss_type) self.build_optimizers() self.saver = tf.train.Saver(max_to_keep=1000)
name = args.name if not args.classmap: classmap = ['nonspeech', 'speech', 'music'] else: classmap = args.classmap n_classes = len(classmap) if not args.dest: args.dest = args.path if args.test: d = get_data(classmap=classmap, max_gb=args.max_gb, reuse=args.reuse, memory_friendly=args.mem_friendly, data_path=args.path, doscale=args.scale, dest=args.dest) else: d = (load_csv('train', max_gb=args.max_gb, classmap=classmap, reuse=args.reuse, memory_friendly=args.mem_friendly, data_path=args.path, doscale=args.scale, dest=args.dest), None) if args.conv: decay_op = lambda x: 10**(-x // 10)
# Sunny with a Chance of Asteroids from DataGetter import get_data from Ship import IntcodeComputer from Timer import timer DAY = 5 data = get_data(DAY) data = [i for i in map(int, data.strip('\n').split(','))] TEST_comp = IntcodeComputer(data) # problem 1 - ID: 1 # problem 2 - ID: 5 @timer def compute(val): try: val = int(val) except ValueError: print('{} is not a valid integer.'.format(val)) exit(1) TEST_comp.set_memory(val) TEST_comp.compute() while not TEST_comp.memory_empty(): print(TEST_comp.retr_memory()) TEST_comp.reset() val = int(input('ID:'))
def train(config = {"minNJetBin": 7, "maxNJetBin": 11, "gr_lambda": 0, "nNodes":70, "nNodesD":10, "nHLayers":1, "nHLayersD":1, "drop_out":0.7, "batch_size":2048, "epochs":100, "lr":0.001, "verbose":1, "Mask":False, "Mask_nJet":7}): # Define ouputDir based on input config outputDir = "Output_1/" for key in sorted(config.keys()): outputDir += key+"_"+str(config[key])+"_" config["outputDir"] = outputDir # Define vars for training jVec = ["Jet_pt_", "Jet_eta_", "Jet_phi_", "Jet_m_"] lepton = ["GoodLeptons_pt_1", "GoodLeptons_eta_1", "GoodLeptons_phi_1", "GoodLeptons_m_1"] MET = ["lvMET_cm_pt", "lvMET_cm_eta", "lvMET_cm_phi", "lvMET_cm_m"] eventShapeVars = ["fwm2_top6", "fwm3_top6", "fwm4_top6", "fwm5_top6", "jmt_ev0_top6", "jmt_ev1_top6", "jmt_ev2_top6"] numJets = ["NGoodJets_double"] nJets = 7 if config["Mask"]: nJets = config["Mask_nJet"] jVecs = list(y+str(x+1) for y in jVec for x in range(nJets)) config["allVars"] = jVecs + lepton # Import data print("----------------Preparing data------------------") #config["dataSet"] = "EventShapeTrainingData_V3/" #config["dataSet"] = "BackGroundMVA_V4_CM_GoodJets/" #config["dataSet"] = "BackGroundMVA_V5_CM_Jets/" #config["dataSet"] = "BackGroundMVA_V6_noCM_GoodJets/" #config["dataSet"] = "BackGroundMVA_V8_All_GoodJets/" config["dataSet"] = "BackGroundMVA_V9_CM_All_GoodJets_Inclusive/" config["massModels"] = ["350","450","550","650","750","850"] #ttMClist = ["TTJets*", "TT"] ttMClist = ["T*", "TT"] config["ttbarMC"] = ttMClist[0] config["otherttbarMC"] = ttMClist[1] print "Using "+config["dataSet"]+" data set" print "Training variables:" print config["allVars"] print "Training on mass models: ", config["massModels"] print "Training on ttbarMC: ", config["ttbarMC"] if os.path.exists(config["outputDir"]): print "Removing old training files: ", config["outputDir"] shutil.rmtree(config["outputDir"]) os.makedirs(config["outputDir"]+"/log_graph") sgTrainSet = sum( (glob(config["dataSet"]+"trainingTuple_*_division_0_*_"+mass+"*_training_0.h5") for mass in config["massModels"]) , []) bgTrainSet = glob(config["dataSet"]+"trainingTuple_*_division_0_"+config["ttbarMC"]+"_training_0.h5") sgTestSet = sum( (glob(config["dataSet"]+"trainingTuple_*_division_2_*_"+mass+"*_test_0.h5") for mass in config["massModels"]) , []) bgTestSet = glob(config["dataSet"]+"trainingTuple_*_division_2_"+config["ttbarMC"]+"_test_0.h5") trainData, trainSg, trainBg = get_data(sgTrainSet, bgTrainSet, config) testData, testSg, testBg = get_data(sgTestSet, bgTestSet, config) bgTrainTT = glob(config["dataSet"]+"trainingTuple_*_division_0_TT_training_0.h5") trainDataTT, trainSgTT, trainBgTT = get_data(sgTrainSet, bgTrainTT, config) # Make and train model print("----------------Preparing training model------------------") gr_lambda = 4 nNodes = 70 nNodesD = 10 nHLayers = 1 nHLayersD = 1 drop_out = 0.7 batch_size = 2048 epochs = 100 lr = 0.001 class_weight = {0: {0: 1.0, 1: 1.0}, 1: {0: 1.0, 1: 5.0, 2: 25.0, 3: 125.0, 4: 625.0}} sample_weight = None#{0: trainData["Weight"][:,0].tolist(), 1: trainData["Weight"][:,0].tolist()} #optimizer = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) optimizer = keras.optimizers.Adam(lr=config["lr"], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) n_hidden_layers = list(config["nNodes"] for x in range(config["nHLayers"])) n_hidden_layers_D = list(config["nNodesD"] for x in range(config["nHLayersD"])) Flip = GradientReversal(config["gr_lambda"]) main_input = keras.layers.Input(shape=(trainData["data"].shape[1],), name='main_input') # Set the rescale inputs to have unit variance centered at 0 between -1 and 1 layer = keras.layers.Lambda(lambda x: (x - K.constant(trainDataTT["mean"])) * K.constant(trainDataTT["scale"]), name='normalizeData')(main_input) layer = keras.layers.Dense(config["nNodes"], activation='relu')(layer) for n in n_hidden_layers: layer = keras.layers.BatchNormalization()(layer) layer = keras.layers.Dense(n, activation='relu')(layer) layer = keras.layers.Dropout(config["drop_out"])(layer) first_output = keras.layers.Dense(trainData["labels"].shape[1], activation='softmax', name='first_output')(layer) layer = Flip(first_output) #layer = keras.layers.Dense(nNodesD, activation='relu')(first_output) for n in n_hidden_layers_D: layer = keras.layers.BatchNormalization()(layer) layer = keras.layers.Dense(n, activation='relu')(layer) layer = keras.layers.Dropout(config["drop_out"])(layer) second_output = keras.layers.Dense(trainData["domain"].shape[1], activation='softmax', name='second_output')(layer) model = keras.models.Model(inputs=main_input, outputs=[first_output, second_output], name='model') model.compile(loss=[make_loss_model(c=1.0) , make_loss_adversary(c=1.0)], optimizer=optimizer, metrics=['accuracy']) tbCallBack = keras.callbacks.TensorBoard(log_dir="./"+outputDir+"/log_graph", histogram_freq=0, write_graph=True, write_images=True) log_model = keras.callbacks.ModelCheckpoint(outputDir+"/BestNN.hdf5", monitor='val_loss', verbose=config["verbose"], save_best_only=True) callbacks = [] if config["verbose"] == 1: callbacks = [log_model, tbCallBack] result_log = model.fit(trainData["data"], [trainData["labels"], trainData["domain"]], batch_size=config["batch_size"], epochs=config["epochs"], class_weight=class_weight, validation_data=(testData["data"], [testData["labels"], testData["domain"]]), callbacks=callbacks, sample_weight=sample_weight) # Model Visualization keras.utils.plot_model(model, to_file=outputDir+"/model.png", show_shapes=True) # Save trainig model as a protocol buffers file inputName = model.input.op.name.split(':')[0] outputName = model.output[0].op.name.split(':')[0] print "Input name:", inputName print "Output name:", outputName config["input_output"] = [inputName, outputName] saver = tf.train.Saver() saver.save(keras.backend.get_session(), outputDir+"/keras_model.ckpt") export_path="./"+outputDir+"/" freeze_graph_binary = "python freeze_graph.py" graph_file=export_path+"keras_model.ckpt.meta" ckpt_file=export_path+"keras_model.ckpt" output_file=export_path+"keras_frozen.pb" command = freeze_graph_binary+" --input_meta_graph="+graph_file+" --input_checkpoint="+ckpt_file+" --output_graph="+output_file+" --output_node_names="+outputName+" --input_binary=true" os.system(command) #Plot results print("----------------Validation of training------------------") val = Validation(model, config, sgTrainSet, trainData, trainSg, trainBg, result_log) config, metric = val.plot() #Clean up training K.clear_session() tf.reset_default_graph() return config, metric
def plot(self): sgValSet = sum( (glob(self.config["dataSet"] + "trainingTuple_*_division_1_*_" + mass + "*_validation_0.h5") for mass in self.config["massModels"]), []) bgValSet = glob(self.config["dataSet"] + "trainingTuple_*_division_1_" + self.config["ttbarMC"] + "_validation_0.h5") bgOTrainSet = glob(self.config["dataSet"] + "trainingTuple_*_division_0_" + self.config["otherttbarMC"] + "_training_0.h5") valData, valSg, valBg = get_data(sgValSet, bgValSet, self.config) trainOData, trainOSg, trainOBg = get_data(self.sgTrainSet, bgOTrainSet, self.config) y_Val = self.model.predict(valData["data"])[0][:, 0].ravel() y_Val_Sg = self.model.predict(valSg["data"])[0][:, 0].ravel() y_Val_Bg = self.model.predict(valBg["data"])[0][:, 0].ravel() y_Train = self.model.predict(self.trainData["data"])[0][:, 0].ravel() y_Train_Sg = self.model.predict(self.trainSg["data"])[0][:, 0].ravel() y_Train_Bg = self.model.predict(self.trainBg["data"])[0][:, 0].ravel() y_OTrain = self.model.predict(trainOData["data"])[0][:, 0].ravel() y_OTrain_Bg = self.model.predict(trainOBg["data"])[0][:, 0].ravel() ## Make input variable plots #index=0 #for var in self.config["allVars"]: # fig = plt.figure() # plt.hist(self.trainBg["data"][:,index], bins=30, histtype='step', density=True, log=False, label=var+" Bg", weights=self.trainBg["Weight"]) # plt.hist(self.trainSg["data"][:,index], bins=30, histtype='step', density=True, log=False, label=var+" Sg", weights=self.trainSg["Weight"]) # plt.legend(loc='upper right') # plt.ylabel('norm') # plt.xlabel(var) # fig.savefig(self.config["outputDir"]+"/"+var+".png", dpi=fig.dpi) # index += 1 # ## Normalize #index=0 #tBg = self.trainData["scale"]*(self.trainBg["data"] - self.trainData["mean"]) #tSg = self.trainData["scale"]*(self.trainSg["data"] - self.trainData["mean"]) #for var in self.config["allVars"]: # fig = plt.figure() # plt.hist(tBg[:,index], bins=30, histtype='step', density=True, log=False, label=var+" Bg", weights=self.trainBg["Weight"]) # plt.hist(tSg[:,index], bins=30, histtype='step', density=True, log=False, label=var+" Sg", weights=self.trainSg["Weight"]) # plt.legend(loc='upper right') # plt.ylabel('norm') # plt.xlabel("norm "+var) # fig.savefig(self.config["outputDir"]+"/norm_"+var+".png", dpi=fig.dpi) # index += 1 # Plot loss of training vs test fig = plt.figure() plt.plot(self.result_log.history['loss']) plt.plot(self.result_log.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') fig.savefig(self.config["outputDir"] + "/loss_train_val.png", dpi=fig.dpi) fig = plt.figure() plt.plot(self.result_log.history['first_output_loss']) plt.plot(self.result_log.history['val_first_output_loss']) plt.title('first output loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') fig.savefig(self.config["outputDir"] + "/first_output_loss_train_val.png", dpi=fig.dpi) fig = plt.figure() plt.plot(self.result_log.history['first_output_acc']) plt.plot(self.result_log.history['val_first_output_acc']) plt.title('first output acc') plt.ylabel('acc') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') fig.savefig(self.config["outputDir"] + "/first_output_acc_train_val.png", dpi=fig.dpi) fig = plt.figure() plt.plot(self.result_log.history['second_output_loss']) plt.plot(self.result_log.history['val_second_output_loss']) plt.title('second output loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') fig.savefig(self.config["outputDir"] + "/second_output_loss_train_val.png", dpi=fig.dpi) fig = plt.figure() plt.plot(self.result_log.history['second_output_acc']) plt.plot(self.result_log.history['val_second_output_acc']) plt.title('second output acc') plt.ylabel('acc') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') fig.savefig(self.config["outputDir"] + "/second_output_acc_train_val.png", dpi=fig.dpi) # Plot discriminator distribution bins = np.linspace(0, 1, 100) fig, ax = plt.subplots(figsize=(6, 6)) ax.set_title('') ax.set_ylabel('Norm Events') ax.set_xlabel('Discriminator') plt.hist(y_Train_Sg, bins, color='xkcd:red', alpha=0.9, histtype='step', lw=2, label='Sg Train', density=True) plt.hist(y_Val_Sg, bins, color='xkcd:green', alpha=0.9, histtype='step', lw=2, label='Sg Val', density=True) plt.hist(y_Train_Bg, bins, color='xkcd:blue', alpha=0.9, histtype='step', lw=2, label='Bg Train', density=True, weights=self.trainBg["Weight"]) plt.hist(y_Val_Bg, bins, color='xkcd:magenta', alpha=0.9, histtype='step', lw=2, label='Bg Val', density=True, weights=valBg["Weight"]) ax.legend(loc='best', frameon=False) fig.savefig(self.config["outputDir"] + "/discriminator.png", dpi=fig.dpi) samples = { "Bg": [self.trainBg, y_Train_Bg, self.trainBg["Weight"]], "Sg": [self.trainSg, y_Train_Sg, self.trainSg["Weight"]] } for sample in samples: trainSample = samples[sample][0] y_train_Sp = samples[sample][1] weights = samples[sample][2] bins = np.linspace(0, 1, 100) fig, ax = plt.subplots(figsize=(6, 6)) ax.set_title('') ax.set_ylabel('Norm Events') ax.set_xlabel('Discriminator') for key in sorted(trainSample.keys()): if key.find("mask") != -1: yt = y_train_Sp[trainSample[key]] wt = weights[trainSample[key]] plt.hist(yt, bins, alpha=0.9, histtype='step', lw=2, label=sample + " Train " + key, density=True, weights=wt) plt.legend(loc='best') fig.savefig(self.config["outputDir"] + "/discriminator_nJet_" + sample + ".png", dpi=fig.dpi) # Plot validation roc curve fpr_Val, tpr_Val, thresholds_Val = roc_curve( valData["labels"][:, 0], y_Val, sample_weight=valData["Weight"]) fpr_Train, tpr_Train, thresholds_Train = roc_curve( self.trainData["labels"][:, 0], y_Train, sample_weight=self.trainData["Weight"]) fpr_OTrain, tpr_OTrain, thresholds_OTrain = roc_curve( trainOData["labels"][:, 0], y_OTrain, sample_weight=trainOData["Weight"]) auc_Val = auc(fpr_Val, tpr_Val) auc_Train = auc(fpr_Train, tpr_Train) auc_OTrain = auc(fpr_OTrain, tpr_OTrain) #self.metric["OverTrain"] = abs(auc_Val - auc_Train) #self.metric["Performance"] = abs(1 - auc_Train) fig = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_Val, tpr_Val, color='xkcd:black', label='Val (area = {:.3f})'.format(auc_Val)) plt.plot(fpr_Train, tpr_Train, color='xkcd:red', label='Train (area = {:.3f})'.format(auc_Train)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.legend(loc='best') fig.savefig(self.config["outputDir"] + "/roc_plot.png", dpi=fig.dpi) fig = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_OTrain, tpr_OTrain, color='xkcd:black', label="Train " + self.config["otherttbarMC"] + " (area = {:.3f})".format(auc_OTrain)) plt.plot(fpr_Train, tpr_Train, color='xkcd:red', label="Train " + self.config["ttbarMC"] + " (area = {:.3f})".format(auc_Train)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.legend(loc='best') fig.savefig(self.config["outputDir"] + "/roc_plot_TT_TTJets.png", dpi=fig.dpi) fig = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') njetPerformance = [] for key in sorted(self.trainData.keys()): if key.find("mask") != -1: labels = self.trainData["labels"][self.trainData[key]] weights = self.trainData["Weight"][self.trainData[key]] y = y_Train[self.trainData[key]] if len(y) == 0: continue fpr_Train, tpr_Train, thresholds_Train = roc_curve( labels[:, 0], y, sample_weight=weights) auc_Train = auc(fpr_Train, tpr_Train) njetPerformance.append(auc_Train) plt.plot(fpr_Train, tpr_Train, label="Train " + key + " (area = {:.3f})".format(auc_Train)) plt.legend(loc='best') fig.savefig(self.config["outputDir"] + "/roc_plot_" + self.config["ttbarMC"] + "_nJet.png", dpi=fig.dpi) #if not self.config["Mask"]: # self.metric["nJetPerformance"] = 0.0 # for i in njetPerformance: # self.metric["nJetPerformance"] += abs(i - self.metric["Performance"]) fig = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title(self.config["otherttbarMC"] + " ROC curve") njetPerformance = [] for key in sorted(trainOData.keys()): if key.find("mask") != -1: labels = trainOData["labels"][trainOData[key]] weights = trainOData["Weight"][trainOData[key]] y = y_OTrain[trainOData[key]] if len(y) == 0: continue fpr_Train, tpr_Train, thresholds_Train = roc_curve( labels[:, 0], y, sample_weight=weights) auc_Train = auc(fpr_Train, tpr_Train) njetPerformance.append(auc_Train) plt.plot(fpr_Train, tpr_Train, label="Train " + key + " (area = {:.3f})".format(auc_Train)) plt.legend(loc='best') fig.savefig(self.config["outputDir"] + "/roc_plot_" + self.config["otherttbarMC"] + "_nJet.png", dpi=fig.dpi) # Plot NJet dependance binxl = self.config["minNJetBin"] binxh = self.config["maxNJetBin"] + 1 numbin = binxh - binxl self.plot2DVar(name="nJet", binxl=binxl, binxh=binxh, numbin=numbin, xIn=self.trainBg["nJet"][:, 0], yIn=y_Train_Bg, nbiny=50) #for i in range(len(self.config["allVars"])): # binxl = np.amin(self.trainBg["data"][:,i]) # binxh = np.amax(self.trainBg["data"][:,i]) # numbin = abs(int(binxh - binxl)) # plot2DVar(name=self.config["allVars"][i], binxl=binxl, binxh=binxh, numbin=numbin, xIn=self.trainBg["data"][:,i], yIn=y_Train_Bg, nbiny=50) # Make njet distribution for 4 different bins nMVABins = 4 inds = y_Train_Bg.argsort() sortednJet = self.trainBg["nJet"][:, 0][inds[::-1]] sorted_y = y_Train_Bg[inds[::-1]] nJetDeepESMBins = np.array_split(sortednJet, nMVABins) sorted_y_split = np.array_split(sorted_y, nMVABins) index = 0 fig = plt.figure() bins = [] for a in nJetDeepESMBins: print "DeepESM bin ", len( nJetDeepESMBins) - index, ": ", " NEvents: ", len( a), " bin cuts: ", sorted_y_split[index][ 0], " ", sorted_y_split[index][-1] plt.hist(a, bins=numbin, range=(binxl, binxh), histtype='step', density=True, log=True, label='Bin {}'.format(len(nJetDeepESMBins) - index)) bins.append([ str(sorted_y_split[index][0]), str(sorted_y_split[index][-1]) ]) index += 1 plt.hist(sortednJet, bins=numbin, range=(binxl, binxh), histtype='step', density=True, log=True, label='Total') plt.legend(loc='upper right') fig.savefig(self.config["outputDir"] + "/nJet_log.png", dpi=fig.dpi) index = 0 MVABinNJetShapeContent = [] fig = plt.figure() for a in nJetDeepESMBins: n, _, _ = plt.hist( a, bins=numbin, range=(binxl, binxh), histtype='step', density=True, log=False, label='Bin {}'.format(len(nJetDeepESMBins) - index)) MVABinNJetShapeContent.append(n) index += 1 TotalMVAnJetShape, _, _ = plt.hist(sortednJet, bins=numbin, range=(binxl, binxh), histtype='step', density=True, log=False, label='Total') plt.legend(loc='upper right') fig.savefig(self.config["outputDir"] + "/nJet.png", dpi=fig.dpi) #if not self.config["Mask"]: # self.metric["nJetShape"] = 0.0 # for l in MVABinNJetShapeContent: # for i in range(len(l)): # self.metric["nJetShape"] += abs(l[i] - TotalMVAnJetShape[i]) # Define metrics for the training self.metric["OverTrain"] = abs(auc_Val - auc_Train) self.metric["Performance"] = abs(1 - auc_Train) if not self.config["Mask"]: self.metric["nJetPerformance"] = 0.0 for i in njetPerformance: self.metric["nJetPerformance"] += abs( i - self.metric["Performance"]) if not self.config["Mask"]: self.metric["nJetShape"] = 0.0 for l in MVABinNJetShapeContent: for i in range(len(l)): self.metric["nJetShape"] += abs(l[i] - TotalMVAnJetShape[i]) # Save useful stuff np.save(self.config["outputDir"] + "/deepESMbin_dis_nJet.npy", { "nJetBins": nJetDeepESMBins, "y": sorted_y_split, "nJet": sortednJet }) self.config["bins"] = bins with open(self.config["outputDir"] + "/config.json", 'w') as configFile: json.dump(self.config, configFile, indent=4, sort_keys=True) return self.config, self.metric