def load_data(self, hps): self.dataset_name = '%s2%s' % (hps['labelA'], hps['labelB']) self.lund = LundImage(npxlx=self.img_rows, npxly=self.img_cols) self.imagesA = [] self.imagesB = [] reader = Jets(hps['fnA'], hps['nev']) jets = reader.values() for j in jets: tree = JetTree(j) li = self.lund(tree) self.imagesA.append(li[:,:,np.newaxis]) reader = Jets(hps['fnB'], hps['nev']) jets = reader.values() for j in jets: tree = JetTree(j) li = self.lund(tree) self.imagesB.append(li[:,:,np.newaxis]) # now do batch averaging self.avg = Averager(hps['navg']) self.imagesA = self.avg.transform(np.array(self.imagesA)) self.imagesB = self.avg.transform(np.array(self.imagesB)) # now create he preprocessors if hps['zca']: self.preproc = PreprocessorZCA(scaler=hps['scaler'], flatten=False, remove_zero=False, pxl_by_pxl=False) else: self.preproc = Preprocessor(scaler=hps['scaler'], flatten=False, remove_zero=False, pxl_by_pxl=False) # and preprocess the input images self.preproc.fit(np.concatenate((self.imagesA,self.imagesB))) self.imagesA = self.preproc.transform(self.imagesA) self.imagesB = self.preproc.transform(self.imagesB)
def main(args): zcut = 0.007 beta = -1 thetacut = 0.0009 if args.data: sdmult = SoftDropMult(zcut=zcut, beta=beta, thetacut=thetacut) reader = Jets(args.data, args.nev) events = reader.values() imgs_ref = np.zeros((len(events), args.npx, args.npx)) li_gen = LundImage(npxlx=args.npx, y_axis=args.yaxis) nsd_ref = [] for i, jet in enumerate(events): tree = JetTree(jet) nsd_ref.append(sdmult(tree)) imgs_ref[i] = li_gen(tree) imgref = np.average(imgs_ref, axis=0) else: imgref = None folder = args.output.strip('/') + '/' if args.output else '' assert (len(args.label_data_pairs) % 2 == 0) filedic = {} for i in range(0, len(args.label_data_pairs), 2): lab = args.label_data_pairs[i] filedic[lab] = args.label_data_pairs[i + 1] print('Plotting soft drop multiplicity') plot_sdmult(filedic, imgs_ref, folder + 'softdropmult.pdf', nsd_ref, npx=args.npx, zcut=zcut, beta=beta, thetacut=thetacut)
def main(args): if args.data: reader=Jets(args.data, args.nev) events=reader.values() imgs_ref=np.zeros((len(events), args.npx, args.npx)) li_gen=LundImage(npxlx = args.npx, y_axis=args.yaxis) for i, jet in enumerate(events): tree = JetTree(jet) imgs_ref[i]=li_gen(tree) imgref=np.average(imgs_ref,axis=0) else: imgref=None folder = args.output.strip('/')+'/' if args.output else '' assert(len(args.label_data_pairs)%2==0) filedic={} for i in range(0,len(args.label_data_pairs),2): lab=args.label_data_pairs[i] filedic[lab] = args.label_data_pairs[i+1] print('Plotting slices') if imgref is not None: plot_activation(filedic, imgs_ref, folder+'activation.pdf') plot_slice_delta(filedic, imgref, folder+'delta_slice.pdf', args.npx) plot_slice_kt(filedic, imgref, folder+'kt_slice.pdf', args.npx) else: plot_slice_delta_noratio(filedic, folder+'delta_slice.pdf', args.npx) plot_slice_kt_noratio(filedic, folder+'kt_slice.pdf', args.npx)
def main(args): rem0=not args.keepzero scaler=not args.noscaler flatten=not args.noflat # read in the data set if args.mnist: # for debugging purposes, we have the option of loading in the # mnist data and training the model on this. (img_train, _), (_, _) = mnist.load_data() # Rescale -1 to 1 img_train = img_train.astype('float32') / 255 img_train = np.expand_dims(img_train, axis=3) else: # load in the jets from file, and create an array of lund images reader=Jets(args.data, args.nev) events=reader.values() img_train=np.zeros((len(events), args.npx, args.npx, 1)) li_gen=LundImage(npxlx = args.npx, y_axis=args.yaxis) for i, jet in enumerate(events): tree = JetTree(jet) img_train[i]=li_gen(tree).reshape(args.npx, args.npx, 1) # set up the preprocessing pipeline if args.pca: preprocess = PreprocessorPCA(args.pca, whiten=True, scaler=scaler, flatten=flatten, remove_zero=rem0) elif args.autoencoder: preprocess = PreprocessorAE(args.autoencoder, args.epochs, scaler=scaler, flatten=flatten, remove_zero=rem0) elif args.zca: preprocess = PreprocessorZCA(scaler=scaler, flatten=flatten, remove_zero=rem0) else: preprocess = Preprocessor(scaler=scaler, flatten=flatten, remove_zero=rem0) # fit the preprocessing unit preprocess.fit(img_train) # transform the images # NB: for ZCA, the zca factor is set during this step img_transf = preprocess.transform(img_train) print('Shape after preprocessing:',img_transf.shape) # now transform back to images img_transf = preprocess.inverse(img_transf) r=5 selec=np.random.choice(img_transf.shape[0], 2*r, replace=True) if args.mnist: ref_transf = img_transf.reshape(img_transf.shape[0],args.npx,args.npx)[selec, :] else: # now interpret the probabilistic sample as physical images for i,v in np.ndenumerate(img_transf): if v < np.random.uniform(0,1): img_transf[i]=0.0 else: img_transf[i]=1.0 ref_transf = img_transf.reshape(img_transf.shape[0], args.npx,args.npx)[selec, :] ref_train = img_train.reshape(img_train.shape[0],args.npx,args.npx)[selec, :] loss=np.linalg.norm(np.average(ref_train - ref_transf,axis=0)) print('# loss: ',loss) fig, axs = plt.subplots(r, 4) axs[0,0].title.set_text('Input') axs[0,3].title.set_text('Decoded') for i in range(r): axs[i,0].imshow(ref_train[i, :,:], cmap='gray') axs[i,0].axis('off') axs[i,1].imshow(ref_train[r+i, :,:], cmap='gray') axs[i,1].axis('off') axs[i,2].imshow(ref_transf[i, :,:], cmap='gray') axs[i,2].axis('off') axs[i,3].imshow(ref_transf[5+i, :,:], cmap='gray') axs[i,3].axis('off') plt.plot([0.5, 0.5], [0, 1], color='lightgray', lw=5, transform=plt.gcf().transFigure, clip_on=False) plt.show() plt.close()
def plot_events(gen_sample, avg, preproc, datafile, setup, folder): # load in the data reader=Jets(datafile, 5000) events=reader.values() img_data=np.zeros((len(events), setup['npx'], setup['npx'], 1)) li_gen=LundImage(npxlx = setup['npx'], y_axis = setup['y_axis'] if 'y_axis' in setup else 'kt') for i, jet in enumerate(events): tree = JetTree(jet) img_data[i]=li_gen(tree).reshape(setup['npx'], setup['npx'], 1) # now reformat the training set as its average over n elements img_input = avg.transform(img_data) # set up the preprocessed input img_unmask = preproc.unmask(preproc.transform(img_input)) # set up the generated images gen_unmask = preproc.unmask(gen_sample) gen_processed = preproc.inverse(gen_sample) gen_final = avg.inverse(gen_processed) with PdfPages(f'{folder}/plot_events.pdf') as pdf: cbartics = [-1.0, -0.5, 0.0, 0.5, 1.0] fig=plt.figure(figsize=(4.5,4)) plt.title('Raw input') plt.imshow(img_data[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close() fig=plt.figure(figsize=(4.5,4)) plt.title('Averaged input') plt.imshow(img_input[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close() fig=plt.figure(figsize=(4.5,4)) plt.title('Preprocessed input') plt.imshow(img_unmask[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close() fig=plt.figure(figsize=(4.5,4)) plt.title('Raw generated output') plt.imshow(gen_unmask[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close() fig=plt.figure(figsize=(4.5,4)) plt.title('Processed generated sample') plt.imshow(gen_processed[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close() fig=plt.figure(figsize=(4.5,4)) plt.title('Generated sample') plt.imshow(gen_final[0].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower', aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig(bbox_inches='tight') plt.close()
def plot_events_debug(gen_sample, preproc, datafile, setup, folder): # load in the data if datafile=='mnist': # if mnist data, load the images from keras from keras.datasets import mnist (img_data, _), (_, _) = mnist.load_data() # Rescale -1 to 1 img_data = (img_data.astype(np.float32) - 127.5) / 127.5 img_data = np.expand_dims(img_data, axis=3) else: reader=Jets(datafile, 5000) events=reader.values() img_data=np.zeros((len(events), setup['npx'], setup['npx'], 1)) li_gen=LundImage(npxlx = setup['npx'], y_axis = setup['y_axis'] if 'y_axis' in setup else 'kt') for i, jet in enumerate(events): tree = JetTree(jet) img_data[i]=li_gen(tree).reshape(setup['npx'], setup['npx'], 1) # now reformat the training set as its average over n elements batch_averaged_img = np.zeros((len(img_data), setup['npx'], setup['npx'], 1)) for i in range(len(img_data)): batch_averaged_img[i] = \ np.average(img_data[np.random.choice(img_data.shape[0], setup['navg'], replace=False), :], axis=0) img_input = batch_averaged_img # set up the preprocessed input img_unmask = preproc.unmask(preproc.transform(img_input)) # set up the generated images gen_unmask = preproc.unmask(gen_sample) gen_final = preproc.inverse(gen_sample) fig, ax=plt.subplots(figsize=(15,6), nrows=5,ncols=12) i=0 j=0 for row in ax: for col in row: col.axis('off') if i%12<3: if i%3==1 and j==0: col.set_title('Input image') col.imshow(img_input[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(), vmin=0.0, vmax=0.5, origin='lower') elif i%12<6: if i%3==1 and j==0: col.set_title('Preprocessed input') col.imshow(img_unmask[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1, cmap=cm.seismic, origin='lower') elif i%12<9: if i%3==1 and j==0: col.set_title('Raw generated sample') col.imshow(gen_unmask[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(), vmin=-1.0, vmax=1, cmap=cm.seismic, origin='lower') else: if i%3==1 and j==0: col.set_title('Final generated sample') col.imshow(gen_final[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(), vmin=0.0, vmax=0.5, origin='lower') i+=1 j+=1 plt.savefig(f'{folder}/plot_debug.pdf')
def plot_lund_with_ref(filename, reference, figname, y_axis='kt'): """Plot a samples of lund images and the average density along with reference data.""" r, c = 5, 5 imgs = np.load(filename) sample = imgs[np.random.choice(imgs.shape[0], r*c, replace=False), :] if reference == 'mnist': # if mnist data, load the images from keras from keras.datasets import mnist (imgs_ref, _), (_, _) = mnist.load_data() # Rescale -1 to 1 imgs_ref = (imgs_ref.astype(np.float32) - 127.5) / 127.5 else: # now read in the pythia reference sample reader=Jets(reference, imgs.shape[0]) events=reader.values() imgs_ref=np.zeros((len(events), imgs.shape[1], imgs.shape[2])) li_gen=LundImage(npxlx = imgs.shape[1], npxly = imgs.shape[2], y_axis = y_axis) for i, jet in enumerate(events): tree = JetTree(jet) imgs_ref[i]=li_gen(tree).reshape(imgs.shape[1], imgs.shape[2]) sample_ref = imgs_ref[np.random.choice(imgs_ref.shape[0], r*c, replace=False), :] with PdfPages(figname) as pdf: fig, axs = plt.subplots(r, c) plt.suptitle('generated') cnt = 0 for i in range(r): for j in range(c): axs[i,j].imshow(imgs[cnt, :,:].transpose(), origin='lower', cmap='gray',vmin=0.0,vmax=1.0) axs[i,j].axis('off') cnt += 1 pdf.savefig() plt.close() fig, axs = plt.subplots(r, c) plt.suptitle('reference') cnt = 0 for i in range(r): for j in range(c): axs[i,j].imshow(imgs_ref[cnt, :,:].transpose(), origin='lower', cmap='gray',vmin=0.0,vmax=1.0) axs[i,j].axis('off') cnt += 1 pdf.savefig() plt.close() fig=plt.figure(figsize=(6,6)) cbartics = [0, 0.05, 0.1, 0.15, 0.2, 0.25] plt.title('generated') plt.imshow(np.average(imgs,axis=0).transpose(), origin='lower',vmin=0.0,vmax=0.2, aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig() plt.close() fig=plt.figure(figsize=(6,6)) plt.title('reference') plt.imshow(np.average(imgs_ref,axis=0).transpose(), origin='lower',vmin=0.0, vmax=0.2, aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics) plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig() plt.close() fig=plt.figure(figsize=(6,6)) plt.title('generated/reference') plt.imshow(np.divide(np.average(imgs,axis=0).transpose(), np.average(imgs_ref,axis=0).transpose()), origin='lower', vmin=0.5, vmax=1.5, cmap=cm.seismic, aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1], LundImage.yval[0], LundImage.yval[1]]) plt.colorbar(orientation='vertical') plt.xlabel('$\ln(1 / \Delta_{ab})$') plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$') pdf.savefig() plt.close() fig=plt.figure() bins = np.arange(0, 101, 1) gen_act=[] ref_act=[] for i in range(len(imgs)): gen_act.append(np.sum(imgs[i])) for i in range(len(imgs_ref)): ref_act.append(np.sum(imgs_ref[i])) plt.hist(gen_act, bins=bins, density=True, color='C0', alpha=0.3, label='generated') plt.hist(ref_act, bins=bins, density=True, color='C1', alpha=0.3, label='reference') plt.title('activated pixels') plt.xlim((0,50)) plt.legend() plt.close() pdf.savefig(fig)
def build_and_train_model(setup): """Training model""" print('[+] Training model') K.clear_session() if setup['model'] not in ('gan', 'dcgan', 'wgan', 'wgangp', 'vae', 'aae', 'bgan', 'lsgan'): raise ValueError('Invalid input: choose one model at a time.') # read in the data set if setup['data'] == 'mnist': print('[+] Loading mnist data') from keras.datasets import mnist # for debugging purposes, we have the option of loading in the # mnist data and training the model on this. (img_data, _), (_, _) = mnist.load_data() # Rescale -1 to 1 if setup['model'] is not 'vae': img_data = (img_data.astype(np.float32) - 127.5) / 127.5 else: img_data = img_data.astype('float32') / 255 img_data = np.expand_dims(img_data, axis=3) else: # load in the jets from file, and create an array of lund images print('[+] Reading jet data from file') reader = Jets(setup['data'], setup['nev']) events = reader.values() img_data = np.zeros((len(events), setup['npx'], setup['npx'], 1)) li_gen = LundImage( npxlx=setup['npx'], y_axis=setup['y_axis'] if 'y_axis' in setup else 'kt') for i, jet in enumerate(events): tree = JetTree(jet) img_data[i] = li_gen(tree).reshape(setup['npx'], setup['npx'], 1) avg = Averager(setup['navg']) img_train = avg.transform(img_data) # set up a preprocessing pipeline preprocess = build_preprocessor(setup) # prepare the training data for the model training print('[+] Fitting the preprocessor') preprocess.fit(img_train) # NB: for ZCA, the zca factor is set in the process.transform call img_train = preprocess.transform(img_train) # now set up the model model = build_model(setup, length=(img_train.shape[1])) # train on the images print('[+] Training the generative model') model.train(img_train, epochs=setup['epochs'], batch_size=setup['batch_size']) # now generate a test sample and save it gen_sample = model.generate(setup['ngen']) # get the raw loss, evaluated on gan input and generated sample print('[+] Calculating loss on raw training data') loss_raw = loss_calc_raw( preprocess.unmask(gen_sample[:min(setup['ngen'], len(img_train))]), preprocess.unmask(img_train[:min(setup['ngen'], len(img_train))])) # retransform the generated sample to image space gen_sample = preprocess.inverse(gen_sample) gen_sample = avg.inverse(gen_sample) # get reference sample and generated sample for tests ref_sample = img_data.reshape(img_data.shape[0],setup['npx'],setup['npx'])\ [np.random.choice(img_data.shape[0], len(gen_sample), replace=True), :] print('[+] Calculating final loss after inverting preprocessing') loss = (loss_calc(gen_sample, ref_sample), loss_raw) if setup['scan']: res = { 'loss': loss[0] if setup['monitor_final_loss'] else loss[1], 'status': STATUS_OK } else: res = model, gen_sample, loss, preprocess return res