if os.path.exists(options.inp_file): inp_file = options.inp_file else: inp_file = options.inp_dir + '/' + options.inp_file hparams = {} if options.hparams is not None: with open(options.hparams) as hf: hparams = json.loads(hf.read()) ## read data #columns = features + ['genPt'] columns = features + ['genEnergy'] #data = io.read_data(inp_file, columns = columns,data_columns=['NtupID'],where='NtupID<3000') #data = io.read_data(inp_file, columns = columns,data_columns=['NtupID'],stop=1500000) data = io.read_data(inp_file, columns=columns) data.describe() #data = io.read_data(inp_file, columns = columns) #data = io.read_data(inp_file, columns = columns,where=['data_column[NtupID][:].values.reshape(-1)<300']) data['full5x5_e5x5'] = data['full5x5_e5x5'] / data['scRawEnergy'] data['full5x5_eMax'] = data['full5x5_eMax'] / data['full5x5_e5x5'] data['full5x5_e2nd'] = data['full5x5_e2nd'] / data['full5x5_e5x5'] data['full5x5_eTop'] = data['full5x5_eTop'] / data['full5x5_e5x5'] data['full5x5_eBottom'] = data['full5x5_eBottom'] / data['full5x5_e5x5'] data['full5x5_eLeft'] = data['full5x5_eLeft'] / data['full5x5_e5x5'] data['full5x5_eRight'] = data['full5x5_eRight'] / data['full5x5_e5x5'] data['full5x5_e2x5Max'] = data['full5x5_e2x5Max'] / data['full5x5_e5x5'] data['full5x5_e2x5Left'] = data['full5x5_e2x5Left'] / data['full5x5_e5x5'] data['full5x5_e2x5Right'] = data['full5x5_e2x5Right'] / data['full5x5_e5x5'] data['full5x5_e2x5Top'] = data['full5x5_e2x5Top'] / data['full5x5_e5x5']
parser = OptionParser(option_list=[ make_option("--inp-dir",type='string',dest="inp_dir",default=os.environ['SCRATCH']+'/bregression'), make_option("--out-dir",type='string',dest="out_dir",default=None), make_option("--inp-file",type='string',dest='inp_file',default='ttbar_unweighted_full80M_selected.hd5'), make_option("--seed",type='int',dest='seed',default=87532), make_option("--test-frac",type='float',dest='test_frac',default=0.05), ]) ## parse options (options, args) = parser.parse_args() if os.path.exists(options.inp_file): inp_file = options.inp_file else: inp_file = options.inp_dir+'/'+options.inp_file if options.out_dir is None: options.out_dir = os.path.dirname(inp_file) data = io.read_data(inp_file, columns = None) #, stop = 100000 ) # split data train,test = train_test_split(data,test_size=options.test_frac,random_state=options.seed) fname = options.out_dir+'/'+os.path.basename(inp_file).rsplit(".",1)[0] print(fname) train.to_hdf(fname+"_train.hd5",key='train',mode='w',format='t') test.to_hdf(fname+"_test.hd5",key='test',mode='w',format='t')
parser = OptionParser(option_list=[ make_option("--training",type='string',dest="training",default='HybridLoss'), make_option("--inp-dir",type='string',dest="inp_dir",default='/users/nchernya//HHbbgg_ETH/root_files/'), make_option("--target-dir",type='string',dest="target_dir",default='/scratch/snx3000/nchernya/bregression/NN_output/'), make_option("--inp-file",type='string',dest='inp_file',default='ttbar_RegressionPerJet_heppy_energyRings3_forTesting.hd5'), make_option("--out-dir",type='string',dest="out_dir",default='/scratch/snx3000/nchernya/bregression/output_root/'), ]) ## parse options (options, args) = parser.parse_args() input_trainings = options.training.split(',') # ## Read test data and model # load data data = io.read_data('%s%s'%(options.inp_dir,options.inp_file),columns=None) data['Jet_pt']=data['Jet_pt']*data['Jet_rawEnergy']/data['Jet_e']*data['Jet_corr'] data['Jet_mt']=data['Jet_mt']*data['Jet_rawEnergy']/data['Jet_e']*data['Jet_corr'] for idx,name in enumerate(input_trainings): # list all model files in the training folder # target='/users/nchernya/HHbbgg_ETH/bregression/notebooks/'+input_trainings[idx] # target=options.target_dir+input_trainings[idx] target=options.target_dir models = get_ipython().getoutput('ls -t $target/*.hdf5') models # read training configuration import json with open('%s/config.json' % target) as fin: config = json.loads(fin.read())
features = options.features.split(',') inp_file_valid = options.inp_dir+'/'+options.inp_file_valid inp_files=options.inp_files.split(',') inp_files = [options.inp_dir+'/'+c.strip() for c in inp_files] hparams = {} if options.hparams is not None: for fname in options.hparams.split(','): with open(fname) as hf: pars = json.loads(hf.read()) hparams.update(pars) # if inside several files we change the same parameter, it will overwrite for the one in the last file ## read data columns = features + ['Jet_mcPt'] + ['Jet_corr_JEC'] + ['Jet_corr_JER'] data_valid = io.read_data(inp_file_valid, columns = None) df_list = [io.read_data(inf,columns = None) for inf in inp_files] for data in df_list: # data['Jet_pt']=data['Jet_pt']/data['Jet_corr_JER'] # data['Jet_mt']=data['Jet_mt']/data['Jet_corr_JER'] data['Jet_mcPt_Jet_pt']=data['Jet_mcPt']/data['Jet_pt'] data=data.query('Jet_mcPt_Jet_pt < 10') #data_valid['Jet_pt']=data_valid['Jet_pt']/data_valid['Jet_corr_JER'] #data_valid['Jet_mt']=data_valid['Jet_mt']/data_valid['Jet_corr_JER'] X_shape = (data_valid[features].values).shape[1:] y_valid = (data_valid['Jet_mcPt']/data_valid['Jet_pt']).values.reshape(-1,1) X_valid = data_valid[features].values mygen = ffwd.Generator(df_list,options.batch_size,features,'Jet_mcPt_Jet_pt')