def makeFileList(name) : if os.path.exists(self.inputFilesListFile(name)) and self.useCachedFileLists() : return fileNames = eval(self.sampleDict[name].filesCommand) assert fileNames, "The command '%s' produced an empty list of files"%self.sampleDict[name].filesCommand tmpDir,localFileName,globalFileName = self.globalToLocal(self.inputFilesListFile(name)) utils.writePickle(localFileName, fileNames) self.localToGlobal(tmpDir, localFileName, globalFileName)
def makeFileList(name) : fName = self.inputFilesListFile(name) if os.path.exists(fName) and self.useCachedFileLists() : return fileNames = eval(self.sampleDict[name].filesCommand) assert fileNames, "The command '%s' produced an empty list of files"%self.sampleDict[name].filesCommand utils.mkdir(os.path.dirname(fName)) utils.writePickle(fName, zip(fileNames,map(nEventsFile, fileNames)))
def writePickle(self) : def pickleJar(step) : inter = set(step.varsToPickle()).intersection(set(['nPass','nFail','outputFileName'])) assert not inter, "%s is trying to pickle %s, which %s reserved for use by analysisStep."%(step.name, str(inter), "is" if len(inter)==1 else "are") return dict([ (item, getattr(step,item)) for item in step.varsToPickle()+['nPass','nFail']] + [('outputFileName', getattr(step,'outputFileName').replace(self.outputDir, self.globalDir))]) utils.writePickle( self.pickleFileName, [ [pickleJar(step) for step in self.steps], self.calculablesUsed, self.leavesUsed] )
def __init__(self, options) : self.__batch = options.batch self.__resubmit= options.resubmit self.__loop = options.loop self.__profile = options.profile self.__jobId = options.jobId self.__tag = options.tag self.__sample = options.sample self.__site = options.site if options.site!=None else sites.prefix() self.__tags = options.tags.split(',') if type(options.tags)==str else options.tags self.__samples = options.samples.split(',') if type(options.samples)==str else options.samples self.__omit = options.omit.split(',') self.__nocheck = options.nocheck self.__quiet = options.quiet self.__skip = options.skip self.moveOutputFiles = (self.__jobId is None) or sites.info(site = self.__site, key = "moveOutputFilesBatch") self.localStem = "%s/%s"%(sites.info(site = self.__site, key = "localOutputDir" ), self.name) self.globalStem = "%s/%s"%(sites.info(site = self.__site, key = "globalOutputDir"), self.name) self.sampleDict = samples.SampleHolder() map(self.sampleDict.update,self.listOfSampleDictionaries()) if self.__tags is True or self.__samples is True : for conf in self.configurations : print conf['tag'] if self.__samples is True : print '\n'.join(' '+sample.weightedName for sample in self.filteredSamples(conf)) sys.exit(0) if self.__loop: os.system("mkdir -p %s"%self.localStem) if self.__jobId==None : os.system("mkdir -p %s"%self.globalStem) self.makeInputFileLists() for conf in self.configurations: if options.slices < 0: nSlicesFixed = None nEventsPerSlice = -options.slices else: nSlicesFixed = options.slices nEventsPerSlice = None self.listsOfLoopers[conf['tag']] = self.sampleLoopers(conf, nSlicesFixed=nSlicesFixed, nEventsPerSlice=nEventsPerSlice, byEvents=options.byEvents) if self.__jobId is None and self.__loop: for looper in self.listsOfLoopers[conf['tag']]: utils.writePickle(self.jobsFile(conf['tag'], looper.name, clean=True), looper.nSlices)
def __init__(self, options) : self.__batch = options.batch self.__loop = int(options.loop) if options.loop!=None else None self.__nSlices = int(options.slices) if options.slices!=None else 1 self.__profile = options.profile self.__jobId = options.jobId self.__tag = options.tag self.__sample = options.sample self.__site = options.site if options.site!=None else configuration.sitePrefix() self.__tags = options.tags.split(',') if type(options.tags)==str else options.tags self.__samples = options.samples.split(',') if type(options.samples)==str else options.samples self.__omit = options.omit.split(',') self.__nocheck = options.nocheck self.localStem = "%s/%s"%(configuration.siteInfo(site = self.__site, key = "localOutputDir" ), self.name) self.globalStem = "%s/%s"%(configuration.siteInfo(site = self.__site, key = "globalOutputDir"), self.name) self.sampleDict = samples.SampleHolder() map(self.sampleDict.update,self.listOfSampleDictionaries()) if self.__tags is True or self.__samples is True : for conf in self.configurations : print conf['tag'] if self.__samples is True : print '\n'.join(' '+sample.weightedName for sample in self.filteredSamples(conf)) sys.exit(0) if self.__loop!=None : os.system("mkdir -p %s"%self.localStem) if self.__jobId==None : os.system("mkdir -p %s"%self.globalStem) self.makeInputFileLists() for conf in self.configurations : self.listsOfLoopers[conf['tag']] = self.sampleLoopers(conf) if self.__jobId==None and self.__loop!=None : for looper in self.listsOfLoopers[conf['tag']] : utils.writePickle( self.jobsFile(conf['tag'],looper.name), self.__nSlices )
#t = timeit.Timer(stmt=code,setup='from __main__ import utils') #print(filename+'_'+str(i+1)+'.pickle') #print ('%f'%float(t.timeit(10/10))) sock = getConn() sock.sendall('token' + filename + '_' + str(i) + '.pickle' + "\n") received = sock.recv(1024) print "Received: {}".format(received) data = open( './pickles/token' + filename + '_' + str(i) + '.pickle', 'rb').read() sock.sendall(data) sock.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--filename', '-f', help='Path of file to encrypt') parser.add_argument('--window', '-w', help='window size of the bucket') parser.add_argument('--chars', '-c', help='number of chars') args = parser.parse_args() #Same key used for different files! key = nacl.utils.random(nacl.secret.SecretBox.KEY_SIZE) nonce = nacl.utils.random(nacl.secret.SecretBox.NONCE_SIZE) utils.writePickle('./pickles/key.pickle', key) parseRealData(args.filename, int(args.window), int(args.chars)) #createData() #sendFM() #createTokens()
def worker(pickle, json) : if not os.path.exists(pickle) : utils.writePickle(pickle, recordedInvMicrobarns(json))
def train(): X, Y, testX, testFid = prepare_data() X, Y, testX = str2float(X, Y, testX) X, testX = scale_data(X, testX) if args.select_feature: # select feature pass for iter in range(args.sample_num): np.random.seed(iter) logger.info('Iteration %2d, Current random seed: %2d' % (iter, np.random.get_state()[1][0])) # sampling data # trainX, trainY, validX, validY = sample_data(X, Y) trainX = X trainY = Y global dTrain dTrain = xgb.DMatrix(trainX, trainY, nthread=args.nthread) global dTest dTest = xgb.DMatrix(testX, nthread=args.nthread) # bayes_opt selection # parameter to be learning logger.info('Setting parameters for BayesianOptimaization') params = { 'max_depth': (10, args.xgb_max_depth), 'subsample': (args.xgb_subsample, 1), 'min_child_weight': (1, args.xgb_min_child_weight), 'alpha': (0, args.xgb_alpha), 'gamma': (0, args.xgb_gamma), 'colsample_bytree': (args.xgb_colsample_bytree, 1), 'colsample_bylevel': (args.xgb_colsample_bylevel, 1), 'learning_rate': (args.xgb_learning_rate_lower, args.xgb_learning_rate_upper) } logger.info('Running BayesianOptimization') xgb_bayesopt = BayesianOptimization(train_xgb, params) xgb_bayesopt.maximize(init_points=5, n_iter=25) # get the best param best_params = xgb_bayesopt.res['max']['max_params'] logger.info('Iteration: %d, XGBoost max auc: %f' % (iter, xgb_bayesopt.res['max']['max_val'])) for param, val in best_params.items(): logger.info('Param %s: %r' % (param, val)) # setting xgboost param logger.info('Setting best parameters for BayesianOptimization') xgb_params = { 'nthread': args.nthread, 'n_estimators': args.xgb_n_estimators, 'eta': args.xgb_eta, 'silent': args.xgb_silent, # for _train_internal 'eval_metric': [args.xgb_eval_metric], ###################### 'max_depth': int(best_params['max_depth']), 'subsample': max(min(best_params['subsample'], 1), 0), 'min_child_weight': int(best_params['min_child_weight']), 'alpha': max(best_params['alpha'], 0), 'gamma': max(best_params['gamma'], 0), 'colsample_bytree': max(min(best_params['colsample_bytree'], 1), 0), 'colsample_bylevel': max(min(best_params['colsample_bylevel'], 1), 0), 'learning_rate': max(min(best_params['learning_rate'], 1), 0), } # training model = xgb.train(xgb_params, dTrain, num_boost_round=args.xgb_num_boost_rounds, verbose_eval=args.xgb_verbose_eval, maximize=args.xgb_maximize) writePickle( model, os.path.join( 'mdl', 'model_iter%d_%dfold_%f.pkl' % (iter, args.xgb_nfold, xgb_bayesopt.res['max']['max_val']))) # predict valid y predY = model.predict(dTest) result_df = pd.DataFrame(data={'y': predY}) joined_df = pd.DataFrame(testFid).join(result_df) joined_df.to_csv(os.path.join( 'result', 'xgb_result%d_%dfold.csv' % (iter, args.xgb_nfold)), index=False) # re-sorted the fid because of the random splitting data logger.info( '----------------------------------------------------------------------\n\n\n' )