Beispiel #1
0
def main():
    parser = prepare_parser()
    options, args = parser.parse_args()
    if options.wmtest:
        print "Setting to injection in cmswebtest : ", options.wmtesturl
        wma.testbed(options.wmtesturl)
    for cfg_name in args:
        wma.upload_to_couch(cfg_name, options.label, options.user, options.group)
Beispiel #2
0
def main():
    parser = prepare_parser()
    options, args = parser.parse_args()
    if options.wmtest:
        print("Setting to injection in cmswebtest : ", options.wmtesturl)
        wma.testbed(options.wmtesturl)
    for cfg_name in args:
        wma.upload_to_couch(cfg_name, options.label, options.user, options.group)
Beispiel #3
0
 def uploadConf(self, filePath, label, where):
     labelInCouch = self.label + '_' + label
     cacheName = filePath.split('/')[-1]
     if self.testMode:
         self.count += 1
         print '\tFake upload to couch with label', labelInCouch
         return self.count
     else:
         try:
             from modules.wma import upload_to_couch
         except:
             print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
             print '\n\t QUIT\n'
             sys.exit(-16)
         if cacheName in self.couchCache:
             print "Not re-uploading", filePath, "to", where, "for", label
             cacheId = self.couchCache[cacheName]
         else:
             print "Loading", filePath, "to", where, "for", label
             cacheId = upload_to_couch(filePath,
                                       labelInCouch,
                                       self.user,
                                       self.group,
                                       test_mode=False,
                                       url=where)
             self.couchCache[cacheName] = cacheId
         return cacheId
 def uploadConf(self,filePath,label,where):
     labelInCouch=self.label+'_'+label
     cacheName=filePath.split('/')[-1]
     if self.testMode:
         self.count+=1
         print '\tFake upload of',filePath,'to couch with label',labelInCouch
         return self.count
     else:
         try:
             from modules.wma import upload_to_couch
         except:
             print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
             print '\n\t QUIT\n'
             sys.exit(-16)
         if cacheName in self.couchCache:
             print "Not re-uploading",filePath,"to",where,"for",label
             cacheId=self.couchCache[cacheName]
         else:
             print "Loading",filePath,"to",where,"for",label
             cacheId=upload_to_couch(filePath,
                                     labelInCouch,
                                     self.user,
                                     self.group,
                                     test_mode=False,
                                     url=where
                                     )
             self.couchCache[cacheName]=cacheId
         return cacheId
Beispiel #5
0
def upload_to_couch_oneArg(arguments):
    from modules.wma import upload_to_couch
    (filePath,labelInCouch,user,group,where) = arguments
    cacheId=upload_to_couch(filePath,
                            labelInCouch,
                            user,
                            group,
                            test_mode=False,
                            url=where)
    return cacheId
Beispiel #6
0
def upload_to_couch_oneArg(arguments):
    from modules.wma import upload_to_couch
    (filePath, labelInCouch, user, group, where) = arguments
    cacheId = upload_to_couch(filePath,
                              labelInCouch,
                              user,
                              group,
                              test_mode=False,
                              url=where)
    return cacheId
Beispiel #7
0
def build_params_dict(section,cfg):
  global couch_pass
  '''
  Build the parameters dictionary for the request. Assumes the presence of an input .conf file or commandline.
  For the moment the defaults of the parameters are stored here.
  Put a dictionary on top?
  '''

  #wm testing
  wmtest = cfg.get_param('wmtest', False, section)

  url_dict = cfg.get_param('url_dict',"",section)


  # fetch some important parameters
  #this trick is to make the uniformation smoother and be able to read old cfgfiles
  doc_id = step1_docID = ''
  doc_id = step1_docID = cfg.get_param('docID','',section)
  dummy = cfg.get_param('step1_docID','',section)
  if dummy!='':
    doc_id = step1_docID = dummy
  step2_docid = cfg.get_param('step2_docID','',section)
  #print step2_docid
  step3_docid = cfg.get_param('step3_docID','',section)
  #print step3_docid
  # elaborate the file containing the name docid pairs
  cfg_db_file = cfg.get_param('cfg_db_file','',section)
  #print cfg_db_file
  cfg_docid_dict = make_cfg_docid_dict(cfg_db_file)
  
  release = cfg.get_param('release','',section)
  globaltag = cfg.get_param('globaltag','',section)
  pileup_dataset = cfg.get_param('pu_dataset','',section)
  primary_dataset = cfg.get_param('primary_dataset','',section)

  filter_eff = cfg.get_param('filter_eff','',section)
  if not filter_eff:
      filter_eff = 1.0
      
  number_events = int(cfg.get_param('number_events',0,section))
  #number_events = cfg.get_param('number_events',0,section)
  version = cfg.get_param('version','',section)
  
  ##new values for renewed Request Agent
  time_event = float(cfg.get_param('time_event',20,section))
  size_memory = int(float(cfg.get_param('size_memory',2300,section)))
  size_event = int(float(cfg.get_param('size_event',2000,section)))
  if size_event <0:
      size_event = 2000
      
  # parameters with fallback  
  scramarch = cfg.get_param('scramarch',default_parameters['scramarch'],section)
  #group = cfg.get_param('group',default_parameters['group'],section)
  #requestor = cfg.get_param('requestor',default_parameters['requestor'],section)
  identifier = cfg.get_param('identifier','',section)
  dbsurl = cfg.get_param('dbsurl',default_parameters['dbsurl'],section)

  includeparents=cfg.get_param('includeparents',default_parameters['includeparents'],section)
  
  req_name=cfg.get_param('req_name','',section)
  process_string = cfg.get_param('process_string','',section)
  processing_string = cfg.get_param('processing_string','',section)
  batch = cfg.get_param('batch','',section)
  open_running_timeout = int(float(cfg.get_param('open_running_timeout','43200',section))) # 12h is legacy

  # for the user and group
  user,group = get_user_group(cfg,section)
  
  # for the skims
  skim_cfg = cfg.get_param('skim_cfg','',section)
  skim_docid = cfg.get_param('skim_docID','',section)
  skim_name = cfg.get_param('skim_name','',section)
  skim_input = cfg.get_param('skim_input','RECOoutput',section)

  if not skim_docid and skim_cfg:
      if cfg_docid_dict.has_key(skim_cfg):
          skim_docid=cfg_docid_dict[skim_cfg]
      else:
          skim_docid=wma.upload_to_couch(skim_cfg, section, user, group,test_mode)
          
  # priority
  priority = cfg.get_param('priority',default_parameters['priority'],section)
  
  #blocks
  blocks = cfg.get_param('blocks', [], section)  

  
  # Now the service ones
  # Service
  step1_cfg = cfg_path = ''
  step1_cfg = cfg_path = cfg.get_param('cfg_path','',section)  
  dummy = cfg.get_param('step1_cfg','',section)
  if dummy != '':
    step1_cfg = cfg_path = dummy

  harvest_cfg = cfg.get_param('harvest_cfg','',section)
  harvest_docID = cfg.get_param('harvest_docID','',section)
  
  step1_output = cfg.get_param('step1_output','',section)
  keep_step1 = cfg.get_param('keep_step1',False,section)
  
  step2_cfg = cfg.get_param('step2_cfg','',section)
  step2_docID = cfg.get_param('step2_docID','',section)
  step2_output = cfg.get_param('step2_output','',section)
  keep_step2 = cfg.get_param('keep_step2',False,section)
  
  step3_cfg = cfg.get_param('step3_cfg','',section)
  step3_docID = cfg.get_param('step3_docID','',section)
  step3_output = cfg.get_param('step3_output','',section)

  transient_output = cfg.get_param('transient_output',[],section)

  request_type = cfg.get_param('request_type',default_parameters['request_type'],section)
  request_id = cfg.get_param('request_id','',section)
  events_per_job = cfg.get_param('events_per_job','',section)
  events_per_lumi = int(float(cfg.get_param('events_per_lumi',100,section))) # 100 is legacy
  force_lumis = cfg.get_param('force_lumis', False, section)
  brute_force = cfg.get_param('brute_force', False, section)
  margin = cfg.get_param('margin', 0.05, section)

  # Upload to couch if needed or check in the cfg dict if there
  docIDs=[step1_docID,step2_docID,step3_docID]
  cfgs=[step1_cfg,step2_cfg,step3_cfg]
  for step in xrange(3):
    step_cfg_name= cfgs[step]
    step_docid = docIDs[step]
    #print step_cfg_name, step_docid
    
    if step_cfg_name!='' and step_docid=='' :
      #print step_cfg_name, step_docid
      # try to see if it is in the cfg name dict
      if cfg_docid_dict.has_key(step_cfg_name):
        print "Using the one in the cfg-docid dictionary." 
        docIDs[step] = cfg_docid_dict[step_cfg_name]
      else:
        print "No DocId found for section %s. Uploading the cfg to the couch." %section
        docIDs[step]= wma.upload_to_couch(step_cfg_name, section, user, group,test_mode)

  step1_docID,step2_docID,step3_docID=docIDs
  if harvest_docID=='' and harvest_cfg!='':
      harvest_docID= wma.upload_to_couch(harvest_cfg , section, user, group,test_mode)
      
  # check if the request is valid
  if step1_docID=='' and url_dict=="":
    print "Invalid request, no docID configuration specified."
    sys.stderr.write("[wmcontrol exception] Invalid request, no docID configuration specified.")
    sys.exit(-1)
 
  # Extract Campaign from PREP-ID if necessary
  campaign = cfg.get_param('campaign','',section)
  if campaign =="" and request_id=="":
    print "Campaign and request-id are not set. Provide at least the Campaign."
  elif campaign =="" and request_id!="":    
    campaign = re.match(".*-(.*)-.*",request_id).group(1)
  elif campaign !="" and request_id!="":
    print "Campaign and request-id are set. Using %s as campaign." %campaign
    

  time_per_campaign=wma.time_per_events(campaign)
  if time_per_campaign:
      time_event=time_per_campaign
  
  service_params={"section": section,
                  "version": version,
                  "request_type": request_type,
                  "step1_cfg": step1_cfg,
                  "step1_output": step1_output,
                  "keep_step1":keep_step1,
#
                  "step2_cfg": step2_cfg,
                  "step2_output": step2_output,
                  "keep_step2":keep_step2,
#
                  "step3_cfg": step3_cfg,
                  "step3_output": step3_output,
                  #
                  'cfg_docid_dict' : cfg_docid_dict,
                  'req_name': req_name,
                  "batch": batch,
                  "process_string": process_string,
                  'force_lumis': force_lumis,
                  'brute_force': brute_force,
                  'margin': margin
                  }
  
  # According to the rerquest type, cook a request!
  params={"CMSSWVersion": release,
          "ScramArch": scramarch,
          "RequestPriority": priority,
          "RunWhitelist": ['Will Be replaced'],
          "InputDataset": 'Will Be replaced',
          "RunBlacklist": [],
          "BlockWhitelist": blocks,
          "BlockBlacklist": [],
          "DbsUrl": dbsurl,
          "RequestType": request_type,
          "GlobalTag": globaltag,
          "inputMode": "couchDB",
          "RequestString": "Will Be dynamically created",
          "Group": group,
          "Requestor": user,
          "Campaign": campaign,
          "Memory": size_memory,
          "SizePerEvent": size_event,
          "TimePerEvent": time_event,
          "OpenRunningTimeout" : open_running_timeout,
          #"ConfigCacheUrl": wma.COUCH_DB_ADDRESS,
          #"EnableHarvesting" : False
          "ProcessingString": processing_string
          }

  if url_dict != "":
      #print "This is the url",url_dict,"to get the dict from"
      params = json.loads(os.popen('curl -s --insecure %s'%(url_dict)).read())
      #print params
      service_params["request_type"] = params["RequestType"]
      service_params["version"] = params["ProcessingVersion"]
      service_params["process_string"] = "T"
      service_params["pid"] = params["RequestString"]
      params["DbsUrl"] = "https://"+wma.WMAGENT_URL+wma.DBS3_URL
      params["CouchURL"] = wma.COUCH_DB_ADDRESS
      params["ConfigCacheURL"] = wma.COUCH_DB_ADDRESS
  elif request_type == "ReReco":
    if number_events:
        if blocks:
            ## cannot perform automatic block selection
            print "\n\n\n WARNING number_events is not functionnal because you specified blocks in input\n\n\n"
        else:
            print "\n\n\n WARNING automated block selection performed \n\n\n"
            params.update({"RequestNumEvents" : number_events})

    params.update({"ConfigCacheID": step1_docID,
                   "Scenario": "pp",
                   "IncludeParents" : includeparents,
                   "TransientOutputModules":transient_output})


    if skim_docid != '':
      print "This is a skim"
      params.update({"SkimName1" : skim_name,
                      "SkimInput1" : skim_input,
                      "Skim1ConfigCacheID" : skim_docid,
                      "nskims" : 1})


  elif request_type == 'MonteCarlo':


      params.update({"RequestString": identifier,
                     "FirstEvent": 1, 
                     "FirstLumi": 1,
                     "TimePerEvent": time_event,
                     "FilterEfficiency": filter_eff,
                     "LheInputFiles" : cfg.get_param('lhe_input',False,section),
                     "RequestNumEvents": number_events,
                     "ConfigCacheID": step1_docID,
                     "PrimaryDataset": primary_dataset,
                     "PrepID": request_id,
                     }
                    )

      events_per_lumi = int(float( events_per_lumi ) / float(filter_eff))
      params.update({
          "EventsPerLumi" : events_per_lumi,
          })
      if wmtest:
          params.pop("EventsPerLumi")
          
      if params["LheInputFiles"]=='True' or params["LheInputFiles"]==True:
          #max out to 500K for "lhe step zero"
          print "Setting events per job here !!!!",type(params["LheInputFiles"]),params["LheInputFiles"]
          events_per_job=500000
          if wmtest:
              events_per_job=15000
          
      if events_per_job and int(events_per_job):
          params.update({
              "EventsPerJob" : int(events_per_job)
              })

        
      params.pop('BlockBlacklist')
      params.pop('BlockWhitelist')
      params.pop('InputDataset')
      params.pop('RunBlacklist')

  elif request_type == 'MonteCarloFromGEN':
    params.update({"TimePerEvent": time_event,
                "FilterEfficiency": filter_eff,
                "ConfigCacheID": step1_docID,
                "PrepID": request_id,
                "TotalTime": 28800 })
    if primary_dataset:
        params.update({"PrimaryDataset": primary_dataset})


    if int(number_events):
        params.update({"RequestNumEvents": number_events})
                    
  elif request_type == 'LHEStepZero':
      params.update({"RequestString": identifier,
                     "TimePerEvent": time_event,
                     "FirstEvent": 1,
                     "FirstLumi": 1,
                     "LheInputFiles" : cfg.get_param('lhe_input',False,section),
                     "Memory": 2300,
                     "SizePerEvent": size_event,
                     "ConfigCacheID": step1_docID,
                     "RequestNumEvents": number_events,
                     "PrimaryDataset": primary_dataset,
                     "PrepID": request_id,
                     "TotalTime": 28800 ,
                     "EventsPerLumi":300,
                     "ProdJobSplitAlgo" : "EventBased",
                     "ProdJobSplitArgs" : {"events_per_job": int(events_per_job),"events_per_lumi": int(events_per_lumi)}
                    })

      params.pop('BlockBlacklist')
      params.pop('BlockWhitelist')
      params.pop('InputDataset')
      params.pop('RunBlacklist')
      params.pop('RunWhitelist')
      
  elif request_type == 'ReDigi':
    if number_events:
        if blocks:
            ## cannot perform automatic block selection
            print "\n\n\n WARNING number_events is not functionnal because you specified blocks in input\n\n\n"
        else:
            print "\n\n\n WARNING automated block selection performed \n\n\n"
            params.update({"RequestNumEvents" : number_events})

        
    params.update({"RequestString": identifier,
                "StepOneConfigCacheID": step1_docID,
                "KeepStepOneOutput": keep_step1,
                "StepOneOutputModuleName": step1_output,
                #"DataPileup": "",
                "MCPileup": pileup_dataset,
                #"Scenario": "pp",
                "PrepID": request_id})

    if primary_dataset:
        params.update({"PrimaryDataset": primary_dataset})

    if step2_cfg != '' or step2_docID !='':
        params.update({"StepTwoConfigCacheID": step2_docID,
                       "KeepStepTwoOutput": keep_step2,
                       "StepTwoOutputModuleName": step2_output})

        if step3_cfg !='' or step3_docID!='':
            params['StepThreeConfigCacheID'] = step3_docID
        else:
            if not keep_step2:
                print 'Request not keeping its step 2 output'
                raise Exception("The request has a second step, no third step and not keeping it's second output")
    else:
        if not keep_step1:
            print 'Request not keeping anything'
            raise Exception('The request has one step and not keeping anything')

  elif request_type == 'TaskChain':

      params.pop('RunBlacklist')
      params.pop('BlockWhitelist')
      params.pop('BlockBlacklist')
      
      task1_dict={'SplittingAlgorithm': 'LumiBased',
                  'SplittingArguments': {'lumis_per_job': 8},
                  'TaskName':'Task1'
                  }
      
      task1_dict['GlobalTag'] = cfg.get_param('step1_globaltag',globaltag,section)
      task1_dict['ConfigCacheID'] = step1_docID
      task1_dict['KeepOutput'] = keep_step1
      params['Task1']=task1_dict
      params['TaskChain']=1
      if step2_cfg or step2_docID:
          task2_dict={'SplittingAlgorithm': 'LumiBased',
                      'SplittingArguments': {'lumis_per_job': 8},
                      'TaskName':'Task2'
                      }
          task2_dict['GlobalTag'] = cfg.get_param('step2_globaltag',globaltag,section)
          task2_dict['ConfigCacheID'] = step2_docID
          task2_dict['InputFromOutputModule'] = step2_output
          task2_dict['InputTask'] = cfg.get_param('step2_input','Task1',section)
          #task2_dict['KeepOutput'] = keep_step2
          params['Task2']=task2_dict
          params['TaskChain']=2
          if step3_cfg or step3_docID:
              task3_dict={'SplittingAlgorithm': 'LumiBased',
                          'SplittingArguments': {'lumis_per_job': 8},
                          'TaskName':'Task3'
                          }
              task3_dict['GlobalTag'] = cfg.get_param('step3_globaltag',globaltag,section)
              task3_dict['ConfigCacheID'] = step3_docID
              task3_dict['InputFromOutputModule'] = step3_output
              task3_dict['InputTask'] = cfg.get_param('step3_input','Task2',section)
              #task3_dict['KeepOutput'] = keep_step3
              params['Task3']=task3_dict
              params['TaskChain']=3
              
      #from pprint import pformat
      #print "\n current dictionnary \n",pformat(params),'\n\n'
      
      ###raise Exception('Unknown request type, aborting')
  else:
      print "Request type chose: "+str(request_type)
      raise Exception('Unknown request type, aborting')

  if harvest_docID:
      ##setup automatic harvesting
      params.update({"EnableDQMHarvest" : 1,
                     "DQMUploadUrl" : "https://cmsweb.cern.ch/dqm/offline",
                     "DQMConfigCacheID" : harvest_docID})


  ## pop any empty parameters
  for (param,value) in params.items():
      if value in ["",[]]:
          params.pop(param)

  return params,service_params