def testConvertToArchiverFormat(self): job = {} job["id"] = "1-0" job['doc'] = { "fwjr": SAMPLE_FWJR, "jobtype": "Processing", "jobstate": "success", "timestamp": int(time.time()) } newData = createArchiverDoc(job) from pprint import pprint pprint(newData) #outputModules = set([a['outputModule'] for a in newData['steps']['cmsRun1']['output']]) #outModules = set(SAMPLE_FWJR['steps']['cmsRun1']['output'].keys()) #self.assertEqual(outputModules - outModules, set()) run = SAMPLE_FWJR['steps']['cmsRun1']['output'][ 'ALCARECOStreamMuAlCalIsolatedMu'][0]['runs'] for step in newData['steps']: if step['name'] == 'cmsRun1': runInfo = step['output'][0]['runs'][0] self.assertEqual((run[str(runInfo['runNumber'])]), runInfo['lumis']) fwjrSamples = [ "ErrorCodeFail.json", "FailedByAgent.json", "HarvestSuccessFwjr.json", "LogCollectFailedFwjr.json", "LogCollectSuccessFwjr.json", "MergeFailedFwjr.json", "MergeSuccessFwjr.json", "NoJobReportFail.json", "ProcessingFailedFwjr.json", "ProcessingPerformanceFailed.json", "ProcessingSuccessFwjr.json", "ProductionFailedFwjr.json", "ProductionSuccessFwjr.json", "SkimSuccessFwjr.json" ] for sample in fwjrSamples: sPath = os.path.join( getTestBase(), "WMCore_t/Services_t/WMArchiver_t/FWJRSamples/%s" % sample) with open(sPath, 'r') as infile: fwjr = json.load(infile) job = {} job["id"] = fwjr["_id"] job['doc'] = { "fwjr": fwjr["fwjr"], "jobtype": fwjr["jobtype"], "jobstate": fwjr['jobstate'], "timestamp": fwjr["timestamp"] } newData = createArchiverDoc(job) print("\n\n==========\n%s" % sPath) pprint(newData)
def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows'] #TODO need to send bulk update update bulk archive jobIDs = [] archiverDocs = [] for job in data: doc = createArchiverDoc(job["id"], job['doc']["fwjr"]) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())
def testConvertToArchiverFormat(self): job = {} job["id"] = "1-0" job['doc'] = {"fwjr": SAMPLE_FWJR, "jobtype": "Processing", "jobstate": "success", "timestamp": int(time.time())} newData = createArchiverDoc(job) from pprint import pprint pprint(newData) #outputModules = set([a['outputModule'] for a in newData['steps']['cmsRun1']['output']]) #outModules = set(SAMPLE_FWJR['steps']['cmsRun1']['output'].keys()) #self.assertEqual(outputModules - outModules, set()) run = SAMPLE_FWJR['steps']['cmsRun1']['output']['ALCARECOStreamMuAlCalIsolatedMu'][0]['runs'] for step in newData['steps']: if step['name'] == 'cmsRun1': runInfo = step['output'][0]['runs'][0] self.assertEqual((run[str(runInfo['runNumber'])]), runInfo['lumis']) fwjrSamples = ["ErrorCodeFail.json", "FailedByAgent.json", "HarvestSuccessFwjr.json", "LogCollectFailedFwjr.json", "LogCollectSuccessFwjr.json", "MergeFailedFwjr.json", "MergeSuccessFwjr.json", "NoJobReportFail.json", "ProcessingFailedFwjr.json", "ProcessingPerformanceFailed.json", "ProcessingSuccessFwjr.json", "ProductionFailedFwjr.json", "ProductionSuccessFwjr.json", "SkimSuccessFwjr.json"] for sample in fwjrSamples: sPath = os.path.join(getTestBase(), "WMCore_t/Services_t/WMArchiver_t/FWJRSamples/%s" % sample) with open(sPath, 'r') as infile: fwjr = json.load(infile) job = {} job["id"] = fwjr["_id"] job['doc'] = {"fwjr": fwjr["fwjr"], "jobtype": fwjr["jobtype"], "jobstate": fwjr['jobstate'], "timestamp": fwjr["timestamp"]} newData =createArchiverDoc(job) print("\n\n==========\n%s" % sPath) pprint(newData)
def testConvertToArchiverFormat(self): jobid = "1-0" newData = createArchiverDoc(jobid, SAMPLE_FWJR) import pprint pprint.pprint(newData) #outputModules = set([a['outputModule'] for a in newData['steps']['cmsRun1']['output']]) #outModules = set(SAMPLE_FWJR['steps']['cmsRun1']['output'].keys()) #self.assertEqual(outputModules - outModules, set()) run = SAMPLE_FWJR['steps']['cmsRun1']['output']['ALCARECOStreamMuAlCalIsolatedMu'][0]['runs'] for step in newData['steps']: if step['name'] == 'cmsRun1': runInfo = step['output'][0]['runs'][0] self.assertEqual((run[str(runInfo['runNumber'])]), runInfo['lumis'])
def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=1000)['rows'] for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiverDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archive IDs returned: %s", response[0]['ids']) if len(set(archiveIDs)) == len(archiveIDs): duplicateIDs = set([x for x in archiveIDs if archiveIDs.count(x) > 1]) logging.info("There are duplicate entry %s", duplicateIDs) else: logging.warning("Upload failed: %s: %s", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())