def fixDBSmissingFileAssoc(): os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) problemFilesSql = """ select dbsbuffer_file.id as fileid, dbsbuffer_location.id as seid from wmbs_file_location fl inner join wmbs_file_details fd on fd.id = fl.fileid inner join wmbs_location_pnns wls on wls.location = fl.location inner join wmbs_location wl on wl.id = fl.location inner join dbsbuffer_location on dbsbuffer_location.pnn = wls.pnn inner join dbsbuffer_file on dbsbuffer_file.lfn = fd.lfn where fd.lfn in (select df.lfn from dbsbuffer_file df left outer join dbsbuffer_file_location dfl on df.id = dfl.filename where dfl.location is null) """ unfinishedTasks = formatter.formatDict( formatter.dbi.processData(problemFilesSql)) print("%s lenth" % len(unfinishedTasks)) result = {} for row in unfinishedTasks: result.setdefault(row["fileid"], row) print(row) print("trimed %s lenth" % len(result)) insertSQL = """INSERT INTO dbsbuffer_file_location (filename, location) VALUES (:fileid, :seid)""" done = formatter.dbi.processData(insertSQL, result.values()) print("inserted %s" % done)
def __init__(self, logger, connectUrl, owner): object.__init__(self) dbFactory = DBFactory(logger, connectUrl, options={}) self.dbi = dbFactory.connect() self.dbFormatter = DBFormatter(logger, self.dbi) self.owner = owner self.sqlDict = { 'trig': """ SELECT TABLE_NAME, TRIGGER_BODY FROM USER_TRIGGERS WHERE TABLE_OWNER='%s' """ % (owner), 'primaryKey': """ SELECT cols.table_name table_name, cols.column_name primaryk FROM all_constraints cons, all_cons_columns cols WHERE cols.table_name = :table_name AND cons.OWNER='%s' AND cons.constraint_type = 'P' AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner """ % (owner), 'sequen': """ SELECT INCREMENT_BY inc, CACHE_SIZE csz from USER_SEQUENCES where SEQUENCE_NAME=:seq_name """ }
def main(): if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ[ 'manage'] = '/data/srv/wmagent/current/config/wmagent/manage' # first, break free from old condor jobs condorCleanup() connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) time5d = int(time.time()) - 5 * 24 * 3600 binds = [{'timestamp': time5d}] activeRunJobs = formatter.formatDict( myThread.dbi.processData(getRunJobsActive, binds)) print "Found %d active jobs in BossAir older than 5 days" % len( activeRunJobs) # now mark these jobs as complete and in Timeout status binds = activeRunJobs[:10000] myThread.dbi.processData(updateState, binds) print "Done!" sys.exit(0)
def check_sites(oldsites, badsites, conn): """ Make sure that all sites from the old schema, that aren't in bad sites are in the new schema. """ logger = logging.getLogger('SiteDB Schema Upgrade: check_sites') sql = "select * from siteinfo where cms_name = :cms_name" formatter = DBFormatter(logger, conn) print "checking %s sites" % len(oldsites) for site in oldsites: test = None if not site in badsites: try: data = conn.processData(sql, {'cms_name': site['cms_name']}) test = formatter.formatDict(data) if len(test) > 0: testsite = test[0] if testsite == site: logger.debug("%s migrated correctly!" % site['cms_name']) else: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (testsite, site)) else: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (test, site)) except Exception, e: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (test, site)) logger.warning(e)
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output , [['value1a', 'value2a'], \ ['value1b', 'value2b'], ['value1c', 'value2d']]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print('test1 ' + str(output)) self.assertEqual(output, ['value1a', 'value2a']) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \ {'bind2': 'value2b', 'bind1': 'value1b'},\ {'bind2': 'value2d', 'bind1': 'value1c'}] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual(output, {'bind2': 'value2a', 'bind1': 'value1a'})
def main(): if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage' timenow = int(time.time()) time6d = timenow - 6 * 24 * 3600 connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) # Get list of workflows and number of jobs executing for more than 6 days binds = [{'timestamp': time6d}] wmbsJobsPerWf = formatter.formatDict(myThread.dbi.processData(getJobsExecuting, binds)) totalJobs = sum([int(item['count']) for item in wmbsJobsPerWf]) print "Found %d workflows with a total of %d jobs" % (len(wmbsJobsPerWf), totalJobs) #pprint(wmbsJobsPerWf) # Retrieve all jobs from condor schedd # it returns an iterator, so let's make it a list such that we can iterate over # it several times... why did I notice it only know?!?! schedd = condor.Schedd() jobs = list(schedd.xquery('true', ['ClusterID', 'ProcId', 'WMAgent_RequestName', 'JobStatus', 'WMAgent_JobID'])) # Retrieve their status from reqmgr2 and # add their wmbsId to the dict for item in wmbsJobsPerWf: item['status'] = getStatus(item['name']) item['condorjobs'] = [] for job in jobs: if job['WMAgent_RequestName'] == item['name']: item['condorjobs'].append(job['WMAgent_JobID']) #pprint(wmbsJobsPerWf) # time to have some ACTION for item in wmbsJobsPerWf: binds = [{'timestamp': time6d, 'wfname': item['name']}] jobIds = formatter.formatDict(myThread.dbi.processData(getWMBSIds, binds)) wmbsIds = [x['id'] for x in jobIds] print "%-100s in %s. Has %d wmbs and %d condor jobs" % (item['name'], item['status'], len(wmbsIds), len(item['condorjobs'])) # continue # Just skip it if there are condor jobs out there if len(item['condorjobs']) > 0 or item['status'] == 'UNKNOWN': continue newstatus = 'jobfailed' if item['status'] in ('acquired', 'running-open', 'running-closed') else 'cleanout' var = raw_input("Marking jobs from %s to %s: (Y/N) " % (item['status'], newstatus)) if var in ['Y', 'y']: print "UPDATED %s" % item['name'] binds = [] for x in jobIds: x['state'] = newstatus binds.append(x) myThread.dbi.processData(updateState, binds) print "Done!" sys.exit(0)
def main(): """ _main_ """ # Start services if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myPhEDEx = PhEDEx() myThread = threading.currentThread() print "Please remember to shutdown the PhEDExInjector first, you have 10 seconds before the script starts." time.sleep(10) # Get the files that the PhEDExInjector would look for formatter = DBFormatter(logging, myThread.dbi) formatter.sql = query results = formatter.execute() sortedBlocks = defaultdict(set) for lfn, block in results: sortedBlocks[block].add(lfn) # Check with block-level calls foundFiles = set() for block in sortedBlocks: result = myPhEDEx._getResult('data', args={'block': block}, verb='GET') for dbs in result['phedex']['dbs']: for dataset in dbs['dataset']: blockChunk = dataset['block'] for blockInfo in blockChunk: for fileInfo in blockInfo['file']: if fileInfo['lfn'] in sortedBlocks[block]: foundFiles.add(fileInfo['lfn']) if not foundFiles: print "I didn't find an abnormal file, feel free to panic!. Please contact a developer." return 0 print "Found %d files that are already registered in PhEDEx but the buffer doesn't know" % len( foundFiles) print "Fixing them now..." # Fix it! binds = [] for lfn in foundFiles: binds.append({'lfn': lfn}) formatter.dbi.processData(modification, binds, conn=None, transaction=False, returnCursor=False) print "Fixed them! :)" print "You can restart the PhEDExInjector now, have a nice day!" return 0
def initialize(self, db_config): self.db_config = db_config self.sourceUrl = None self.migration_req_id = 0 self.block_names = [] self.migration_block_ids = [] self.inserted = True dbowner = self.db_config.get('dbowner') connectUrl = self.db_config.get('connectUrl') dbFactory = DBFactory(MgrLogger, connectUrl, options={}) self.dbi = dbFactory.connect() self.dbFormatter = DBFormatter(MgrLogger, self.dbi) self.dbsMigrate = DBSMigrate(MgrLogger, self.dbi, dbowner) self.DBSBlockInsert = DBSBlockInsert(MgrLogger, self.dbi, dbowner)
def get_siteinfo(conn): formatter = DBFormatter(logging.getLogger('SiteDB Schema Upgrade'), conn) sql = """select site.id, cms_name.name cms_name, site.name site_name, site.country, site.usage, site.url, site.logourl from site join SITE_CMS_NAME_MAP on SITE_CMS_NAME_MAP.SITE_ID=site.id join CMS_NAME on CMS_NAME.ID = SITE_CMS_NAME_MAP.CMS_NAME_ID """ data = conn.processData(sql) return formatter.formatDict(data)
def selectOne(self, query): """ execute a query.with only one result expected """ # db connect self.session.connect() # execute query results = self.session.processData(query) if (results.rowcount > 0): formatter = DBFormatter(self.logger, self.session) out = formatter.formatOne(results) else: out = None return out
def select(self, query): """ execute a query. """ # db connect self.session.connect() # -> WMCore.Database.ResultSet import ResultSet results = self.session.processData(query) if (results.rowcount > 0): formatter = DBFormatter(self.logger, self.session) out = formatter.format(results) else: out = None return out
def testBFormatting(self): """ Test various formats """ # fill the database with some initial data self.stuffDB() myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.format(result) self.assertEqual(output, [['value1a', 1, 'value2a'], ['value1b', 2, 'value2b'], ['value1c', 3, 'value2d']]) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatOne(result) print('test1 ' + str(output)) self.assertEqual(output, ['value1a', 1, 'value2a']) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatDict(result) self.assertEqual(output, [{ 'column3': 'value2a', 'column2': 1, 'column1': 'value1a' }, { 'column3': 'value2b', 'column2': 2, 'column1': 'value1b' }, { 'column3': 'value2d', 'column2': 3, 'column1': 'value1c' }]) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatOneDict(result) self.assertEqual(output, { 'column3': 'value2a', 'column2': 1, 'column1': 'value1a' })
def check_badsites(badsites, conn): """ Go through the list of bad sites, check if they are _exactly_ in the DB already. If they are remove them from the badsites list, if not leave them in purgatory. """ for constraint in ['cms_name', 'id']: formatter = DBFormatter(logger, conn) sql = "select * from siteinfo where %s = :%s" % (constraint, constraint) for site in badsites: try: data = conn.processData(sql, {constraint: site[constraint]}) test = formatter.formatDict(data)[0] if test == site: logger.debug("%s actually did make it..." % site['cms_name']) badsites.remove(site) except: logger.warning("%s really didn't make it..." % site['cms_name']) return badsites
def migrate_badsites(badsites, conn): awfulsites = [] goodsites = [] for site in badsites: sql = """insert into siteinfo (id, cms_name, site_name, country, usage, url, logourl) values (site_sq.nextval, :cms_name, :site_name, :country, :usage, :url, :logourl)""" try: logger.debug("trying %s" % site['cms_name']) oldid = site['id'] del site['id'] conn.processData(sql, site) formatter = DBFormatter(logger, conn) data = conn.processData( "select id from siteinfo where cms_name=:cms_name", {'cms_name': site['cms_name']}) data = formatter.formatOneDict(data) print data logger.warning("%s migrated with a new id: %s" % (site['cms_name'], data['id'])) goodsites.append(site) except Exception, e: awfulsites.append(site) logger.warning(e)
def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ[ 'manage'] = '/data/srv/wmagent/current/config/wmagent/manage' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) # Get all the files available for each subscription print "Getting files available without location..." availFiles = formatter.formatDict( myThread.dbi.processData(getFilesAvailable)) print "Total files available: %s" % len(availFiles) uniqAvailFiles = list(set([x['fileid'] for x in availFiles])) availFiles = [{'fileid': x} for x in uniqAvailFiles] print "Total unique files available: %s" % len(uniqAvailFiles) cernID = formatter.formatDict(myThread.dbi.processData(getCERNLocation))[0] print "CERN location id: %s" % cernID if not cernID: print "You need to add T0_CH_CERN to the resource control db" sys.exit(1) for fid in availFiles: fid.update(cernID) myThread.dbi.processData(updateFileLocation, availFiles) print "Done!" sys.exit(0)
def getWMBSInfo(config): connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) workflows = formatter.formatDict(myThread.dbi.processData(knownWorkflows)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: found %d distinct workflows in this agent." % len(workflows)) workflowsDict = fetchWorkflowsSpec(config, workflows) printWfStatus(workflows, workflowsDict) for st in ('Available', 'Negotiating', 'Acquired', 'Running'): print( "\n*** WORKQUEUE: elements still marked as %s in LQ workqueue / workqueue_inbox." % st) checkLocalWQStatus(config, st) for st in ("Acquired", "Running"): print("\n*** WORKQUEUE: elements still marked as %s in GQ workqueue." % st) checkGlobalWQStatus(config, st) workflows = formatter.formatDict(myThread.dbi.processData(incompleteWfs)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: there are %d distinct workflows not completed." % len(workflows)) printWfStatus(workflows, workflowsDict) wfsNotInjected = flattenList( formatter.format(myThread.dbi.processData(workflowsNotInjected))) print("\n*** WORKFLOWS: found %d workflows not fully injected." % len(wfsNotInjected)) printWfStatus(wfsNotInjected, workflowsDict) jobsByState = formatter.formatDict( myThread.dbi.processData(jobCountByState)) print("\n*** WMBS: amount of wmbs jobs in each status:\n%s" % jobsByState) # IF we have executing jobs in wmbs and nothing in condor, then investigate the wfs if 'executing' in [item['name'] for item in jobsByState]: wfsJobCount = formatter.formatDict( myThread.dbi.processData(workflowsExecuting)) print("\n*** WMBS: %d workflows with executing jobs in wmbs:" % len(wfsJobCount)) workflows = [wf['name'] for wf in wfsJobCount] printWfStatus(workflows, workflowsDict) unfinishedSubs = formatter.formatDict( myThread.dbi.processData(unfinishedSubscriptions)) unfinishedSubs = [wf['wfname'] for wf in unfinishedSubs] print("\n*** SUBSCRIPTIONS: subscriptions not finished: %d" % len(unfinishedSubs)) printWfStatus(unfinishedSubs, workflowsDict) filesAvailable = formatter.formatDict( myThread.dbi.processData(filesAvailWMBS)) print( "\n*** SUBSCRIPTIONS: found %d files available in WMBS (waiting for job creation):\n%s" % (len(filesAvailable), filesAvailable)) filesAcquired = formatter.formatDict( myThread.dbi.processData(filesAcqWMBS)) print( "\n*** SUBSCRIPTIONS: found %d files acquired in WMBS (waiting for jobs to finish):\n%s" % (len(filesAcquired), filesAcquired)) blocksopenDBS = formatter.formatDict( myThread.dbi.processData(blocksOpenDBS)) print("\n*** DBS: found %d blocks open in DBS." % len(blocksopenDBS), end="") print(" Printing the first 20 blocks only:\n%s" % blocksopenDBS[:20]) filesnotinDBS = flattenList( formatter.format(myThread.dbi.processData(filesNotInDBS))) print("\n*** DBS: found %d files not uploaded to DBS.\n" % len(filesnotinDBS)) getDsetAndWf(filesnotinDBS, workflowsDict) filesnotinPhedex = flattenList( formatter.format(myThread.dbi.processData(filesNotInPhedex))) print( "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (recoverable)." % len(filesnotinPhedex)) getDsetAndWf(filesnotinPhedex, workflowsDict) filesnotinPhedexNull = flattenList( formatter.format(myThread.dbi.processData(filesNotInPhedexNull))) print( "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (unrecoverable)." % len(filesnotinPhedexNull)) getDsetAndWf(filesnotinPhedexNull, workflowsDict)
def __init__(self, logger, connectUrl, ownerDBS3, ownerDBS2): object.__init__(self) dbFactory = DBFactory(logger, connectUrl, options={}) self.dbi = dbFactory.connect() self.dbFormatter = DBFormatter(logger, self.dbi) self.sqlPrimaryKey = { 'AcquisitionEras': 'acquisition_era_name', 'ApplicationExecutables': 'app_exec_id', 'Block': 'block_id', 'BlockParents': 'this_block_id', 'Dataset': 'dataset_id', 'DatasetAccessTypes': 'dataset_access_type_id', 'DatasetOutputModConfigs': 'ds_output_mod_conf_id', 'DatasetParents': 'this_dataset_id', 'DatasetRuns': 'dataset_run_id', 'DataTier': 'data_tier_id', 'Files': 'file_id', 'FileDataTypes': 'file_type_id', 'FileLumis': 'file_lumi_id', 'FileOutputModConfigs': 'file_output_config_id', 'FileParents': 'this_file_id', 'OriginSiteName': 'block_id', 'OutputModule': 'output_mod_config_id', 'ParametersetHashes': 'parameter_set_hash_id', 'PhysicsGroups': 'physics_group_id', 'PrimaryDS': 'primary_ds_id', 'PrimaryDSTypes': 'primary_ds_type_id', 'ProcessedDatasets': 'processed_ds_name', 'ReleaseVersions': 'release_version_id' } self.sqlDict = { 'AcquisitionEras': """SELECT ACQUISITION_ERA_NAME, START_DATE, END_DATE CREATION_DATE, CREATE_BY, DESCRIPTION FROM( SELECT DISTINCT AE.ACQUISITION_ERA_NAME, AE.START_DATE, AE.END_DATE, AE.CREATION_DATE, AE.CREATE_BY, AE.DESCRIPTION FROM {ownerDBS3}.ACQUISITION_ERAS AE UNION ALL SELECT DISTINCT PCD.AQUISITIONERA ACQUISITION_ERA_NAME, 0 START_DATE, NULL END_DATE, NULL CREATION_DATE, NULL CREATE_BY, NULL DESCRIPTION FROM {ownerDBS2}.PROCESSEDDATASET PCD WHERE AQUISITIONERA IS NOT NULL ) GROUP BY ACQUISITION_ERA_NAME, START_DATE, END_DATE, CREATION_DATE, CREATE_BY, DESCRIPTION HAVING COUNT(*) <> 2 ORDER BY ACQUISITION_ERA_NAME """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'ApplicationExecutables': """SELECT APP_EXEC_ID, APP_NAME FROM( SELECT AE.APP_EXEC_ID, AE.APP_NAME FROM {ownerDBS3}.APPLICATION_EXECUTABLES AE UNION ALL SELECT AE2.ID APP_EXEC_ID, AE2.EXECUTABLENAME APP_NAME FROM {ownerDBS2}.APPEXECUTABLE AE2 ) GROUP BY APP_EXEC_ID, APP_NAME HAVING COUNT(*) <> 2 ORDER BY APP_EXEC_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'Block': """SELECT BLOCK_ID, BLOCK_NAME, DATASET_ID, PATH, OPEN_FOR_WRITING, ORIGIN_SITE_NAME, BLOCK_SIZE, FILE_COUNT, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY FROM( SELECT BL.BLOCK_ID, BL.BLOCK_NAME, BL.DATASET_ID, DS.DATASET PATH, BL.OPEN_FOR_WRITING, BL.ORIGIN_SITE_NAME, BL.BLOCK_SIZE, BL.FILE_COUNT, BL.CREATION_DATE, BL.CREATE_BY, BL.LAST_MODIFICATION_DATE, BL.LAST_MODIFIED_BY FROM {ownerDBS3}.BLOCKS BL JOIN {ownerDBS3}.DATASETS DS ON BL.DATASET_ID=DS.DATASET_ID UNION ALL SELECT BL2.ID BLOCK_ID, BL2.NAME BLOCK_NAME, BL2.DATASET DATASET_ID, BL2.PATH, BL2.OPENFORWRITING OPEN_FOR_WRITING, 'UNKNOWN' ORIGIN_SITE_NAME, BL2.BLOCKSIZE BLOCK_SIZE, BL2.NUMBEROFFILES FILE_COUNT, BL2.CREATIONDATE CREATION_DATE, PS1.DISTINGUISHEDNAME CREATE_BY, BL2.LASTMODIFICATIONDATE LAST_MODIFICATION_DATE, PS2.DISTINGUISHEDNAME LAST_MODIFIED_BY FROM {ownerDBS2}.BLOCK BL2 JOIN {ownerDBS2}.PERSON PS1 ON BL2.CREATEDBY=PS1.ID JOIN {ownerDBS2}.PERSON PS2 ON BL2.LASTMODIFIEDBY=PS2.ID JOIN {ownerDBS2}.PROCESSEDDATASET DS ON DS.ID=BL2.DATASET JOIN {ownerDBS2}.PRIMARYDATASET PD on DS.PRIMARYDATASET=PD.ID JOIN {ownerDBS2}.DATATIER DT ON DS.DATATIER=DT.ID ) GROUP BY BLOCK_ID, BLOCK_NAME, DATASET_ID, PATH, OPEN_FOR_WRITING, ORIGIN_SITE_NAME, BLOCK_SIZE, FILE_COUNT, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY HAVING COUNT(*) <> 2 ORDER BY BLOCK_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'BlockParents': """SELECT THIS_BLOCK_ID, PARENT_BLOCK_ID FROM ( SELECT BP.THIS_BLOCK_ID, BP.PARENT_BLOCK_ID FROM {ownerDBS3}.BLOCK_PARENTS BP UNION ALL SELECT THISBLOCK this_block_id, ITSPARENT parent_block_id FROM {ownerDBS2}.BLOCKPARENT) GROUP BY THIS_BLOCK_ID,PARENT_BLOCK_ID HAVING COUNT(*) <> 2 ORDER BY THIS_BLOCK_ID, PARENT_BLOCK_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'DataTier': """SELECT DATA_TIER_ID, DATA_TIER_NAME, CREATION_DATE, CREATE_BY FROM( SELECT DT.DATA_TIER_ID, DT.DATA_TIER_NAME, DT.CREATION_DATE, DT.CREATE_BY FROM {ownerDBS3}.DATA_TIERS DT UNION ALL SELECT DT.ID DATA_TIER_ID, DT.NAME DATA_TIER_NAME, DT.CREATIONDATE CREATION_DATE, PS.DISTINGUISHEDNAME CREATE_BY FROM {ownerDBS2}.DATATIER DT JOIN {ownerDBS2}.PERSON PS ON PS.ID=DT.CREATEDBY ) GROUP BY DATA_TIER_ID, DATA_TIER_NAME, CREATION_DATE, CREATE_BY HAVING COUNT(*) <> 2 ORDER BY data_tier_id """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'Dataset': """SELECT DATASET_ID, DATASET, XTCROSSSECTION, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY, PRIMARY_DS_NAME, PRIMARY_DS_TYPE, PROCESSED_DS_NAME, DATA_TIER_NAME, DATASET_ACCESS_TYPE, ACQUISITION_ERA_NAME, PROCESSING_ERA_ID, PHYSICS_GROUP_NAME, PREP_ID FROM( SELECT D.DATASET_ID, D.DATASET, D.XTCROSSSECTION, D.CREATION_DATE, D.CREATE_BY, D.LAST_MODIFICATION_DATE, D.LAST_MODIFIED_BY, P.PRIMARY_DS_NAME, PDT.PRIMARY_DS_TYPE, PD.PROCESSED_DS_NAME, DT.DATA_TIER_NAME, DP.DATASET_ACCESS_TYPE, AE.ACQUISITION_ERA_NAME, D.PROCESSING_ERA_ID, PH.PHYSICS_GROUP_NAME, D.PREP_ID FROM {ownerDBS3}.DATASETS D JOIN {ownerDBS3}.PRIMARY_DATASETS P ON P.PRIMARY_DS_ID = D.PRIMARY_DS_ID JOIN {ownerDBS3}.PRIMARY_DS_TYPES PDT ON PDT.PRIMARY_DS_TYPE_ID = P.PRIMARY_DS_TYPE_ID JOIN {ownerDBS3}.PROCESSED_DATASETS PD ON PD.PROCESSED_DS_ID = D.PROCESSED_DS_ID JOIN {ownerDBS3}.DATA_TIERS DT ON DT.DATA_TIER_ID = D.DATA_TIER_ID JOIN {ownerDBS3}.DATASET_ACCESS_TYPES DP on DP.DATASET_ACCESS_TYPE_ID= D.DATASET_ACCESS_TYPE_ID LEFT OUTER JOIN {ownerDBS3}.ACQUISITION_ERAS AE ON AE.ACQUISITION_ERA_ID = D.ACQUISITION_ERA_ID LEFT OUTER JOIN {ownerDBS3}.PHYSICS_GROUPS PH ON PH.PHYSICS_GROUP_ID = D.PHYSICS_GROUP_ID UNION ALL SELECT DS.ID DATASET_ID, '/' || PD2.NAME || '/' || DS.NAME || '/' || DT2.NAME DATASET, DS.XTCROSSSECTION, DS.CREATIONDATE CREATION_DATE, PS1.DISTINGUISHEDNAME CREATE_BY, DS.LASTMODIFICATIONDATE LAST_MODIFICATION_DATE, PS2.DISTINGUISHEDNAME LAST_MODIFIED_BY, PD2.NAME PRIMARY_DS_NAME, PT.TYPE PRIMARY_DS_TYPE, DS.NAME PROCESSED_DS_NAME, DT2.NAME DATA_TIER_NAME, ST.STATUS DATASET_ACCESS_TYPE, DS.AQUISITIONERA ACQUISITION_ERA_NAME, NULL PROCESSING_ERA_ID, PG.PHYSICSGROUPNAME physics_group_name, NULL PREP_ID FROM {ownerDBS2}.PROCESSEDDATASET DS JOIN {ownerDBS2}.DATATIER DT2 ON DS.DATATIER=DT2.ID JOIN {ownerDBS2}.PRIMARYDATASET PD2 ON PD2.ID=DS.PRIMARYDATASET JOIN {ownerDBS2}.PHYSICSGROUP PG ON PG.ID=DS.PHYSICSGROUP JOIN {ownerDBS2}.PROCDSSTATUS ST ON ST.ID=DS.STATUS JOIN {ownerDBS2}.PERSON PS1 ON DS.CREATEDBY=PS1.ID JOIN {ownerDBS2}.PERSON PS2 ON DS.LASTMODIFIEDBY=PS2.ID JOIN {ownerDBS2}.PRIMARYDSTYPE PT ON PT.ID=PD2.TYPE ) GROUP BY DATASET_ID, DATASET, XTCROSSSECTION, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY, PRIMARY_DS_NAME, PRIMARY_DS_TYPE, PROCESSED_DS_NAME, DATA_TIER_NAME, DATASET_ACCESS_TYPE, ACQUISITION_ERA_NAME, PROCESSING_ERA_ID, PHYSICS_GROUP_NAME, PREP_ID HAVING COUNT(*) <> 2 ORDER BY DATASET_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## ## Some datatypes are not existing anymore in DBS3 'DatasetAccessTypes': """SELECT DATASET_ACCESS_TYPE_ID, DATASET_ACCESS_TYPE FROM( SELECT DAT.DATASET_ACCESS_TYPE_ID, DAT.DATASET_ACCESS_TYPE FROM {ownerDBS3}.DATASET_ACCESS_TYPES DAT UNION ALL SELECT PDS.ID DATASET_ACCESS_TYPE_ID, PDS.STATUS DATASET_ACCESS_TYPE FROM {ownerDBS2}.PROCDSSTATUS PDS WHERE PDS.ID!=3 AND PDS.ID!=4 AND PDS.ID!=21 AND PDS.ID!=61 ) GROUP BY DATASET_ACCESS_TYPE_ID, DATASET_ACCESS_TYPE HAVING COUNT(*) <> 2 ORDER BY DATASET_ACCESS_TYPE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'DatasetOutputModConfigs': """SELECT DS_OUTPUT_MOD_CONF_ID, DATASET_ID, OUTPUT_MOD_CONFIG_ID FROM( SELECT DOMC.DS_OUTPUT_MOD_CONF_ID, DOMC.DATASET_ID, DOMC.OUTPUT_MOD_CONFIG_ID FROM {ownerDBS3}.DATASET_OUTPUT_MOD_CONFIGS DOMC UNION ALL SELECT PA.ID ds_output_mod_conf_id, PA.DATASET dataset_id, PA.ALGORITHM output_mod_config_id FROM {ownerDBS2}.PROCALGO PA ) GROUP BY DS_OUTPUT_MOD_CONF_ID, DATASET_ID, OUTPUT_MOD_CONFIG_ID HAVING COUNT(*) <> 2 ORDER BY DS_OUTPUT_MOD_CONF_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'DatasetParents': """SELECT THIS_DATASET_ID, PARENT_DATASET_ID FROM( SELECT DP.THIS_DATASET_ID, DP.PARENT_DATASET_ID FROM {ownerDBS3}.DATASET_PARENTS DP UNION ALL SELECT DP2.THISDATASET this_dataset_id, DP2.ITSPARENT parent_dataset_id FROM {ownerDBS2}.PROCDSPARENT DP2 ) GROUP BY THIS_DATASET_ID, PARENT_DATASET_ID HAVING COUNT(*) <> 2 ORDER BY this_dataset_id,parent_dataset_id """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'File': """ SELECT FILE_ID, LOGICAL_FILE_NAME, IS_FILE_VALID, DATASET_ID, DATASET, BLOCK_ID, BLOCK_NAME, FILE_TYPE_ID, FILE_TYPE, CHECK_SUM, EVENT_COUNT, FILE_SIZE, BRANCH_HASH_ID, ADLER32, MD5, AUTO_CROSS_SECTION, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY FROM ( SELECT F.FILE_ID, F.LOGICAL_FILE_NAME, F.IS_FILE_VALID, F.DATASET_ID, D.DATASET, F.BLOCK_ID, B.BLOCK_NAME, F.FILE_TYPE_ID, FT.FILE_TYPE, F.CHECK_SUM, F.EVENT_COUNT, F.FILE_SIZE, F.BRANCH_HASH_ID, F.ADLER32, F.MD5, F.AUTO_CROSS_SECTION, F.CREATION_DATE, F.CREATE_BY, F.LAST_MODIFICATION_DATE, F.LAST_MODIFIED_BY FROM {ownerDBS3}.FILES F JOIN {ownerDBS3}.FILE_DATA_TYPES FT ON FT.FILE_TYPE_ID = F.FILE_TYPE_ID JOIN {ownerDBS3}.DATASETS D ON D.DATASET_ID = F.DATASET_ID JOIN {ownerDBS3}.BLOCKS B ON B.BLOCK_ID = F.BLOCK_ID UNION ALL SELECT FS2.ID file_id, FS2.LOGICALFILENAME logical_file_name, CASE WHEN FST.STATUS='VALID' THEN 1 ELSE 0 END AS IS_FILE_VALID, FS2.DATASET dataset_id, '/' || PD2.NAME || '/' || DS2.NAME || '/' || DT2.NAME dataset, FS2.BLOCK block_id, BL2.NAME block_name, FS2.FILETYPE file_type_id, FT2.TYPE file_type, FS2.CHECKSUM check_sum, FS2.NUMBEROFEVENTS event_count, FS2.FILESIZE file_size, FS2.FILEBRANCH branch_hash_id, FS2.ADLER32, FS2.MD5, FS2.AUTOCROSSSECTION auto_cross_section, FS2.CREATIONDATE creation_date, PS12.DISTINGUISHEDNAME create_by, FS2.LASTMODIFICATIONDATE last_modification_date, PS22.DISTINGUISHEDNAME last_modified_by FROM {ownerDBS2}.FILES FS2 JOIN {ownerDBS2}.PROCESSEDDATASET DS2 ON DS2.ID=FS2.DATASET JOIN {ownerDBS2}.PRIMARYDATASET PD2 on DS2.PRIMARYDATASET=PD2.ID JOIN {ownerDBS2}.DATATIER DT2 ON DS2.DATATIER=DT2.ID JOIN {ownerDBS2}.PERSON PS12 ON FS2.CREATEDBY=PS12.ID JOIN {ownerDBS2}.PERSON PS22 ON FS2.LASTMODIFIEDBY=PS22.ID JOIN {ownerDBS2}.BLOCK BL2 ON FS2.BLOCK=BL2.ID JOIN {ownerDBS2}.FILETYPE FT2 ON FT2.ID=FS2.FILETYPE JOIN {ownerDBS2}.FILESTATUS FST ON FST.ID=FS2.FILESTATUS ) GROUP BY FILE_ID, LOGICAL_FILE_NAME, IS_FILE_VALID, DATASET_ID, DATASET, BLOCK_ID, BLOCK_NAME, FILE_TYPE_ID, FILE_TYPE, CHECK_SUM, EVENT_COUNT, FILE_SIZE, BRANCH_HASH_ID, ADLER32, MD5, AUTO_CROSS_SECTION, CREATION_DATE, CREATE_BY, LAST_MODIFICATION_DATE, LAST_MODIFIED_BY HAVING COUNT(*) <> 2 ORDER BY FILE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'FileDataTypes': """SELECT FILE_TYPE_ID, FILE_TYPE FROM( SELECT FDT.FILE_TYPE_ID, FDT.FILE_TYPE FROM {ownerDBS3}.FILE_DATA_TYPES FDT UNION ALL SELECT FDT2.ID FILE_TYPE_ID, FDT2.TYPE FILE_TYPE FROM {ownerDBS2}.FILETYPE FDT2 ) GROUP BY FILE_TYPE_ID, FILE_TYPE HAVING COUNT(*) <> 2 ORDER BY FILE_TYPE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'FileLumis': """SELECT RUN_NUM,LUMI_SECTION_NUM,FILE_ID FROM (SELECT FL.RUN_NUM,FL.LUMI_SECTION_NUM,FL.FILE_ID FROM {ownerDBS3}.FILE_LUMIS FL UNION ALL SELECT RU.RUNNUMBER RUN_NUM, LU.LUMISECTIONNUMBER LUMI_SECTION_NUM, FRL.FILEID FILE_ID FROM {ownerDBS2}.FILERUNLUMI FRL JOIN {ownerDBS2}.RUNS RU ON FRL.RUN=RU.ID JOIN {ownerDBS2}.LUMISECTION LU ON FRL.LUMI=LU.ID ) GROUP BY RUN_NUM,LUMI_SECTION_NUM,FILE_ID HAVING COUNT(*) <> 2 ORDER BY FILE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'FileLumisMinMax': """SELECT MIN(FRL.FILEID) AS MIN_ID, MAX(FRL.FILEID) AS MAX_ID FROM {ownerDBS2}.FILERUNLUMI FRL """.format(ownerDBS2=ownerDBS2), ############################################## 'FileLumisSplited': """SELECT RUN_NUM,LUMI_SECTION_NUM,FILE_ID FROM (SELECT FL.RUN_NUM,FL.LUMI_SECTION_NUM,FL.FILE_ID FROM {ownerDBS3}.FILE_LUMIS FL UNION ALL SELECT RU.RUNNUMBER RUN_NUM, LU.LUMISECTIONNUMBER LUMI_SECTION_NUM, FRL.FILEID file_id FROM {ownerDBS2}.FILERUNLUMI FRL JOIN {ownerDBS2}.RUNS RU ON FRL.RUN=RU.ID JOIN {ownerDBS2}.LUMISECTION LU ON FRL.LUMI=LU.ID ) WHERE FILE_ID >= :min_id AND FILE_ID <= :max_id GROUP BY RUN_NUM,LUMI_SECTION_NUM,FILE_ID HAVING COUNT(*) <> 2 ORDER BY FILE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'FileOutputModConfigs': """SELECT FILE_OUTPUT_CONFIG_ID,FILE_ID,OUTPUT_MOD_CONFIG_ID FROM (SELECT FOMC.FILE_OUTPUT_CONFIG_ID,FOMC.FILE_ID,FOMC.OUTPUT_MOD_CONFIG_ID FROM {ownerDBS3}.FILE_OUTPUT_MOD_CONFIGS FOMC UNION ALL SELECT FA.ID file_output_config_id, FA.FILEID file_id, FA.ALGORITHM output_mod_config_id FROM {ownerDBS2}.FILEALGO FA ) GROUP BY FILE_OUTPUT_CONFIG_ID,FILE_ID,OUTPUT_MOD_CONFIG_ID HAVING COUNT(*) <> 2 ORDER BY FILE_OUTPUT_CONFIG_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'FileParents': """SELECT THIS_FILE_ID,PARENT_FILE_ID FROM (SELECT FP.THIS_FILE_ID,FP.PARENT_FILE_ID FROM {ownerDBS3}.FILE_PARENTS FP UNION ALL SELECT FP2.THISFILE this_file_id, FP2.ITSPARENT parent_file_id FROM {ownerDBS2}.FILEPARENTAGE FP2) GROUP BY THIS_FILE_ID,PARENT_FILE_ID HAVING COUNT(*) <> 2 ORDER BY THIS_FILE_ID,PARENT_FILE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'OriginSiteName': """SELECT BLOCK_ID, ORIGIN_SITE_NAME FROM (SELECT DISTINCT B3.BLOCK_ID, B3.ORIGIN_SITE_NAME FROM {ownerDBS3}.BLOCKS B3 WHERE B3.ORIGIN_SITE_NAME!='UNKNOWN' UNION ALL SELECT DISTINCT B1.ID AS BLOCK_ID, SE1.SENAME AS ORIGIN_SITE_NAME FROM {ownerDBS2}.BLOCK B1 JOIN {ownerDBS2}.SEBLOCK SEB1 ON B1.ID=SEB1.BLOCKID JOIN {ownerDBS2}.STORAGEELEMENT SE1 ON SEB1.SEID=SE1.ID WHERE B1.ID IN (SELECT B2.ID FROM {ownerDBS2}.BLOCK B2 JOIN {ownerDBS2}.SEBLOCK SEB2 ON B2.ID=SEB2.BLOCKID JOIN {ownerDBS2}.STORAGEELEMENT SE2 ON SEB2.SEID=SE2.ID WHERE B2.ID=B1.ID GROUP BY B2.ID HAVING COUNT(B2.ID)=1) ) GROUP BY BLOCK_ID, ORIGIN_SITE_NAME HAVING COUNT(*)<>2 ORDER BY BLOCK_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'OutputModule': """SELECT OUTPUT_MOD_CONFIG_ID, APP_NAME, RELEASE_VERSION, PARAMETER_SET_HASH_ID, PSET_HASH, pset_name, OUTPUT_MODULE_LABEL, GLOBAL_TAG, SCENARIO, CREATION_DATE, CREATE_BY FROM( SELECT O.OUTPUT_MOD_CONFIG_ID, AE.APP_NAME, RV.RELEASE_VERSION, PSH.PARAMETER_SET_HASH_ID, PSH.PSET_HASH, PSH.PSET_NAME, O.OUTPUT_MODULE_LABEL, O.GLOBAL_TAG, O.SCENARIO, O.CREATION_DATE, O.CREATE_BY FROM {ownerDBS3}.OUTPUT_MODULE_CONFIGS O JOIN {ownerDBS3}.APPLICATION_EXECUTABLES AE ON O.APP_EXEC_ID=AE.APP_EXEC_ID JOIN {ownerDBS3}.RELEASE_VERSIONS RV ON O.RELEASE_VERSION_ID=RV.RELEASE_VERSION_ID JOIN {ownerDBS3}.PARAMETER_SET_HASHES PSH ON O.PARAMETER_SET_HASH_ID=PSH.PARAMETER_SET_HASH_ID UNION ALL SELECT DISTINCT AC.ID OUTPUT_MOD_CONFIG_ID, APPEX.EXECUTABLENAME APP_NAME, APPVER.VERSION RELEASE_VERSION, AC.PARAMETERSETID PARAMETER_SET_HASH_ID, QPS.HASH PSET_HASH, QPS.NAME PSET_NAME, TO_CHAR(AC.APPLICATIONFAMILY) OUTPUT_MODULE_LABEL, CASE WHEN (SELECT COUNT(DISTINCT PDS.GLOBALTAG) FROM {ownerDBS2}.PROCALGO PA INNER JOIN {ownerDBS2}.PROCESSEDDATASET PDS ON PA.DATASET = PDS.ID INNER JOIN {ownerDBS2}.ALGORITHMCONFIG AC2 on AC2.ID = PA.ALGORITHM WHERE PDS.GLOBALTAG IS NOT NULL ) = 1 THEN (SELECT DISTINCT PDS.GLOBALTAG FROM {ownerDBS2}.PROCALGO PA LEFT JOIN {ownerDBS2}.PROCESSEDDATASET PDS ON PA.DATASET = PDS.ID WHERE PDS.GLOBALTAG IS NOT NULL AND AC.ID = PA.ALGORITHM) ELSE 'UNKNOWN' END AS GLOBAL_TAG, NULL SCENARIO, AC.CREATIONDATE CREATION_DATE, PS.DISTINGUISHEDNAME CREATE_BY FROM {ownerDBS2}.ALGORITHMCONFIG AC JOIN {ownerDBS2}.APPEXECUTABLE APPEX ON APPEX.ID=AC.EXECUTABLENAME JOIN {ownerDBS2}.APPVERSION APPVER ON APPVER.ID=AC.APPLICATIONVERSION JOIN {ownerDBS2}.PERSON PS ON PS.ID=AC.CREATEDBY JOIN {ownerDBS2}.QUERYABLEPARAMETERSET QPS ON QPS.ID=AC.PARAMETERSETID ) GROUP BY OUTPUT_MOD_CONFIG_ID, APP_NAME, RELEASE_VERSION, PARAMETER_SET_HASH_ID, PSET_HASH, PSET_NAME, OUTPUT_MODULE_LABEL, GLOBAL_TAG, SCENARIO, CREATION_DATE, CREATE_BY HAVING COUNT(*) <> 2 ORDER BY OUTPUT_MOD_CONFIG_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'ParametersetHashes': """ SELECT PARAMETER_SET_HASH_ID, PSET_HASH, PSET_NAME FROM( SELECT PH.PARAMETER_SET_HASH_ID, PH.PSET_HASH, PH.PSET_NAME FROM {ownerDBS3}.PARAMETER_SET_HASHES PH UNION ALL SELECT QP.ID PARAMETER_SET_HASH_ID, QP.HASH PSET_HASH, QP.NAME PSET_NAME FROM {ownerDBS2}.QUERYABLEPARAMETERSET QP ) GROUP BY PARAMETER_SET_HASH_ID, PSET_HASH, PSET_NAME HAVING COUNT(*) <> 2 ORDER BY PARAMETER_SET_HASH_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'PhysicsGroups': """SELECT PHYSICS_GROUP_ID, PHYSICS_GROUP_NAME FROM( SELECT PG.PHYSICS_GROUP_ID, PG.PHYSICS_GROUP_NAME FROM {ownerDBS3}.PHYSICS_GROUPS PG UNION ALL SELECT PG2.ID PHYSICS_GROUP_ID, PG2.PHYSICSGROUPNAME PHYSICS_GROUP_NAME FROM {ownerDBS2}.PHYSICSGROUP PG2 ) GROUP BY PHYSICS_GROUP_ID, PHYSICS_GROUP_NAME HAVING COUNT(*) <> 2 ORDER BY PHYSICS_GROUP_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'PrimaryDS': """SELECT PRIMARY_DS_ID, PRIMARY_DS_NAME, CREATION_DATE, CREATE_BY, PRIMARY_DS_TYPE FROM( SELECT P.PRIMARY_DS_ID, P.PRIMARY_DS_NAME, P.CREATION_DATE, P.CREATE_BY, PT.PRIMARY_DS_TYPE FROM {ownerDBS3}.PRIMARY_DATASETS P JOIN {ownerDBS3}.PRIMARY_DS_TYPES PT ON PT.PRIMARY_DS_TYPE_ID = P.PRIMARY_DS_TYPE_ID UNION ALL SELECT PD.ID PRIMARY_DS_ID, PD.NAME PRIMARY_DS_NAME, PD.CREATIONDATE CREATION_DATE, PS.DISTINGUISHEDNAME CREATE_BY, PT2.TYPE PRIMARY_DS_TYPE FROM {ownerDBS2}.PRIMARYDATASET PD JOIN {ownerDBS2}.PERSON PS ON PS.ID=PD.CREATEDBY JOIN {ownerDBS2}.PRIMARYDSTYPE PT2 ON PT2.ID=PD.TYPE ) GROUP BY PRIMARY_DS_ID, PRIMARY_DS_NAME, CREATION_DATE, CREATE_BY, PRIMARY_DS_TYPE HAVING COUNT(*) <> 2 ORDER BY PRIMARY_DS_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'PrimaryDSTypes': """SELECT PRIMARY_DS_TYPE_ID, PRIMARY_DS_TYPE FROM( SELECT PDST.PRIMARY_DS_TYPE_ID, PDST.PRIMARY_DS_TYPE FROM {ownerDBS3}.PRIMARY_DS_TYPES PDST UNION ALL SELECT PDST.ID PRIMARY_DS_TYPE_ID, PDST.TYPE PRIMARY_DS_TYPE FROM {ownerDBS2}.PRIMARYDSTYPE PDST ) GROUP BY PRIMARY_DS_TYPE_ID, PRIMARY_DS_TYPE HAVING COUNT(*) <> 2 ORDER BY PRIMARY_DS_TYPE_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'ProcessedDatasets': """SELECT PROCESSED_DS_NAME FROM( SELECT DISTINCT PCD.PROCESSED_DS_NAME FROM {ownerDBS3}.PROCESSED_DATASETS PCD UNION ALL SELECT DISTINCT PCD2.NAME PROCESSED_DS_NAME FROM {ownerDBS2}.PROCESSEDDATASET PCD2 ) GROUP BY PROCESSED_DS_NAME HAVING COUNT(*) <> 2 ORDER BY PROCESSED_DS_NAME """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), ############################################## 'ReleaseVersions': """ SELECT RELEASE_VERSION_ID, RELEASE_VERSION FROM ( SELECT RV.RELEASE_VERSION_ID, RV.RELEASE_VERSION FROM {ownerDBS3}.RELEASE_VERSIONS RV UNION ALL SELECT RV.ID RELEASE_VERSION_ID, RV.VERSION RELEASE_VERSION FROM {ownerDBS2}.APPVERSION RV ) GROUP BY RELEASE_VERSION_ID, RELEASE_VERSION HAVING COUNT(*) <> 2 ORDER BY RELEASE_VERSION_ID """.format(ownerDBS3=ownerDBS3, ownerDBS2=ownerDBS2), }
def checkJobCountsAgent(requestName): os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) unfinishedTasks = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_workflow.task, wmbs_job_state.name, COUNT(wmbs_job.id) AS jobcount FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id INNER JOIN wmbs_jobgroup ON wmbs_jobgroup.subscription = wmbs_subscription.id INNER JOIN wmbs_job ON wmbs_job.jobgroup = wmbs_jobgroup.id INNER JOIN wmbs_job_state ON wmbs_job.state = wmbs_job_state.id WHERE wmbs_workflow.name = '%s' AND wmbs_subscription.finished = 0 AND wmbs_job_state.name != 'cleanout' GROUP BY wmbs_workflow.task, wmbs_job_state.name""" % requestName)) result = {} for row in unfinishedTasks: if row['task'] not in result: result[row['task']] = {} result[row['task']][row['name']] = row['jobcount'] for task in result: msg = "Task %s has " % task for state in result[task]: msg += '%d jobs %s ' % (result[task][state], state) print msg if not result: print "Check #1 failed, there are no unfinished tasks in the system apparently." else: return unfinishedSubs = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_subscription.id, wmbs_workflow.task FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id WHERE wmbs_workflow.name = '%s' AND wmbs_subscription.finished = 0""" % requestName)) totalSubs = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_subscription.id, wmbs_workflow.task FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id WHERE wmbs_workflow.name = '%s'""" % requestName)) print "There are %d subscriptions for this workflow, %d are incomplete." % (len(totalSubs), len(unfinishedSubs)) if len(unfinishedSubs) != 0: for sub in unfinishedSubs: subId = sub['id'] availableFiles = formatter.formatDict(myThread.dbi.processData("""SELECT COUNT(wmbs_sub_files_available.fileid) AS count FROM wmbs_sub_files_available WHERE wmbs_sub_files_available.subscription = %s""" % subId)) acquiredFiles = formatter.formatDict(myThread.dbi.processData("""SELECT COUNT(wmbs_sub_files_acquired.fileid) AS count FROM wmbs_sub_files_acquired WHERE wmbs_sub_files_acquired.subscription = %s""" % subId)) print "There are %s files available and %s files acquired in the subscription %s. If the JobCreator is up, more jobs will appear soon." % (availableFiles[0]['count'], acquiredFiles[0]['count'], subId) else: print "This workflow has all subscriptions as finished, the TaskArchiver should be eating through it now. This can take time though." return
def __init__(self, connectUrl): logger = logging.getLogger() dbFactory = DBFactory(logger, connectUrl, options={}) self.dbi = dbFactory.connect() self.dbFormatter = DBFormatter(logger, self.dbi)
def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ[ 'manage'] = '/data/srv/wmagent/current/config/wmagent/manage' connectToDB() myPhEDEx = PhEDEx() myDBS = DBS3Reader('https://cmsweb.cern.ch/dbs/prod/global/DBSReader/') myThread = threading.currentThread() print "Shutting down PhEDExInjector..." subprocess.call([ os.environ['manage'], "execute-agent", "wmcoreD", "--shutdown", "--component=PhEDExInjector" ], stdout=open(os.devnull, 'wb')) time.sleep(5) ## TASK1: query DB for files not injected in phedex yet # Get the files that the PhEDExInjector would look for formatter = DBFormatter(logging, myThread.dbi) formatter.sql = getQuery results = formatter.execute() fileList = [] fileList = [lfn[0] for lfn in results] ## TASK2: makes lfns a bit shorter to sort and uniq them reducedLfns = [lfn.rsplit('/', 2)[0] for lfn in fileList] reducedLfns = list(set(reducedLfns)) ## TASK3: build uniq dataset names and check whether PhEDEx and DBS contain ## the same number of files. If so, then those lfns are healthy print "Checking %d dataset in both PhEDEx and DBS ..." % len(reducedLfns) crippleLfns, healthyLfns = [], [] i = 0 n = len(reducedLfns) for lfn in reducedLfns: try: lfnAux = lfn.split('/') dset = '/' + lfnAux[4] + '/' + lfnAux[3] + '-' + lfnAux[ 6] + '/' + lfnAux[5] result = myPhEDEx._getResult('blockreplicas', args={'dataset': dset}, verb='GET') phedexFiles = 0 for item in result["phedex"]["block"]: phedexFiles += item['files'] ## TODO: ValidFile is only available for > 0.9.95pre5. Once all agents are ## upgraded, then we can start using this new query. #result = myDBS.listDatasetFileDetails(dset) #dbsFiles = 0 #for item in result.itervalues(): # dbsFiles += 1 if item['ValidFile'] else 0 # This call returns valid+invalid number of filesfiles result = myDBS.listDatasetFiles(dset) dbsFiles = len(result) if phedexFiles == dbsFiles: healthyLfns.append(lfn) else: crippleLfns.append(lfn) except: print "Error with:", lfn i += 1 if i % 100 == 0: print '%d/%d files processed' % (i, n) ## TASK4: map the short cripple and healthy lists to the full original lfns ## TODO: this code looks terrible... IMPROVE IT! if crippleLfns: filesToCheck = [] for lfn in crippleLfns: #filesToCheck = [file for file in fileList if lfn in file] for file in fileList: if lfn in file: filesToCheck.append(file) else: filesToCheck = [] if healthyLfns: filesInPhedex = [] for lfn in healthyLfns: #filesInPhedex = [file for file in fileList if lfn in file] for file in fileList: if lfn in file: filesInPhedex.append(file) else: filesInPhedex = [] ## TASK5: query PhEDEx for each cripple file (filesToCheck) ## and build the final file lists missingFiles = [] i = 0 n = len(filesToCheck) for file in filesToCheck: try: result = myPhEDEx._getResult('data', args={'file': file}, verb='GET') if len(result['phedex']['dbs']): filesInPhedex.append(file) else: missingFiles.append(file) except: print "Error contacting Phedex", file i += 1 if i % 100 == 0: print '%d/%d files processed' % (i, n) if not filesInPhedex: print "There are no files to be updated in the buffer. Contact a developer." print "Starting PhEDExInjector now ..." subprocess.call([ os.environ['manage'], "execute-agent", "wmcoreD", "--start", "--component=PhEDExInjector" ], stdout=open(os.devnull, 'wb')) return 0 print "Found %d out of %d files that are already registered in PhEDEx \ but buffer doesn't know" % (len(filesInPhedex), len(fileList)) print "Fixing them now, it may take several minutes ..." ## TASK6: time to actually fix these files binds = [] for file in filesInPhedex: binds.append({'lfn': file}) formatter.dbi.processData(setQuery, binds, conn=None, transaction=False, returnCursor=False) print "Rows were successfully updated! Good job!" print "Starting PhEDExInjector now ..." subprocess.call([ os.environ['manage'], "execute-agent", "wmcoreD", "--start", "--component=PhEDExInjector" ], stdout=open(os.devnull, 'wb')) print "Done!" return 0
print "Couldn't retrieve the priority of request %s" % workflow print "Error: %s" % ex continue if priority != priorityCache[workflow]: workflowsToUpdate[workflow] = priorityCache[workflow] print "%d wfs need to be updated in WQ. Final list is: %s" % ( len(workflowsToUpdate), workflowsToUpdate) for workflow in workflowsToUpdate: pass # Update priority of a workflow, its spec and all the available elements #workqueue.updatePriority(workflow, workflowsToUpdate[workflow]) print "\nSecond attempt, now check workflows in WMBS" connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) priorityCache = {} workflowsToUpdateWMBS = {} # Querying "Workflow.ListForJobUpdater" output = myThread.transaction.processData(getQuery) workflowsToCheck = formatter.formatDict(output) print "Retrieved %d workflows from WMBS" % len(workflowsToCheck) for workflowEntry in workflowsToCheck: workflow = workflowEntry['name'] if workflow not in priorityCache: try: priorityCache[workflow] = reqmgr2.getRequestByNames( workflow)[workflow]['RequestPriority'] except Exception, ex: